ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,42 @@
1
+ """
2
+ EBK Plugin System
3
+
4
+ This module provides the plugin architecture for EBK, allowing extensions
5
+ to add functionality without modifying core code.
6
+ """
7
+
8
+ from .base import (
9
+ Plugin,
10
+ MetadataExtractor,
11
+ TagSuggester,
12
+ ContentAnalyzer,
13
+ SimilarityFinder,
14
+ Deduplicator,
15
+ Validator,
16
+ Exporter
17
+ )
18
+
19
+ from .registry import PluginRegistry, plugin_registry
20
+ from .hooks import HookRegistry, hooks, hook
21
+
22
+ # Initialize global registries
23
+ __all__ = [
24
+ # Base classes
25
+ 'Plugin',
26
+ 'MetadataExtractor',
27
+ 'TagSuggester',
28
+ 'ContentAnalyzer',
29
+ 'SimilarityFinder',
30
+ 'Deduplicator',
31
+ 'Validator',
32
+ 'Exporter',
33
+
34
+ # Registry
35
+ 'PluginRegistry',
36
+ 'plugin_registry',
37
+
38
+ # Hooks
39
+ 'HookRegistry',
40
+ 'hooks',
41
+ 'hook'
42
+ ]
ebk/plugins/base.py ADDED
@@ -0,0 +1,502 @@
1
+ """
2
+ Base classes for the EBK plugin system.
3
+
4
+ This module defines abstract base classes that all plugins must inherit from.
5
+ Each plugin type has specific methods that must be implemented.
6
+ """
7
+
8
+ from abc import ABC, abstractmethod
9
+ from typing import Dict, Any, List, Optional, Tuple
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+
13
+
14
+ @dataclass
15
+ class TagSuggestion:
16
+ """Represents a suggested tag with confidence score."""
17
+ tag: str
18
+ confidence: float
19
+ source: str # Which plugin suggested it
20
+ reason: Optional[str] = None # Why this tag was suggested
21
+
22
+
23
+ @dataclass
24
+ class ContentAnalysis:
25
+ """Results from content analysis."""
26
+ reading_time: Optional[int] = None # minutes
27
+ difficulty_level: Optional[str] = None # easy/medium/hard
28
+ word_count: Optional[int] = None
29
+ page_count: Optional[int] = None
30
+ language: Optional[str] = None
31
+ summary: Optional[str] = None
32
+ key_topics: List[str] = None
33
+ sentiment: Optional[float] = None # -1 to 1
34
+ quality_score: Optional[float] = None # 0 to 1
35
+
36
+ def __post_init__(self):
37
+ if self.key_topics is None:
38
+ self.key_topics = []
39
+
40
+
41
+ @dataclass
42
+ class DuplicateGroup:
43
+ """Group of duplicate entries."""
44
+ entries: List[Dict[str, Any]]
45
+ similarity_score: float
46
+ match_reason: str # "isbn", "title_author", "content_hash", etc.
47
+
48
+
49
+ @dataclass
50
+ class ValidationResult:
51
+ """Result of entry validation."""
52
+ is_valid: bool
53
+ errors: List['ValidationError']
54
+ warnings: List['ValidationWarning']
55
+ completeness_score: float # 0 to 1
56
+
57
+
58
+ @dataclass
59
+ class ValidationError:
60
+ """Validation error details."""
61
+ field: str
62
+ message: str
63
+ severity: str # "error", "warning", "info"
64
+
65
+
66
+ @dataclass
67
+ class ValidationWarning:
68
+ """Validation warning details."""
69
+ field: str
70
+ message: str
71
+
72
+
73
+ @dataclass
74
+ class ExportResult:
75
+ """Result of an export operation."""
76
+ success: bool
77
+ output_path: str
78
+ entries_exported: int
79
+ errors: List[str] = None
80
+ warnings: List[str] = None
81
+
82
+ def __post_init__(self):
83
+ if self.errors is None:
84
+ self.errors = []
85
+ if self.warnings is None:
86
+ self.warnings = []
87
+
88
+
89
+ class Plugin(ABC):
90
+ """Base class for all EBK plugins."""
91
+
92
+ @property
93
+ @abstractmethod
94
+ def name(self) -> str:
95
+ """Unique name for this plugin."""
96
+ pass
97
+
98
+ @property
99
+ @abstractmethod
100
+ def version(self) -> str:
101
+ """Plugin version."""
102
+ pass
103
+
104
+ @property
105
+ def description(self) -> str:
106
+ """Plugin description."""
107
+ return ""
108
+
109
+ @property
110
+ def author(self) -> str:
111
+ """Plugin author."""
112
+ return ""
113
+
114
+ @property
115
+ def requires(self) -> List[str]:
116
+ """List of required dependencies."""
117
+ return []
118
+
119
+ def initialize(self, config: Dict[str, Any] = None) -> None:
120
+ """
121
+ Initialize the plugin with configuration.
122
+
123
+ Args:
124
+ config: Plugin-specific configuration
125
+ """
126
+ self.config = config or {}
127
+
128
+ def cleanup(self) -> None:
129
+ """Cleanup resources used by the plugin."""
130
+ pass
131
+
132
+ def validate_config(self) -> bool:
133
+ """
134
+ Validate plugin configuration.
135
+
136
+ Returns:
137
+ True if configuration is valid
138
+ """
139
+ return True
140
+
141
+
142
+ class MetadataExtractor(Plugin):
143
+ """Base class for metadata extraction plugins."""
144
+
145
+ @abstractmethod
146
+ async def extract(self,
147
+ file_path: Optional[str] = None,
148
+ url: Optional[str] = None,
149
+ isbn: Optional[str] = None,
150
+ content: Optional[bytes] = None) -> Dict[str, Any]:
151
+ """
152
+ Extract metadata from various sources.
153
+
154
+ Args:
155
+ file_path: Path to file to extract from
156
+ url: URL to fetch metadata from
157
+ isbn: ISBN to lookup
158
+ content: Raw content bytes
159
+
160
+ Returns:
161
+ Dictionary with metadata fields like:
162
+ - title, creators, subjects, description
163
+ - publisher, date, language
164
+ - isbn, doi, other identifiers
165
+ - cover_url, thumbnail_url
166
+ """
167
+ pass
168
+
169
+ @abstractmethod
170
+ def supported_formats(self) -> List[str]:
171
+ """Return list of supported file formats."""
172
+ pass
173
+
174
+ def can_extract(self, source: str) -> bool:
175
+ """
176
+ Check if this extractor can handle the given source.
177
+
178
+ Args:
179
+ source: File path, URL, or identifier
180
+
181
+ Returns:
182
+ True if this extractor can handle the source
183
+ """
184
+ if not source:
185
+ return False
186
+
187
+ # Check file extension
188
+ if Path(source).suffix.lower()[1:] in self.supported_formats():
189
+ return True
190
+
191
+ # Check if it's an ISBN
192
+ if source.replace('-', '').replace(' ', '').isdigit() and len(source) in [10, 13]:
193
+ return 'isbn' in self.supported_formats()
194
+
195
+ # Check if it's a URL
196
+ if source.startswith(('http://', 'https://')):
197
+ return 'url' in self.supported_formats()
198
+
199
+ return False
200
+
201
+
202
+ class TagSuggester(Plugin):
203
+ """Base class for tag suggestion plugins."""
204
+
205
+ @abstractmethod
206
+ async def suggest_tags(self,
207
+ entry: Dict[str, Any],
208
+ max_tags: int = 10,
209
+ confidence_threshold: float = 0.5) -> List[TagSuggestion]:
210
+ """
211
+ Suggest tags for an entry.
212
+
213
+ Args:
214
+ entry: Entry dictionary with metadata
215
+ max_tags: Maximum number of tags to suggest
216
+ confidence_threshold: Minimum confidence score
217
+
218
+ Returns:
219
+ List of TagSuggestion objects with tag and confidence score
220
+ """
221
+ pass
222
+
223
+ @abstractmethod
224
+ def requires_content(self) -> bool:
225
+ """Whether this suggester needs file content."""
226
+ pass
227
+
228
+ def filter_suggestions(self,
229
+ suggestions: List[TagSuggestion],
230
+ max_tags: int,
231
+ confidence_threshold: float) -> List[TagSuggestion]:
232
+ """
233
+ Filter suggestions by confidence and limit.
234
+
235
+ Args:
236
+ suggestions: List of suggestions to filter
237
+ max_tags: Maximum number of tags
238
+ confidence_threshold: Minimum confidence
239
+
240
+ Returns:
241
+ Filtered list of suggestions
242
+ """
243
+ # Filter by confidence
244
+ filtered = [s for s in suggestions if s.confidence >= confidence_threshold]
245
+
246
+ # Sort by confidence (descending)
247
+ filtered.sort(key=lambda s: s.confidence, reverse=True)
248
+
249
+ # Limit to max_tags
250
+ return filtered[:max_tags]
251
+
252
+
253
+ class ContentAnalyzer(Plugin):
254
+ """Base class for content analysis plugins."""
255
+
256
+ @abstractmethod
257
+ async def analyze(self, entry: Dict[str, Any]) -> ContentAnalysis:
258
+ """
259
+ Analyze entry content.
260
+
261
+ Args:
262
+ entry: Entry dictionary with metadata and content
263
+
264
+ Returns:
265
+ ContentAnalysis object with analysis results
266
+ """
267
+ pass
268
+
269
+ def estimate_reading_time(self, word_count: int, wpm: int = 250) -> int:
270
+ """
271
+ Estimate reading time in minutes.
272
+
273
+ Args:
274
+ word_count: Number of words
275
+ wpm: Words per minute (default 250)
276
+
277
+ Returns:
278
+ Estimated reading time in minutes
279
+ """
280
+ return max(1, round(word_count / wpm))
281
+
282
+
283
+ class SimilarityFinder(Plugin):
284
+ """Base class for finding similar entries."""
285
+
286
+ @abstractmethod
287
+ def find_similar(self,
288
+ entry: Dict[str, Any],
289
+ candidates: List[Dict[str, Any]],
290
+ threshold: float = 0.8,
291
+ limit: int = 10) -> List[Tuple[Dict[str, Any], float]]:
292
+ """
293
+ Find entries similar to a given entry.
294
+
295
+ Args:
296
+ entry: Entry to find similar entries for
297
+ candidates: List of candidate entries
298
+ threshold: Minimum similarity score (0-1)
299
+ limit: Maximum number of similar entries
300
+
301
+ Returns:
302
+ List of (entry, similarity_score) tuples
303
+ """
304
+ pass
305
+
306
+ @abstractmethod
307
+ def compute_similarity(self,
308
+ entry1: Dict[str, Any],
309
+ entry2: Dict[str, Any]) -> float:
310
+ """
311
+ Compute similarity between two entries.
312
+
313
+ Args:
314
+ entry1: First entry
315
+ entry2: Second entry
316
+
317
+ Returns:
318
+ Similarity score between 0 and 1
319
+ """
320
+ pass
321
+
322
+
323
+ class Deduplicator(Plugin):
324
+ """Base class for deduplication plugins."""
325
+
326
+ @abstractmethod
327
+ def find_duplicates(self,
328
+ entries: List[Dict[str, Any]],
329
+ threshold: float = 0.9) -> List[DuplicateGroup]:
330
+ """
331
+ Find duplicate entries.
332
+
333
+ Args:
334
+ entries: List of entries to check
335
+ threshold: Similarity threshold for duplicates
336
+
337
+ Returns:
338
+ List of DuplicateGroup objects
339
+ """
340
+ pass
341
+
342
+ @abstractmethod
343
+ def merge_duplicates(self,
344
+ duplicates: DuplicateGroup,
345
+ strategy: str = "newest") -> Dict[str, Any]:
346
+ """
347
+ Merge duplicate entries into one.
348
+
349
+ Args:
350
+ duplicates: Group of duplicate entries
351
+ strategy: Merge strategy ("newest", "oldest", "most_complete")
352
+
353
+ Returns:
354
+ Merged entry
355
+ """
356
+ pass
357
+
358
+ def calculate_completeness(self, entry: Dict[str, Any]) -> float:
359
+ """
360
+ Calculate completeness score for an entry.
361
+
362
+ Args:
363
+ entry: Entry to evaluate
364
+
365
+ Returns:
366
+ Completeness score between 0 and 1
367
+ """
368
+ required_fields = ['title', 'creators', 'date', 'language', 'subjects']
369
+ optional_fields = ['description', 'publisher', 'isbn', 'cover_path']
370
+
371
+ # Required fields worth 70% of score
372
+ required_score = sum(1 for f in required_fields if entry.get(f)) / len(required_fields) * 0.7
373
+
374
+ # Optional fields worth 30% of score
375
+ optional_score = sum(1 for f in optional_fields if entry.get(f)) / len(optional_fields) * 0.3
376
+
377
+ return required_score + optional_score
378
+
379
+
380
+ class Validator(Plugin):
381
+ """Base class for validation plugins."""
382
+
383
+ @abstractmethod
384
+ def validate(self, entry: Dict[str, Any]) -> ValidationResult:
385
+ """
386
+ Validate an entry.
387
+
388
+ Args:
389
+ entry: Entry to validate
390
+
391
+ Returns:
392
+ ValidationResult with errors and warnings
393
+ """
394
+ pass
395
+
396
+ def check_required_fields(self, entry: Dict[str, Any]) -> List[ValidationError]:
397
+ """
398
+ Check for required fields.
399
+
400
+ Args:
401
+ entry: Entry to check
402
+
403
+ Returns:
404
+ List of validation errors
405
+ """
406
+ errors = []
407
+ required = ['title', 'unique_id']
408
+
409
+ for field in required:
410
+ if not entry.get(field):
411
+ errors.append(ValidationError(
412
+ field=field,
413
+ message=f"Required field '{field}' is missing",
414
+ severity="error"
415
+ ))
416
+
417
+ return errors
418
+
419
+ def check_field_types(self, entry: Dict[str, Any]) -> List[ValidationError]:
420
+ """
421
+ Check field types.
422
+
423
+ Args:
424
+ entry: Entry to check
425
+
426
+ Returns:
427
+ List of validation errors
428
+ """
429
+ errors = []
430
+
431
+ # Define expected types
432
+ field_types = {
433
+ 'title': str,
434
+ 'creators': list,
435
+ 'subjects': list,
436
+ 'date': str,
437
+ 'language': str,
438
+ 'page_count': int,
439
+ 'rating': (int, float)
440
+ }
441
+
442
+ for field, expected_type in field_types.items():
443
+ if field in entry and entry[field] is not None:
444
+ if not isinstance(entry[field], expected_type):
445
+ errors.append(ValidationError(
446
+ field=field,
447
+ message=f"Field '{field}' should be {expected_type.__name__}",
448
+ severity="error"
449
+ ))
450
+
451
+ return errors
452
+
453
+
454
+ class Exporter(Plugin):
455
+ """Base class for export plugins."""
456
+
457
+ @abstractmethod
458
+ async def export(self,
459
+ entries: List[Dict[str, Any]],
460
+ output_path: str,
461
+ options: Dict[str, Any] = None) -> ExportResult:
462
+ """
463
+ Export entries to a specific format.
464
+
465
+ Args:
466
+ entries: List of entries to export
467
+ output_path: Output file or directory path
468
+ options: Export options
469
+
470
+ Returns:
471
+ ExportResult with status and details
472
+ """
473
+ pass
474
+
475
+ @abstractmethod
476
+ def supported_formats(self) -> List[str]:
477
+ """Return list of supported export formats."""
478
+ pass
479
+
480
+ def validate_entries(self, entries: List[Dict[str, Any]]) -> List[str]:
481
+ """
482
+ Validate entries before export.
483
+
484
+ Args:
485
+ entries: Entries to validate
486
+
487
+ Returns:
488
+ List of validation errors
489
+ """
490
+ errors = []
491
+
492
+ if not entries:
493
+ errors.append("No entries to export")
494
+ return errors
495
+
496
+ for i, entry in enumerate(entries):
497
+ if not entry.get('unique_id'):
498
+ errors.append(f"Entry {i} missing unique_id")
499
+ if not entry.get('title'):
500
+ errors.append(f"Entry {i} missing title")
501
+
502
+ return errors