ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
ebk/db/migrations.py ADDED
@@ -0,0 +1,507 @@
1
+ """
2
+ Database migration utilities for ebk.
3
+
4
+ Since this project uses SQLAlchemy's create_all() approach rather than Alembic,
5
+ this module provides simple migration functions for schema changes.
6
+
7
+ Schema versioning is tracked in the `schema_versions` table, which stores:
8
+ - version: Sequential version number
9
+ - migration_name: Name of the migration
10
+ - applied_at: Timestamp when migration was applied
11
+ """
12
+
13
+ from pathlib import Path
14
+ from datetime import datetime, timezone
15
+ from sqlalchemy import create_engine, text, inspect
16
+ from sqlalchemy.engine import Engine
17
+ from typing import Set
18
+
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Current schema version - increment when adding new migrations
24
+ CURRENT_SCHEMA_VERSION = 6
25
+
26
+
27
+ def get_engine(library_path: Path) -> Engine:
28
+ """Get database engine for a library."""
29
+ db_path = library_path / 'library.db'
30
+ if not db_path.exists():
31
+ raise FileNotFoundError(f"Database not found at {db_path}")
32
+
33
+ db_url = f'sqlite:///{db_path}'
34
+ return create_engine(db_url, echo=False)
35
+
36
+
37
+ def table_exists(engine: Engine, table_name: str) -> bool:
38
+ """Check if a table exists in the database."""
39
+ inspector = inspect(engine)
40
+ return table_name in inspector.get_table_names()
41
+
42
+
43
+ def ensure_schema_versions_table(engine: Engine) -> None:
44
+ """Create schema_versions table if it doesn't exist."""
45
+ if table_exists(engine, 'schema_versions'):
46
+ return
47
+
48
+ with engine.begin() as conn:
49
+ conn.execute(text("""
50
+ CREATE TABLE schema_versions (
51
+ version INTEGER NOT NULL PRIMARY KEY,
52
+ migration_name VARCHAR(200) NOT NULL,
53
+ applied_at DATETIME NOT NULL
54
+ )
55
+ """))
56
+ logger.debug("Created schema_versions table")
57
+
58
+
59
+ def get_applied_migrations(engine: Engine) -> Set[str]:
60
+ """Get set of migration names that have been applied."""
61
+ if not table_exists(engine, 'schema_versions'):
62
+ return set()
63
+
64
+ with engine.connect() as conn:
65
+ result = conn.execute(text("SELECT migration_name FROM schema_versions"))
66
+ return {row[0] for row in result.fetchall()}
67
+
68
+
69
+ def get_schema_version(engine: Engine) -> int:
70
+ """Get the current schema version number."""
71
+ if not table_exists(engine, 'schema_versions'):
72
+ return 0
73
+
74
+ with engine.connect() as conn:
75
+ result = conn.execute(text("SELECT MAX(version) FROM schema_versions"))
76
+ row = result.fetchone()
77
+ return row[0] if row and row[0] else 0
78
+
79
+
80
+ def record_migration(engine: Engine, version: int, migration_name: str) -> None:
81
+ """Record that a migration has been applied."""
82
+ ensure_schema_versions_table(engine)
83
+
84
+ with engine.begin() as conn:
85
+ conn.execute(
86
+ text("""
87
+ INSERT INTO schema_versions (version, migration_name, applied_at)
88
+ VALUES (:version, :migration_name, :applied_at)
89
+ """),
90
+ {"version": version, "migration_name": migration_name, "applied_at": datetime.now(timezone.utc)}
91
+ )
92
+
93
+
94
+ def is_migration_applied(engine: Engine, migration_name: str) -> bool:
95
+ """Check if a specific migration has been applied."""
96
+ applied = get_applied_migrations(engine)
97
+ return migration_name in applied
98
+
99
+
100
+ def migrate_add_tags(library_path: Path, dry_run: bool = False) -> bool:
101
+ """
102
+ Add tags table and book_tags association table to existing database.
103
+
104
+ This migration adds support for hierarchical user-defined tags,
105
+ separate from bibliographic subjects.
106
+
107
+ Args:
108
+ library_path: Path to library directory
109
+ dry_run: If True, only check if migration is needed
110
+
111
+ Returns:
112
+ True if migration was applied (or would be applied in dry_run),
113
+ False if already up-to-date
114
+ """
115
+ engine = get_engine(library_path)
116
+
117
+ # Check if migration is needed
118
+ if table_exists(engine, 'tags'):
119
+ logger.debug("Tags table already exists, skipping migration")
120
+ return False
121
+
122
+ if dry_run:
123
+ logger.debug("Migration needed: tags table does not exist")
124
+ return True
125
+
126
+ logger.debug("Applying migration: Adding tags table and book_tags association")
127
+
128
+ with engine.begin() as conn:
129
+ # Create tags table
130
+ conn.execute(text("""
131
+ CREATE TABLE tags (
132
+ id INTEGER NOT NULL PRIMARY KEY,
133
+ name VARCHAR(200) NOT NULL,
134
+ path VARCHAR(500) NOT NULL UNIQUE,
135
+ parent_id INTEGER,
136
+ description TEXT,
137
+ color VARCHAR(7),
138
+ created_at DATETIME NOT NULL,
139
+ FOREIGN KEY(parent_id) REFERENCES tags (id) ON DELETE CASCADE
140
+ )
141
+ """))
142
+
143
+ # Create indexes on tags table
144
+ conn.execute(text("CREATE INDEX idx_tag_path ON tags (path)"))
145
+ conn.execute(text("CREATE INDEX idx_tag_parent ON tags (parent_id)"))
146
+ conn.execute(text("CREATE INDEX ix_tags_name ON tags (name)"))
147
+
148
+ # Create book_tags association table
149
+ conn.execute(text("""
150
+ CREATE TABLE book_tags (
151
+ book_id INTEGER NOT NULL,
152
+ tag_id INTEGER NOT NULL,
153
+ created_at DATETIME,
154
+ PRIMARY KEY (book_id, tag_id),
155
+ FOREIGN KEY(book_id) REFERENCES books (id) ON DELETE CASCADE,
156
+ FOREIGN KEY(tag_id) REFERENCES tags (id) ON DELETE CASCADE
157
+ )
158
+ """))
159
+
160
+ logger.debug("Migration completed successfully")
161
+
162
+ return True
163
+
164
+
165
+ def migrate_add_book_color(library_path: Path, dry_run: bool = False) -> bool:
166
+ """
167
+ Add color column to books table.
168
+
169
+ This migration adds a color field to books for user customization.
170
+
171
+ Args:
172
+ library_path: Path to library directory
173
+ dry_run: If True, only check if migration is needed
174
+
175
+ Returns:
176
+ True if migration was applied (or would be applied in dry_run),
177
+ False if already up-to-date
178
+ """
179
+ engine = get_engine(library_path)
180
+ inspector = inspect(engine)
181
+
182
+ # Check if migration is needed
183
+ if 'books' not in inspector.get_table_names():
184
+ logger.error("Books table does not exist")
185
+ return False
186
+
187
+ columns = [col['name'] for col in inspector.get_columns('books')]
188
+ if 'color' in columns:
189
+ logger.debug("Books.color column already exists, skipping migration")
190
+ return False
191
+
192
+ if dry_run:
193
+ logger.debug("Migration needed: books.color column does not exist")
194
+ return True
195
+
196
+ logger.debug("Applying migration: Adding color column to books table")
197
+
198
+ with engine.begin() as conn:
199
+ conn.execute(text("ALTER TABLE books ADD COLUMN color VARCHAR(7)"))
200
+ logger.debug("Migration completed successfully")
201
+
202
+ return True
203
+
204
+
205
+ def migrate_descriptions_to_markdown(library_path: Path, dry_run: bool = False) -> bool:
206
+ """
207
+ Convert HTML descriptions to markdown.
208
+
209
+ This migration converts book descriptions containing HTML to clean markdown
210
+ for better display in console and web interfaces.
211
+
212
+ Args:
213
+ library_path: Path to library directory
214
+ dry_run: If True, only check if migration is needed
215
+
216
+ Returns:
217
+ True if migration was applied (or would be applied in dry_run),
218
+ False if already up-to-date
219
+ """
220
+ engine = get_engine(library_path)
221
+
222
+ # Check if any descriptions contain HTML
223
+ with engine.connect() as conn:
224
+ result = conn.execute(text(
225
+ "SELECT COUNT(*) FROM books WHERE description LIKE '%<%>%'"
226
+ ))
227
+ html_count = result.scalar()
228
+
229
+ if html_count == 0:
230
+ logger.debug("No HTML descriptions found, skipping migration")
231
+ return False
232
+
233
+ if dry_run:
234
+ logger.debug(f"Migration needed: {html_count} descriptions contain HTML")
235
+ return True
236
+
237
+ logger.debug(f"Converting {html_count} HTML descriptions to markdown")
238
+
239
+ try:
240
+ from markdownify import markdownify as md
241
+ except ImportError:
242
+ logger.warning("markdownify not installed, using basic HTML stripping")
243
+ md = None
244
+
245
+ with engine.begin() as conn:
246
+ # Fetch all descriptions with HTML
247
+ result = conn.execute(text(
248
+ "SELECT id, description FROM books WHERE description LIKE '%<%>%'"
249
+ ))
250
+ rows = result.fetchall()
251
+
252
+ for book_id, description in rows:
253
+ if not description:
254
+ continue
255
+
256
+ if md:
257
+ # Convert HTML to markdown
258
+ clean_desc = md(description, strip=['script', 'style'])
259
+ # Clean up excessive whitespace
260
+ import re
261
+ clean_desc = re.sub(r'\n{3,}', '\n\n', clean_desc)
262
+ clean_desc = clean_desc.strip()
263
+ else:
264
+ # Fallback: strip HTML tags
265
+ from bs4 import BeautifulSoup
266
+ soup = BeautifulSoup(description, 'html.parser')
267
+ clean_desc = soup.get_text(separator=' ', strip=True)
268
+
269
+ conn.execute(
270
+ text("UPDATE books SET description = :desc WHERE id = :id"),
271
+ {"desc": clean_desc, "id": book_id}
272
+ )
273
+
274
+ logger.debug(f"Converted {len(rows)} descriptions to markdown")
275
+ return True
276
+
277
+
278
+ def migrate_add_reviews_table(library_path: Path, dry_run: bool = False) -> bool:
279
+ """
280
+ Add reviews table for user book reviews.
281
+
282
+ This migration adds support for detailed user reviews,
283
+ separate from simple ratings.
284
+
285
+ Args:
286
+ library_path: Path to library directory
287
+ dry_run: If True, only check if migration is needed
288
+
289
+ Returns:
290
+ True if migration was applied (or would be applied in dry_run),
291
+ False if already up-to-date
292
+ """
293
+ engine = get_engine(library_path)
294
+
295
+ # Check if migration is needed
296
+ if table_exists(engine, 'reviews'):
297
+ logger.debug("Reviews table already exists, skipping migration")
298
+ return False
299
+
300
+ if dry_run:
301
+ logger.debug("Migration needed: reviews table does not exist")
302
+ return True
303
+
304
+ logger.debug("Applying migration: Adding reviews table")
305
+
306
+ with engine.begin() as conn:
307
+ conn.execute(text("""
308
+ CREATE TABLE reviews (
309
+ id INTEGER NOT NULL PRIMARY KEY,
310
+ book_id INTEGER NOT NULL,
311
+ title VARCHAR(255),
312
+ content TEXT NOT NULL,
313
+ rating FLOAT,
314
+ review_type VARCHAR(50) DEFAULT 'personal',
315
+ visibility VARCHAR(20) DEFAULT 'private',
316
+ created_at DATETIME NOT NULL,
317
+ updated_at DATETIME NOT NULL,
318
+ FOREIGN KEY(book_id) REFERENCES books (id) ON DELETE CASCADE
319
+ )
320
+ """))
321
+
322
+ # Create indexes
323
+ conn.execute(text("CREATE INDEX idx_review_book ON reviews (book_id)"))
324
+ conn.execute(text("CREATE INDEX idx_review_type ON reviews (review_type)"))
325
+ conn.execute(text("CREATE INDEX idx_review_created ON reviews (created_at)"))
326
+
327
+ logger.debug("Migration completed successfully")
328
+
329
+ return True
330
+
331
+
332
+ def migrate_add_enrichment_history_table(library_path: Path, dry_run: bool = False) -> bool:
333
+ """
334
+ Add enrichment_history table for tracking metadata changes.
335
+
336
+ This migration adds support for tracking provenance of
337
+ automated metadata enrichment.
338
+
339
+ Args:
340
+ library_path: Path to library directory
341
+ dry_run: If True, only check if migration is needed
342
+
343
+ Returns:
344
+ True if migration was applied (or would be applied in dry_run),
345
+ False if already up-to-date
346
+ """
347
+ engine = get_engine(library_path)
348
+
349
+ # Check if migration is needed
350
+ if table_exists(engine, 'enrichment_history'):
351
+ logger.debug("Enrichment history table already exists, skipping migration")
352
+ return False
353
+
354
+ if dry_run:
355
+ logger.debug("Migration needed: enrichment_history table does not exist")
356
+ return True
357
+
358
+ logger.debug("Applying migration: Adding enrichment_history table")
359
+
360
+ with engine.begin() as conn:
361
+ conn.execute(text("""
362
+ CREATE TABLE enrichment_history (
363
+ id INTEGER NOT NULL PRIMARY KEY,
364
+ book_id INTEGER NOT NULL,
365
+ field_name VARCHAR(100) NOT NULL,
366
+ old_value TEXT,
367
+ new_value TEXT,
368
+ source_type VARCHAR(50) NOT NULL,
369
+ source_detail VARCHAR(200),
370
+ confidence FLOAT DEFAULT 1.0,
371
+ applied BOOLEAN DEFAULT 1,
372
+ reverted BOOLEAN DEFAULT 0,
373
+ enriched_at DATETIME NOT NULL,
374
+ FOREIGN KEY(book_id) REFERENCES books (id) ON DELETE CASCADE
375
+ )
376
+ """))
377
+
378
+ # Create indexes
379
+ conn.execute(text("CREATE INDEX idx_enrichment_book ON enrichment_history (book_id)"))
380
+ conn.execute(text("CREATE INDEX idx_enrichment_source ON enrichment_history (source_type)"))
381
+ conn.execute(text("CREATE INDEX idx_enrichment_field ON enrichment_history (field_name)"))
382
+ conn.execute(text("CREATE INDEX idx_enrichment_date ON enrichment_history (enriched_at)"))
383
+
384
+ logger.debug("Migration completed successfully")
385
+
386
+ return True
387
+
388
+
389
+ def migrate_enhance_annotations(library_path: Path, dry_run: bool = False) -> bool:
390
+ """
391
+ Add rich content fields to annotations table.
392
+
393
+ Adds: title, content_format, category, pinned, updated_at
394
+
395
+ Args:
396
+ library_path: Path to library directory
397
+ dry_run: If True, only check if migration is needed
398
+
399
+ Returns:
400
+ True if migration was applied (or would be applied in dry_run),
401
+ False if already up-to-date
402
+ """
403
+ engine = get_engine(library_path)
404
+ inspector = inspect(engine)
405
+
406
+ # Check if migration is needed
407
+ if 'annotations' not in inspector.get_table_names():
408
+ logger.debug("Annotations table does not exist, skipping migration")
409
+ return False
410
+
411
+ columns = [col['name'] for col in inspector.get_columns('annotations')]
412
+ if 'content_format' in columns:
413
+ logger.debug("Annotations.content_format column already exists, skipping migration")
414
+ return False
415
+
416
+ if dry_run:
417
+ logger.debug("Migration needed: annotations columns missing")
418
+ return True
419
+
420
+ logger.debug("Applying migration: Enhancing annotations table")
421
+
422
+ with engine.begin() as conn:
423
+ # Add new columns
424
+ conn.execute(text("ALTER TABLE annotations ADD COLUMN title VARCHAR(255)"))
425
+ conn.execute(text("ALTER TABLE annotations ADD COLUMN content_format VARCHAR(20) DEFAULT 'plain'"))
426
+ conn.execute(text("ALTER TABLE annotations ADD COLUMN category VARCHAR(100)"))
427
+ conn.execute(text("ALTER TABLE annotations ADD COLUMN pinned BOOLEAN DEFAULT 0"))
428
+ conn.execute(text("ALTER TABLE annotations ADD COLUMN updated_at DATETIME"))
429
+
430
+ # Create indexes for new columns
431
+ conn.execute(text("CREATE INDEX idx_annotation_pinned ON annotations (book_id, pinned)"))
432
+ conn.execute(text("CREATE INDEX idx_annotation_category ON annotations (category)"))
433
+
434
+ logger.debug("Migration completed successfully")
435
+
436
+ return True
437
+
438
+
439
+ # Migration registry: (version, name, function)
440
+ # Add new migrations here with incrementing version numbers
441
+ MIGRATIONS = [
442
+ (1, 'add_tags', migrate_add_tags),
443
+ (2, 'add_book_color', migrate_add_book_color),
444
+ (3, 'descriptions_to_markdown', migrate_descriptions_to_markdown),
445
+ (4, 'add_reviews_table', migrate_add_reviews_table),
446
+ (5, 'add_enrichment_history_table', migrate_add_enrichment_history_table),
447
+ (6, 'enhance_annotations', migrate_enhance_annotations),
448
+ ]
449
+
450
+
451
+ def run_all_migrations(library_path: Path, dry_run: bool = False) -> dict:
452
+ """
453
+ Run all pending migrations on a library database.
454
+
455
+ Uses schema version tracking to determine which migrations need to run.
456
+ Backwards compatible with databases that don't have schema_versions table.
457
+ Retroactively records migrations that were already applied.
458
+
459
+ Args:
460
+ library_path: Path to library directory
461
+ dry_run: If True, only check which migrations are needed
462
+
463
+ Returns:
464
+ Dict mapping migration name to whether it was applied
465
+ """
466
+ results = {}
467
+ engine = get_engine(library_path)
468
+
469
+ # Get already applied migrations (for backwards compatibility)
470
+ applied_migrations = get_applied_migrations(engine)
471
+
472
+ for version, name, migration_func in MIGRATIONS:
473
+ try:
474
+ # Check if already recorded in schema_versions
475
+ if name in applied_migrations:
476
+ results[name] = False
477
+ continue
478
+
479
+ # Run migration (it will check internally if already applied)
480
+ applied = migration_func(library_path, dry_run=dry_run)
481
+
482
+ if not dry_run:
483
+ # Record the migration (whether newly applied or retroactively)
484
+ # This ensures proper version tracking even for already-migrated dbs
485
+ record_migration(engine, version, name)
486
+
487
+ results[name] = applied
488
+
489
+ except Exception as e:
490
+ logger.error(f"Migration '{name}' failed: {e}")
491
+ results[name] = False
492
+ raise
493
+
494
+ return results
495
+
496
+
497
+ def check_migrations(library_path: Path) -> dict:
498
+ """
499
+ Check which migrations need to be applied.
500
+
501
+ Args:
502
+ library_path: Path to library directory
503
+
504
+ Returns:
505
+ Dict mapping migration name to whether it's needed
506
+ """
507
+ return run_all_migrations(library_path, dry_run=True)