ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/library_db.py ADDED
@@ -0,0 +1,899 @@
1
+ """
2
+ Database-backed Library class for ebk.
3
+
4
+ Provides a fluent API for managing ebook libraries using SQLAlchemy + SQLite.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Optional, Tuple
9
+ from datetime import datetime
10
+ import logging
11
+
12
+ from sqlalchemy import func, or_, and_, text
13
+ from sqlalchemy.orm import Session
14
+
15
+ from .db.models import Book, Author, Subject, File, ExtractedText, PersonalMetadata
16
+ from .db.session import init_db, get_session, close_db
17
+ from .services.import_service import ImportService
18
+ from .services.text_extraction import TextExtractionService
19
+ from .search_parser import parse_search_query
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class Library:
25
+ """
26
+ Database-backed library for managing ebooks.
27
+
28
+ Usage:
29
+ lib = Library.open("/path/to/library")
30
+ lib.add_book(Path("book.pdf"), {"title": "My Book", "creators": ["Author"]})
31
+ results = lib.search("python programming")
32
+ stats = lib.stats()
33
+ lib.close()
34
+ """
35
+
36
+ def __init__(self, library_path: Path, session: Session):
37
+ self.library_path = Path(library_path)
38
+ self.session = session
39
+ self.import_service = ImportService(library_path, session)
40
+ self.text_service = TextExtractionService(library_path)
41
+
42
+ @classmethod
43
+ def open(cls, library_path: Path, echo: bool = False) -> 'Library':
44
+ """
45
+ Open or create a library.
46
+
47
+ Args:
48
+ library_path: Path to library directory
49
+ echo: If True, log all SQL statements
50
+
51
+ Returns:
52
+ Library instance
53
+ """
54
+ library_path = Path(library_path)
55
+ init_db(library_path, echo=echo)
56
+ session = get_session()
57
+
58
+ logger.info(f"Opened library at {library_path}")
59
+ return cls(library_path, session)
60
+
61
+ def close(self):
62
+ """Close library and cleanup database connection."""
63
+ if self.session:
64
+ self.session.close()
65
+ close_db()
66
+ logger.info("Closed library")
67
+
68
+ def add_book(self, file_path: Path, metadata: Dict[str, Any],
69
+ extract_text: bool = True, extract_cover: bool = True) -> Optional[Book]:
70
+ """
71
+ Add a book to the library.
72
+
73
+ Args:
74
+ file_path: Path to ebook file
75
+ metadata: Metadata dictionary (title, creators, subjects, etc.)
76
+ extract_text: Whether to extract full text
77
+ extract_cover: Whether to extract cover image
78
+
79
+ Returns:
80
+ Book instance or None if import failed
81
+ """
82
+ book = self.import_service.import_file(
83
+ file_path,
84
+ metadata,
85
+ extract_text=extract_text,
86
+ extract_cover=extract_cover
87
+ )
88
+
89
+ if book:
90
+ logger.info(f"Added book: {book.title}")
91
+
92
+ return book
93
+
94
+ def add_calibre_book(self, metadata_opf_path: Path) -> Optional[Book]:
95
+ """
96
+ Add book from Calibre metadata.opf file.
97
+
98
+ Args:
99
+ metadata_opf_path: Path to metadata.opf
100
+
101
+ Returns:
102
+ Book instance or None
103
+ """
104
+ return self.import_service.import_calibre_book(metadata_opf_path)
105
+
106
+ def batch_import(self, files_and_metadata: List[Tuple[Path, Dict[str, Any]]],
107
+ show_progress: bool = True) -> List[Book]:
108
+ """
109
+ Import multiple books with progress tracking.
110
+
111
+ Args:
112
+ files_and_metadata: List of (file_path, metadata) tuples
113
+ show_progress: Whether to show progress bar
114
+
115
+ Returns:
116
+ List of imported Book instances
117
+ """
118
+ file_paths = [f for f, _ in files_and_metadata]
119
+ metadata_list = [m for _, m in files_and_metadata]
120
+
121
+ return self.import_service.batch_import(
122
+ file_paths,
123
+ metadata_list,
124
+ show_progress=show_progress
125
+ )
126
+
127
+ def get_book(self, book_id: int) -> Optional[Book]:
128
+ """Get book by ID."""
129
+ return self.session.query(Book).get(book_id)
130
+
131
+ def get_book_by_unique_id(self, unique_id: str) -> Optional[Book]:
132
+ """Get book by unique ID."""
133
+ return self.session.query(Book).filter_by(unique_id=unique_id).first()
134
+
135
+ def query(self) -> 'QueryBuilder':
136
+ """Start a fluent query."""
137
+ return QueryBuilder(self.session)
138
+
139
+ def search(self, query: str, limit: int = 50) -> List[Book]:
140
+ """
141
+ Advanced search across books with field-specific queries and boolean logic.
142
+
143
+ Supports:
144
+ - Field searches: title:Python, author:Knuth, tag:programming
145
+ - Phrases: "machine learning"
146
+ - Boolean: AND (implicit), OR (explicit), NOT/-prefix (negation)
147
+ - Comparisons: rating:>=4, rating:3-5
148
+ - Filters: language:en, format:pdf, favorite:true
149
+
150
+ Examples:
151
+ title:Python rating:>=4 format:pdf
152
+ author:"Donald Knuth" series:TAOCP
153
+ tag:programming favorite:true NOT java
154
+
155
+ Args:
156
+ query: Search query (supports advanced syntax or plain text)
157
+ limit: Maximum number of results
158
+
159
+ Returns:
160
+ List of matching books
161
+ """
162
+ try:
163
+ # Parse the query
164
+ parsed = parse_search_query(query)
165
+
166
+ # If no FTS terms and no filters, return empty
167
+ if not parsed.has_fts_terms() and not parsed.has_filters():
168
+ return []
169
+
170
+ # Build the query
171
+ book_ids = []
172
+
173
+ # If we have FTS terms, search FTS5 first
174
+ if parsed.has_fts_terms():
175
+ result = self.session.execute(
176
+ text("""
177
+ SELECT book_id, rank
178
+ FROM books_fts
179
+ WHERE books_fts MATCH :query
180
+ ORDER BY rank
181
+ LIMIT :limit
182
+ """),
183
+ {"query": parsed.fts_query, "limit": limit * 2} # Get more for filtering
184
+ )
185
+ book_ids = [row[0] for row in result]
186
+
187
+ if not book_ids:
188
+ return []
189
+
190
+ # Build filter conditions
191
+ from .search_parser import SearchQueryParser
192
+ parser = SearchQueryParser()
193
+ where_clause, params = parser.to_sql_conditions(parsed)
194
+
195
+ # If we have both FTS and filters, combine them
196
+ if book_ids and where_clause:
197
+ # Start with FTS results and apply filters
198
+ books_query = self.session.query(Book).filter(
199
+ Book.id.in_(book_ids)
200
+ )
201
+
202
+ # Apply additional SQL filters
203
+ if where_clause:
204
+ books_query = books_query.filter(text(where_clause).bindparams(**params))
205
+
206
+ books = books_query.limit(limit).all()
207
+
208
+ # Maintain FTS ranking order
209
+ books_dict = {b.id: b for b in books}
210
+ return [books_dict[bid] for bid in book_ids if bid in books_dict][:limit]
211
+
212
+ # If only FTS (no additional filters)
213
+ elif book_ids:
214
+ books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
215
+ books_dict = {b.id: b for b in books}
216
+ return [books_dict[bid] for bid in book_ids if bid in books_dict][:limit]
217
+
218
+ # If only filters (no FTS)
219
+ elif where_clause:
220
+ books_query = self.session.query(Book)
221
+ books_query = books_query.filter(text(where_clause).bindparams(**params))
222
+ return books_query.limit(limit).all()
223
+
224
+ return []
225
+
226
+ except Exception as e:
227
+ logger.error(f"Search error: {e}")
228
+ logger.exception(e)
229
+ # Fallback to original simple FTS search
230
+ try:
231
+ result = self.session.execute(
232
+ text("""
233
+ SELECT book_id, rank
234
+ FROM books_fts
235
+ WHERE books_fts MATCH :query
236
+ ORDER BY rank
237
+ LIMIT :limit
238
+ """),
239
+ {"query": query, "limit": limit}
240
+ )
241
+ book_ids = [row[0] for row in result]
242
+ if not book_ids:
243
+ return []
244
+ books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
245
+ books_dict = {b.id: b for b in books}
246
+ return [books_dict[bid] for bid in book_ids if bid in books_dict]
247
+ except Exception as fallback_error:
248
+ logger.error(f"Fallback search also failed: {fallback_error}")
249
+ return []
250
+
251
+ def stats(self) -> Dict[str, Any]:
252
+ """
253
+ Get library statistics.
254
+
255
+ Returns:
256
+ Dictionary with statistics
257
+ """
258
+ total_books = self.session.query(func.count(Book.id)).scalar()
259
+ total_authors = self.session.query(func.count(Author.id)).scalar()
260
+ total_subjects = self.session.query(func.count(Subject.id)).scalar()
261
+ total_files = self.session.query(func.count(File.id)).scalar()
262
+
263
+ # Reading stats
264
+ read_count = self.session.query(func.count(PersonalMetadata.id)).filter(
265
+ PersonalMetadata.reading_status == 'read'
266
+ ).scalar()
267
+
268
+ reading_count = self.session.query(func.count(PersonalMetadata.id)).filter(
269
+ PersonalMetadata.reading_status == 'reading'
270
+ ).scalar()
271
+
272
+ # Language distribution
273
+ lang_dist = self.session.query(
274
+ Book.language,
275
+ func.count(Book.id)
276
+ ).group_by(Book.language).all()
277
+
278
+ # Format distribution
279
+ format_dist = self.session.query(
280
+ File.format,
281
+ func.count(File.id)
282
+ ).group_by(File.format).all()
283
+
284
+ return {
285
+ 'total_books': total_books,
286
+ 'total_authors': total_authors,
287
+ 'total_subjects': total_subjects,
288
+ 'total_files': total_files,
289
+ 'read_count': read_count,
290
+ 'reading_count': reading_count,
291
+ 'languages': dict(lang_dist),
292
+ 'formats': dict(format_dist)
293
+ }
294
+
295
+ def get_all_books(self, limit: Optional[int] = None, offset: int = 0) -> List[Book]:
296
+ """
297
+ Get all books with optional pagination.
298
+
299
+ Args:
300
+ limit: Maximum number of books
301
+ offset: Starting offset
302
+
303
+ Returns:
304
+ List of books
305
+ """
306
+ query = self.session.query(Book).order_by(Book.title)
307
+
308
+ if limit:
309
+ query = query.limit(limit).offset(offset)
310
+
311
+ return query.all()
312
+
313
+ def get_books_by_author(self, author_name: str) -> List[Book]:
314
+ """Get all books by an author."""
315
+ return self.session.query(Book).join(Book.authors).filter(
316
+ Author.name.ilike(f"%{author_name}%")
317
+ ).all()
318
+
319
+ def get_books_by_subject(self, subject_name: str) -> List[Book]:
320
+ """Get all books with a subject."""
321
+ return self.session.query(Book).join(Book.subjects).filter(
322
+ Subject.name.ilike(f"%{subject_name}%")
323
+ ).all()
324
+
325
+ def update_reading_status(self, book_id: int, status: str,
326
+ progress: Optional[int] = None,
327
+ rating: Optional[int] = None):
328
+ """
329
+ Update reading status for a book.
330
+
331
+ Args:
332
+ book_id: Book ID
333
+ status: Reading status (unread, reading, read)
334
+ progress: Reading progress percentage (0-100)
335
+ rating: Rating (1-5)
336
+ """
337
+ personal = self.session.query(PersonalMetadata).filter_by(
338
+ book_id=book_id
339
+ ).first()
340
+
341
+ if personal:
342
+ personal.reading_status = status
343
+ if progress is not None:
344
+ personal.reading_progress = progress
345
+ if rating is not None:
346
+ personal.rating = rating
347
+
348
+ if status == 'read':
349
+ personal.date_finished = datetime.now()
350
+
351
+ self.session.commit()
352
+ logger.info(f"Updated reading status for book {book_id}: {status}")
353
+
354
+ def set_favorite(self, book_id: int, favorite: bool = True):
355
+ """
356
+ Mark/unmark book as favorite.
357
+
358
+ Args:
359
+ book_id: Book ID
360
+ favorite: True to mark as favorite, False to unmark
361
+ """
362
+ from .db.models import PersonalMetadata
363
+
364
+ personal = self.session.query(PersonalMetadata).filter_by(
365
+ book_id=book_id
366
+ ).first()
367
+
368
+ if not personal:
369
+ # Create personal metadata if it doesn't exist
370
+ personal = PersonalMetadata(book_id=book_id, favorite=favorite)
371
+ self.session.add(personal)
372
+ else:
373
+ personal.favorite = favorite
374
+
375
+ self.session.commit()
376
+ logger.info(f"Set favorite for book {book_id}: {favorite}")
377
+
378
+ def add_tags(self, book_id: int, tags: List[str]):
379
+ """
380
+ Add personal tags to a book.
381
+
382
+ Args:
383
+ book_id: Book ID
384
+ tags: List of tag strings
385
+ """
386
+ from .db.models import PersonalMetadata
387
+
388
+ personal = self.session.query(PersonalMetadata).filter_by(
389
+ book_id=book_id
390
+ ).first()
391
+
392
+ if not personal:
393
+ personal = PersonalMetadata(book_id=book_id, personal_tags=tags)
394
+ self.session.add(personal)
395
+ else:
396
+ existing_tags = personal.personal_tags or []
397
+ # Add new tags without duplicates
398
+ combined = list(set(existing_tags + tags))
399
+ personal.personal_tags = combined
400
+
401
+ self.session.commit()
402
+ logger.info(f"Added tags to book {book_id}: {tags}")
403
+
404
+ def remove_tags(self, book_id: int, tags: List[str]):
405
+ """
406
+ Remove personal tags from a book.
407
+
408
+ Args:
409
+ book_id: Book ID
410
+ tags: List of tag strings to remove
411
+ """
412
+ from .db.models import PersonalMetadata
413
+
414
+ personal = self.session.query(PersonalMetadata).filter_by(
415
+ book_id=book_id
416
+ ).first()
417
+
418
+ if personal and personal.personal_tags:
419
+ personal.personal_tags = [t for t in personal.personal_tags if t not in tags]
420
+ self.session.commit()
421
+ logger.info(f"Removed tags from book {book_id}: {tags}")
422
+
423
+ def add_subject(self, book_id: int, subject_name: str):
424
+ """
425
+ Add a subject/tag to a book.
426
+
427
+ Args:
428
+ book_id: Book ID
429
+ subject_name: Subject/tag name to add
430
+ """
431
+ book = self.session.query(Book).filter_by(id=book_id).first()
432
+ if not book:
433
+ logger.warning(f"Book {book_id} not found")
434
+ return
435
+
436
+ # Check if subject already exists
437
+ subject = self.session.query(Subject).filter_by(name=subject_name).first()
438
+ if not subject:
439
+ subject = Subject(name=subject_name)
440
+ self.session.add(subject)
441
+
442
+ # Add subject to book if not already present
443
+ if subject not in book.subjects:
444
+ book.subjects.append(subject)
445
+ self.session.commit()
446
+ logger.info(f"Added subject '{subject_name}' to book {book_id}")
447
+
448
+ def add_annotation(self, book_id: int, content: str,
449
+ page: Optional[int] = None,
450
+ annotation_type: str = 'note'):
451
+ """
452
+ Add an annotation/comment to a book.
453
+
454
+ Args:
455
+ book_id: Book ID
456
+ content: Annotation text
457
+ page: Page number (optional)
458
+ annotation_type: Type of annotation (note, highlight, bookmark)
459
+
460
+ Returns:
461
+ Annotation ID
462
+ """
463
+ from .db.models import Annotation
464
+
465
+ annotation = Annotation(
466
+ book_id=book_id,
467
+ content=content,
468
+ page_number=page,
469
+ annotation_type=annotation_type,
470
+ created_at=datetime.now()
471
+ )
472
+ self.session.add(annotation)
473
+ self.session.commit()
474
+
475
+ logger.info(f"Added annotation to book {book_id}")
476
+ return annotation.id
477
+
478
+ def get_annotations(self, book_id: int) -> List:
479
+ """
480
+ Get all annotations for a book.
481
+
482
+ Args:
483
+ book_id: Book ID
484
+
485
+ Returns:
486
+ List of Annotation objects
487
+ """
488
+ from .db.models import Annotation
489
+
490
+ return self.session.query(Annotation).filter_by(
491
+ book_id=book_id
492
+ ).order_by(Annotation.created_at.desc()).all()
493
+
494
+ def delete_annotation(self, annotation_id: int):
495
+ """
496
+ Delete an annotation.
497
+
498
+ Args:
499
+ annotation_id: Annotation ID
500
+ """
501
+ from .db.models import Annotation
502
+
503
+ annotation = self.session.query(Annotation).get(annotation_id)
504
+ if annotation:
505
+ self.session.delete(annotation)
506
+ self.session.commit()
507
+ logger.info(f"Deleted annotation {annotation_id}")
508
+
509
+ def add_to_virtual_library(self, book_id: int, library_name: str):
510
+ """
511
+ Add a book to a virtual library (collection/view).
512
+
513
+ Args:
514
+ book_id: Book ID
515
+ library_name: Name of the virtual library
516
+ """
517
+ from .db.models import PersonalMetadata
518
+
519
+ personal = self.session.query(PersonalMetadata).filter_by(
520
+ book_id=book_id
521
+ ).first()
522
+
523
+ if not personal:
524
+ # Use personal_tags as virtual_libraries array
525
+ personal = PersonalMetadata(book_id=book_id, personal_tags=[library_name])
526
+ self.session.add(personal)
527
+ else:
528
+ existing_libs = personal.personal_tags or []
529
+ if library_name not in existing_libs:
530
+ existing_libs.append(library_name)
531
+ personal.personal_tags = existing_libs
532
+
533
+ self.session.commit()
534
+ logger.info(f"Added book {book_id} to virtual library '{library_name}'")
535
+
536
+ def remove_from_virtual_library(self, book_id: int, library_name: str):
537
+ """
538
+ Remove a book from a virtual library.
539
+
540
+ Args:
541
+ book_id: Book ID
542
+ library_name: Name of the virtual library
543
+ """
544
+ from .db.models import PersonalMetadata
545
+
546
+ personal = self.session.query(PersonalMetadata).filter_by(
547
+ book_id=book_id
548
+ ).first()
549
+
550
+ if personal and personal.personal_tags:
551
+ personal.personal_tags = [lib for lib in personal.personal_tags if lib != library_name]
552
+ self.session.commit()
553
+ logger.info(f"Removed book {book_id} from virtual library '{library_name}'")
554
+
555
+ def get_virtual_library(self, library_name: str) -> List[Book]:
556
+ """
557
+ Get all books in a virtual library.
558
+
559
+ Args:
560
+ library_name: Name of the virtual library
561
+
562
+ Returns:
563
+ List of books in this virtual library
564
+ """
565
+ from .db.models import PersonalMetadata
566
+ from sqlalchemy import func
567
+
568
+ # Query books where personal_tags contains the library_name
569
+ # This works with SQLite's JSON support
570
+ books = (self.session.query(Book)
571
+ .join(Book.personal)
572
+ .filter(PersonalMetadata.personal_tags.contains(library_name))
573
+ .all())
574
+
575
+ return books
576
+
577
+ def list_virtual_libraries(self) -> List[str]:
578
+ """
579
+ Get all unique virtual library names.
580
+
581
+ Returns:
582
+ List of virtual library names
583
+ """
584
+ from .db.models import PersonalMetadata
585
+
586
+ # Get all personal_tags arrays and flatten them
587
+ all_metadata = self.session.query(PersonalMetadata).filter(
588
+ PersonalMetadata.personal_tags.isnot(None)
589
+ ).all()
590
+
591
+ libraries = set()
592
+ for pm in all_metadata:
593
+ if pm.personal_tags:
594
+ libraries.update(pm.personal_tags)
595
+
596
+ return sorted(list(libraries))
597
+
598
+ def delete_book(self, book_id: int, delete_files: bool = False):
599
+ """
600
+ Delete a book from the library.
601
+
602
+ Args:
603
+ book_id: Book ID
604
+ delete_files: If True, also delete physical files
605
+ """
606
+ book = self.get_book(book_id)
607
+ if not book:
608
+ logger.warning(f"Book {book_id} not found")
609
+ return
610
+
611
+ # Delete physical files if requested
612
+ if delete_files:
613
+ for file in book.files:
614
+ file_path = self.library_path / file.path
615
+ if file_path.exists():
616
+ file_path.unlink()
617
+ logger.info(f"Deleted file: {file_path}")
618
+
619
+ # Delete covers
620
+ for cover in book.covers:
621
+ cover_path = self.library_path / cover.path
622
+ if cover_path.exists():
623
+ cover_path.unlink()
624
+
625
+ # Delete from database (cascade will handle related records)
626
+ self.session.delete(book)
627
+ self.session.commit()
628
+ logger.info(f"Deleted book: {book.title}")
629
+
630
+ def find_similar(
631
+ self,
632
+ book_id: int,
633
+ top_k: int = 10,
634
+ similarity_config: Optional[Any] = None,
635
+ filter_language: bool = True,
636
+ ) -> List[Tuple[Book, float]]:
637
+ """
638
+ Find books similar to the given book.
639
+
640
+ Uses semantic similarity based on content, metadata, etc.
641
+
642
+ Args:
643
+ book_id: ID of the query book
644
+ top_k: Number of similar books to return (default 10)
645
+ similarity_config: Optional BookSimilarity instance
646
+ (default: balanced preset)
647
+ filter_language: If True, only return books in same language
648
+
649
+ Returns:
650
+ List of (book, similarity_score) tuples, sorted by similarity
651
+
652
+ Example:
653
+ >>> similar = lib.find_similar(42, top_k=5)
654
+ >>> for book, score in similar:
655
+ ... print(f"{book.title}: {score:.2f}")
656
+ """
657
+ from ebk.similarity import BookSimilarity
658
+
659
+ # Get query book
660
+ query_book = self.get_book(book_id)
661
+ if not query_book:
662
+ logger.warning(f"Book {book_id} not found")
663
+ return []
664
+
665
+ # Get candidate books
666
+ candidates_query = self.query()
667
+ if filter_language and query_book.language:
668
+ candidates_query = candidates_query.filter_by_language(query_book.language)
669
+
670
+ candidates = candidates_query.all()
671
+
672
+ if not candidates:
673
+ return []
674
+
675
+ # Configure similarity
676
+ if similarity_config is None:
677
+ similarity_config = BookSimilarity().balanced()
678
+
679
+ # Fit on all candidates for performance
680
+ similarity_config.fit(candidates)
681
+
682
+ # Find similar books
683
+ results = similarity_config.find_similar(query_book, candidates, top_k=top_k)
684
+
685
+ logger.info(
686
+ f"Found {len(results)} similar books to '{query_book.title}'"
687
+ )
688
+
689
+ return results
690
+
691
+ def compute_similarity_matrix(
692
+ self,
693
+ book_ids: Optional[List[int]] = None,
694
+ similarity_config: Optional[Any] = None,
695
+ ) -> Tuple[List[Book], Any]:
696
+ """
697
+ Compute pairwise similarity matrix for books.
698
+
699
+ Args:
700
+ book_ids: Optional list of book IDs (default: all books)
701
+ similarity_config: Optional BookSimilarity instance
702
+ (default: balanced preset)
703
+
704
+ Returns:
705
+ Tuple of (books, similarity_matrix)
706
+ where similarity_matrix[i][j] = similarity(books[i], books[j])
707
+
708
+ Example:
709
+ >>> books, matrix = lib.compute_similarity_matrix()
710
+ >>> # matrix[0][1] is similarity between books[0] and books[1]
711
+ """
712
+ from ebk.similarity import BookSimilarity
713
+
714
+ # Get books
715
+ if book_ids:
716
+ books = [self.get_book(book_id) for book_id in book_ids]
717
+ books = [b for b in books if b is not None] # Filter None
718
+ else:
719
+ books = self.query().all()
720
+
721
+ if not books:
722
+ logger.warning("No books found for similarity matrix")
723
+ return [], None
724
+
725
+ # Configure similarity
726
+ if similarity_config is None:
727
+ similarity_config = BookSimilarity().balanced()
728
+
729
+ # Fit and compute matrix
730
+ similarity_config.fit(books)
731
+ matrix = similarity_config.similarity_matrix(books)
732
+
733
+ logger.info(f"Computed {len(books)}x{len(books)} similarity matrix")
734
+
735
+ return books, matrix
736
+
737
+
738
+ class QueryBuilder:
739
+ """Fluent query builder for books."""
740
+
741
+ def __init__(self, session: Session):
742
+ self.session = session
743
+ self._query = session.query(Book)
744
+
745
+ def filter_by_title(self, title: str, exact: bool = False) -> 'QueryBuilder':
746
+ """Filter by title."""
747
+ if exact:
748
+ self._query = self._query.filter(Book.title == title)
749
+ else:
750
+ self._query = self._query.filter(Book.title.ilike(f"%{title}%"))
751
+ return self
752
+
753
+ def filter_by_author(self, author: str) -> 'QueryBuilder':
754
+ """Filter by author name."""
755
+ self._query = self._query.join(Book.authors).filter(
756
+ Author.name.ilike(f"%{author}%")
757
+ )
758
+ return self
759
+
760
+ def filter_by_subject(self, subject: str) -> 'QueryBuilder':
761
+ """Filter by subject."""
762
+ self._query = self._query.join(Book.subjects).filter(
763
+ Subject.name.ilike(f"%{subject}%")
764
+ )
765
+ return self
766
+
767
+ def filter_by_language(self, language: str) -> 'QueryBuilder':
768
+ """Filter by language code."""
769
+ self._query = self._query.filter(Book.language == language)
770
+ return self
771
+
772
+ def filter_by_publisher(self, publisher: str) -> 'QueryBuilder':
773
+ """Filter by publisher."""
774
+ self._query = self._query.filter(Book.publisher.ilike(f"%{publisher}%"))
775
+ return self
776
+
777
+ def filter_by_year(self, year: int) -> 'QueryBuilder':
778
+ """Filter by publication year.
779
+
780
+ Args:
781
+ year: Publication year (e.g., 1975)
782
+
783
+ Returns:
784
+ Self for chaining
785
+ """
786
+ # publication_date can be "YYYY", "YYYY-MM", or "YYYY-MM-DD"
787
+ # So we match if it starts with the year
788
+ year_str = str(year)
789
+ self._query = self._query.filter(Book.publication_date.like(f"{year_str}%"))
790
+ return self
791
+
792
+ def filter_by_text(self, search_text: str) -> 'QueryBuilder':
793
+ """Filter by full-text search.
794
+
795
+ Uses FTS5 to search across title, description, and extracted text.
796
+
797
+ Args:
798
+ search_text: Text to search for
799
+
800
+ Returns:
801
+ Self for chaining
802
+ """
803
+ from sqlalchemy import text as sql_text
804
+
805
+ # Query FTS5 table for matching book IDs
806
+ result = self.session.execute(
807
+ sql_text("""
808
+ SELECT book_id
809
+ FROM books_fts
810
+ WHERE books_fts MATCH :query
811
+ ORDER BY rank
812
+ """),
813
+ {"query": search_text}
814
+ )
815
+ book_ids = [row[0] for row in result]
816
+
817
+ if book_ids:
818
+ self._query = self._query.filter(Book.id.in_(book_ids))
819
+ else:
820
+ # No matches - ensure query returns empty
821
+ self._query = self._query.filter(Book.id == -1)
822
+
823
+ return self
824
+
825
+ def filter_by_reading_status(self, status: str) -> 'QueryBuilder':
826
+ """Filter by reading status."""
827
+ self._query = self._query.join(Book.personal).filter(
828
+ PersonalMetadata.reading_status == status
829
+ )
830
+ return self
831
+
832
+ def filter_by_rating(self, min_rating: int, max_rating: int = 5) -> 'QueryBuilder':
833
+ """Filter by rating range."""
834
+ self._query = self._query.join(Book.personal).filter(
835
+ and_(
836
+ PersonalMetadata.rating >= min_rating,
837
+ PersonalMetadata.rating <= max_rating
838
+ )
839
+ )
840
+ return self
841
+
842
+ def filter_by_favorite(self, is_favorite: bool = True) -> 'QueryBuilder':
843
+ """Filter by favorite status."""
844
+ self._query = self._query.join(Book.personal).filter(
845
+ PersonalMetadata.favorite == is_favorite
846
+ )
847
+ return self
848
+
849
+ def filter_by_format(self, format_name: str) -> 'QueryBuilder':
850
+ """Filter by file format (e.g., 'pdf', 'epub')."""
851
+ from .db.models import File
852
+ self._query = self._query.join(Book.files).filter(
853
+ File.format.ilike(f'%{format_name}%')
854
+ )
855
+ return self
856
+
857
+ def order_by(self, field: str, desc: bool = False) -> 'QueryBuilder':
858
+ """
859
+ Order results.
860
+
861
+ Args:
862
+ field: Field name (title, created_at, publication_date)
863
+ desc: Descending order if True
864
+ """
865
+ field_map = {
866
+ 'title': Book.title,
867
+ 'created_at': Book.created_at,
868
+ 'publication_date': Book.publication_date,
869
+ }
870
+
871
+ if field in field_map:
872
+ order_field = field_map[field]
873
+ if desc:
874
+ order_field = order_field.desc()
875
+ self._query = self._query.order_by(order_field)
876
+
877
+ return self
878
+
879
+ def limit(self, limit: int) -> 'QueryBuilder':
880
+ """Limit number of results."""
881
+ self._query = self._query.limit(limit)
882
+ return self
883
+
884
+ def offset(self, offset: int) -> 'QueryBuilder':
885
+ """Set result offset."""
886
+ self._query = self._query.offset(offset)
887
+ return self
888
+
889
+ def all(self) -> List[Book]:
890
+ """Execute query and return all results."""
891
+ return self._query.all()
892
+
893
+ def first(self) -> Optional[Book]:
894
+ """Execute query and return first result."""
895
+ return self._query.first()
896
+
897
+ def count(self) -> int:
898
+ """Get count of matching books."""
899
+ return self._query.count()