ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
ebk/library_db.py ADDED
@@ -0,0 +1,1440 @@
1
+ """
2
+ Database-backed Library class for ebk.
3
+
4
+ Provides a fluent API for managing ebook libraries using SQLAlchemy + SQLite.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Optional, Tuple
9
+ from datetime import datetime
10
+ import logging
11
+
12
+ from sqlalchemy import func, or_, and_, text, update
13
+ from sqlalchemy.orm import Session
14
+
15
+ from .db.models import Book, Author, Subject, File, PersonalMetadata
16
+ from .db.session import init_db, get_session, close_db
17
+ from .services.import_service import ImportService
18
+ from .services.text_extraction import TextExtractionService
19
+ from .search_parser import parse_search_query
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class Library:
25
+ """
26
+ Database-backed library for managing ebooks.
27
+
28
+ Usage:
29
+ lib = Library.open("/path/to/library")
30
+ lib.add_book(Path("book.pdf"), {"title": "My Book", "creators": ["Author"]})
31
+ results = lib.search("python programming")
32
+ stats = lib.stats()
33
+ lib.close()
34
+ """
35
+
36
+ def __init__(self, library_path: Path, session: Session):
37
+ self.library_path = Path(library_path)
38
+ self.session = session
39
+ self.import_service = ImportService(library_path, session)
40
+ self.text_service = TextExtractionService(library_path)
41
+
42
+ @classmethod
43
+ def open(cls, library_path: Path, echo: bool = False) -> 'Library':
44
+ """
45
+ Open or create a library.
46
+
47
+ Args:
48
+ library_path: Path to library directory
49
+ echo: If True, log all SQL statements
50
+
51
+ Returns:
52
+ Library instance
53
+ """
54
+ library_path = Path(library_path)
55
+ init_db(library_path, echo=echo)
56
+ session = get_session()
57
+
58
+ logger.debug(f"Opened library at {library_path}")
59
+ return cls(library_path, session)
60
+
61
+ def close(self):
62
+ """Close library and cleanup database connection."""
63
+ if self.session:
64
+ self.session.close()
65
+ close_db()
66
+ logger.debug("Closed library")
67
+
68
+ def add_book(self, file_path: Path, metadata: Dict[str, Any],
69
+ extract_text: bool = True, extract_cover: bool = True) -> Optional[Book]:
70
+ """
71
+ Add a book to the library.
72
+
73
+ Args:
74
+ file_path: Path to ebook file
75
+ metadata: Metadata dictionary (title, creators, subjects, etc.)
76
+ extract_text: Whether to extract full text
77
+ extract_cover: Whether to extract cover image
78
+
79
+ Returns:
80
+ Book instance or None if import failed
81
+ """
82
+ book = self.import_service.import_file(
83
+ file_path,
84
+ metadata,
85
+ extract_text=extract_text,
86
+ extract_cover=extract_cover
87
+ )
88
+
89
+ if book:
90
+ logger.debug(f"Added book: {book.title}")
91
+
92
+ return book
93
+
94
+ def add_calibre_book(self, metadata_opf_path: Path) -> Optional[Book]:
95
+ """
96
+ Add book from Calibre metadata.opf file.
97
+
98
+ Args:
99
+ metadata_opf_path: Path to metadata.opf
100
+
101
+ Returns:
102
+ Book instance or None
103
+ """
104
+ return self.import_service.import_calibre_book(metadata_opf_path)
105
+
106
+ def batch_import(self, files_and_metadata: List[Tuple[Path, Dict[str, Any]]],
107
+ show_progress: bool = True) -> List[Book]:
108
+ """
109
+ Import multiple books with progress tracking.
110
+
111
+ Args:
112
+ files_and_metadata: List of (file_path, metadata) tuples
113
+ show_progress: Whether to show progress bar
114
+
115
+ Returns:
116
+ List of imported Book instances
117
+ """
118
+ file_paths = [f for f, _ in files_and_metadata]
119
+ metadata_list = [m for _, m in files_and_metadata]
120
+
121
+ return self.import_service.batch_import(
122
+ file_paths,
123
+ metadata_list,
124
+ show_progress=show_progress
125
+ )
126
+
127
+ def get_book(self, book_id: int) -> Optional[Book]:
128
+ """Get book by ID."""
129
+ return self.session.get(Book, book_id)
130
+
131
+ def get_book_by_unique_id(self, unique_id: str) -> Optional[Book]:
132
+ """Get book by unique ID."""
133
+ return self.session.query(Book).filter_by(unique_id=unique_id).first()
134
+
135
+ def query(self) -> 'QueryBuilder':
136
+ """Start a fluent query."""
137
+ return QueryBuilder(self.session)
138
+
139
+ def search(self, query: str, limit: int = 50, offset: int = 0) -> List[Book]:
140
+ """
141
+ Advanced search across books with field-specific queries and boolean logic.
142
+
143
+ Supports:
144
+ - Field searches: title:Python, author:Knuth, tag:programming
145
+ - Phrases: "machine learning"
146
+ - Boolean: AND (implicit), OR (explicit), NOT/-prefix (negation)
147
+ - Comparisons: rating:>=4, rating:3-5
148
+ - Filters: language:en, format:pdf, favorite:true
149
+
150
+ Examples:
151
+ title:Python rating:>=4 format:pdf
152
+ author:"Donald Knuth" series:TAOCP
153
+ tag:programming favorite:true NOT java
154
+
155
+ Args:
156
+ query: Search query (supports advanced syntax or plain text)
157
+ limit: Maximum number of results
158
+ offset: Number of results to skip (for pagination)
159
+
160
+ Returns:
161
+ List of matching books
162
+ """
163
+ try:
164
+ # Parse the query
165
+ parsed = parse_search_query(query)
166
+
167
+ # If no FTS terms and no filters, return empty
168
+ if not parsed.has_fts_terms() and not parsed.has_filters():
169
+ return []
170
+
171
+ # Build the query
172
+ book_ids = []
173
+
174
+ # If we have FTS terms, search FTS5 first
175
+ if parsed.has_fts_terms():
176
+ result = self.session.execute(
177
+ text("""
178
+ SELECT book_id, rank
179
+ FROM books_fts
180
+ WHERE books_fts MATCH :query
181
+ ORDER BY rank
182
+ LIMIT :limit OFFSET :offset
183
+ """),
184
+ {"query": parsed.fts_query, "limit": limit + offset + limit, "offset": 0} # Get more for filtering
185
+ )
186
+ book_ids = [row[0] for row in result]
187
+
188
+ if not book_ids:
189
+ return []
190
+
191
+ # Build filter conditions
192
+ from .search_parser import SearchQueryParser
193
+ parser = SearchQueryParser()
194
+ where_clause, params = parser.to_sql_conditions(parsed)
195
+
196
+ # If we have both FTS and filters, combine them
197
+ if book_ids and where_clause:
198
+ # Start with FTS results and apply filters
199
+ books_query = self.session.query(Book).filter(
200
+ Book.id.in_(book_ids)
201
+ )
202
+
203
+ # Apply additional SQL filters
204
+ books_query = books_query.filter(text(where_clause).bindparams(**params))
205
+
206
+ books = books_query.all()
207
+
208
+ # Maintain FTS ranking order and apply offset/limit
209
+ books_dict = {b.id: b for b in books}
210
+ ordered = [books_dict[bid] for bid in book_ids if bid in books_dict]
211
+ return ordered[offset:offset + limit]
212
+
213
+ # If only FTS (no additional filters)
214
+ elif book_ids:
215
+ books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
216
+ books_dict = {b.id: b for b in books}
217
+ ordered = [books_dict[bid] for bid in book_ids if bid in books_dict]
218
+ return ordered[offset:offset + limit]
219
+
220
+ # If only filters (no FTS)
221
+ elif where_clause:
222
+ books_query = self.session.query(Book)
223
+ books_query = books_query.filter(text(where_clause).bindparams(**params))
224
+ return books_query.offset(offset).limit(limit).all()
225
+
226
+ return []
227
+
228
+ except Exception as e:
229
+ logger.error(f"Search error: {e}")
230
+ logger.exception(e)
231
+ # Fallback to original simple FTS search
232
+ try:
233
+ result = self.session.execute(
234
+ text("""
235
+ SELECT book_id, rank
236
+ FROM books_fts
237
+ WHERE books_fts MATCH :query
238
+ ORDER BY rank
239
+ LIMIT :limit
240
+ """),
241
+ {"query": query, "limit": limit}
242
+ )
243
+ book_ids = [row[0] for row in result]
244
+ if not book_ids:
245
+ return []
246
+ books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
247
+ books_dict = {b.id: b for b in books}
248
+ return [books_dict[bid] for bid in book_ids if bid in books_dict]
249
+ except Exception as fallback_error:
250
+ logger.error(f"Fallback search also failed: {fallback_error}")
251
+ return []
252
+
253
+ def stats(self) -> Dict[str, Any]:
254
+ """
255
+ Get library statistics.
256
+
257
+ Returns:
258
+ Dictionary with statistics
259
+ """
260
+ total_books = self.session.query(func.count(Book.id)).scalar()
261
+ total_authors = self.session.query(func.count(Author.id)).scalar()
262
+ total_subjects = self.session.query(func.count(Subject.id)).scalar()
263
+ total_files = self.session.query(func.count(File.id)).scalar()
264
+
265
+ # Reading stats
266
+ read_count = self.session.query(func.count(PersonalMetadata.id)).filter(
267
+ PersonalMetadata.reading_status == 'read'
268
+ ).scalar()
269
+
270
+ reading_count = self.session.query(func.count(PersonalMetadata.id)).filter(
271
+ PersonalMetadata.reading_status == 'reading'
272
+ ).scalar()
273
+
274
+ # Language distribution
275
+ lang_dist = self.session.query(
276
+ Book.language,
277
+ func.count(Book.id)
278
+ ).group_by(Book.language).all()
279
+
280
+ # Format distribution
281
+ format_dist = self.session.query(
282
+ File.format,
283
+ func.count(File.id)
284
+ ).group_by(File.format).all()
285
+
286
+ return {
287
+ 'total_books': total_books,
288
+ 'total_authors': total_authors,
289
+ 'total_subjects': total_subjects,
290
+ 'total_files': total_files,
291
+ 'read_count': read_count,
292
+ 'reading_count': reading_count,
293
+ 'languages': dict(lang_dist),
294
+ 'formats': dict(format_dist)
295
+ }
296
+
297
+ def get_all_books(self, limit: Optional[int] = None, offset: int = 0) -> List[Book]:
298
+ """
299
+ Get all books with optional pagination.
300
+
301
+ Args:
302
+ limit: Maximum number of books
303
+ offset: Starting offset
304
+
305
+ Returns:
306
+ List of books
307
+ """
308
+ query = self.session.query(Book).order_by(Book.title)
309
+
310
+ if limit:
311
+ query = query.limit(limit).offset(offset)
312
+
313
+ return query.all()
314
+
315
+ def get_books_by_author(self, author_name: str) -> List[Book]:
316
+ """Get all books by an author."""
317
+ return self.session.query(Book).join(Book.authors).filter(
318
+ Author.name.ilike(f"%{author_name}%")
319
+ ).all()
320
+
321
+ def get_books_by_subject(self, subject_name: str) -> List[Book]:
322
+ """Get all books with a subject."""
323
+ return self.session.query(Book).join(Book.subjects).filter(
324
+ Subject.name.ilike(f"%{subject_name}%")
325
+ ).all()
326
+
327
+ def update_reading_status(self, book_id: int, status: str,
328
+ progress: Optional[int] = None,
329
+ rating: Optional[int] = None):
330
+ """
331
+ Update reading status for a book.
332
+
333
+ Args:
334
+ book_id: Book ID
335
+ status: Reading status (unread, reading, read)
336
+ progress: Reading progress percentage (0-100)
337
+ rating: Rating (1-5)
338
+ """
339
+ personal = self.session.query(PersonalMetadata).filter_by(
340
+ book_id=book_id
341
+ ).first()
342
+
343
+ if personal:
344
+ personal.reading_status = status
345
+ if progress is not None:
346
+ personal.reading_progress = progress
347
+ if rating is not None:
348
+ personal.rating = rating
349
+
350
+ if status == 'read':
351
+ personal.date_finished = datetime.now()
352
+
353
+ self.session.commit()
354
+ logger.debug(f"Updated reading status for book {book_id}: {status}")
355
+
356
+ def set_favorite(self, book_id: int, favorite: bool = True):
357
+ """
358
+ Mark/unmark book as favorite.
359
+
360
+ Args:
361
+ book_id: Book ID
362
+ favorite: True to mark as favorite, False to unmark
363
+ """
364
+
365
+
366
+ personal = self.session.query(PersonalMetadata).filter_by(
367
+ book_id=book_id
368
+ ).first()
369
+
370
+ if not personal:
371
+ # Create personal metadata if it doesn't exist
372
+ personal = PersonalMetadata(book_id=book_id, favorite=favorite)
373
+ self.session.add(personal)
374
+ else:
375
+ personal.favorite = favorite
376
+
377
+ self.session.commit()
378
+ logger.debug(f"Set favorite for book {book_id}: {favorite}")
379
+
380
+ # Reading Queue Methods
381
+
382
+ def get_reading_queue(self) -> List[Book]:
383
+ """
384
+ Get all books in the reading queue, ordered by position.
385
+
386
+ Returns:
387
+ List of books in queue order
388
+ """
389
+
390
+
391
+ return self.session.query(Book).join(Book.personal).filter(
392
+ PersonalMetadata.queue_position.isnot(None)
393
+ ).order_by(PersonalMetadata.queue_position).all()
394
+
395
+ def add_to_queue(self, book_id: int, position: Optional[int] = None):
396
+ """
397
+ Add a book to the reading queue.
398
+
399
+ Args:
400
+ book_id: Book ID to add
401
+ position: Position in queue (1-based). If None, adds to end.
402
+ """
403
+
404
+
405
+ personal = self.session.query(PersonalMetadata).filter_by(
406
+ book_id=book_id
407
+ ).first()
408
+
409
+ if not personal:
410
+ personal = PersonalMetadata(book_id=book_id)
411
+ self.session.add(personal)
412
+ self.session.flush()
413
+
414
+ # Get current max position
415
+ max_pos = self.session.query(func.max(PersonalMetadata.queue_position)).scalar() or 0
416
+
417
+ if position is None:
418
+ # Add to end
419
+ personal.queue_position = max_pos + 1
420
+ else:
421
+ # Insert at specific position, shift others down
422
+ position = max(1, position) # Ensure positive
423
+ self.session.query(PersonalMetadata).filter(
424
+ PersonalMetadata.queue_position >= position,
425
+ PersonalMetadata.queue_position.isnot(None)
426
+ ).update({PersonalMetadata.queue_position: PersonalMetadata.queue_position + 1})
427
+ personal.queue_position = position
428
+
429
+ self.session.commit()
430
+ logger.debug(f"Added book {book_id} to queue at position {personal.queue_position}")
431
+
432
+ def remove_from_queue(self, book_id: int):
433
+ """
434
+ Remove a book from the reading queue.
435
+
436
+ Args:
437
+ book_id: Book ID to remove
438
+ """
439
+
440
+
441
+ personal = self.session.query(PersonalMetadata).filter_by(
442
+ book_id=book_id
443
+ ).first()
444
+
445
+ if personal and personal.queue_position is not None:
446
+ old_position = personal.queue_position
447
+ personal.queue_position = None
448
+
449
+ # Shift other items up to fill gap
450
+ self.session.query(PersonalMetadata).filter(
451
+ PersonalMetadata.queue_position > old_position
452
+ ).update({PersonalMetadata.queue_position: PersonalMetadata.queue_position - 1})
453
+
454
+ self.session.commit()
455
+ logger.debug(f"Removed book {book_id} from queue")
456
+
457
+ def reorder_queue(self, book_id: int, new_position: int):
458
+ """
459
+ Move a book to a new position in the queue.
460
+
461
+ Args:
462
+ book_id: Book ID to move
463
+ new_position: New position (1-based)
464
+ """
465
+
466
+
467
+ personal = self.session.query(PersonalMetadata).filter_by(
468
+ book_id=book_id
469
+ ).first()
470
+
471
+ if not personal or personal.queue_position is None:
472
+ # Not in queue, add it
473
+ self.add_to_queue(book_id, new_position)
474
+ return
475
+
476
+ old_position = personal.queue_position
477
+ new_position = max(1, new_position)
478
+
479
+ if old_position == new_position:
480
+ return # No change needed
481
+
482
+ if old_position < new_position:
483
+ # Moving down: shift items between old and new up
484
+ self.session.query(PersonalMetadata).filter(
485
+ PersonalMetadata.queue_position > old_position,
486
+ PersonalMetadata.queue_position <= new_position,
487
+ PersonalMetadata.queue_position.isnot(None)
488
+ ).update({PersonalMetadata.queue_position: PersonalMetadata.queue_position - 1})
489
+ else:
490
+ # Moving up: shift items between new and old down
491
+ self.session.query(PersonalMetadata).filter(
492
+ PersonalMetadata.queue_position >= new_position,
493
+ PersonalMetadata.queue_position < old_position,
494
+ PersonalMetadata.queue_position.isnot(None)
495
+ ).update({PersonalMetadata.queue_position: PersonalMetadata.queue_position + 1})
496
+
497
+ personal.queue_position = new_position
498
+ self.session.commit()
499
+ logger.debug(f"Moved book {book_id} from position {old_position} to {new_position}")
500
+
501
+ def clear_queue(self):
502
+ """Clear all books from the reading queue."""
503
+
504
+
505
+ self.session.query(PersonalMetadata).filter(
506
+ PersonalMetadata.queue_position.isnot(None)
507
+ ).update({PersonalMetadata.queue_position: None})
508
+ self.session.commit()
509
+ logger.debug("Cleared reading queue")
510
+
511
+ def add_tags(self, book_id: int, tags: List[str]):
512
+ """
513
+ Add personal tags to a book.
514
+
515
+ Args:
516
+ book_id: Book ID
517
+ tags: List of tag strings
518
+ """
519
+
520
+
521
+ personal = self.session.query(PersonalMetadata).filter_by(
522
+ book_id=book_id
523
+ ).first()
524
+
525
+ if not personal:
526
+ personal = PersonalMetadata(book_id=book_id, personal_tags=tags)
527
+ self.session.add(personal)
528
+ else:
529
+ existing_tags = personal.personal_tags or []
530
+ # Add new tags without duplicates
531
+ combined = list(set(existing_tags + tags))
532
+ personal.personal_tags = combined
533
+
534
+ self.session.commit()
535
+ logger.debug(f"Added tags to book {book_id}: {tags}")
536
+
537
+ def remove_tags(self, book_id: int, tags: List[str]):
538
+ """
539
+ Remove personal tags from a book.
540
+
541
+ Args:
542
+ book_id: Book ID
543
+ tags: List of tag strings to remove
544
+ """
545
+
546
+
547
+ personal = self.session.query(PersonalMetadata).filter_by(
548
+ book_id=book_id
549
+ ).first()
550
+
551
+ if personal and personal.personal_tags:
552
+ personal.personal_tags = [t for t in personal.personal_tags if t not in tags]
553
+ self.session.commit()
554
+ logger.debug(f"Removed tags from book {book_id}: {tags}")
555
+
556
+ def add_subject(self, book_id: int, subject_name: str):
557
+ """
558
+ Add a subject/tag to a book.
559
+
560
+ Args:
561
+ book_id: Book ID
562
+ subject_name: Subject/tag name to add
563
+ """
564
+ book = self.session.query(Book).filter_by(id=book_id).first()
565
+ if not book:
566
+ logger.warning(f"Book {book_id} not found")
567
+ return
568
+
569
+ # Check if subject already exists
570
+ subject = self.session.query(Subject).filter_by(name=subject_name).first()
571
+ if not subject:
572
+ subject = Subject(name=subject_name)
573
+ self.session.add(subject)
574
+
575
+ # Add subject to book if not already present
576
+ if subject not in book.subjects:
577
+ book.subjects.append(subject)
578
+ self.session.commit()
579
+ logger.debug(f"Added subject '{subject_name}' to book {book_id}")
580
+
581
+ def add_annotation(self, book_id: int, content: str,
582
+ page: Optional[int] = None,
583
+ annotation_type: str = 'note'):
584
+ """
585
+ Add an annotation/comment to a book.
586
+
587
+ Args:
588
+ book_id: Book ID
589
+ content: Annotation text
590
+ page: Page number (optional)
591
+ annotation_type: Type of annotation (note, highlight, bookmark)
592
+
593
+ Returns:
594
+ Annotation ID
595
+ """
596
+ from .db.models import Annotation
597
+
598
+ annotation = Annotation(
599
+ book_id=book_id,
600
+ content=content,
601
+ page_number=page,
602
+ annotation_type=annotation_type,
603
+ created_at=datetime.now()
604
+ )
605
+ self.session.add(annotation)
606
+ self.session.commit()
607
+
608
+ logger.debug(f"Added annotation to book {book_id}")
609
+ return annotation.id
610
+
611
+ def get_annotations(self, book_id: int) -> List:
612
+ """
613
+ Get all annotations for a book.
614
+
615
+ Args:
616
+ book_id: Book ID
617
+
618
+ Returns:
619
+ List of Annotation objects
620
+ """
621
+ from .db.models import Annotation
622
+
623
+ return self.session.query(Annotation).filter_by(
624
+ book_id=book_id
625
+ ).order_by(Annotation.created_at.desc()).all()
626
+
627
+ def delete_annotation(self, annotation_id: int):
628
+ """
629
+ Delete an annotation.
630
+
631
+ Args:
632
+ annotation_id: Annotation ID
633
+ """
634
+ from .db.models import Annotation
635
+
636
+ annotation = self.session.get(Annotation, annotation_id)
637
+ if annotation:
638
+ self.session.delete(annotation)
639
+ self.session.commit()
640
+ logger.debug(f"Deleted annotation {annotation_id}")
641
+
642
+ # -------------------------------------------------------------------------
643
+ # Review Methods
644
+ # -------------------------------------------------------------------------
645
+
646
+ def add_review(self, book_id: int, content: str,
647
+ title: Optional[str] = None,
648
+ rating: Optional[float] = None,
649
+ review_type: str = 'personal',
650
+ visibility: str = 'private') -> int:
651
+ """
652
+ Add a review to a book.
653
+
654
+ Args:
655
+ book_id: Book ID
656
+ content: Review text (markdown supported)
657
+ title: Review headline/title (optional)
658
+ rating: Rating 1-5 stars (optional, separate from book rating)
659
+ review_type: Type of review (personal, summary, critique, notes)
660
+ visibility: Visibility level (private, public)
661
+
662
+ Returns:
663
+ Review ID
664
+ """
665
+ from .db.models import Review
666
+
667
+ review = Review(
668
+ book_id=book_id,
669
+ content=content,
670
+ title=title,
671
+ rating=rating,
672
+ review_type=review_type,
673
+ visibility=visibility
674
+ )
675
+ self.session.add(review)
676
+ self.session.commit()
677
+
678
+ logger.debug(f"Added review to book {book_id}")
679
+ return review.id
680
+
681
+ def get_reviews(self, book_id: int) -> List:
682
+ """
683
+ Get all reviews for a book.
684
+
685
+ Args:
686
+ book_id: Book ID
687
+
688
+ Returns:
689
+ List of Review objects
690
+ """
691
+ from .db.models import Review
692
+
693
+ return self.session.query(Review).filter_by(
694
+ book_id=book_id
695
+ ).order_by(Review.created_at.desc()).all()
696
+
697
+ def get_review(self, review_id: int):
698
+ """
699
+ Get a specific review by ID.
700
+
701
+ Args:
702
+ review_id: Review ID
703
+
704
+ Returns:
705
+ Review object or None
706
+ """
707
+ from .db.models import Review
708
+ return self.session.get(Review, review_id)
709
+
710
+ def update_review(self, review_id: int,
711
+ content: Optional[str] = None,
712
+ title: Optional[str] = None,
713
+ rating: Optional[float] = None,
714
+ review_type: Optional[str] = None,
715
+ visibility: Optional[str] = None) -> bool:
716
+ """
717
+ Update an existing review.
718
+
719
+ Args:
720
+ review_id: Review ID
721
+ content: New review text
722
+ title: New title
723
+ rating: New rating
724
+ review_type: New review type
725
+ visibility: New visibility
726
+
727
+ Returns:
728
+ True if updated, False if not found
729
+ """
730
+ from .db.models import Review
731
+
732
+ review = self.session.get(Review, review_id)
733
+ if not review:
734
+ return False
735
+
736
+ if content is not None:
737
+ review.content = content
738
+ if title is not None:
739
+ review.title = title
740
+ if rating is not None:
741
+ review.rating = rating
742
+ if review_type is not None:
743
+ review.review_type = review_type
744
+ if visibility is not None:
745
+ review.visibility = visibility
746
+
747
+ self.session.commit()
748
+ logger.debug(f"Updated review {review_id}")
749
+ return True
750
+
751
+ def delete_review(self, review_id: int) -> bool:
752
+ """
753
+ Delete a review.
754
+
755
+ Args:
756
+ review_id: Review ID
757
+
758
+ Returns:
759
+ True if deleted, False if not found
760
+ """
761
+ from .db.models import Review
762
+
763
+ review = self.session.get(Review, review_id)
764
+ if review:
765
+ self.session.delete(review)
766
+ self.session.commit()
767
+ logger.debug(f"Deleted review {review_id}")
768
+ return True
769
+ return False
770
+
771
+ def add_to_virtual_library(self, book_id: int, library_name: str):
772
+ """
773
+ Add a book to a virtual library (collection/view).
774
+
775
+ Args:
776
+ book_id: Book ID
777
+ library_name: Name of the virtual library
778
+ """
779
+
780
+
781
+ personal = self.session.query(PersonalMetadata).filter_by(
782
+ book_id=book_id
783
+ ).first()
784
+
785
+ if not personal:
786
+ # Use personal_tags as virtual_libraries array
787
+ personal = PersonalMetadata(book_id=book_id, personal_tags=[library_name])
788
+ self.session.add(personal)
789
+ else:
790
+ existing_libs = personal.personal_tags or []
791
+ if library_name not in existing_libs:
792
+ existing_libs.append(library_name)
793
+ personal.personal_tags = existing_libs
794
+
795
+ self.session.commit()
796
+ logger.debug(f"Added book {book_id} to virtual library '{library_name}'")
797
+
798
+ def remove_from_virtual_library(self, book_id: int, library_name: str):
799
+ """
800
+ Remove a book from a virtual library.
801
+
802
+ Args:
803
+ book_id: Book ID
804
+ library_name: Name of the virtual library
805
+ """
806
+
807
+
808
+ personal = self.session.query(PersonalMetadata).filter_by(
809
+ book_id=book_id
810
+ ).first()
811
+
812
+ if personal and personal.personal_tags:
813
+ personal.personal_tags = [lib for lib in personal.personal_tags if lib != library_name]
814
+ self.session.commit()
815
+ logger.debug(f"Removed book {book_id} from virtual library '{library_name}'")
816
+
817
+ def get_virtual_library(self, library_name: str) -> List[Book]:
818
+ """
819
+ Get all books in a virtual library.
820
+
821
+ Args:
822
+ library_name: Name of the virtual library
823
+
824
+ Returns:
825
+ List of books in this virtual library
826
+ """
827
+
828
+ # Query books where personal_tags contains the library_name
829
+ # This works with SQLite's JSON support
830
+ books = (self.session.query(Book)
831
+ .join(Book.personal)
832
+ .filter(PersonalMetadata.personal_tags.contains(library_name))
833
+ .all())
834
+
835
+ return books
836
+
837
+ def list_virtual_libraries(self) -> List[str]:
838
+ """
839
+ Get all unique virtual library names.
840
+
841
+ Returns:
842
+ List of virtual library names
843
+ """
844
+
845
+
846
+ # Get all personal_tags arrays and flatten them
847
+ all_metadata = self.session.query(PersonalMetadata).filter(
848
+ PersonalMetadata.personal_tags.isnot(None)
849
+ ).all()
850
+
851
+ libraries = set()
852
+ for pm in all_metadata:
853
+ if pm.personal_tags:
854
+ libraries.update(pm.personal_tags)
855
+
856
+ return sorted(list(libraries))
857
+
858
+ def delete_book(self, book_id: int, delete_files: bool = False):
859
+ """
860
+ Delete a book from the library.
861
+
862
+ Args:
863
+ book_id: Book ID
864
+ delete_files: If True, also delete physical files
865
+ """
866
+ book = self.get_book(book_id)
867
+ if not book:
868
+ logger.warning(f"Book {book_id} not found")
869
+ return
870
+
871
+ # Delete physical files if requested
872
+ if delete_files:
873
+ for file in book.files:
874
+ file_path = self.library_path / file.path
875
+ if file_path.exists():
876
+ file_path.unlink()
877
+ logger.debug(f"Deleted file: {file_path}")
878
+
879
+ # Delete covers
880
+ for cover in book.covers:
881
+ cover_path = self.library_path / cover.path
882
+ if cover_path.exists():
883
+ cover_path.unlink()
884
+
885
+ # Delete from database (cascade will handle related records)
886
+ self.session.delete(book)
887
+ self.session.commit()
888
+ logger.debug(f"Deleted book: {book.title}")
889
+
890
+ def merge_books(
891
+ self,
892
+ primary_id: int,
893
+ secondary_ids: List[int],
894
+ delete_secondary_files: bool = False,
895
+ ) -> Tuple[Optional[Book], List[int]]:
896
+ """
897
+ Merge multiple books into one, combining metadata and files.
898
+
899
+ The primary book absorbs metadata and files from secondary books.
900
+ Secondary books are deleted after merging.
901
+
902
+ Merge strategy:
903
+ - Scalar fields: Keep primary's value, fallback to secondary if empty
904
+ - Relationships (authors, subjects, tags): Union of all
905
+ - Files/covers: Move all to primary
906
+ - Personal metadata: Keep higher rating, combine dates
907
+
908
+ Args:
909
+ primary_id: ID of the book to keep (receives merged data)
910
+ secondary_ids: IDs of books to merge into primary (will be deleted)
911
+ delete_secondary_files: If True, delete physical files from secondaries
912
+ that duplicate primary's files (by hash)
913
+
914
+ Returns:
915
+ Tuple of (merged book, list of deleted book IDs)
916
+
917
+ Example:
918
+ >>> merged, deleted = lib.merge_books(42, [43, 44])
919
+ >>> print(f"Merged {len(deleted)} books into {merged.title}")
920
+ """
921
+ from .db.models import (
922
+ Book, Author, Subject, Tag, File, Cover, Contributor,
923
+ Identifier, PersonalMetadata, BookConcept, ReadingSession,
924
+ Annotation, utc_now
925
+ )
926
+
927
+ # Get primary book
928
+ primary = self.get_book(primary_id)
929
+ if not primary:
930
+ logger.error(f"Primary book {primary_id} not found")
931
+ return None, []
932
+
933
+ # Get secondary books
934
+ secondaries = []
935
+ for sid in secondary_ids:
936
+ if sid == primary_id:
937
+ continue # Skip if same as primary
938
+ book = self.get_book(sid)
939
+ if book:
940
+ secondaries.append(book)
941
+ else:
942
+ logger.warning(f"Secondary book {sid} not found, skipping")
943
+
944
+ if not secondaries:
945
+ logger.warning("No valid secondary books to merge")
946
+ return primary, []
947
+
948
+ deleted_ids = []
949
+
950
+ # Track existing file hashes to detect duplicates
951
+ primary_hashes = {f.file_hash for f in primary.files}
952
+
953
+ for secondary in secondaries:
954
+ logger.debug(f"Merging '{secondary.title}' into '{primary.title}'")
955
+
956
+ # === Merge scalar fields (prefer non-empty) ===
957
+ if not primary.subtitle and secondary.subtitle:
958
+ primary.subtitle = secondary.subtitle
959
+ if not primary.sort_title and secondary.sort_title:
960
+ primary.sort_title = secondary.sort_title
961
+ if not primary.language and secondary.language:
962
+ primary.language = secondary.language
963
+ if not primary.publisher and secondary.publisher:
964
+ primary.publisher = secondary.publisher
965
+ if not primary.publication_date and secondary.publication_date:
966
+ primary.publication_date = secondary.publication_date
967
+ if not primary.series and secondary.series:
968
+ primary.series = secondary.series
969
+ if primary.series_index is None and secondary.series_index is not None:
970
+ primary.series_index = secondary.series_index
971
+ if not primary.edition and secondary.edition:
972
+ primary.edition = secondary.edition
973
+ if not primary.rights and secondary.rights:
974
+ primary.rights = secondary.rights
975
+ if not primary.source and secondary.source:
976
+ primary.source = secondary.source
977
+ # For description, prefer longer one if both exist
978
+ if secondary.description:
979
+ if not primary.description or len(secondary.description) > len(primary.description):
980
+ primary.description = secondary.description
981
+ if primary.page_count is None and secondary.page_count:
982
+ primary.page_count = secondary.page_count
983
+ if primary.word_count is None and secondary.word_count:
984
+ primary.word_count = secondary.word_count
985
+ # Merge keywords arrays
986
+ if secondary.keywords:
987
+ if primary.keywords:
988
+ primary.keywords = list(set(primary.keywords + secondary.keywords))
989
+ else:
990
+ primary.keywords = secondary.keywords
991
+ if not primary.color and secondary.color:
992
+ primary.color = secondary.color
993
+ # Keep earliest created_at
994
+ if secondary.created_at and (not primary.created_at or secondary.created_at < primary.created_at):
995
+ primary.created_at = secondary.created_at
996
+
997
+ # === Merge relationships (union) ===
998
+ # Authors
999
+ existing_author_ids = {a.id for a in primary.authors}
1000
+ for author in secondary.authors:
1001
+ if author.id not in existing_author_ids:
1002
+ primary.authors.append(author)
1003
+
1004
+ # Subjects
1005
+ existing_subject_ids = {s.id for s in primary.subjects}
1006
+ for subject in secondary.subjects:
1007
+ if subject.id not in existing_subject_ids:
1008
+ primary.subjects.append(subject)
1009
+
1010
+ # Tags
1011
+ existing_tag_ids = {t.id for t in primary.tags}
1012
+ for tag in secondary.tags:
1013
+ if tag.id not in existing_tag_ids:
1014
+ primary.tags.append(tag)
1015
+
1016
+ # Contributors - move to primary using SQL
1017
+ from .db.models import Contributor as ContributorModel
1018
+ for contrib in list(secondary.contributors):
1019
+ self.session.execute(
1020
+ update(ContributorModel).where(ContributorModel.id == contrib.id).values(book_id=primary.id)
1021
+ )
1022
+
1023
+ # Identifiers - move unique ones to primary using SQL
1024
+ from .db.models import Identifier as IdentifierModel
1025
+ existing_identifiers = {(i.scheme, i.value) for i in primary.identifiers}
1026
+ for ident in list(secondary.identifiers):
1027
+ if (ident.scheme, ident.value) not in existing_identifiers:
1028
+ self.session.execute(
1029
+ update(IdentifierModel).where(IdentifierModel.id == ident.id).values(book_id=primary.id)
1030
+ )
1031
+
1032
+ # Files - move to primary (handle duplicates by hash)
1033
+ # Must use SQL UPDATE to bypass cascade delete on the relationship
1034
+ from .db.models import File as FileModel
1035
+
1036
+ for file in list(secondary.files):
1037
+ if file.file_hash in primary_hashes:
1038
+ # Duplicate file - explicitly delete
1039
+ if delete_secondary_files:
1040
+ file_path = self.library_path / file.path
1041
+ if file_path.exists():
1042
+ file_path.unlink()
1043
+ logger.debug(f"Deleted duplicate file: {file_path}")
1044
+ self.session.execute(
1045
+ update(FileModel).where(FileModel.id == file.id).values(book_id=None)
1046
+ )
1047
+ self.session.delete(file)
1048
+ else:
1049
+ # Move file to primary using direct SQL to bypass cascade
1050
+ self.session.execute(
1051
+ update(FileModel).where(FileModel.id == file.id).values(book_id=primary.id)
1052
+ )
1053
+ primary_hashes.add(file.file_hash)
1054
+
1055
+ # Covers - move to primary using SQL to bypass cascade
1056
+ from .db.models import Cover as CoverModel
1057
+ for cover in list(secondary.covers):
1058
+ self.session.execute(
1059
+ update(CoverModel).where(CoverModel.id == cover.id).values(
1060
+ book_id=primary.id,
1061
+ is_primary=False
1062
+ )
1063
+ )
1064
+
1065
+ # Concepts - move to primary using SQL
1066
+ from .db.models import BookConcept
1067
+ for concept in list(secondary.concepts):
1068
+ self.session.execute(
1069
+ update(BookConcept).where(BookConcept.id == concept.id).values(book_id=primary.id)
1070
+ )
1071
+
1072
+ # Reading sessions - move to primary using SQL
1073
+ from .db.models import ReadingSession
1074
+ for sess in list(secondary.sessions):
1075
+ self.session.execute(
1076
+ update(ReadingSession).where(ReadingSession.id == sess.id).values(book_id=primary.id)
1077
+ )
1078
+
1079
+ # Annotations - move to primary using SQL
1080
+ from .db.models import Annotation as AnnotationModel
1081
+ for annotation in list(secondary.annotations):
1082
+ self.session.execute(
1083
+ update(AnnotationModel).where(AnnotationModel.id == annotation.id).values(book_id=primary.id)
1084
+ )
1085
+
1086
+ # Expire secondary so ORM doesn't cascade delete moved items
1087
+ self.session.expire(secondary)
1088
+ self.session.flush()
1089
+
1090
+ # Personal metadata - merge intelligently
1091
+ if secondary.personal:
1092
+ if primary.personal:
1093
+ # Keep higher rating
1094
+ if secondary.personal.rating and (
1095
+ not primary.personal.rating or
1096
+ secondary.personal.rating > primary.personal.rating
1097
+ ):
1098
+ primary.personal.rating = secondary.personal.rating
1099
+ # Keep "read" status over "unread"
1100
+ status_priority = {'read': 0, 'reading': 1, 'abandoned': 2, 'unread': 3}
1101
+ if status_priority.get(secondary.personal.reading_status, 3) < status_priority.get(primary.personal.reading_status, 3):
1102
+ primary.personal.reading_status = secondary.personal.reading_status
1103
+ # Keep higher progress
1104
+ if secondary.personal.reading_progress and (
1105
+ not primary.personal.reading_progress or
1106
+ secondary.personal.reading_progress > primary.personal.reading_progress
1107
+ ):
1108
+ primary.personal.reading_progress = secondary.personal.reading_progress
1109
+ # Keep favorite if either is favorite
1110
+ if secondary.personal.favorite:
1111
+ primary.personal.favorite = True
1112
+ # Keep earliest date_added
1113
+ if secondary.personal.date_added and (
1114
+ not primary.personal.date_added or
1115
+ secondary.personal.date_added < primary.personal.date_added
1116
+ ):
1117
+ primary.personal.date_added = secondary.personal.date_added
1118
+ # Keep dates if set
1119
+ if secondary.personal.date_started and not primary.personal.date_started:
1120
+ primary.personal.date_started = secondary.personal.date_started
1121
+ if secondary.personal.date_finished and not primary.personal.date_finished:
1122
+ primary.personal.date_finished = secondary.personal.date_finished
1123
+ # Merge personal_tags
1124
+ if secondary.personal.personal_tags:
1125
+ if primary.personal.personal_tags:
1126
+ primary.personal.personal_tags = list(set(
1127
+ primary.personal.personal_tags + secondary.personal.personal_tags
1128
+ ))
1129
+ else:
1130
+ primary.personal.personal_tags = secondary.personal.personal_tags
1131
+ else:
1132
+ # Move secondary's personal metadata to primary
1133
+ secondary.personal.book_id = primary.id
1134
+
1135
+ # Delete secondary book
1136
+ deleted_ids.append(secondary.id)
1137
+ self.session.delete(secondary)
1138
+
1139
+ # Update primary's timestamp
1140
+ primary.updated_at = utc_now()
1141
+
1142
+ self.session.commit()
1143
+ logger.info(f"Merged {len(deleted_ids)} books into '{primary.title}' (ID: {primary.id})")
1144
+
1145
+ return primary, deleted_ids
1146
+
1147
+ def find_similar(
1148
+ self,
1149
+ book_id: int,
1150
+ top_k: int = 10,
1151
+ similarity_config: Optional[Any] = None,
1152
+ filter_language: bool = True,
1153
+ ) -> List[Tuple[Book, float]]:
1154
+ """
1155
+ Find books similar to the given book.
1156
+
1157
+ Uses semantic similarity based on content, metadata, etc.
1158
+
1159
+ Args:
1160
+ book_id: ID of the query book
1161
+ top_k: Number of similar books to return (default 10)
1162
+ similarity_config: Optional BookSimilarity instance
1163
+ (default: balanced preset)
1164
+ filter_language: If True, only return books in same language
1165
+
1166
+ Returns:
1167
+ List of (book, similarity_score) tuples, sorted by similarity
1168
+
1169
+ Example:
1170
+ >>> similar = lib.find_similar(42, top_k=5)
1171
+ >>> for book, score in similar:
1172
+ ... print(f"{book.title}: {score:.2f}")
1173
+ """
1174
+ from ebk.similarity import BookSimilarity
1175
+
1176
+ # Get query book
1177
+ query_book = self.get_book(book_id)
1178
+ if not query_book:
1179
+ logger.warning(f"Book {book_id} not found")
1180
+ return []
1181
+
1182
+ # Get candidate books
1183
+ candidates_query = self.query()
1184
+ if filter_language and query_book.language:
1185
+ candidates_query = candidates_query.filter_by_language(query_book.language)
1186
+
1187
+ candidates = candidates_query.all()
1188
+
1189
+ if not candidates:
1190
+ return []
1191
+
1192
+ # Configure similarity - auto-detect sparse data
1193
+ if similarity_config is None:
1194
+ # Check if query book has extracted text
1195
+ has_extracted_text = False
1196
+ for file in query_book.files:
1197
+ if file.extracted_text and file.extracted_text.content:
1198
+ if len(file.extracted_text.content) > 500: # Minimum useful text
1199
+ has_extracted_text = True
1200
+ break
1201
+
1202
+ if has_extracted_text:
1203
+ similarity_config = BookSimilarity().balanced()
1204
+ logger.debug(f"Using balanced preset for book with extracted text")
1205
+ else:
1206
+ similarity_config = BookSimilarity().sparse_friendly()
1207
+ logger.debug(f"Using sparse_friendly preset for book without extracted text")
1208
+
1209
+ # Fit on all candidates for performance
1210
+ similarity_config.fit(candidates)
1211
+
1212
+ # Find similar books
1213
+ results = similarity_config.find_similar(query_book, candidates, top_k=top_k)
1214
+
1215
+ logger.debug(
1216
+ f"Found {len(results)} similar books to '{query_book.title}'"
1217
+ )
1218
+
1219
+ return results
1220
+
1221
+ def compute_similarity_matrix(
1222
+ self,
1223
+ book_ids: Optional[List[int]] = None,
1224
+ similarity_config: Optional[Any] = None,
1225
+ ) -> Tuple[List[Book], Any]:
1226
+ """
1227
+ Compute pairwise similarity matrix for books.
1228
+
1229
+ Args:
1230
+ book_ids: Optional list of book IDs (default: all books)
1231
+ similarity_config: Optional BookSimilarity instance
1232
+ (default: balanced preset)
1233
+
1234
+ Returns:
1235
+ Tuple of (books, similarity_matrix)
1236
+ where similarity_matrix[i][j] = similarity(books[i], books[j])
1237
+
1238
+ Example:
1239
+ >>> books, matrix = lib.compute_similarity_matrix()
1240
+ >>> # matrix[0][1] is similarity between books[0] and books[1]
1241
+ """
1242
+ from ebk.similarity import BookSimilarity
1243
+
1244
+ # Get books
1245
+ if book_ids:
1246
+ books = [self.get_book(book_id) for book_id in book_ids]
1247
+ books = [b for b in books if b is not None] # Filter None
1248
+ else:
1249
+ books = self.query().all()
1250
+
1251
+ if not books:
1252
+ logger.warning("No books found for similarity matrix")
1253
+ return [], None
1254
+
1255
+ # Configure similarity
1256
+ if similarity_config is None:
1257
+ similarity_config = BookSimilarity().balanced()
1258
+
1259
+ # Fit and compute matrix
1260
+ similarity_config.fit(books)
1261
+ matrix = similarity_config.similarity_matrix(books)
1262
+
1263
+ logger.debug(f"Computed {len(books)}x{len(books)} similarity matrix")
1264
+
1265
+ return books, matrix
1266
+
1267
+
1268
+ class QueryBuilder:
1269
+ """Fluent query builder for books."""
1270
+
1271
+ def __init__(self, session: Session):
1272
+ self.session = session
1273
+ self._query = session.query(Book)
1274
+
1275
+ def filter_by_title(self, title: str, exact: bool = False) -> 'QueryBuilder':
1276
+ """Filter by title."""
1277
+ if exact:
1278
+ self._query = self._query.filter(Book.title == title)
1279
+ else:
1280
+ self._query = self._query.filter(Book.title.ilike(f"%{title}%"))
1281
+ return self
1282
+
1283
+ def filter_by_author(self, author: str) -> 'QueryBuilder':
1284
+ """Filter by author name."""
1285
+ self._query = self._query.join(Book.authors).filter(
1286
+ Author.name.ilike(f"%{author}%")
1287
+ )
1288
+ return self
1289
+
1290
+ def filter_by_subject(self, subject: str) -> 'QueryBuilder':
1291
+ """Filter by subject."""
1292
+ self._query = self._query.join(Book.subjects).filter(
1293
+ Subject.name.ilike(f"%{subject}%")
1294
+ )
1295
+ return self
1296
+
1297
+ def filter_by_language(self, language: str) -> 'QueryBuilder':
1298
+ """Filter by language code."""
1299
+ self._query = self._query.filter(Book.language == language)
1300
+ return self
1301
+
1302
+ def filter_by_publisher(self, publisher: str) -> 'QueryBuilder':
1303
+ """Filter by publisher."""
1304
+ self._query = self._query.filter(Book.publisher.ilike(f"%{publisher}%"))
1305
+ return self
1306
+
1307
+ def filter_by_year(self, year: int) -> 'QueryBuilder':
1308
+ """Filter by publication year.
1309
+
1310
+ Args:
1311
+ year: Publication year (e.g., 1975)
1312
+
1313
+ Returns:
1314
+ Self for chaining
1315
+ """
1316
+ # publication_date can be "YYYY", "YYYY-MM", or "YYYY-MM-DD"
1317
+ # So we match if it starts with the year
1318
+ year_str = str(year)
1319
+ self._query = self._query.filter(Book.publication_date.like(f"{year_str}%"))
1320
+ return self
1321
+
1322
+ def filter_by_text(self, search_text: str) -> 'QueryBuilder':
1323
+ """Filter by full-text search.
1324
+
1325
+ Uses FTS5 to search across title, description, and extracted text.
1326
+
1327
+ Args:
1328
+ search_text: Text to search for
1329
+
1330
+ Returns:
1331
+ Self for chaining
1332
+ """
1333
+ from sqlalchemy import text as sql_text
1334
+
1335
+ # Query FTS5 table for matching book IDs
1336
+ result = self.session.execute(
1337
+ sql_text("""
1338
+ SELECT book_id
1339
+ FROM books_fts
1340
+ WHERE books_fts MATCH :query
1341
+ ORDER BY rank
1342
+ """),
1343
+ {"query": search_text}
1344
+ )
1345
+ book_ids = [row[0] for row in result]
1346
+
1347
+ if book_ids:
1348
+ self._query = self._query.filter(Book.id.in_(book_ids))
1349
+ else:
1350
+ # No matches - ensure query returns empty
1351
+ self._query = self._query.filter(Book.id == -1)
1352
+
1353
+ return self
1354
+
1355
+ def filter_by_reading_status(self, status: str) -> 'QueryBuilder':
1356
+ """Filter by reading status."""
1357
+ self._query = self._query.join(Book.personal).filter(
1358
+ PersonalMetadata.reading_status == status
1359
+ )
1360
+ return self
1361
+
1362
+ def filter_by_rating(self, min_rating: int, max_rating: int = 5) -> 'QueryBuilder':
1363
+ """Filter by rating range."""
1364
+ self._query = self._query.join(Book.personal).filter(
1365
+ and_(
1366
+ PersonalMetadata.rating >= min_rating,
1367
+ PersonalMetadata.rating <= max_rating
1368
+ )
1369
+ )
1370
+ return self
1371
+
1372
+ def filter_by_favorite(self, is_favorite: bool = True) -> 'QueryBuilder':
1373
+ """Filter by favorite status."""
1374
+ from sqlalchemy import or_
1375
+ if is_favorite:
1376
+ # Only books explicitly marked as favorite
1377
+ self._query = self._query.join(Book.personal).filter(
1378
+ PersonalMetadata.favorite == True
1379
+ )
1380
+ else:
1381
+ # Books not favorited (including those without PersonalMetadata)
1382
+ self._query = self._query.outerjoin(Book.personal).filter(
1383
+ or_(
1384
+ PersonalMetadata.favorite == False,
1385
+ PersonalMetadata.favorite.is_(None)
1386
+ )
1387
+ )
1388
+ return self
1389
+
1390
+ def filter_by_format(self, format_name: str) -> 'QueryBuilder':
1391
+ """Filter by file format (e.g., 'pdf', 'epub')."""
1392
+ from .db.models import File
1393
+ self._query = self._query.join(Book.files).filter(
1394
+ File.format.ilike(f'%{format_name}%')
1395
+ )
1396
+ return self
1397
+
1398
+ def order_by(self, field: str, desc: bool = False) -> 'QueryBuilder':
1399
+ """
1400
+ Order results.
1401
+
1402
+ Args:
1403
+ field: Field name (title, created_at, publication_date)
1404
+ desc: Descending order if True
1405
+ """
1406
+ field_map = {
1407
+ 'title': Book.title,
1408
+ 'created_at': Book.created_at,
1409
+ 'publication_date': Book.publication_date,
1410
+ }
1411
+
1412
+ if field in field_map:
1413
+ order_field = field_map[field]
1414
+ if desc:
1415
+ order_field = order_field.desc()
1416
+ self._query = self._query.order_by(order_field)
1417
+
1418
+ return self
1419
+
1420
+ def limit(self, limit: int) -> 'QueryBuilder':
1421
+ """Limit number of results."""
1422
+ self._query = self._query.limit(limit)
1423
+ return self
1424
+
1425
+ def offset(self, offset: int) -> 'QueryBuilder':
1426
+ """Set result offset."""
1427
+ self._query = self._query.offset(offset)
1428
+ return self
1429
+
1430
+ def all(self) -> List[Book]:
1431
+ """Execute query and return all results."""
1432
+ return self._query.all()
1433
+
1434
+ def first(self) -> Optional[Book]:
1435
+ """Execute query and return first result."""
1436
+ return self._query.first()
1437
+
1438
+ def count(self) -> int:
1439
+ """Get count of matching books."""
1440
+ return self._query.count()