ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
ebk/db/models.py ADDED
@@ -0,0 +1,725 @@
1
+ """
2
+ SQLAlchemy models for ebk database.
3
+
4
+ Clean, normalized schema with proper relationships and indexes.
5
+ """
6
+
7
+ from datetime import datetime, timezone
8
+ from typing import List, Optional
9
+ from pathlib import Path
10
+ import hashlib
11
+
12
+ from sqlalchemy import (
13
+ Column, Integer, String, Text, Boolean, Float,
14
+ DateTime, ForeignKey, Table, UniqueConstraint, Index, JSON
15
+ )
16
+ from sqlalchemy.ext.declarative import declarative_base
17
+ from sqlalchemy.orm import relationship
18
+ from sqlalchemy.ext.hybrid import hybrid_property
19
+
20
+ Base = declarative_base()
21
+
22
+
23
+ def utc_now():
24
+ """Return current UTC time as timezone-naive datetime for SQLite compatibility."""
25
+ return datetime.now(timezone.utc).replace(tzinfo=None)
26
+
27
+
28
+ # Association tables for many-to-many relationships
29
+ book_authors = Table(
30
+ 'book_authors',
31
+ Base.metadata,
32
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
33
+ Column('author_id', Integer, ForeignKey('authors.id', ondelete='CASCADE'), primary_key=True),
34
+ Column('role', String(50), default='author'), # author, editor, translator, contributor
35
+ Column('position', Integer, default=0) # For ordering
36
+ )
37
+
38
+ book_subjects = Table(
39
+ 'book_subjects',
40
+ Base.metadata,
41
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
42
+ Column('subject_id', Integer, ForeignKey('subjects.id', ondelete='CASCADE'), primary_key=True),
43
+ Column('relevance_score', Float, default=1.0), # How central is this topic (0-1)
44
+ Column('source', String(50), default='user') # calibre, ai_extracted, user_added
45
+ )
46
+
47
+ book_tags = Table(
48
+ 'book_tags',
49
+ Base.metadata,
50
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
51
+ Column('tag_id', Integer, ForeignKey('tags.id', ondelete='CASCADE'), primary_key=True),
52
+ Column('created_at', DateTime, default=utc_now) # When tag was added
53
+ )
54
+
55
+
56
+ class Book(Base):
57
+ """Core book entity with metadata."""
58
+ __tablename__ = 'books'
59
+
60
+ id = Column(Integer, primary_key=True)
61
+ unique_id = Column(String(32), unique=True, nullable=False, index=True) # Hash-based
62
+
63
+ # Core metadata
64
+ title = Column(String(500), nullable=False, index=True)
65
+ subtitle = Column(String(500))
66
+ sort_title = Column(String(500), index=True) # For alphabetical sorting
67
+ language = Column(String(10), index=True) # ISO 639-1 code
68
+ publisher = Column(String(200), index=True)
69
+ publication_date = Column(String(50)) # Flexible: year, YYYY-MM, or YYYY-MM-DD
70
+
71
+ # Series information
72
+ series = Column(String(200), index=True) # Book series name
73
+ series_index = Column(Float) # Position in series (e.g., 2.5)
74
+
75
+ # Edition and rights
76
+ edition = Column(String(100)) # "2nd Edition", "Revised", etc.
77
+ rights = Column(Text) # Copyright/license statement
78
+ source = Column(String(500)) # Original source URL or reference
79
+
80
+ # Rich content
81
+ description = Column(Text) # Full text indexed separately
82
+ page_count = Column(Integer)
83
+ word_count = Column(Integer) # From extracted text
84
+ keywords = Column(JSON) # Array of keyword strings from PDF/metadata
85
+
86
+ # User customization
87
+ color = Column(String(7)) # Hex color code (e.g., #FF5733)
88
+
89
+ # Timestamps
90
+ created_at = Column(DateTime, default=utc_now, nullable=False)
91
+ updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
92
+
93
+ # Relationships
94
+ authors = relationship('Author', secondary=book_authors, back_populates='books', lazy='selectin')
95
+ subjects = relationship('Subject', secondary=book_subjects, back_populates='books', lazy='selectin')
96
+ tags = relationship('Tag', secondary=book_tags, back_populates='books', lazy='selectin')
97
+ contributors = relationship('Contributor', back_populates='book', cascade='all, delete-orphan')
98
+ identifiers = relationship('Identifier', back_populates='book', cascade='all, delete-orphan')
99
+ files = relationship('File', back_populates='book', cascade='all, delete-orphan')
100
+ covers = relationship('Cover', back_populates='book', cascade='all, delete-orphan')
101
+ concepts = relationship('BookConcept', back_populates='book', cascade='all, delete-orphan')
102
+ sessions = relationship('ReadingSession', back_populates='book', cascade='all, delete-orphan')
103
+ annotations = relationship('Annotation', back_populates='book', cascade='all, delete-orphan')
104
+ personal = relationship('PersonalMetadata', back_populates='book', uselist=False, cascade='all, delete-orphan')
105
+
106
+ # Indexes
107
+ __table_args__ = (
108
+ Index('idx_book_title_lang', 'title', 'language'),
109
+ Index('idx_book_created', 'created_at'),
110
+ )
111
+
112
+ @hybrid_property
113
+ def primary_file(self) -> Optional['File']:
114
+ """Get the primary file (prefer PDF > EPUB > others)."""
115
+ if not self.files:
116
+ return None
117
+ # Sort by preference
118
+ format_priority = {'pdf': 0, 'epub': 1, 'mobi': 2, 'azw3': 3}
119
+ sorted_files = sorted(
120
+ self.files,
121
+ key=lambda f: format_priority.get(f.format.lower(), 99)
122
+ )
123
+ return sorted_files[0] if sorted_files else None
124
+
125
+ @hybrid_property
126
+ def primary_cover(self) -> Optional['Cover']:
127
+ """Get the primary cover image."""
128
+ for cover in self.covers:
129
+ if cover.is_primary:
130
+ return cover
131
+ return self.covers[0] if self.covers else None
132
+
133
+ def __repr__(self):
134
+ return f"<Book(id={self.id}, title='{self.title[:50]}')>"
135
+
136
+
137
+ class Author(Base):
138
+ """Author/creator entity."""
139
+ __tablename__ = 'authors'
140
+
141
+ id = Column(Integer, primary_key=True)
142
+ name = Column(String(200), nullable=False, index=True)
143
+ sort_name = Column(String(200), index=True) # "Tolkien, J.R.R."
144
+ bio = Column(Text)
145
+ birth_year = Column(Integer)
146
+ death_year = Column(Integer)
147
+
148
+ # Relationships
149
+ books = relationship('Book', secondary=book_authors, back_populates='authors')
150
+
151
+ __table_args__ = (
152
+ UniqueConstraint('name', name='uix_author_name'),
153
+ )
154
+
155
+ def __repr__(self):
156
+ return f"<Author(id={self.id}, name='{self.name}')>"
157
+
158
+
159
+ class Subject(Base):
160
+ """Subject/tag/genre with hierarchical support."""
161
+ __tablename__ = 'subjects'
162
+
163
+ id = Column(Integer, primary_key=True)
164
+ name = Column(String(200), nullable=False, unique=True, index=True)
165
+ parent_id = Column(Integer, ForeignKey('subjects.id', ondelete='SET NULL'))
166
+ type = Column(String(50), default='topic') # genre, topic, keyword, personal_tag
167
+
168
+ # Self-referential relationship for hierarchy
169
+ parent = relationship('Subject', remote_side=[id], backref='children')
170
+ books = relationship('Book', secondary=book_subjects, back_populates='subjects')
171
+
172
+ def __repr__(self):
173
+ return f"<Subject(id={self.id}, name='{self.name}', type='{self.type}')>"
174
+
175
+
176
+ class Tag(Base):
177
+ """User-defined hierarchical tags for organizing books.
178
+
179
+ Tags are separate from Subjects:
180
+ - Subjects: Bibliographic metadata (what the book is about)
181
+ - Tags: User-defined organization (how you use/categorize the book)
182
+
183
+ Examples:
184
+ - path="Work/Project-2024"
185
+ - path="Personal/To-Read"
186
+ - path="Reference/Programming/Python"
187
+ """
188
+ __tablename__ = 'tags'
189
+
190
+ id = Column(Integer, primary_key=True)
191
+ name = Column(String(200), nullable=False, index=True) # Name at this level (e.g., "Python")
192
+ path = Column(String(500), nullable=False, unique=True, index=True) # Full path (e.g., "Programming/Python")
193
+ parent_id = Column(Integer, ForeignKey('tags.id', ondelete='CASCADE'))
194
+
195
+ # Metadata
196
+ description = Column(Text) # Optional description of the tag
197
+ color = Column(String(7)) # Hex color code for UI display (e.g., "#FF5733")
198
+ created_at = Column(DateTime, default=utc_now, nullable=False)
199
+
200
+ # Self-referential relationship for hierarchy
201
+ parent = relationship('Tag', remote_side=[id], backref='children')
202
+ books = relationship('Book', secondary=book_tags, back_populates='tags')
203
+
204
+ __table_args__ = (
205
+ Index('idx_tag_path', 'path'),
206
+ Index('idx_tag_parent', 'parent_id'),
207
+ )
208
+
209
+ @property
210
+ def depth(self) -> int:
211
+ """Calculate depth in hierarchy (root=0)."""
212
+ return self.path.count('/')
213
+
214
+ @property
215
+ def ancestors(self) -> List['Tag']:
216
+ """Get list of ancestor tags from root to parent."""
217
+ ancestors = []
218
+ current = self.parent
219
+ while current:
220
+ ancestors.insert(0, current)
221
+ current = current.parent
222
+ return ancestors
223
+
224
+ @property
225
+ def full_path_parts(self) -> List[str]:
226
+ """Split path into components."""
227
+ return self.path.split('/')
228
+
229
+ def __repr__(self):
230
+ return f"<Tag(id={self.id}, path='{self.path}')>"
231
+
232
+
233
+ class Contributor(Base):
234
+ """Contributors to a book (editors, translators, illustrators, etc)."""
235
+ __tablename__ = 'contributors'
236
+
237
+ id = Column(Integer, primary_key=True)
238
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
239
+
240
+ name = Column(String(200), nullable=False, index=True)
241
+ role = Column(String(50), nullable=False) # editor, translator, illustrator, etc.
242
+ file_as = Column(String(200)) # Sorting name
243
+
244
+ book = relationship('Book', back_populates='contributors')
245
+
246
+ __table_args__ = (
247
+ Index('idx_contributor_name', 'name'),
248
+ Index('idx_contributor_role', 'role'),
249
+ )
250
+
251
+ def __repr__(self):
252
+ return f"<Contributor(name='{self.name}', role='{self.role}')>"
253
+
254
+
255
+ class Identifier(Base):
256
+ """Flexible identifiers (ISBN, DOI, etc)."""
257
+ __tablename__ = 'identifiers'
258
+
259
+ id = Column(Integer, primary_key=True)
260
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
261
+ scheme = Column(String(50), nullable=False, index=True) # isbn, doi, arxiv, goodreads
262
+ value = Column(String(200), nullable=False, index=True)
263
+
264
+ book = relationship('Book', back_populates='identifiers')
265
+
266
+ __table_args__ = (
267
+ UniqueConstraint('book_id', 'scheme', 'value', name='uix_identifier'),
268
+ )
269
+
270
+ def __repr__(self):
271
+ return f"<Identifier(scheme='{self.scheme}', value='{self.value}')>"
272
+
273
+
274
+ class File(Base):
275
+ """Ebook files with extraction metadata."""
276
+ __tablename__ = 'files'
277
+
278
+ id = Column(Integer, primary_key=True)
279
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
280
+
281
+ path = Column(String(500), nullable=False) # Relative to library root
282
+ format = Column(String(20), nullable=False, index=True) # pdf, epub, mobi
283
+ size_bytes = Column(Integer)
284
+ file_hash = Column(String(64), unique=True, nullable=False, index=True) # SHA256
285
+
286
+ # File metadata
287
+ mime_type = Column(String(100)) # Full MIME type (e.g., application/pdf)
288
+ created_date = Column(DateTime) # File creation time from filesystem
289
+ modified_date = Column(DateTime) # File modification time from filesystem
290
+ creator_application = Column(String(200)) # PDF: Creator app (e.g., "LaTeX")
291
+
292
+ # Text extraction status
293
+ text_extracted = Column(Boolean, default=False)
294
+ extraction_date = Column(DateTime)
295
+
296
+ book = relationship('Book', back_populates='files')
297
+ extracted_text = relationship('ExtractedText', back_populates='file', uselist=False, cascade='all, delete-orphan')
298
+ chunks = relationship('TextChunk', back_populates='file', cascade='all, delete-orphan')
299
+
300
+ @staticmethod
301
+ def compute_hash(file_path: Path) -> str:
302
+ """Compute SHA256 hash of file."""
303
+ sha256 = hashlib.sha256()
304
+ with open(file_path, 'rb') as f:
305
+ for block in iter(lambda: f.read(8192), b''):
306
+ sha256.update(block)
307
+ return sha256.hexdigest()
308
+
309
+ def __repr__(self):
310
+ return f"<File(id={self.id}, format='{self.format}', path='{self.path}')>"
311
+
312
+
313
+ class ExtractedText(Base):
314
+ """Full extracted text for search."""
315
+ __tablename__ = 'extracted_texts'
316
+
317
+ id = Column(Integer, primary_key=True)
318
+ file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), unique=True, nullable=False)
319
+
320
+ content = Column(Text, nullable=False) # Full text - will use FTS5 virtual table
321
+ content_hash = Column(String(64), nullable=False)
322
+ extracted_at = Column(DateTime, default=utc_now, nullable=False)
323
+
324
+ file = relationship('File', back_populates='extracted_text')
325
+
326
+ def __repr__(self):
327
+ return f"<ExtractedText(file_id={self.file_id}, length={len(self.content)})>"
328
+
329
+
330
+ class TextChunk(Base):
331
+ """Chunks for semantic search with embeddings."""
332
+ __tablename__ = 'text_chunks'
333
+
334
+ id = Column(Integer, primary_key=True)
335
+ file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), nullable=False)
336
+
337
+ chunk_index = Column(Integer, nullable=False) # Order within file
338
+ content = Column(Text, nullable=False) # 500-1000 words
339
+
340
+ # Page range (if available)
341
+ start_page = Column(Integer)
342
+ end_page = Column(Integer)
343
+
344
+ # Embedding stored separately (pickle file or vector extension)
345
+ has_embedding = Column(Boolean, default=False)
346
+
347
+ file = relationship('File', back_populates='chunks')
348
+
349
+ __table_args__ = (
350
+ UniqueConstraint('file_id', 'chunk_index', name='uix_chunk'),
351
+ Index('idx_chunk_file', 'file_id', 'chunk_index'),
352
+ )
353
+
354
+ def __repr__(self):
355
+ return f"<TextChunk(id={self.id}, file_id={self.file_id}, index={self.chunk_index})>"
356
+
357
+
358
+ class Cover(Base):
359
+ """Cover images."""
360
+ __tablename__ = 'covers'
361
+
362
+ id = Column(Integer, primary_key=True)
363
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
364
+
365
+ path = Column(String(500), nullable=False) # Relative to library root
366
+ width = Column(Integer)
367
+ height = Column(Integer)
368
+ is_primary = Column(Boolean, default=True)
369
+ source = Column(String(50), default='extracted') # extracted, user_provided, downloaded
370
+
371
+ book = relationship('Book', back_populates='covers')
372
+
373
+ def __repr__(self):
374
+ return f"<Cover(id={self.id}, book_id={self.book_id}, path='{self.path}')>"
375
+
376
+
377
+ class Concept(Base):
378
+ """Knowledge graph concepts."""
379
+ __tablename__ = 'concepts'
380
+
381
+ id = Column(Integer, primary_key=True)
382
+ name = Column(String(200), nullable=False, unique=True, index=True)
383
+ description = Column(Text)
384
+ concept_type = Column(String(50), default='idea') # definition, idea, theory, principle
385
+ importance_score = Column(Float, default=0.0, index=True) # PageRank score
386
+
387
+ created_at = Column(DateTime, default=utc_now, nullable=False)
388
+
389
+ # Relationships
390
+ book_concepts = relationship('BookConcept', back_populates='concept', cascade='all, delete-orphan')
391
+ outgoing_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.source_id', back_populates='source', cascade='all, delete-orphan')
392
+ incoming_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.target_id', back_populates='target', cascade='all, delete-orphan')
393
+
394
+ def __repr__(self):
395
+ return f"<Concept(id={self.id}, name='{self.name}')>"
396
+
397
+
398
+ class BookConcept(Base):
399
+ """Link between books and concepts they discuss."""
400
+ __tablename__ = 'book_concepts'
401
+
402
+ id = Column(Integer, primary_key=True)
403
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
404
+ concept_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
405
+
406
+ page_references = Column(JSON) # Array of page numbers
407
+ quote_examples = Column(JSON) # Array of relevant quotes
408
+ confidence_score = Column(Float, default=1.0)
409
+
410
+ book = relationship('Book', back_populates='concepts')
411
+ concept = relationship('Concept', back_populates='book_concepts')
412
+
413
+ __table_args__ = (
414
+ UniqueConstraint('book_id', 'concept_id', name='uix_book_concept'),
415
+ )
416
+
417
+
418
+ class ConceptRelation(Base):
419
+ """Relationships between concepts (knowledge graph edges)."""
420
+ __tablename__ = 'concept_relations'
421
+
422
+ id = Column(Integer, primary_key=True)
423
+ source_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
424
+ target_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
425
+
426
+ relation_type = Column(String(50), nullable=False) # supports, contradicts, extends, examples, causes
427
+ strength = Column(Float, default=1.0) # 0-1
428
+ evidence_book_id = Column(Integer, ForeignKey('books.id', ondelete='SET NULL'))
429
+
430
+ source = relationship('Concept', foreign_keys=[source_id], back_populates='outgoing_relations')
431
+ target = relationship('Concept', foreign_keys=[target_id], back_populates='incoming_relations')
432
+ evidence_book = relationship('Book')
433
+
434
+ __table_args__ = (
435
+ UniqueConstraint('source_id', 'target_id', 'relation_type', name='uix_concept_relation'),
436
+ Index('idx_relation_source', 'source_id'),
437
+ Index('idx_relation_target', 'target_id'),
438
+ )
439
+
440
+
441
+ class ReadingSession(Base):
442
+ """Track reading sessions for active recall."""
443
+ __tablename__ = 'reading_sessions'
444
+
445
+ id = Column(Integer, primary_key=True)
446
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
447
+
448
+ start_time = Column(DateTime, default=utc_now, nullable=False)
449
+ end_time = Column(DateTime)
450
+ pages_read = Column(Integer, default=0)
451
+
452
+ highlights = Column(JSON) # Array of highlight texts
453
+ notes = Column(JSON) # Array of note objects
454
+ comprehension_score = Column(Float) # From quiz results
455
+
456
+ book = relationship('Book', back_populates='sessions')
457
+
458
+ @hybrid_property
459
+ def duration_minutes(self) -> Optional[float]:
460
+ if self.end_time and self.start_time:
461
+ return (self.end_time - self.start_time).total_seconds() / 60
462
+ return None
463
+
464
+
465
+ class Annotation(Base):
466
+ """Highlights, notes, bookmarks with rich content support."""
467
+ __tablename__ = 'annotations'
468
+
469
+ id = Column(Integer, primary_key=True)
470
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
471
+ session_id = Column(Integer, ForeignKey('reading_sessions.id', ondelete='SET NULL'))
472
+
473
+ annotation_type = Column(String(20), nullable=False) # highlight, note, bookmark
474
+ page_number = Column(Integer)
475
+ position = Column(JSON) # {char_offset: int} or {x: float, y: float}
476
+ content = Column(Text, nullable=False) # The highlighted text or note content
477
+ color = Column(String(20)) # For highlights
478
+
479
+ # Rich content support (new fields)
480
+ title = Column(String(255)) # Optional title for the annotation
481
+ content_format = Column(String(20), default='plain') # plain, markdown, html
482
+ category = Column(String(100)) # User-defined category
483
+ pinned = Column(Boolean, default=False) # Pin to top
484
+
485
+ created_at = Column(DateTime, default=utc_now, nullable=False)
486
+ updated_at = Column(DateTime, default=utc_now, onupdate=utc_now)
487
+
488
+ book = relationship('Book', back_populates='annotations')
489
+ session = relationship('ReadingSession')
490
+
491
+ __table_args__ = (
492
+ Index('idx_annotation_book', 'book_id', 'annotation_type'),
493
+ Index('idx_annotation_pinned', 'book_id', 'pinned'),
494
+ Index('idx_annotation_category', 'category'),
495
+ )
496
+
497
+
498
+ class PersonalMetadata(Base):
499
+ """Personal reading metadata (ratings, status, etc)."""
500
+ __tablename__ = 'personal_metadata'
501
+
502
+ id = Column(Integer, primary_key=True)
503
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), unique=True, nullable=False)
504
+
505
+ # Reading status
506
+ rating = Column(Float) # 0-5 stars
507
+ reading_status = Column(String(20), default='unread') # unread, reading, read, abandoned
508
+ reading_progress = Column(Integer, default=0) # 0-100 percentage
509
+
510
+ # Collections
511
+ favorite = Column(Boolean, default=False)
512
+ owned = Column(Boolean, default=True) # vs borrowed/library
513
+
514
+ # Reading queue
515
+ queue_position = Column(Integer) # Position in reading queue (1-based, NULL = not queued)
516
+
517
+ # Dates
518
+ date_added = Column(DateTime, default=utc_now, nullable=False)
519
+ date_started = Column(DateTime)
520
+ date_finished = Column(DateTime)
521
+
522
+ # Quick access tags (denormalized for performance)
523
+ personal_tags = Column(JSON) # Array of tag strings
524
+
525
+ book = relationship('Book', back_populates='personal')
526
+
527
+ __table_args__ = (
528
+ Index('idx_personal_status', 'reading_status', 'rating'),
529
+ Index('idx_personal_favorite', 'favorite'),
530
+ Index('idx_personal_queue', 'queue_position'),
531
+ )
532
+
533
+
534
+ # ============================================================================
535
+ # Views DSL Models
536
+ # ============================================================================
537
+ # Views provide a composable, non-destructive way to define subsets of the
538
+ # library with optional metadata overrides. Following SICP principles:
539
+ # - Primitives: all, none, filter, ids
540
+ # - Combination: union, intersect, difference
541
+ # - Abstraction: named views become new primitives
542
+ # - Closure: combining views yields a view
543
+
544
+
545
+ class View(Base):
546
+ """
547
+ A named view defining a subset of the library with optional transforms.
548
+
549
+ Views are non-destructive lenses over the library. They define:
550
+ - Selection: which books (via filters, explicit IDs, or references to other views)
551
+ - Transforms: metadata overrides (title, description) per book
552
+ - Ordering: how to sort the results
553
+
554
+ The view definition is stored as YAML/JSON in the `definition` field.
555
+ """
556
+ __tablename__ = 'views'
557
+
558
+ id = Column(Integer, primary_key=True)
559
+ name = Column(String(200), nullable=False, unique=True, index=True)
560
+ description = Column(Text)
561
+
562
+ # The full view definition as YAML-compatible JSON
563
+ # Structure: {select: ..., transform: ..., order: ...}
564
+ definition = Column(JSON, nullable=False, default=dict)
565
+
566
+ # Cached count for quick display (updated on eval)
567
+ cached_count = Column(Integer)
568
+ cached_at = Column(DateTime)
569
+
570
+ # Timestamps
571
+ created_at = Column(DateTime, default=utc_now, nullable=False)
572
+ updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
573
+
574
+ # Relationships
575
+ overrides = relationship('ViewOverride', back_populates='view', cascade='all, delete-orphan')
576
+
577
+ def __repr__(self):
578
+ return f"<View(id={self.id}, name='{self.name}')>"
579
+
580
+
581
+ class ViewOverride(Base):
582
+ """
583
+ Per-book metadata overrides within a view.
584
+
585
+ These are stored separately for efficient querying and to avoid
586
+ bloating the main view definition. Overrides are non-destructive:
587
+ the original book metadata is unchanged.
588
+ """
589
+ __tablename__ = 'view_overrides'
590
+
591
+ id = Column(Integer, primary_key=True)
592
+ view_id = Column(Integer, ForeignKey('views.id', ondelete='CASCADE'), nullable=False)
593
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
594
+
595
+ # Overrideable fields
596
+ title = Column(String(500))
597
+ description = Column(Text)
598
+
599
+ # Custom position for manual ordering within the view
600
+ position = Column(Integer)
601
+
602
+ # Timestamps
603
+ created_at = Column(DateTime, default=utc_now, nullable=False)
604
+ updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
605
+
606
+ # Relationships
607
+ view = relationship('View', back_populates='overrides')
608
+ book = relationship('Book')
609
+
610
+ __table_args__ = (
611
+ UniqueConstraint('view_id', 'book_id', name='uix_view_book_override'),
612
+ Index('idx_view_override_view', 'view_id'),
613
+ Index('idx_view_override_book', 'book_id'),
614
+ )
615
+
616
+ def __repr__(self):
617
+ return f"<ViewOverride(view_id={self.view_id}, book_id={self.book_id})>"
618
+
619
+
620
+ # ============================================================================
621
+ # Reviews and Enrichment Tracking
622
+ # ============================================================================
623
+
624
+ class Review(Base):
625
+ """User reviews of books.
626
+
627
+ Allows users to write detailed reviews, separate from simple ratings.
628
+ Reviews can be personal notes, summaries, critiques, or reading notes.
629
+
630
+ Examples:
631
+ - Personal review: "I found this book particularly helpful for..."
632
+ - Summary: Key takeaways and main points
633
+ - Critique: Critical analysis and evaluation
634
+ - Notes: Reading notes and observations
635
+ """
636
+ __tablename__ = 'reviews'
637
+
638
+ id = Column(Integer, primary_key=True)
639
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
640
+
641
+ # Review content
642
+ title = Column(String(255)) # Review headline/title
643
+ content = Column(Text, nullable=False) # Full review text (markdown supported)
644
+ rating = Column(Float) # 1-5 stars (separate from PersonalMetadata.rating)
645
+
646
+ # Review metadata
647
+ review_type = Column(String(50), default='personal') # personal, summary, critique, notes
648
+ visibility = Column(String(20), default='private') # private, public (for future sharing)
649
+
650
+ # Timestamps
651
+ created_at = Column(DateTime, default=utc_now, nullable=False)
652
+ updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
653
+
654
+ # Relationship
655
+ book = relationship('Book', backref='reviews')
656
+
657
+ __table_args__ = (
658
+ Index('idx_review_book', 'book_id'),
659
+ Index('idx_review_type', 'review_type'),
660
+ Index('idx_review_created', 'created_at'),
661
+ )
662
+
663
+ def __repr__(self):
664
+ return f"<Review(id={self.id}, book_id={self.book_id}, type='{self.review_type}')>"
665
+
666
+
667
+ class EnrichmentHistory(Base):
668
+ """Track metadata enrichment provenance.
669
+
670
+ Records every change made to book metadata by automated enrichment,
671
+ allowing audit trails and rollback if needed.
672
+
673
+ Tracks:
674
+ - What field was changed
675
+ - Old and new values
676
+ - Source of the enrichment (LLM, metadata API, user)
677
+ - Confidence level
678
+ """
679
+ __tablename__ = 'enrichment_history'
680
+
681
+ id = Column(Integer, primary_key=True)
682
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
683
+
684
+ # What was enriched
685
+ field_name = Column(String(100), nullable=False) # description, tags, categories, difficulty
686
+ old_value = Column(Text) # JSON of previous value
687
+ new_value = Column(Text) # JSON of new value
688
+
689
+ # Source tracking
690
+ source_type = Column(String(50), nullable=False) # llm, google_books, open_library, user
691
+ source_detail = Column(String(200)) # ollama:llama3.2, anthropic:claude-sonnet-4-20250514, etc.
692
+ confidence = Column(Float, default=1.0) # 0.0-1.0
693
+
694
+ # Status
695
+ applied = Column(Boolean, default=True) # Was this change applied?
696
+ reverted = Column(Boolean, default=False) # Was this change reverted?
697
+
698
+ # Timestamps
699
+ enriched_at = Column(DateTime, default=utc_now, nullable=False)
700
+
701
+ # Relationship
702
+ book = relationship('Book', backref='enrichment_history')
703
+
704
+ __table_args__ = (
705
+ Index('idx_enrichment_book', 'book_id'),
706
+ Index('idx_enrichment_source', 'source_type'),
707
+ Index('idx_enrichment_field', 'field_name'),
708
+ Index('idx_enrichment_date', 'enriched_at'),
709
+ )
710
+
711
+ def __repr__(self):
712
+ return f"<EnrichmentHistory(id={self.id}, book_id={self.book_id}, field='{self.field_name}')>"
713
+
714
+
715
+ # Full-Text Search Virtual Table (SQLite FTS5)
716
+ # This will be created separately as it's SQLite-specific
717
+ """
718
+ CREATE VIRTUAL TABLE books_fts USING fts5(
719
+ book_id UNINDEXED,
720
+ title,
721
+ description,
722
+ content='extracted_texts',
723
+ content_rowid='id'
724
+ );
725
+ """