ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/db/models.py ADDED
@@ -0,0 +1,526 @@
1
+ """
2
+ SQLAlchemy models for ebk database.
3
+
4
+ Clean, normalized schema with proper relationships and indexes.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from typing import List, Optional
9
+ from pathlib import Path
10
+ import hashlib
11
+
12
+ from sqlalchemy import (
13
+ create_engine, Column, Integer, String, Text, Boolean, Float,
14
+ DateTime, ForeignKey, Table, UniqueConstraint, Index, JSON
15
+ )
16
+ from sqlalchemy.ext.declarative import declarative_base
17
+ from sqlalchemy.orm import relationship, sessionmaker
18
+ from sqlalchemy.ext.hybrid import hybrid_property
19
+
20
+ Base = declarative_base()
21
+
22
+
23
+ # Association tables for many-to-many relationships
24
+ book_authors = Table(
25
+ 'book_authors',
26
+ Base.metadata,
27
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
28
+ Column('author_id', Integer, ForeignKey('authors.id', ondelete='CASCADE'), primary_key=True),
29
+ Column('role', String(50), default='author'), # author, editor, translator, contributor
30
+ Column('position', Integer, default=0) # For ordering
31
+ )
32
+
33
+ book_subjects = Table(
34
+ 'book_subjects',
35
+ Base.metadata,
36
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
37
+ Column('subject_id', Integer, ForeignKey('subjects.id', ondelete='CASCADE'), primary_key=True),
38
+ Column('relevance_score', Float, default=1.0), # How central is this topic (0-1)
39
+ Column('source', String(50), default='user') # calibre, ai_extracted, user_added
40
+ )
41
+
42
+ book_tags = Table(
43
+ 'book_tags',
44
+ Base.metadata,
45
+ Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
46
+ Column('tag_id', Integer, ForeignKey('tags.id', ondelete='CASCADE'), primary_key=True),
47
+ Column('created_at', DateTime, default=datetime.utcnow) # When tag was added
48
+ )
49
+
50
+
51
+ class Book(Base):
52
+ """Core book entity with metadata."""
53
+ __tablename__ = 'books'
54
+
55
+ id = Column(Integer, primary_key=True)
56
+ unique_id = Column(String(32), unique=True, nullable=False, index=True) # Hash-based
57
+
58
+ # Core metadata
59
+ title = Column(String(500), nullable=False, index=True)
60
+ subtitle = Column(String(500))
61
+ sort_title = Column(String(500), index=True) # For alphabetical sorting
62
+ language = Column(String(10), index=True) # ISO 639-1 code
63
+ publisher = Column(String(200), index=True)
64
+ publication_date = Column(String(50)) # Flexible: year, YYYY-MM, or YYYY-MM-DD
65
+
66
+ # Series information
67
+ series = Column(String(200), index=True) # Book series name
68
+ series_index = Column(Float) # Position in series (e.g., 2.5)
69
+
70
+ # Edition and rights
71
+ edition = Column(String(100)) # "2nd Edition", "Revised", etc.
72
+ rights = Column(Text) # Copyright/license statement
73
+ source = Column(String(500)) # Original source URL or reference
74
+
75
+ # Rich content
76
+ description = Column(Text) # Full text indexed separately
77
+ page_count = Column(Integer)
78
+ word_count = Column(Integer) # From extracted text
79
+ keywords = Column(JSON) # Array of keyword strings from PDF/metadata
80
+
81
+ # User customization
82
+ color = Column(String(7)) # Hex color code (e.g., #FF5733)
83
+
84
+ # Timestamps
85
+ created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
86
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
87
+
88
+ # Relationships
89
+ authors = relationship('Author', secondary=book_authors, back_populates='books', lazy='selectin')
90
+ subjects = relationship('Subject', secondary=book_subjects, back_populates='books', lazy='selectin')
91
+ tags = relationship('Tag', secondary=book_tags, back_populates='books', lazy='selectin')
92
+ contributors = relationship('Contributor', back_populates='book', cascade='all, delete-orphan')
93
+ identifiers = relationship('Identifier', back_populates='book', cascade='all, delete-orphan')
94
+ files = relationship('File', back_populates='book', cascade='all, delete-orphan')
95
+ covers = relationship('Cover', back_populates='book', cascade='all, delete-orphan')
96
+ concepts = relationship('BookConcept', back_populates='book', cascade='all, delete-orphan')
97
+ sessions = relationship('ReadingSession', back_populates='book', cascade='all, delete-orphan')
98
+ annotations = relationship('Annotation', back_populates='book', cascade='all, delete-orphan')
99
+ personal = relationship('PersonalMetadata', back_populates='book', uselist=False, cascade='all, delete-orphan')
100
+
101
+ # Indexes
102
+ __table_args__ = (
103
+ Index('idx_book_title_lang', 'title', 'language'),
104
+ Index('idx_book_created', 'created_at'),
105
+ )
106
+
107
+ @hybrid_property
108
+ def primary_file(self) -> Optional['File']:
109
+ """Get the primary file (prefer PDF > EPUB > others)."""
110
+ if not self.files:
111
+ return None
112
+ # Sort by preference
113
+ format_priority = {'pdf': 0, 'epub': 1, 'mobi': 2, 'azw3': 3}
114
+ sorted_files = sorted(
115
+ self.files,
116
+ key=lambda f: format_priority.get(f.format.lower(), 99)
117
+ )
118
+ return sorted_files[0] if sorted_files else None
119
+
120
+ @hybrid_property
121
+ def primary_cover(self) -> Optional['Cover']:
122
+ """Get the primary cover image."""
123
+ for cover in self.covers:
124
+ if cover.is_primary:
125
+ return cover
126
+ return self.covers[0] if self.covers else None
127
+
128
+ def __repr__(self):
129
+ return f"<Book(id={self.id}, title='{self.title[:50]}')>"
130
+
131
+
132
+ class Author(Base):
133
+ """Author/creator entity."""
134
+ __tablename__ = 'authors'
135
+
136
+ id = Column(Integer, primary_key=True)
137
+ name = Column(String(200), nullable=False, index=True)
138
+ sort_name = Column(String(200), index=True) # "Tolkien, J.R.R."
139
+ bio = Column(Text)
140
+ birth_year = Column(Integer)
141
+ death_year = Column(Integer)
142
+
143
+ # Relationships
144
+ books = relationship('Book', secondary=book_authors, back_populates='authors')
145
+
146
+ __table_args__ = (
147
+ UniqueConstraint('name', name='uix_author_name'),
148
+ )
149
+
150
+ def __repr__(self):
151
+ return f"<Author(id={self.id}, name='{self.name}')>"
152
+
153
+
154
+ class Subject(Base):
155
+ """Subject/tag/genre with hierarchical support."""
156
+ __tablename__ = 'subjects'
157
+
158
+ id = Column(Integer, primary_key=True)
159
+ name = Column(String(200), nullable=False, unique=True, index=True)
160
+ parent_id = Column(Integer, ForeignKey('subjects.id', ondelete='SET NULL'))
161
+ type = Column(String(50), default='topic') # genre, topic, keyword, personal_tag
162
+
163
+ # Self-referential relationship for hierarchy
164
+ parent = relationship('Subject', remote_side=[id], backref='children')
165
+ books = relationship('Book', secondary=book_subjects, back_populates='subjects')
166
+
167
+ def __repr__(self):
168
+ return f"<Subject(id={self.id}, name='{self.name}', type='{self.type}')>"
169
+
170
+
171
+ class Tag(Base):
172
+ """User-defined hierarchical tags for organizing books.
173
+
174
+ Tags are separate from Subjects:
175
+ - Subjects: Bibliographic metadata (what the book is about)
176
+ - Tags: User-defined organization (how you use/categorize the book)
177
+
178
+ Examples:
179
+ - path="Work/Project-2024"
180
+ - path="Personal/To-Read"
181
+ - path="Reference/Programming/Python"
182
+ """
183
+ __tablename__ = 'tags'
184
+
185
+ id = Column(Integer, primary_key=True)
186
+ name = Column(String(200), nullable=False, index=True) # Name at this level (e.g., "Python")
187
+ path = Column(String(500), nullable=False, unique=True, index=True) # Full path (e.g., "Programming/Python")
188
+ parent_id = Column(Integer, ForeignKey('tags.id', ondelete='CASCADE'))
189
+
190
+ # Metadata
191
+ description = Column(Text) # Optional description of the tag
192
+ color = Column(String(7)) # Hex color code for UI display (e.g., "#FF5733")
193
+ created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
194
+
195
+ # Self-referential relationship for hierarchy
196
+ parent = relationship('Tag', remote_side=[id], backref='children')
197
+ books = relationship('Book', secondary=book_tags, back_populates='tags')
198
+
199
+ __table_args__ = (
200
+ Index('idx_tag_path', 'path'),
201
+ Index('idx_tag_parent', 'parent_id'),
202
+ )
203
+
204
+ @property
205
+ def depth(self) -> int:
206
+ """Calculate depth in hierarchy (root=0)."""
207
+ return self.path.count('/')
208
+
209
+ @property
210
+ def ancestors(self) -> List['Tag']:
211
+ """Get list of ancestor tags from root to parent."""
212
+ ancestors = []
213
+ current = self.parent
214
+ while current:
215
+ ancestors.insert(0, current)
216
+ current = current.parent
217
+ return ancestors
218
+
219
+ @property
220
+ def full_path_parts(self) -> List[str]:
221
+ """Split path into components."""
222
+ return self.path.split('/')
223
+
224
+ def __repr__(self):
225
+ return f"<Tag(id={self.id}, path='{self.path}')>"
226
+
227
+
228
+ class Contributor(Base):
229
+ """Contributors to a book (editors, translators, illustrators, etc)."""
230
+ __tablename__ = 'contributors'
231
+
232
+ id = Column(Integer, primary_key=True)
233
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
234
+
235
+ name = Column(String(200), nullable=False, index=True)
236
+ role = Column(String(50), nullable=False) # editor, translator, illustrator, etc.
237
+ file_as = Column(String(200)) # Sorting name
238
+
239
+ book = relationship('Book', back_populates='contributors')
240
+
241
+ __table_args__ = (
242
+ Index('idx_contributor_name', 'name'),
243
+ Index('idx_contributor_role', 'role'),
244
+ )
245
+
246
+ def __repr__(self):
247
+ return f"<Contributor(name='{self.name}', role='{self.role}')>"
248
+
249
+
250
+ class Identifier(Base):
251
+ """Flexible identifiers (ISBN, DOI, etc)."""
252
+ __tablename__ = 'identifiers'
253
+
254
+ id = Column(Integer, primary_key=True)
255
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
256
+ scheme = Column(String(50), nullable=False, index=True) # isbn, doi, arxiv, goodreads
257
+ value = Column(String(200), nullable=False, index=True)
258
+
259
+ book = relationship('Book', back_populates='identifiers')
260
+
261
+ __table_args__ = (
262
+ UniqueConstraint('book_id', 'scheme', 'value', name='uix_identifier'),
263
+ )
264
+
265
+ def __repr__(self):
266
+ return f"<Identifier(scheme='{self.scheme}', value='{self.value}')>"
267
+
268
+
269
+ class File(Base):
270
+ """Ebook files with extraction metadata."""
271
+ __tablename__ = 'files'
272
+
273
+ id = Column(Integer, primary_key=True)
274
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
275
+
276
+ path = Column(String(500), nullable=False) # Relative to library root
277
+ format = Column(String(20), nullable=False, index=True) # pdf, epub, mobi
278
+ size_bytes = Column(Integer)
279
+ file_hash = Column(String(64), unique=True, nullable=False, index=True) # SHA256
280
+
281
+ # File metadata
282
+ mime_type = Column(String(100)) # Full MIME type (e.g., application/pdf)
283
+ created_date = Column(DateTime) # File creation time from filesystem
284
+ modified_date = Column(DateTime) # File modification time from filesystem
285
+ creator_application = Column(String(200)) # PDF: Creator app (e.g., "LaTeX")
286
+
287
+ # Text extraction status
288
+ text_extracted = Column(Boolean, default=False)
289
+ extraction_date = Column(DateTime)
290
+
291
+ book = relationship('Book', back_populates='files')
292
+ extracted_text = relationship('ExtractedText', back_populates='file', uselist=False, cascade='all, delete-orphan')
293
+ chunks = relationship('TextChunk', back_populates='file', cascade='all, delete-orphan')
294
+
295
+ @staticmethod
296
+ def compute_hash(file_path: Path) -> str:
297
+ """Compute SHA256 hash of file."""
298
+ sha256 = hashlib.sha256()
299
+ with open(file_path, 'rb') as f:
300
+ for block in iter(lambda: f.read(8192), b''):
301
+ sha256.update(block)
302
+ return sha256.hexdigest()
303
+
304
+ def __repr__(self):
305
+ return f"<File(id={self.id}, format='{self.format}', path='{self.path}')>"
306
+
307
+
308
+ class ExtractedText(Base):
309
+ """Full extracted text for search."""
310
+ __tablename__ = 'extracted_texts'
311
+
312
+ id = Column(Integer, primary_key=True)
313
+ file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), unique=True, nullable=False)
314
+
315
+ content = Column(Text, nullable=False) # Full text - will use FTS5 virtual table
316
+ content_hash = Column(String(64), nullable=False)
317
+ extracted_at = Column(DateTime, default=datetime.utcnow, nullable=False)
318
+
319
+ file = relationship('File', back_populates='extracted_text')
320
+
321
+ def __repr__(self):
322
+ return f"<ExtractedText(file_id={self.file_id}, length={len(self.content)})>"
323
+
324
+
325
+ class TextChunk(Base):
326
+ """Chunks for semantic search with embeddings."""
327
+ __tablename__ = 'text_chunks'
328
+
329
+ id = Column(Integer, primary_key=True)
330
+ file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), nullable=False)
331
+
332
+ chunk_index = Column(Integer, nullable=False) # Order within file
333
+ content = Column(Text, nullable=False) # 500-1000 words
334
+
335
+ # Page range (if available)
336
+ start_page = Column(Integer)
337
+ end_page = Column(Integer)
338
+
339
+ # Embedding stored separately (pickle file or vector extension)
340
+ has_embedding = Column(Boolean, default=False)
341
+
342
+ file = relationship('File', back_populates='chunks')
343
+
344
+ __table_args__ = (
345
+ UniqueConstraint('file_id', 'chunk_index', name='uix_chunk'),
346
+ Index('idx_chunk_file', 'file_id', 'chunk_index'),
347
+ )
348
+
349
+ def __repr__(self):
350
+ return f"<TextChunk(id={self.id}, file_id={self.file_id}, index={self.chunk_index})>"
351
+
352
+
353
+ class Cover(Base):
354
+ """Cover images."""
355
+ __tablename__ = 'covers'
356
+
357
+ id = Column(Integer, primary_key=True)
358
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
359
+
360
+ path = Column(String(500), nullable=False) # Relative to library root
361
+ width = Column(Integer)
362
+ height = Column(Integer)
363
+ is_primary = Column(Boolean, default=True)
364
+ source = Column(String(50), default='extracted') # extracted, user_provided, downloaded
365
+
366
+ book = relationship('Book', back_populates='covers')
367
+
368
+ def __repr__(self):
369
+ return f"<Cover(id={self.id}, book_id={self.book_id}, path='{self.path}')>"
370
+
371
+
372
+ class Concept(Base):
373
+ """Knowledge graph concepts."""
374
+ __tablename__ = 'concepts'
375
+
376
+ id = Column(Integer, primary_key=True)
377
+ name = Column(String(200), nullable=False, unique=True, index=True)
378
+ description = Column(Text)
379
+ concept_type = Column(String(50), default='idea') # definition, idea, theory, principle
380
+ importance_score = Column(Float, default=0.0, index=True) # PageRank score
381
+
382
+ created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
383
+
384
+ # Relationships
385
+ book_concepts = relationship('BookConcept', back_populates='concept', cascade='all, delete-orphan')
386
+ outgoing_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.source_id', back_populates='source', cascade='all, delete-orphan')
387
+ incoming_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.target_id', back_populates='target', cascade='all, delete-orphan')
388
+
389
+ def __repr__(self):
390
+ return f"<Concept(id={self.id}, name='{self.name}')>"
391
+
392
+
393
+ class BookConcept(Base):
394
+ """Link between books and concepts they discuss."""
395
+ __tablename__ = 'book_concepts'
396
+
397
+ id = Column(Integer, primary_key=True)
398
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
399
+ concept_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
400
+
401
+ page_references = Column(JSON) # Array of page numbers
402
+ quote_examples = Column(JSON) # Array of relevant quotes
403
+ confidence_score = Column(Float, default=1.0)
404
+
405
+ book = relationship('Book', back_populates='concepts')
406
+ concept = relationship('Concept', back_populates='book_concepts')
407
+
408
+ __table_args__ = (
409
+ UniqueConstraint('book_id', 'concept_id', name='uix_book_concept'),
410
+ )
411
+
412
+
413
+ class ConceptRelation(Base):
414
+ """Relationships between concepts (knowledge graph edges)."""
415
+ __tablename__ = 'concept_relations'
416
+
417
+ id = Column(Integer, primary_key=True)
418
+ source_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
419
+ target_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
420
+
421
+ relation_type = Column(String(50), nullable=False) # supports, contradicts, extends, examples, causes
422
+ strength = Column(Float, default=1.0) # 0-1
423
+ evidence_book_id = Column(Integer, ForeignKey('books.id', ondelete='SET NULL'))
424
+
425
+ source = relationship('Concept', foreign_keys=[source_id], back_populates='outgoing_relations')
426
+ target = relationship('Concept', foreign_keys=[target_id], back_populates='incoming_relations')
427
+ evidence_book = relationship('Book')
428
+
429
+ __table_args__ = (
430
+ UniqueConstraint('source_id', 'target_id', 'relation_type', name='uix_concept_relation'),
431
+ Index('idx_relation_source', 'source_id'),
432
+ Index('idx_relation_target', 'target_id'),
433
+ )
434
+
435
+
436
+ class ReadingSession(Base):
437
+ """Track reading sessions for active recall."""
438
+ __tablename__ = 'reading_sessions'
439
+
440
+ id = Column(Integer, primary_key=True)
441
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
442
+
443
+ start_time = Column(DateTime, default=datetime.utcnow, nullable=False)
444
+ end_time = Column(DateTime)
445
+ pages_read = Column(Integer, default=0)
446
+
447
+ highlights = Column(JSON) # Array of highlight texts
448
+ notes = Column(JSON) # Array of note objects
449
+ comprehension_score = Column(Float) # From quiz results
450
+
451
+ book = relationship('Book', back_populates='sessions')
452
+
453
+ @hybrid_property
454
+ def duration_minutes(self) -> Optional[float]:
455
+ if self.end_time and self.start_time:
456
+ return (self.end_time - self.start_time).total_seconds() / 60
457
+ return None
458
+
459
+
460
+ class Annotation(Base):
461
+ """Highlights, notes, bookmarks."""
462
+ __tablename__ = 'annotations'
463
+
464
+ id = Column(Integer, primary_key=True)
465
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
466
+ session_id = Column(Integer, ForeignKey('reading_sessions.id', ondelete='SET NULL'))
467
+
468
+ annotation_type = Column(String(20), nullable=False) # highlight, note, bookmark
469
+ page_number = Column(Integer)
470
+ position = Column(JSON) # {char_offset: int} or {x: float, y: float}
471
+ content = Column(Text, nullable=False) # The highlighted text or note content
472
+ color = Column(String(20)) # For highlights
473
+
474
+ created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
475
+
476
+ book = relationship('Book', back_populates='annotations')
477
+ session = relationship('ReadingSession')
478
+
479
+ __table_args__ = (
480
+ Index('idx_annotation_book', 'book_id', 'annotation_type'),
481
+ )
482
+
483
+
484
+ class PersonalMetadata(Base):
485
+ """Personal reading metadata (ratings, status, etc)."""
486
+ __tablename__ = 'personal_metadata'
487
+
488
+ id = Column(Integer, primary_key=True)
489
+ book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), unique=True, nullable=False)
490
+
491
+ # Reading status
492
+ rating = Column(Float) # 0-5 stars
493
+ reading_status = Column(String(20), default='unread') # unread, reading, read, abandoned
494
+ reading_progress = Column(Integer, default=0) # 0-100 percentage
495
+
496
+ # Collections
497
+ favorite = Column(Boolean, default=False)
498
+ owned = Column(Boolean, default=True) # vs borrowed/library
499
+
500
+ # Dates
501
+ date_added = Column(DateTime, default=datetime.utcnow, nullable=False)
502
+ date_started = Column(DateTime)
503
+ date_finished = Column(DateTime)
504
+
505
+ # Quick access tags (denormalized for performance)
506
+ personal_tags = Column(JSON) # Array of tag strings
507
+
508
+ book = relationship('Book', back_populates='personal')
509
+
510
+ __table_args__ = (
511
+ Index('idx_personal_status', 'reading_status', 'rating'),
512
+ Index('idx_personal_favorite', 'favorite'),
513
+ )
514
+
515
+
516
+ # Full-Text Search Virtual Table (SQLite FTS5)
517
+ # This will be created separately as it's SQLite-specific
518
+ """
519
+ CREATE VIRTUAL TABLE books_fts USING fts5(
520
+ book_id UNINDEXED,
521
+ title,
522
+ description,
523
+ content='extracted_texts',
524
+ content_rowid='id'
525
+ );
526
+ """
ebk/db/session.py ADDED
@@ -0,0 +1,144 @@
1
+ """
2
+ Database session management for ebk.
3
+
4
+ Provides session factory and initialization utilities.
5
+ """
6
+
7
+ from pathlib import Path
8
+ from typing import Optional
9
+ from contextlib import contextmanager
10
+
11
+ from sqlalchemy import create_engine, event, text
12
+ from sqlalchemy.orm import sessionmaker, Session
13
+ from sqlalchemy.engine import Engine
14
+
15
+ from .models import Base
16
+
17
+ # Global session factory
18
+ _SessionFactory: Optional[sessionmaker] = None
19
+ _engine: Optional[Engine] = None
20
+
21
+
22
+ def init_db(library_path: Path, echo: bool = False) -> Engine:
23
+ """
24
+ Initialize database and create all tables.
25
+
26
+ Args:
27
+ library_path: Path to library directory
28
+ echo: If True, log all SQL statements (debug mode)
29
+
30
+ Returns:
31
+ SQLAlchemy engine
32
+ """
33
+ global _engine, _SessionFactory
34
+
35
+ library_path = Path(library_path)
36
+ library_path.mkdir(parents=True, exist_ok=True)
37
+
38
+ db_path = library_path / 'library.db'
39
+ db_url = f'sqlite:///{db_path}'
40
+
41
+ _engine = create_engine(db_url, echo=echo)
42
+
43
+ # Enable foreign keys for SQLite
44
+ @event.listens_for(Engine, "connect")
45
+ def set_sqlite_pragma(dbapi_conn, connection_record):
46
+ cursor = dbapi_conn.cursor()
47
+ cursor.execute("PRAGMA foreign_keys=ON")
48
+ cursor.close()
49
+
50
+ # Create all tables
51
+ Base.metadata.create_all(_engine)
52
+
53
+ # Create FTS5 virtual table for full-text search
54
+ with _engine.connect() as conn:
55
+ # Check if FTS table exists
56
+ result = conn.execute(
57
+ text("SELECT name FROM sqlite_master WHERE type='table' AND name='books_fts'")
58
+ )
59
+ if not result.fetchone():
60
+ conn.execute(text("""
61
+ CREATE VIRTUAL TABLE books_fts USING fts5(
62
+ book_id UNINDEXED,
63
+ title,
64
+ description,
65
+ extracted_text,
66
+ tokenize='porter unicode61'
67
+ )
68
+ """))
69
+ conn.commit()
70
+
71
+ # Create session factory
72
+ _SessionFactory = sessionmaker(bind=_engine)
73
+
74
+ return _engine
75
+
76
+
77
+ def get_session() -> Session:
78
+ """
79
+ Get a new database session.
80
+
81
+ Returns:
82
+ SQLAlchemy session
83
+
84
+ Raises:
85
+ RuntimeError: If database not initialized
86
+ """
87
+ if _SessionFactory is None:
88
+ raise RuntimeError(
89
+ "Database not initialized. Call init_db() first."
90
+ )
91
+ return _SessionFactory()
92
+
93
+
94
+ @contextmanager
95
+ def session_scope():
96
+ """
97
+ Provide a transactional scope around a series of operations.
98
+
99
+ Usage:
100
+ with session_scope() as session:
101
+ session.add(book)
102
+ # Automatically commits or rolls back
103
+ """
104
+ session = get_session()
105
+ try:
106
+ yield session
107
+ session.commit()
108
+ except Exception:
109
+ session.rollback()
110
+ raise
111
+ finally:
112
+ session.close()
113
+
114
+
115
+ def close_db():
116
+ """Close database connection and cleanup."""
117
+ global _engine, _SessionFactory
118
+
119
+ if _engine:
120
+ _engine.dispose()
121
+ _engine = None
122
+
123
+ _SessionFactory = None
124
+
125
+
126
+ def get_or_create(session: Session, model, **kwargs):
127
+ """
128
+ Get existing instance or create new one.
129
+
130
+ Args:
131
+ session: Database session
132
+ model: SQLAlchemy model class
133
+ **kwargs: Filter criteria and/or values to set
134
+
135
+ Returns:
136
+ Tuple of (instance, created: bool)
137
+ """
138
+ instance = session.query(model).filter_by(**kwargs).first()
139
+ if instance:
140
+ return instance, False
141
+ else:
142
+ instance = model(**kwargs)
143
+ session.add(instance)
144
+ return instance, True