ebk 0.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ebk/__init__.py +35 -0
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +450 -0
- ebk/ai/llm_providers/__init__.py +26 -0
- ebk/ai/llm_providers/anthropic.py +209 -0
- ebk/ai/llm_providers/base.py +295 -0
- ebk/ai/llm_providers/gemini.py +285 -0
- ebk/ai/llm_providers/ollama.py +294 -0
- ebk/ai/metadata_enrichment.py +394 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +433 -0
- ebk/ai/text_extractor.py +393 -0
- ebk/calibre_import.py +66 -0
- ebk/cli.py +6433 -0
- ebk/config.py +230 -0
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +507 -0
- ebk/db/models.py +725 -0
- ebk/db/session.py +144 -0
- ebk/decorators.py +1 -0
- ebk/exports/__init__.py +0 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/echo_export.py +279 -0
- ebk/exports/html_library.py +1743 -0
- ebk/exports/html_utils.py +87 -0
- ebk/exports/hugo.py +59 -0
- ebk/exports/jinja_export.py +286 -0
- ebk/exports/multi_facet_export.py +159 -0
- ebk/exports/opds_export.py +232 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/exports/zip.py +25 -0
- ebk/extract_metadata.py +341 -0
- ebk/ident.py +89 -0
- ebk/library_db.py +1440 -0
- ebk/opds.py +748 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +442 -0
- ebk/plugins/registry.py +499 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +173 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +3608 -0
- ebk/services/__init__.py +28 -0
- ebk/services/annotation_extraction.py +351 -0
- ebk/services/annotation_service.py +380 -0
- ebk/services/export_service.py +577 -0
- ebk/services/import_service.py +447 -0
- ebk/services/personal_metadata_service.py +347 -0
- ebk/services/queue_service.py +253 -0
- ebk/services/tag_service.py +281 -0
- ebk/services/text_extraction.py +317 -0
- ebk/services/view_service.py +12 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +471 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/skills/SKILL.md +182 -0
- ebk/skills/__init__.py +1 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +298 -0
- ebk/vfs/library_vfs.py +122 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- ebk/vfs_router.py +275 -0
- ebk/views/__init__.py +32 -0
- ebk/views/dsl.py +668 -0
- ebk/views/service.py +619 -0
- ebk-0.4.4.dist-info/METADATA +755 -0
- ebk-0.4.4.dist-info/RECORD +87 -0
- ebk-0.4.4.dist-info/WHEEL +5 -0
- ebk-0.4.4.dist-info/entry_points.txt +2 -0
- ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
- ebk-0.4.4.dist-info/top_level.txt +1 -0
ebk/db/models.py
ADDED
|
@@ -0,0 +1,725 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLAlchemy models for ebk database.
|
|
3
|
+
|
|
4
|
+
Clean, normalized schema with proper relationships and indexes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime, timezone
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import hashlib
|
|
11
|
+
|
|
12
|
+
from sqlalchemy import (
|
|
13
|
+
Column, Integer, String, Text, Boolean, Float,
|
|
14
|
+
DateTime, ForeignKey, Table, UniqueConstraint, Index, JSON
|
|
15
|
+
)
|
|
16
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
17
|
+
from sqlalchemy.orm import relationship
|
|
18
|
+
from sqlalchemy.ext.hybrid import hybrid_property
|
|
19
|
+
|
|
20
|
+
Base = declarative_base()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def utc_now():
|
|
24
|
+
"""Return current UTC time as timezone-naive datetime for SQLite compatibility."""
|
|
25
|
+
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Association tables for many-to-many relationships
|
|
29
|
+
book_authors = Table(
|
|
30
|
+
'book_authors',
|
|
31
|
+
Base.metadata,
|
|
32
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
33
|
+
Column('author_id', Integer, ForeignKey('authors.id', ondelete='CASCADE'), primary_key=True),
|
|
34
|
+
Column('role', String(50), default='author'), # author, editor, translator, contributor
|
|
35
|
+
Column('position', Integer, default=0) # For ordering
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
book_subjects = Table(
|
|
39
|
+
'book_subjects',
|
|
40
|
+
Base.metadata,
|
|
41
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
42
|
+
Column('subject_id', Integer, ForeignKey('subjects.id', ondelete='CASCADE'), primary_key=True),
|
|
43
|
+
Column('relevance_score', Float, default=1.0), # How central is this topic (0-1)
|
|
44
|
+
Column('source', String(50), default='user') # calibre, ai_extracted, user_added
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
book_tags = Table(
|
|
48
|
+
'book_tags',
|
|
49
|
+
Base.metadata,
|
|
50
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
51
|
+
Column('tag_id', Integer, ForeignKey('tags.id', ondelete='CASCADE'), primary_key=True),
|
|
52
|
+
Column('created_at', DateTime, default=utc_now) # When tag was added
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class Book(Base):
|
|
57
|
+
"""Core book entity with metadata."""
|
|
58
|
+
__tablename__ = 'books'
|
|
59
|
+
|
|
60
|
+
id = Column(Integer, primary_key=True)
|
|
61
|
+
unique_id = Column(String(32), unique=True, nullable=False, index=True) # Hash-based
|
|
62
|
+
|
|
63
|
+
# Core metadata
|
|
64
|
+
title = Column(String(500), nullable=False, index=True)
|
|
65
|
+
subtitle = Column(String(500))
|
|
66
|
+
sort_title = Column(String(500), index=True) # For alphabetical sorting
|
|
67
|
+
language = Column(String(10), index=True) # ISO 639-1 code
|
|
68
|
+
publisher = Column(String(200), index=True)
|
|
69
|
+
publication_date = Column(String(50)) # Flexible: year, YYYY-MM, or YYYY-MM-DD
|
|
70
|
+
|
|
71
|
+
# Series information
|
|
72
|
+
series = Column(String(200), index=True) # Book series name
|
|
73
|
+
series_index = Column(Float) # Position in series (e.g., 2.5)
|
|
74
|
+
|
|
75
|
+
# Edition and rights
|
|
76
|
+
edition = Column(String(100)) # "2nd Edition", "Revised", etc.
|
|
77
|
+
rights = Column(Text) # Copyright/license statement
|
|
78
|
+
source = Column(String(500)) # Original source URL or reference
|
|
79
|
+
|
|
80
|
+
# Rich content
|
|
81
|
+
description = Column(Text) # Full text indexed separately
|
|
82
|
+
page_count = Column(Integer)
|
|
83
|
+
word_count = Column(Integer) # From extracted text
|
|
84
|
+
keywords = Column(JSON) # Array of keyword strings from PDF/metadata
|
|
85
|
+
|
|
86
|
+
# User customization
|
|
87
|
+
color = Column(String(7)) # Hex color code (e.g., #FF5733)
|
|
88
|
+
|
|
89
|
+
# Timestamps
|
|
90
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
91
|
+
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
|
|
92
|
+
|
|
93
|
+
# Relationships
|
|
94
|
+
authors = relationship('Author', secondary=book_authors, back_populates='books', lazy='selectin')
|
|
95
|
+
subjects = relationship('Subject', secondary=book_subjects, back_populates='books', lazy='selectin')
|
|
96
|
+
tags = relationship('Tag', secondary=book_tags, back_populates='books', lazy='selectin')
|
|
97
|
+
contributors = relationship('Contributor', back_populates='book', cascade='all, delete-orphan')
|
|
98
|
+
identifiers = relationship('Identifier', back_populates='book', cascade='all, delete-orphan')
|
|
99
|
+
files = relationship('File', back_populates='book', cascade='all, delete-orphan')
|
|
100
|
+
covers = relationship('Cover', back_populates='book', cascade='all, delete-orphan')
|
|
101
|
+
concepts = relationship('BookConcept', back_populates='book', cascade='all, delete-orphan')
|
|
102
|
+
sessions = relationship('ReadingSession', back_populates='book', cascade='all, delete-orphan')
|
|
103
|
+
annotations = relationship('Annotation', back_populates='book', cascade='all, delete-orphan')
|
|
104
|
+
personal = relationship('PersonalMetadata', back_populates='book', uselist=False, cascade='all, delete-orphan')
|
|
105
|
+
|
|
106
|
+
# Indexes
|
|
107
|
+
__table_args__ = (
|
|
108
|
+
Index('idx_book_title_lang', 'title', 'language'),
|
|
109
|
+
Index('idx_book_created', 'created_at'),
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
@hybrid_property
|
|
113
|
+
def primary_file(self) -> Optional['File']:
|
|
114
|
+
"""Get the primary file (prefer PDF > EPUB > others)."""
|
|
115
|
+
if not self.files:
|
|
116
|
+
return None
|
|
117
|
+
# Sort by preference
|
|
118
|
+
format_priority = {'pdf': 0, 'epub': 1, 'mobi': 2, 'azw3': 3}
|
|
119
|
+
sorted_files = sorted(
|
|
120
|
+
self.files,
|
|
121
|
+
key=lambda f: format_priority.get(f.format.lower(), 99)
|
|
122
|
+
)
|
|
123
|
+
return sorted_files[0] if sorted_files else None
|
|
124
|
+
|
|
125
|
+
@hybrid_property
|
|
126
|
+
def primary_cover(self) -> Optional['Cover']:
|
|
127
|
+
"""Get the primary cover image."""
|
|
128
|
+
for cover in self.covers:
|
|
129
|
+
if cover.is_primary:
|
|
130
|
+
return cover
|
|
131
|
+
return self.covers[0] if self.covers else None
|
|
132
|
+
|
|
133
|
+
def __repr__(self):
|
|
134
|
+
return f"<Book(id={self.id}, title='{self.title[:50]}')>"
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class Author(Base):
|
|
138
|
+
"""Author/creator entity."""
|
|
139
|
+
__tablename__ = 'authors'
|
|
140
|
+
|
|
141
|
+
id = Column(Integer, primary_key=True)
|
|
142
|
+
name = Column(String(200), nullable=False, index=True)
|
|
143
|
+
sort_name = Column(String(200), index=True) # "Tolkien, J.R.R."
|
|
144
|
+
bio = Column(Text)
|
|
145
|
+
birth_year = Column(Integer)
|
|
146
|
+
death_year = Column(Integer)
|
|
147
|
+
|
|
148
|
+
# Relationships
|
|
149
|
+
books = relationship('Book', secondary=book_authors, back_populates='authors')
|
|
150
|
+
|
|
151
|
+
__table_args__ = (
|
|
152
|
+
UniqueConstraint('name', name='uix_author_name'),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def __repr__(self):
|
|
156
|
+
return f"<Author(id={self.id}, name='{self.name}')>"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class Subject(Base):
|
|
160
|
+
"""Subject/tag/genre with hierarchical support."""
|
|
161
|
+
__tablename__ = 'subjects'
|
|
162
|
+
|
|
163
|
+
id = Column(Integer, primary_key=True)
|
|
164
|
+
name = Column(String(200), nullable=False, unique=True, index=True)
|
|
165
|
+
parent_id = Column(Integer, ForeignKey('subjects.id', ondelete='SET NULL'))
|
|
166
|
+
type = Column(String(50), default='topic') # genre, topic, keyword, personal_tag
|
|
167
|
+
|
|
168
|
+
# Self-referential relationship for hierarchy
|
|
169
|
+
parent = relationship('Subject', remote_side=[id], backref='children')
|
|
170
|
+
books = relationship('Book', secondary=book_subjects, back_populates='subjects')
|
|
171
|
+
|
|
172
|
+
def __repr__(self):
|
|
173
|
+
return f"<Subject(id={self.id}, name='{self.name}', type='{self.type}')>"
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class Tag(Base):
|
|
177
|
+
"""User-defined hierarchical tags for organizing books.
|
|
178
|
+
|
|
179
|
+
Tags are separate from Subjects:
|
|
180
|
+
- Subjects: Bibliographic metadata (what the book is about)
|
|
181
|
+
- Tags: User-defined organization (how you use/categorize the book)
|
|
182
|
+
|
|
183
|
+
Examples:
|
|
184
|
+
- path="Work/Project-2024"
|
|
185
|
+
- path="Personal/To-Read"
|
|
186
|
+
- path="Reference/Programming/Python"
|
|
187
|
+
"""
|
|
188
|
+
__tablename__ = 'tags'
|
|
189
|
+
|
|
190
|
+
id = Column(Integer, primary_key=True)
|
|
191
|
+
name = Column(String(200), nullable=False, index=True) # Name at this level (e.g., "Python")
|
|
192
|
+
path = Column(String(500), nullable=False, unique=True, index=True) # Full path (e.g., "Programming/Python")
|
|
193
|
+
parent_id = Column(Integer, ForeignKey('tags.id', ondelete='CASCADE'))
|
|
194
|
+
|
|
195
|
+
# Metadata
|
|
196
|
+
description = Column(Text) # Optional description of the tag
|
|
197
|
+
color = Column(String(7)) # Hex color code for UI display (e.g., "#FF5733")
|
|
198
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
199
|
+
|
|
200
|
+
# Self-referential relationship for hierarchy
|
|
201
|
+
parent = relationship('Tag', remote_side=[id], backref='children')
|
|
202
|
+
books = relationship('Book', secondary=book_tags, back_populates='tags')
|
|
203
|
+
|
|
204
|
+
__table_args__ = (
|
|
205
|
+
Index('idx_tag_path', 'path'),
|
|
206
|
+
Index('idx_tag_parent', 'parent_id'),
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def depth(self) -> int:
|
|
211
|
+
"""Calculate depth in hierarchy (root=0)."""
|
|
212
|
+
return self.path.count('/')
|
|
213
|
+
|
|
214
|
+
@property
|
|
215
|
+
def ancestors(self) -> List['Tag']:
|
|
216
|
+
"""Get list of ancestor tags from root to parent."""
|
|
217
|
+
ancestors = []
|
|
218
|
+
current = self.parent
|
|
219
|
+
while current:
|
|
220
|
+
ancestors.insert(0, current)
|
|
221
|
+
current = current.parent
|
|
222
|
+
return ancestors
|
|
223
|
+
|
|
224
|
+
@property
|
|
225
|
+
def full_path_parts(self) -> List[str]:
|
|
226
|
+
"""Split path into components."""
|
|
227
|
+
return self.path.split('/')
|
|
228
|
+
|
|
229
|
+
def __repr__(self):
|
|
230
|
+
return f"<Tag(id={self.id}, path='{self.path}')>"
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
class Contributor(Base):
|
|
234
|
+
"""Contributors to a book (editors, translators, illustrators, etc)."""
|
|
235
|
+
__tablename__ = 'contributors'
|
|
236
|
+
|
|
237
|
+
id = Column(Integer, primary_key=True)
|
|
238
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
239
|
+
|
|
240
|
+
name = Column(String(200), nullable=False, index=True)
|
|
241
|
+
role = Column(String(50), nullable=False) # editor, translator, illustrator, etc.
|
|
242
|
+
file_as = Column(String(200)) # Sorting name
|
|
243
|
+
|
|
244
|
+
book = relationship('Book', back_populates='contributors')
|
|
245
|
+
|
|
246
|
+
__table_args__ = (
|
|
247
|
+
Index('idx_contributor_name', 'name'),
|
|
248
|
+
Index('idx_contributor_role', 'role'),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
def __repr__(self):
|
|
252
|
+
return f"<Contributor(name='{self.name}', role='{self.role}')>"
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class Identifier(Base):
|
|
256
|
+
"""Flexible identifiers (ISBN, DOI, etc)."""
|
|
257
|
+
__tablename__ = 'identifiers'
|
|
258
|
+
|
|
259
|
+
id = Column(Integer, primary_key=True)
|
|
260
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
261
|
+
scheme = Column(String(50), nullable=False, index=True) # isbn, doi, arxiv, goodreads
|
|
262
|
+
value = Column(String(200), nullable=False, index=True)
|
|
263
|
+
|
|
264
|
+
book = relationship('Book', back_populates='identifiers')
|
|
265
|
+
|
|
266
|
+
__table_args__ = (
|
|
267
|
+
UniqueConstraint('book_id', 'scheme', 'value', name='uix_identifier'),
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
def __repr__(self):
|
|
271
|
+
return f"<Identifier(scheme='{self.scheme}', value='{self.value}')>"
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
class File(Base):
|
|
275
|
+
"""Ebook files with extraction metadata."""
|
|
276
|
+
__tablename__ = 'files'
|
|
277
|
+
|
|
278
|
+
id = Column(Integer, primary_key=True)
|
|
279
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
280
|
+
|
|
281
|
+
path = Column(String(500), nullable=False) # Relative to library root
|
|
282
|
+
format = Column(String(20), nullable=False, index=True) # pdf, epub, mobi
|
|
283
|
+
size_bytes = Column(Integer)
|
|
284
|
+
file_hash = Column(String(64), unique=True, nullable=False, index=True) # SHA256
|
|
285
|
+
|
|
286
|
+
# File metadata
|
|
287
|
+
mime_type = Column(String(100)) # Full MIME type (e.g., application/pdf)
|
|
288
|
+
created_date = Column(DateTime) # File creation time from filesystem
|
|
289
|
+
modified_date = Column(DateTime) # File modification time from filesystem
|
|
290
|
+
creator_application = Column(String(200)) # PDF: Creator app (e.g., "LaTeX")
|
|
291
|
+
|
|
292
|
+
# Text extraction status
|
|
293
|
+
text_extracted = Column(Boolean, default=False)
|
|
294
|
+
extraction_date = Column(DateTime)
|
|
295
|
+
|
|
296
|
+
book = relationship('Book', back_populates='files')
|
|
297
|
+
extracted_text = relationship('ExtractedText', back_populates='file', uselist=False, cascade='all, delete-orphan')
|
|
298
|
+
chunks = relationship('TextChunk', back_populates='file', cascade='all, delete-orphan')
|
|
299
|
+
|
|
300
|
+
@staticmethod
|
|
301
|
+
def compute_hash(file_path: Path) -> str:
|
|
302
|
+
"""Compute SHA256 hash of file."""
|
|
303
|
+
sha256 = hashlib.sha256()
|
|
304
|
+
with open(file_path, 'rb') as f:
|
|
305
|
+
for block in iter(lambda: f.read(8192), b''):
|
|
306
|
+
sha256.update(block)
|
|
307
|
+
return sha256.hexdigest()
|
|
308
|
+
|
|
309
|
+
def __repr__(self):
|
|
310
|
+
return f"<File(id={self.id}, format='{self.format}', path='{self.path}')>"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class ExtractedText(Base):
|
|
314
|
+
"""Full extracted text for search."""
|
|
315
|
+
__tablename__ = 'extracted_texts'
|
|
316
|
+
|
|
317
|
+
id = Column(Integer, primary_key=True)
|
|
318
|
+
file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), unique=True, nullable=False)
|
|
319
|
+
|
|
320
|
+
content = Column(Text, nullable=False) # Full text - will use FTS5 virtual table
|
|
321
|
+
content_hash = Column(String(64), nullable=False)
|
|
322
|
+
extracted_at = Column(DateTime, default=utc_now, nullable=False)
|
|
323
|
+
|
|
324
|
+
file = relationship('File', back_populates='extracted_text')
|
|
325
|
+
|
|
326
|
+
def __repr__(self):
|
|
327
|
+
return f"<ExtractedText(file_id={self.file_id}, length={len(self.content)})>"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class TextChunk(Base):
|
|
331
|
+
"""Chunks for semantic search with embeddings."""
|
|
332
|
+
__tablename__ = 'text_chunks'
|
|
333
|
+
|
|
334
|
+
id = Column(Integer, primary_key=True)
|
|
335
|
+
file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), nullable=False)
|
|
336
|
+
|
|
337
|
+
chunk_index = Column(Integer, nullable=False) # Order within file
|
|
338
|
+
content = Column(Text, nullable=False) # 500-1000 words
|
|
339
|
+
|
|
340
|
+
# Page range (if available)
|
|
341
|
+
start_page = Column(Integer)
|
|
342
|
+
end_page = Column(Integer)
|
|
343
|
+
|
|
344
|
+
# Embedding stored separately (pickle file or vector extension)
|
|
345
|
+
has_embedding = Column(Boolean, default=False)
|
|
346
|
+
|
|
347
|
+
file = relationship('File', back_populates='chunks')
|
|
348
|
+
|
|
349
|
+
__table_args__ = (
|
|
350
|
+
UniqueConstraint('file_id', 'chunk_index', name='uix_chunk'),
|
|
351
|
+
Index('idx_chunk_file', 'file_id', 'chunk_index'),
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
def __repr__(self):
|
|
355
|
+
return f"<TextChunk(id={self.id}, file_id={self.file_id}, index={self.chunk_index})>"
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class Cover(Base):
|
|
359
|
+
"""Cover images."""
|
|
360
|
+
__tablename__ = 'covers'
|
|
361
|
+
|
|
362
|
+
id = Column(Integer, primary_key=True)
|
|
363
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
364
|
+
|
|
365
|
+
path = Column(String(500), nullable=False) # Relative to library root
|
|
366
|
+
width = Column(Integer)
|
|
367
|
+
height = Column(Integer)
|
|
368
|
+
is_primary = Column(Boolean, default=True)
|
|
369
|
+
source = Column(String(50), default='extracted') # extracted, user_provided, downloaded
|
|
370
|
+
|
|
371
|
+
book = relationship('Book', back_populates='covers')
|
|
372
|
+
|
|
373
|
+
def __repr__(self):
|
|
374
|
+
return f"<Cover(id={self.id}, book_id={self.book_id}, path='{self.path}')>"
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
class Concept(Base):
|
|
378
|
+
"""Knowledge graph concepts."""
|
|
379
|
+
__tablename__ = 'concepts'
|
|
380
|
+
|
|
381
|
+
id = Column(Integer, primary_key=True)
|
|
382
|
+
name = Column(String(200), nullable=False, unique=True, index=True)
|
|
383
|
+
description = Column(Text)
|
|
384
|
+
concept_type = Column(String(50), default='idea') # definition, idea, theory, principle
|
|
385
|
+
importance_score = Column(Float, default=0.0, index=True) # PageRank score
|
|
386
|
+
|
|
387
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
388
|
+
|
|
389
|
+
# Relationships
|
|
390
|
+
book_concepts = relationship('BookConcept', back_populates='concept', cascade='all, delete-orphan')
|
|
391
|
+
outgoing_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.source_id', back_populates='source', cascade='all, delete-orphan')
|
|
392
|
+
incoming_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.target_id', back_populates='target', cascade='all, delete-orphan')
|
|
393
|
+
|
|
394
|
+
def __repr__(self):
|
|
395
|
+
return f"<Concept(id={self.id}, name='{self.name}')>"
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
class BookConcept(Base):
|
|
399
|
+
"""Link between books and concepts they discuss."""
|
|
400
|
+
__tablename__ = 'book_concepts'
|
|
401
|
+
|
|
402
|
+
id = Column(Integer, primary_key=True)
|
|
403
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
404
|
+
concept_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
405
|
+
|
|
406
|
+
page_references = Column(JSON) # Array of page numbers
|
|
407
|
+
quote_examples = Column(JSON) # Array of relevant quotes
|
|
408
|
+
confidence_score = Column(Float, default=1.0)
|
|
409
|
+
|
|
410
|
+
book = relationship('Book', back_populates='concepts')
|
|
411
|
+
concept = relationship('Concept', back_populates='book_concepts')
|
|
412
|
+
|
|
413
|
+
__table_args__ = (
|
|
414
|
+
UniqueConstraint('book_id', 'concept_id', name='uix_book_concept'),
|
|
415
|
+
)
|
|
416
|
+
|
|
417
|
+
|
|
418
|
+
class ConceptRelation(Base):
|
|
419
|
+
"""Relationships between concepts (knowledge graph edges)."""
|
|
420
|
+
__tablename__ = 'concept_relations'
|
|
421
|
+
|
|
422
|
+
id = Column(Integer, primary_key=True)
|
|
423
|
+
source_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
424
|
+
target_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
425
|
+
|
|
426
|
+
relation_type = Column(String(50), nullable=False) # supports, contradicts, extends, examples, causes
|
|
427
|
+
strength = Column(Float, default=1.0) # 0-1
|
|
428
|
+
evidence_book_id = Column(Integer, ForeignKey('books.id', ondelete='SET NULL'))
|
|
429
|
+
|
|
430
|
+
source = relationship('Concept', foreign_keys=[source_id], back_populates='outgoing_relations')
|
|
431
|
+
target = relationship('Concept', foreign_keys=[target_id], back_populates='incoming_relations')
|
|
432
|
+
evidence_book = relationship('Book')
|
|
433
|
+
|
|
434
|
+
__table_args__ = (
|
|
435
|
+
UniqueConstraint('source_id', 'target_id', 'relation_type', name='uix_concept_relation'),
|
|
436
|
+
Index('idx_relation_source', 'source_id'),
|
|
437
|
+
Index('idx_relation_target', 'target_id'),
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class ReadingSession(Base):
|
|
442
|
+
"""Track reading sessions for active recall."""
|
|
443
|
+
__tablename__ = 'reading_sessions'
|
|
444
|
+
|
|
445
|
+
id = Column(Integer, primary_key=True)
|
|
446
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
447
|
+
|
|
448
|
+
start_time = Column(DateTime, default=utc_now, nullable=False)
|
|
449
|
+
end_time = Column(DateTime)
|
|
450
|
+
pages_read = Column(Integer, default=0)
|
|
451
|
+
|
|
452
|
+
highlights = Column(JSON) # Array of highlight texts
|
|
453
|
+
notes = Column(JSON) # Array of note objects
|
|
454
|
+
comprehension_score = Column(Float) # From quiz results
|
|
455
|
+
|
|
456
|
+
book = relationship('Book', back_populates='sessions')
|
|
457
|
+
|
|
458
|
+
@hybrid_property
|
|
459
|
+
def duration_minutes(self) -> Optional[float]:
|
|
460
|
+
if self.end_time and self.start_time:
|
|
461
|
+
return (self.end_time - self.start_time).total_seconds() / 60
|
|
462
|
+
return None
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
class Annotation(Base):
|
|
466
|
+
"""Highlights, notes, bookmarks with rich content support."""
|
|
467
|
+
__tablename__ = 'annotations'
|
|
468
|
+
|
|
469
|
+
id = Column(Integer, primary_key=True)
|
|
470
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
471
|
+
session_id = Column(Integer, ForeignKey('reading_sessions.id', ondelete='SET NULL'))
|
|
472
|
+
|
|
473
|
+
annotation_type = Column(String(20), nullable=False) # highlight, note, bookmark
|
|
474
|
+
page_number = Column(Integer)
|
|
475
|
+
position = Column(JSON) # {char_offset: int} or {x: float, y: float}
|
|
476
|
+
content = Column(Text, nullable=False) # The highlighted text or note content
|
|
477
|
+
color = Column(String(20)) # For highlights
|
|
478
|
+
|
|
479
|
+
# Rich content support (new fields)
|
|
480
|
+
title = Column(String(255)) # Optional title for the annotation
|
|
481
|
+
content_format = Column(String(20), default='plain') # plain, markdown, html
|
|
482
|
+
category = Column(String(100)) # User-defined category
|
|
483
|
+
pinned = Column(Boolean, default=False) # Pin to top
|
|
484
|
+
|
|
485
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
486
|
+
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now)
|
|
487
|
+
|
|
488
|
+
book = relationship('Book', back_populates='annotations')
|
|
489
|
+
session = relationship('ReadingSession')
|
|
490
|
+
|
|
491
|
+
__table_args__ = (
|
|
492
|
+
Index('idx_annotation_book', 'book_id', 'annotation_type'),
|
|
493
|
+
Index('idx_annotation_pinned', 'book_id', 'pinned'),
|
|
494
|
+
Index('idx_annotation_category', 'category'),
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
class PersonalMetadata(Base):
|
|
499
|
+
"""Personal reading metadata (ratings, status, etc)."""
|
|
500
|
+
__tablename__ = 'personal_metadata'
|
|
501
|
+
|
|
502
|
+
id = Column(Integer, primary_key=True)
|
|
503
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), unique=True, nullable=False)
|
|
504
|
+
|
|
505
|
+
# Reading status
|
|
506
|
+
rating = Column(Float) # 0-5 stars
|
|
507
|
+
reading_status = Column(String(20), default='unread') # unread, reading, read, abandoned
|
|
508
|
+
reading_progress = Column(Integer, default=0) # 0-100 percentage
|
|
509
|
+
|
|
510
|
+
# Collections
|
|
511
|
+
favorite = Column(Boolean, default=False)
|
|
512
|
+
owned = Column(Boolean, default=True) # vs borrowed/library
|
|
513
|
+
|
|
514
|
+
# Reading queue
|
|
515
|
+
queue_position = Column(Integer) # Position in reading queue (1-based, NULL = not queued)
|
|
516
|
+
|
|
517
|
+
# Dates
|
|
518
|
+
date_added = Column(DateTime, default=utc_now, nullable=False)
|
|
519
|
+
date_started = Column(DateTime)
|
|
520
|
+
date_finished = Column(DateTime)
|
|
521
|
+
|
|
522
|
+
# Quick access tags (denormalized for performance)
|
|
523
|
+
personal_tags = Column(JSON) # Array of tag strings
|
|
524
|
+
|
|
525
|
+
book = relationship('Book', back_populates='personal')
|
|
526
|
+
|
|
527
|
+
__table_args__ = (
|
|
528
|
+
Index('idx_personal_status', 'reading_status', 'rating'),
|
|
529
|
+
Index('idx_personal_favorite', 'favorite'),
|
|
530
|
+
Index('idx_personal_queue', 'queue_position'),
|
|
531
|
+
)
|
|
532
|
+
|
|
533
|
+
|
|
534
|
+
# ============================================================================
|
|
535
|
+
# Views DSL Models
|
|
536
|
+
# ============================================================================
|
|
537
|
+
# Views provide a composable, non-destructive way to define subsets of the
|
|
538
|
+
# library with optional metadata overrides. Following SICP principles:
|
|
539
|
+
# - Primitives: all, none, filter, ids
|
|
540
|
+
# - Combination: union, intersect, difference
|
|
541
|
+
# - Abstraction: named views become new primitives
|
|
542
|
+
# - Closure: combining views yields a view
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
class View(Base):
|
|
546
|
+
"""
|
|
547
|
+
A named view defining a subset of the library with optional transforms.
|
|
548
|
+
|
|
549
|
+
Views are non-destructive lenses over the library. They define:
|
|
550
|
+
- Selection: which books (via filters, explicit IDs, or references to other views)
|
|
551
|
+
- Transforms: metadata overrides (title, description) per book
|
|
552
|
+
- Ordering: how to sort the results
|
|
553
|
+
|
|
554
|
+
The view definition is stored as YAML/JSON in the `definition` field.
|
|
555
|
+
"""
|
|
556
|
+
__tablename__ = 'views'
|
|
557
|
+
|
|
558
|
+
id = Column(Integer, primary_key=True)
|
|
559
|
+
name = Column(String(200), nullable=False, unique=True, index=True)
|
|
560
|
+
description = Column(Text)
|
|
561
|
+
|
|
562
|
+
# The full view definition as YAML-compatible JSON
|
|
563
|
+
# Structure: {select: ..., transform: ..., order: ...}
|
|
564
|
+
definition = Column(JSON, nullable=False, default=dict)
|
|
565
|
+
|
|
566
|
+
# Cached count for quick display (updated on eval)
|
|
567
|
+
cached_count = Column(Integer)
|
|
568
|
+
cached_at = Column(DateTime)
|
|
569
|
+
|
|
570
|
+
# Timestamps
|
|
571
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
572
|
+
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
|
|
573
|
+
|
|
574
|
+
# Relationships
|
|
575
|
+
overrides = relationship('ViewOverride', back_populates='view', cascade='all, delete-orphan')
|
|
576
|
+
|
|
577
|
+
def __repr__(self):
|
|
578
|
+
return f"<View(id={self.id}, name='{self.name}')>"
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class ViewOverride(Base):
|
|
582
|
+
"""
|
|
583
|
+
Per-book metadata overrides within a view.
|
|
584
|
+
|
|
585
|
+
These are stored separately for efficient querying and to avoid
|
|
586
|
+
bloating the main view definition. Overrides are non-destructive:
|
|
587
|
+
the original book metadata is unchanged.
|
|
588
|
+
"""
|
|
589
|
+
__tablename__ = 'view_overrides'
|
|
590
|
+
|
|
591
|
+
id = Column(Integer, primary_key=True)
|
|
592
|
+
view_id = Column(Integer, ForeignKey('views.id', ondelete='CASCADE'), nullable=False)
|
|
593
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
594
|
+
|
|
595
|
+
# Overrideable fields
|
|
596
|
+
title = Column(String(500))
|
|
597
|
+
description = Column(Text)
|
|
598
|
+
|
|
599
|
+
# Custom position for manual ordering within the view
|
|
600
|
+
position = Column(Integer)
|
|
601
|
+
|
|
602
|
+
# Timestamps
|
|
603
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
604
|
+
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
|
|
605
|
+
|
|
606
|
+
# Relationships
|
|
607
|
+
view = relationship('View', back_populates='overrides')
|
|
608
|
+
book = relationship('Book')
|
|
609
|
+
|
|
610
|
+
__table_args__ = (
|
|
611
|
+
UniqueConstraint('view_id', 'book_id', name='uix_view_book_override'),
|
|
612
|
+
Index('idx_view_override_view', 'view_id'),
|
|
613
|
+
Index('idx_view_override_book', 'book_id'),
|
|
614
|
+
)
|
|
615
|
+
|
|
616
|
+
def __repr__(self):
|
|
617
|
+
return f"<ViewOverride(view_id={self.view_id}, book_id={self.book_id})>"
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
# ============================================================================
|
|
621
|
+
# Reviews and Enrichment Tracking
|
|
622
|
+
# ============================================================================
|
|
623
|
+
|
|
624
|
+
class Review(Base):
|
|
625
|
+
"""User reviews of books.
|
|
626
|
+
|
|
627
|
+
Allows users to write detailed reviews, separate from simple ratings.
|
|
628
|
+
Reviews can be personal notes, summaries, critiques, or reading notes.
|
|
629
|
+
|
|
630
|
+
Examples:
|
|
631
|
+
- Personal review: "I found this book particularly helpful for..."
|
|
632
|
+
- Summary: Key takeaways and main points
|
|
633
|
+
- Critique: Critical analysis and evaluation
|
|
634
|
+
- Notes: Reading notes and observations
|
|
635
|
+
"""
|
|
636
|
+
__tablename__ = 'reviews'
|
|
637
|
+
|
|
638
|
+
id = Column(Integer, primary_key=True)
|
|
639
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
640
|
+
|
|
641
|
+
# Review content
|
|
642
|
+
title = Column(String(255)) # Review headline/title
|
|
643
|
+
content = Column(Text, nullable=False) # Full review text (markdown supported)
|
|
644
|
+
rating = Column(Float) # 1-5 stars (separate from PersonalMetadata.rating)
|
|
645
|
+
|
|
646
|
+
# Review metadata
|
|
647
|
+
review_type = Column(String(50), default='personal') # personal, summary, critique, notes
|
|
648
|
+
visibility = Column(String(20), default='private') # private, public (for future sharing)
|
|
649
|
+
|
|
650
|
+
# Timestamps
|
|
651
|
+
created_at = Column(DateTime, default=utc_now, nullable=False)
|
|
652
|
+
updated_at = Column(DateTime, default=utc_now, onupdate=utc_now, nullable=False)
|
|
653
|
+
|
|
654
|
+
# Relationship
|
|
655
|
+
book = relationship('Book', backref='reviews')
|
|
656
|
+
|
|
657
|
+
__table_args__ = (
|
|
658
|
+
Index('idx_review_book', 'book_id'),
|
|
659
|
+
Index('idx_review_type', 'review_type'),
|
|
660
|
+
Index('idx_review_created', 'created_at'),
|
|
661
|
+
)
|
|
662
|
+
|
|
663
|
+
def __repr__(self):
|
|
664
|
+
return f"<Review(id={self.id}, book_id={self.book_id}, type='{self.review_type}')>"
|
|
665
|
+
|
|
666
|
+
|
|
667
|
+
class EnrichmentHistory(Base):
|
|
668
|
+
"""Track metadata enrichment provenance.
|
|
669
|
+
|
|
670
|
+
Records every change made to book metadata by automated enrichment,
|
|
671
|
+
allowing audit trails and rollback if needed.
|
|
672
|
+
|
|
673
|
+
Tracks:
|
|
674
|
+
- What field was changed
|
|
675
|
+
- Old and new values
|
|
676
|
+
- Source of the enrichment (LLM, metadata API, user)
|
|
677
|
+
- Confidence level
|
|
678
|
+
"""
|
|
679
|
+
__tablename__ = 'enrichment_history'
|
|
680
|
+
|
|
681
|
+
id = Column(Integer, primary_key=True)
|
|
682
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
683
|
+
|
|
684
|
+
# What was enriched
|
|
685
|
+
field_name = Column(String(100), nullable=False) # description, tags, categories, difficulty
|
|
686
|
+
old_value = Column(Text) # JSON of previous value
|
|
687
|
+
new_value = Column(Text) # JSON of new value
|
|
688
|
+
|
|
689
|
+
# Source tracking
|
|
690
|
+
source_type = Column(String(50), nullable=False) # llm, google_books, open_library, user
|
|
691
|
+
source_detail = Column(String(200)) # ollama:llama3.2, anthropic:claude-sonnet-4-20250514, etc.
|
|
692
|
+
confidence = Column(Float, default=1.0) # 0.0-1.0
|
|
693
|
+
|
|
694
|
+
# Status
|
|
695
|
+
applied = Column(Boolean, default=True) # Was this change applied?
|
|
696
|
+
reverted = Column(Boolean, default=False) # Was this change reverted?
|
|
697
|
+
|
|
698
|
+
# Timestamps
|
|
699
|
+
enriched_at = Column(DateTime, default=utc_now, nullable=False)
|
|
700
|
+
|
|
701
|
+
# Relationship
|
|
702
|
+
book = relationship('Book', backref='enrichment_history')
|
|
703
|
+
|
|
704
|
+
__table_args__ = (
|
|
705
|
+
Index('idx_enrichment_book', 'book_id'),
|
|
706
|
+
Index('idx_enrichment_source', 'source_type'),
|
|
707
|
+
Index('idx_enrichment_field', 'field_name'),
|
|
708
|
+
Index('idx_enrichment_date', 'enriched_at'),
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
def __repr__(self):
|
|
712
|
+
return f"<EnrichmentHistory(id={self.id}, book_id={self.book_id}, field='{self.field_name}')>"
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
# Full-Text Search Virtual Table (SQLite FTS5)
|
|
716
|
+
# This will be created separately as it's SQLite-specific
|
|
717
|
+
"""
|
|
718
|
+
CREATE VIRTUAL TABLE books_fts USING fts5(
|
|
719
|
+
book_id UNINDEXED,
|
|
720
|
+
title,
|
|
721
|
+
description,
|
|
722
|
+
content='extracted_texts',
|
|
723
|
+
content_rowid='id'
|
|
724
|
+
);
|
|
725
|
+
"""
|