ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +35 -0
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +443 -0
- ebk/ai/llm_providers/__init__.py +21 -0
- ebk/ai/llm_providers/base.py +230 -0
- ebk/ai/llm_providers/ollama.py +362 -0
- ebk/ai/metadata_enrichment.py +396 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +434 -0
- ebk/ai/text_extractor.py +394 -0
- ebk/cli.py +2828 -680
- ebk/config.py +260 -22
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +180 -0
- ebk/db/models.py +526 -0
- ebk/db/session.py +144 -0
- ebk/decorators.py +132 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/html_library.py +1390 -0
- ebk/exports/html_utils.py +117 -0
- ebk/exports/hugo.py +7 -3
- ebk/exports/jinja_export.py +287 -0
- ebk/exports/multi_facet_export.py +164 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/extract_metadata.py +76 -7
- ebk/library_db.py +899 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +444 -0
- ebk/plugins/registry.py +500 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +174 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +1633 -0
- ebk/services/__init__.py +11 -0
- ebk/services/import_service.py +442 -0
- ebk/services/tag_service.py +282 -0
- ebk/services/text_extraction.py +317 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +445 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +301 -0
- ebk/vfs/library_vfs.py +124 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- ebk-0.3.2.dist-info/METADATA +755 -0
- ebk-0.3.2.dist-info/RECORD +69 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
- ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +0 -144
- ebk/imports/ebooks.py +0 -116
- ebk/llm.py +0 -58
- ebk/manager.py +0 -44
- ebk/merge.py +0 -308
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +0 -185
- ebk/streamlit/display.py +0 -168
- ebk/streamlit/filters.py +0 -151
- ebk/streamlit/utils.py +0 -58
- ebk/utils.py +0 -311
- ebk-0.1.0.dist-info/METADATA +0 -457
- ebk-0.1.0.dist-info/RECORD +0 -29
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/db/models.py
ADDED
|
@@ -0,0 +1,526 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SQLAlchemy models for ebk database.
|
|
3
|
+
|
|
4
|
+
Clean, normalized schema with proper relationships and indexes.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
import hashlib
|
|
11
|
+
|
|
12
|
+
from sqlalchemy import (
|
|
13
|
+
create_engine, Column, Integer, String, Text, Boolean, Float,
|
|
14
|
+
DateTime, ForeignKey, Table, UniqueConstraint, Index, JSON
|
|
15
|
+
)
|
|
16
|
+
from sqlalchemy.ext.declarative import declarative_base
|
|
17
|
+
from sqlalchemy.orm import relationship, sessionmaker
|
|
18
|
+
from sqlalchemy.ext.hybrid import hybrid_property
|
|
19
|
+
|
|
20
|
+
Base = declarative_base()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Association tables for many-to-many relationships
|
|
24
|
+
book_authors = Table(
|
|
25
|
+
'book_authors',
|
|
26
|
+
Base.metadata,
|
|
27
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
28
|
+
Column('author_id', Integer, ForeignKey('authors.id', ondelete='CASCADE'), primary_key=True),
|
|
29
|
+
Column('role', String(50), default='author'), # author, editor, translator, contributor
|
|
30
|
+
Column('position', Integer, default=0) # For ordering
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
book_subjects = Table(
|
|
34
|
+
'book_subjects',
|
|
35
|
+
Base.metadata,
|
|
36
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
37
|
+
Column('subject_id', Integer, ForeignKey('subjects.id', ondelete='CASCADE'), primary_key=True),
|
|
38
|
+
Column('relevance_score', Float, default=1.0), # How central is this topic (0-1)
|
|
39
|
+
Column('source', String(50), default='user') # calibre, ai_extracted, user_added
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
book_tags = Table(
|
|
43
|
+
'book_tags',
|
|
44
|
+
Base.metadata,
|
|
45
|
+
Column('book_id', Integer, ForeignKey('books.id', ondelete='CASCADE'), primary_key=True),
|
|
46
|
+
Column('tag_id', Integer, ForeignKey('tags.id', ondelete='CASCADE'), primary_key=True),
|
|
47
|
+
Column('created_at', DateTime, default=datetime.utcnow) # When tag was added
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Book(Base):
|
|
52
|
+
"""Core book entity with metadata."""
|
|
53
|
+
__tablename__ = 'books'
|
|
54
|
+
|
|
55
|
+
id = Column(Integer, primary_key=True)
|
|
56
|
+
unique_id = Column(String(32), unique=True, nullable=False, index=True) # Hash-based
|
|
57
|
+
|
|
58
|
+
# Core metadata
|
|
59
|
+
title = Column(String(500), nullable=False, index=True)
|
|
60
|
+
subtitle = Column(String(500))
|
|
61
|
+
sort_title = Column(String(500), index=True) # For alphabetical sorting
|
|
62
|
+
language = Column(String(10), index=True) # ISO 639-1 code
|
|
63
|
+
publisher = Column(String(200), index=True)
|
|
64
|
+
publication_date = Column(String(50)) # Flexible: year, YYYY-MM, or YYYY-MM-DD
|
|
65
|
+
|
|
66
|
+
# Series information
|
|
67
|
+
series = Column(String(200), index=True) # Book series name
|
|
68
|
+
series_index = Column(Float) # Position in series (e.g., 2.5)
|
|
69
|
+
|
|
70
|
+
# Edition and rights
|
|
71
|
+
edition = Column(String(100)) # "2nd Edition", "Revised", etc.
|
|
72
|
+
rights = Column(Text) # Copyright/license statement
|
|
73
|
+
source = Column(String(500)) # Original source URL or reference
|
|
74
|
+
|
|
75
|
+
# Rich content
|
|
76
|
+
description = Column(Text) # Full text indexed separately
|
|
77
|
+
page_count = Column(Integer)
|
|
78
|
+
word_count = Column(Integer) # From extracted text
|
|
79
|
+
keywords = Column(JSON) # Array of keyword strings from PDF/metadata
|
|
80
|
+
|
|
81
|
+
# User customization
|
|
82
|
+
color = Column(String(7)) # Hex color code (e.g., #FF5733)
|
|
83
|
+
|
|
84
|
+
# Timestamps
|
|
85
|
+
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
86
|
+
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow, nullable=False)
|
|
87
|
+
|
|
88
|
+
# Relationships
|
|
89
|
+
authors = relationship('Author', secondary=book_authors, back_populates='books', lazy='selectin')
|
|
90
|
+
subjects = relationship('Subject', secondary=book_subjects, back_populates='books', lazy='selectin')
|
|
91
|
+
tags = relationship('Tag', secondary=book_tags, back_populates='books', lazy='selectin')
|
|
92
|
+
contributors = relationship('Contributor', back_populates='book', cascade='all, delete-orphan')
|
|
93
|
+
identifiers = relationship('Identifier', back_populates='book', cascade='all, delete-orphan')
|
|
94
|
+
files = relationship('File', back_populates='book', cascade='all, delete-orphan')
|
|
95
|
+
covers = relationship('Cover', back_populates='book', cascade='all, delete-orphan')
|
|
96
|
+
concepts = relationship('BookConcept', back_populates='book', cascade='all, delete-orphan')
|
|
97
|
+
sessions = relationship('ReadingSession', back_populates='book', cascade='all, delete-orphan')
|
|
98
|
+
annotations = relationship('Annotation', back_populates='book', cascade='all, delete-orphan')
|
|
99
|
+
personal = relationship('PersonalMetadata', back_populates='book', uselist=False, cascade='all, delete-orphan')
|
|
100
|
+
|
|
101
|
+
# Indexes
|
|
102
|
+
__table_args__ = (
|
|
103
|
+
Index('idx_book_title_lang', 'title', 'language'),
|
|
104
|
+
Index('idx_book_created', 'created_at'),
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@hybrid_property
|
|
108
|
+
def primary_file(self) -> Optional['File']:
|
|
109
|
+
"""Get the primary file (prefer PDF > EPUB > others)."""
|
|
110
|
+
if not self.files:
|
|
111
|
+
return None
|
|
112
|
+
# Sort by preference
|
|
113
|
+
format_priority = {'pdf': 0, 'epub': 1, 'mobi': 2, 'azw3': 3}
|
|
114
|
+
sorted_files = sorted(
|
|
115
|
+
self.files,
|
|
116
|
+
key=lambda f: format_priority.get(f.format.lower(), 99)
|
|
117
|
+
)
|
|
118
|
+
return sorted_files[0] if sorted_files else None
|
|
119
|
+
|
|
120
|
+
@hybrid_property
|
|
121
|
+
def primary_cover(self) -> Optional['Cover']:
|
|
122
|
+
"""Get the primary cover image."""
|
|
123
|
+
for cover in self.covers:
|
|
124
|
+
if cover.is_primary:
|
|
125
|
+
return cover
|
|
126
|
+
return self.covers[0] if self.covers else None
|
|
127
|
+
|
|
128
|
+
def __repr__(self):
|
|
129
|
+
return f"<Book(id={self.id}, title='{self.title[:50]}')>"
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class Author(Base):
|
|
133
|
+
"""Author/creator entity."""
|
|
134
|
+
__tablename__ = 'authors'
|
|
135
|
+
|
|
136
|
+
id = Column(Integer, primary_key=True)
|
|
137
|
+
name = Column(String(200), nullable=False, index=True)
|
|
138
|
+
sort_name = Column(String(200), index=True) # "Tolkien, J.R.R."
|
|
139
|
+
bio = Column(Text)
|
|
140
|
+
birth_year = Column(Integer)
|
|
141
|
+
death_year = Column(Integer)
|
|
142
|
+
|
|
143
|
+
# Relationships
|
|
144
|
+
books = relationship('Book', secondary=book_authors, back_populates='authors')
|
|
145
|
+
|
|
146
|
+
__table_args__ = (
|
|
147
|
+
UniqueConstraint('name', name='uix_author_name'),
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def __repr__(self):
|
|
151
|
+
return f"<Author(id={self.id}, name='{self.name}')>"
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class Subject(Base):
|
|
155
|
+
"""Subject/tag/genre with hierarchical support."""
|
|
156
|
+
__tablename__ = 'subjects'
|
|
157
|
+
|
|
158
|
+
id = Column(Integer, primary_key=True)
|
|
159
|
+
name = Column(String(200), nullable=False, unique=True, index=True)
|
|
160
|
+
parent_id = Column(Integer, ForeignKey('subjects.id', ondelete='SET NULL'))
|
|
161
|
+
type = Column(String(50), default='topic') # genre, topic, keyword, personal_tag
|
|
162
|
+
|
|
163
|
+
# Self-referential relationship for hierarchy
|
|
164
|
+
parent = relationship('Subject', remote_side=[id], backref='children')
|
|
165
|
+
books = relationship('Book', secondary=book_subjects, back_populates='subjects')
|
|
166
|
+
|
|
167
|
+
def __repr__(self):
|
|
168
|
+
return f"<Subject(id={self.id}, name='{self.name}', type='{self.type}')>"
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
class Tag(Base):
|
|
172
|
+
"""User-defined hierarchical tags for organizing books.
|
|
173
|
+
|
|
174
|
+
Tags are separate from Subjects:
|
|
175
|
+
- Subjects: Bibliographic metadata (what the book is about)
|
|
176
|
+
- Tags: User-defined organization (how you use/categorize the book)
|
|
177
|
+
|
|
178
|
+
Examples:
|
|
179
|
+
- path="Work/Project-2024"
|
|
180
|
+
- path="Personal/To-Read"
|
|
181
|
+
- path="Reference/Programming/Python"
|
|
182
|
+
"""
|
|
183
|
+
__tablename__ = 'tags'
|
|
184
|
+
|
|
185
|
+
id = Column(Integer, primary_key=True)
|
|
186
|
+
name = Column(String(200), nullable=False, index=True) # Name at this level (e.g., "Python")
|
|
187
|
+
path = Column(String(500), nullable=False, unique=True, index=True) # Full path (e.g., "Programming/Python")
|
|
188
|
+
parent_id = Column(Integer, ForeignKey('tags.id', ondelete='CASCADE'))
|
|
189
|
+
|
|
190
|
+
# Metadata
|
|
191
|
+
description = Column(Text) # Optional description of the tag
|
|
192
|
+
color = Column(String(7)) # Hex color code for UI display (e.g., "#FF5733")
|
|
193
|
+
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
194
|
+
|
|
195
|
+
# Self-referential relationship for hierarchy
|
|
196
|
+
parent = relationship('Tag', remote_side=[id], backref='children')
|
|
197
|
+
books = relationship('Book', secondary=book_tags, back_populates='tags')
|
|
198
|
+
|
|
199
|
+
__table_args__ = (
|
|
200
|
+
Index('idx_tag_path', 'path'),
|
|
201
|
+
Index('idx_tag_parent', 'parent_id'),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
@property
|
|
205
|
+
def depth(self) -> int:
|
|
206
|
+
"""Calculate depth in hierarchy (root=0)."""
|
|
207
|
+
return self.path.count('/')
|
|
208
|
+
|
|
209
|
+
@property
|
|
210
|
+
def ancestors(self) -> List['Tag']:
|
|
211
|
+
"""Get list of ancestor tags from root to parent."""
|
|
212
|
+
ancestors = []
|
|
213
|
+
current = self.parent
|
|
214
|
+
while current:
|
|
215
|
+
ancestors.insert(0, current)
|
|
216
|
+
current = current.parent
|
|
217
|
+
return ancestors
|
|
218
|
+
|
|
219
|
+
@property
|
|
220
|
+
def full_path_parts(self) -> List[str]:
|
|
221
|
+
"""Split path into components."""
|
|
222
|
+
return self.path.split('/')
|
|
223
|
+
|
|
224
|
+
def __repr__(self):
|
|
225
|
+
return f"<Tag(id={self.id}, path='{self.path}')>"
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
class Contributor(Base):
|
|
229
|
+
"""Contributors to a book (editors, translators, illustrators, etc)."""
|
|
230
|
+
__tablename__ = 'contributors'
|
|
231
|
+
|
|
232
|
+
id = Column(Integer, primary_key=True)
|
|
233
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
234
|
+
|
|
235
|
+
name = Column(String(200), nullable=False, index=True)
|
|
236
|
+
role = Column(String(50), nullable=False) # editor, translator, illustrator, etc.
|
|
237
|
+
file_as = Column(String(200)) # Sorting name
|
|
238
|
+
|
|
239
|
+
book = relationship('Book', back_populates='contributors')
|
|
240
|
+
|
|
241
|
+
__table_args__ = (
|
|
242
|
+
Index('idx_contributor_name', 'name'),
|
|
243
|
+
Index('idx_contributor_role', 'role'),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def __repr__(self):
|
|
247
|
+
return f"<Contributor(name='{self.name}', role='{self.role}')>"
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
class Identifier(Base):
|
|
251
|
+
"""Flexible identifiers (ISBN, DOI, etc)."""
|
|
252
|
+
__tablename__ = 'identifiers'
|
|
253
|
+
|
|
254
|
+
id = Column(Integer, primary_key=True)
|
|
255
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
256
|
+
scheme = Column(String(50), nullable=False, index=True) # isbn, doi, arxiv, goodreads
|
|
257
|
+
value = Column(String(200), nullable=False, index=True)
|
|
258
|
+
|
|
259
|
+
book = relationship('Book', back_populates='identifiers')
|
|
260
|
+
|
|
261
|
+
__table_args__ = (
|
|
262
|
+
UniqueConstraint('book_id', 'scheme', 'value', name='uix_identifier'),
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
def __repr__(self):
|
|
266
|
+
return f"<Identifier(scheme='{self.scheme}', value='{self.value}')>"
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class File(Base):
|
|
270
|
+
"""Ebook files with extraction metadata."""
|
|
271
|
+
__tablename__ = 'files'
|
|
272
|
+
|
|
273
|
+
id = Column(Integer, primary_key=True)
|
|
274
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
275
|
+
|
|
276
|
+
path = Column(String(500), nullable=False) # Relative to library root
|
|
277
|
+
format = Column(String(20), nullable=False, index=True) # pdf, epub, mobi
|
|
278
|
+
size_bytes = Column(Integer)
|
|
279
|
+
file_hash = Column(String(64), unique=True, nullable=False, index=True) # SHA256
|
|
280
|
+
|
|
281
|
+
# File metadata
|
|
282
|
+
mime_type = Column(String(100)) # Full MIME type (e.g., application/pdf)
|
|
283
|
+
created_date = Column(DateTime) # File creation time from filesystem
|
|
284
|
+
modified_date = Column(DateTime) # File modification time from filesystem
|
|
285
|
+
creator_application = Column(String(200)) # PDF: Creator app (e.g., "LaTeX")
|
|
286
|
+
|
|
287
|
+
# Text extraction status
|
|
288
|
+
text_extracted = Column(Boolean, default=False)
|
|
289
|
+
extraction_date = Column(DateTime)
|
|
290
|
+
|
|
291
|
+
book = relationship('Book', back_populates='files')
|
|
292
|
+
extracted_text = relationship('ExtractedText', back_populates='file', uselist=False, cascade='all, delete-orphan')
|
|
293
|
+
chunks = relationship('TextChunk', back_populates='file', cascade='all, delete-orphan')
|
|
294
|
+
|
|
295
|
+
@staticmethod
|
|
296
|
+
def compute_hash(file_path: Path) -> str:
|
|
297
|
+
"""Compute SHA256 hash of file."""
|
|
298
|
+
sha256 = hashlib.sha256()
|
|
299
|
+
with open(file_path, 'rb') as f:
|
|
300
|
+
for block in iter(lambda: f.read(8192), b''):
|
|
301
|
+
sha256.update(block)
|
|
302
|
+
return sha256.hexdigest()
|
|
303
|
+
|
|
304
|
+
def __repr__(self):
|
|
305
|
+
return f"<File(id={self.id}, format='{self.format}', path='{self.path}')>"
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class ExtractedText(Base):
|
|
309
|
+
"""Full extracted text for search."""
|
|
310
|
+
__tablename__ = 'extracted_texts'
|
|
311
|
+
|
|
312
|
+
id = Column(Integer, primary_key=True)
|
|
313
|
+
file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), unique=True, nullable=False)
|
|
314
|
+
|
|
315
|
+
content = Column(Text, nullable=False) # Full text - will use FTS5 virtual table
|
|
316
|
+
content_hash = Column(String(64), nullable=False)
|
|
317
|
+
extracted_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
318
|
+
|
|
319
|
+
file = relationship('File', back_populates='extracted_text')
|
|
320
|
+
|
|
321
|
+
def __repr__(self):
|
|
322
|
+
return f"<ExtractedText(file_id={self.file_id}, length={len(self.content)})>"
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
class TextChunk(Base):
|
|
326
|
+
"""Chunks for semantic search with embeddings."""
|
|
327
|
+
__tablename__ = 'text_chunks'
|
|
328
|
+
|
|
329
|
+
id = Column(Integer, primary_key=True)
|
|
330
|
+
file_id = Column(Integer, ForeignKey('files.id', ondelete='CASCADE'), nullable=False)
|
|
331
|
+
|
|
332
|
+
chunk_index = Column(Integer, nullable=False) # Order within file
|
|
333
|
+
content = Column(Text, nullable=False) # 500-1000 words
|
|
334
|
+
|
|
335
|
+
# Page range (if available)
|
|
336
|
+
start_page = Column(Integer)
|
|
337
|
+
end_page = Column(Integer)
|
|
338
|
+
|
|
339
|
+
# Embedding stored separately (pickle file or vector extension)
|
|
340
|
+
has_embedding = Column(Boolean, default=False)
|
|
341
|
+
|
|
342
|
+
file = relationship('File', back_populates='chunks')
|
|
343
|
+
|
|
344
|
+
__table_args__ = (
|
|
345
|
+
UniqueConstraint('file_id', 'chunk_index', name='uix_chunk'),
|
|
346
|
+
Index('idx_chunk_file', 'file_id', 'chunk_index'),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
def __repr__(self):
|
|
350
|
+
return f"<TextChunk(id={self.id}, file_id={self.file_id}, index={self.chunk_index})>"
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
class Cover(Base):
|
|
354
|
+
"""Cover images."""
|
|
355
|
+
__tablename__ = 'covers'
|
|
356
|
+
|
|
357
|
+
id = Column(Integer, primary_key=True)
|
|
358
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
359
|
+
|
|
360
|
+
path = Column(String(500), nullable=False) # Relative to library root
|
|
361
|
+
width = Column(Integer)
|
|
362
|
+
height = Column(Integer)
|
|
363
|
+
is_primary = Column(Boolean, default=True)
|
|
364
|
+
source = Column(String(50), default='extracted') # extracted, user_provided, downloaded
|
|
365
|
+
|
|
366
|
+
book = relationship('Book', back_populates='covers')
|
|
367
|
+
|
|
368
|
+
def __repr__(self):
|
|
369
|
+
return f"<Cover(id={self.id}, book_id={self.book_id}, path='{self.path}')>"
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class Concept(Base):
|
|
373
|
+
"""Knowledge graph concepts."""
|
|
374
|
+
__tablename__ = 'concepts'
|
|
375
|
+
|
|
376
|
+
id = Column(Integer, primary_key=True)
|
|
377
|
+
name = Column(String(200), nullable=False, unique=True, index=True)
|
|
378
|
+
description = Column(Text)
|
|
379
|
+
concept_type = Column(String(50), default='idea') # definition, idea, theory, principle
|
|
380
|
+
importance_score = Column(Float, default=0.0, index=True) # PageRank score
|
|
381
|
+
|
|
382
|
+
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
383
|
+
|
|
384
|
+
# Relationships
|
|
385
|
+
book_concepts = relationship('BookConcept', back_populates='concept', cascade='all, delete-orphan')
|
|
386
|
+
outgoing_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.source_id', back_populates='source', cascade='all, delete-orphan')
|
|
387
|
+
incoming_relations = relationship('ConceptRelation', foreign_keys='ConceptRelation.target_id', back_populates='target', cascade='all, delete-orphan')
|
|
388
|
+
|
|
389
|
+
def __repr__(self):
|
|
390
|
+
return f"<Concept(id={self.id}, name='{self.name}')>"
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
class BookConcept(Base):
|
|
394
|
+
"""Link between books and concepts they discuss."""
|
|
395
|
+
__tablename__ = 'book_concepts'
|
|
396
|
+
|
|
397
|
+
id = Column(Integer, primary_key=True)
|
|
398
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
399
|
+
concept_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
400
|
+
|
|
401
|
+
page_references = Column(JSON) # Array of page numbers
|
|
402
|
+
quote_examples = Column(JSON) # Array of relevant quotes
|
|
403
|
+
confidence_score = Column(Float, default=1.0)
|
|
404
|
+
|
|
405
|
+
book = relationship('Book', back_populates='concepts')
|
|
406
|
+
concept = relationship('Concept', back_populates='book_concepts')
|
|
407
|
+
|
|
408
|
+
__table_args__ = (
|
|
409
|
+
UniqueConstraint('book_id', 'concept_id', name='uix_book_concept'),
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class ConceptRelation(Base):
|
|
414
|
+
"""Relationships between concepts (knowledge graph edges)."""
|
|
415
|
+
__tablename__ = 'concept_relations'
|
|
416
|
+
|
|
417
|
+
id = Column(Integer, primary_key=True)
|
|
418
|
+
source_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
419
|
+
target_id = Column(Integer, ForeignKey('concepts.id', ondelete='CASCADE'), nullable=False)
|
|
420
|
+
|
|
421
|
+
relation_type = Column(String(50), nullable=False) # supports, contradicts, extends, examples, causes
|
|
422
|
+
strength = Column(Float, default=1.0) # 0-1
|
|
423
|
+
evidence_book_id = Column(Integer, ForeignKey('books.id', ondelete='SET NULL'))
|
|
424
|
+
|
|
425
|
+
source = relationship('Concept', foreign_keys=[source_id], back_populates='outgoing_relations')
|
|
426
|
+
target = relationship('Concept', foreign_keys=[target_id], back_populates='incoming_relations')
|
|
427
|
+
evidence_book = relationship('Book')
|
|
428
|
+
|
|
429
|
+
__table_args__ = (
|
|
430
|
+
UniqueConstraint('source_id', 'target_id', 'relation_type', name='uix_concept_relation'),
|
|
431
|
+
Index('idx_relation_source', 'source_id'),
|
|
432
|
+
Index('idx_relation_target', 'target_id'),
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class ReadingSession(Base):
|
|
437
|
+
"""Track reading sessions for active recall."""
|
|
438
|
+
__tablename__ = 'reading_sessions'
|
|
439
|
+
|
|
440
|
+
id = Column(Integer, primary_key=True)
|
|
441
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
442
|
+
|
|
443
|
+
start_time = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
444
|
+
end_time = Column(DateTime)
|
|
445
|
+
pages_read = Column(Integer, default=0)
|
|
446
|
+
|
|
447
|
+
highlights = Column(JSON) # Array of highlight texts
|
|
448
|
+
notes = Column(JSON) # Array of note objects
|
|
449
|
+
comprehension_score = Column(Float) # From quiz results
|
|
450
|
+
|
|
451
|
+
book = relationship('Book', back_populates='sessions')
|
|
452
|
+
|
|
453
|
+
@hybrid_property
|
|
454
|
+
def duration_minutes(self) -> Optional[float]:
|
|
455
|
+
if self.end_time and self.start_time:
|
|
456
|
+
return (self.end_time - self.start_time).total_seconds() / 60
|
|
457
|
+
return None
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class Annotation(Base):
|
|
461
|
+
"""Highlights, notes, bookmarks."""
|
|
462
|
+
__tablename__ = 'annotations'
|
|
463
|
+
|
|
464
|
+
id = Column(Integer, primary_key=True)
|
|
465
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), nullable=False)
|
|
466
|
+
session_id = Column(Integer, ForeignKey('reading_sessions.id', ondelete='SET NULL'))
|
|
467
|
+
|
|
468
|
+
annotation_type = Column(String(20), nullable=False) # highlight, note, bookmark
|
|
469
|
+
page_number = Column(Integer)
|
|
470
|
+
position = Column(JSON) # {char_offset: int} or {x: float, y: float}
|
|
471
|
+
content = Column(Text, nullable=False) # The highlighted text or note content
|
|
472
|
+
color = Column(String(20)) # For highlights
|
|
473
|
+
|
|
474
|
+
created_at = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
475
|
+
|
|
476
|
+
book = relationship('Book', back_populates='annotations')
|
|
477
|
+
session = relationship('ReadingSession')
|
|
478
|
+
|
|
479
|
+
__table_args__ = (
|
|
480
|
+
Index('idx_annotation_book', 'book_id', 'annotation_type'),
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
class PersonalMetadata(Base):
|
|
485
|
+
"""Personal reading metadata (ratings, status, etc)."""
|
|
486
|
+
__tablename__ = 'personal_metadata'
|
|
487
|
+
|
|
488
|
+
id = Column(Integer, primary_key=True)
|
|
489
|
+
book_id = Column(Integer, ForeignKey('books.id', ondelete='CASCADE'), unique=True, nullable=False)
|
|
490
|
+
|
|
491
|
+
# Reading status
|
|
492
|
+
rating = Column(Float) # 0-5 stars
|
|
493
|
+
reading_status = Column(String(20), default='unread') # unread, reading, read, abandoned
|
|
494
|
+
reading_progress = Column(Integer, default=0) # 0-100 percentage
|
|
495
|
+
|
|
496
|
+
# Collections
|
|
497
|
+
favorite = Column(Boolean, default=False)
|
|
498
|
+
owned = Column(Boolean, default=True) # vs borrowed/library
|
|
499
|
+
|
|
500
|
+
# Dates
|
|
501
|
+
date_added = Column(DateTime, default=datetime.utcnow, nullable=False)
|
|
502
|
+
date_started = Column(DateTime)
|
|
503
|
+
date_finished = Column(DateTime)
|
|
504
|
+
|
|
505
|
+
# Quick access tags (denormalized for performance)
|
|
506
|
+
personal_tags = Column(JSON) # Array of tag strings
|
|
507
|
+
|
|
508
|
+
book = relationship('Book', back_populates='personal')
|
|
509
|
+
|
|
510
|
+
__table_args__ = (
|
|
511
|
+
Index('idx_personal_status', 'reading_status', 'rating'),
|
|
512
|
+
Index('idx_personal_favorite', 'favorite'),
|
|
513
|
+
)
|
|
514
|
+
|
|
515
|
+
|
|
516
|
+
# Full-Text Search Virtual Table (SQLite FTS5)
|
|
517
|
+
# This will be created separately as it's SQLite-specific
|
|
518
|
+
"""
|
|
519
|
+
CREATE VIRTUAL TABLE books_fts USING fts5(
|
|
520
|
+
book_id UNINDEXED,
|
|
521
|
+
title,
|
|
522
|
+
description,
|
|
523
|
+
content='extracted_texts',
|
|
524
|
+
content_rowid='id'
|
|
525
|
+
);
|
|
526
|
+
"""
|
ebk/db/session.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database session management for ebk.
|
|
3
|
+
|
|
4
|
+
Provides session factory and initialization utilities.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Optional
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import create_engine, event, text
|
|
12
|
+
from sqlalchemy.orm import sessionmaker, Session
|
|
13
|
+
from sqlalchemy.engine import Engine
|
|
14
|
+
|
|
15
|
+
from .models import Base
|
|
16
|
+
|
|
17
|
+
# Global session factory
|
|
18
|
+
_SessionFactory: Optional[sessionmaker] = None
|
|
19
|
+
_engine: Optional[Engine] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def init_db(library_path: Path, echo: bool = False) -> Engine:
|
|
23
|
+
"""
|
|
24
|
+
Initialize database and create all tables.
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
library_path: Path to library directory
|
|
28
|
+
echo: If True, log all SQL statements (debug mode)
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
SQLAlchemy engine
|
|
32
|
+
"""
|
|
33
|
+
global _engine, _SessionFactory
|
|
34
|
+
|
|
35
|
+
library_path = Path(library_path)
|
|
36
|
+
library_path.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
|
|
38
|
+
db_path = library_path / 'library.db'
|
|
39
|
+
db_url = f'sqlite:///{db_path}'
|
|
40
|
+
|
|
41
|
+
_engine = create_engine(db_url, echo=echo)
|
|
42
|
+
|
|
43
|
+
# Enable foreign keys for SQLite
|
|
44
|
+
@event.listens_for(Engine, "connect")
|
|
45
|
+
def set_sqlite_pragma(dbapi_conn, connection_record):
|
|
46
|
+
cursor = dbapi_conn.cursor()
|
|
47
|
+
cursor.execute("PRAGMA foreign_keys=ON")
|
|
48
|
+
cursor.close()
|
|
49
|
+
|
|
50
|
+
# Create all tables
|
|
51
|
+
Base.metadata.create_all(_engine)
|
|
52
|
+
|
|
53
|
+
# Create FTS5 virtual table for full-text search
|
|
54
|
+
with _engine.connect() as conn:
|
|
55
|
+
# Check if FTS table exists
|
|
56
|
+
result = conn.execute(
|
|
57
|
+
text("SELECT name FROM sqlite_master WHERE type='table' AND name='books_fts'")
|
|
58
|
+
)
|
|
59
|
+
if not result.fetchone():
|
|
60
|
+
conn.execute(text("""
|
|
61
|
+
CREATE VIRTUAL TABLE books_fts USING fts5(
|
|
62
|
+
book_id UNINDEXED,
|
|
63
|
+
title,
|
|
64
|
+
description,
|
|
65
|
+
extracted_text,
|
|
66
|
+
tokenize='porter unicode61'
|
|
67
|
+
)
|
|
68
|
+
"""))
|
|
69
|
+
conn.commit()
|
|
70
|
+
|
|
71
|
+
# Create session factory
|
|
72
|
+
_SessionFactory = sessionmaker(bind=_engine)
|
|
73
|
+
|
|
74
|
+
return _engine
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_session() -> Session:
|
|
78
|
+
"""
|
|
79
|
+
Get a new database session.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
SQLAlchemy session
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
RuntimeError: If database not initialized
|
|
86
|
+
"""
|
|
87
|
+
if _SessionFactory is None:
|
|
88
|
+
raise RuntimeError(
|
|
89
|
+
"Database not initialized. Call init_db() first."
|
|
90
|
+
)
|
|
91
|
+
return _SessionFactory()
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@contextmanager
|
|
95
|
+
def session_scope():
|
|
96
|
+
"""
|
|
97
|
+
Provide a transactional scope around a series of operations.
|
|
98
|
+
|
|
99
|
+
Usage:
|
|
100
|
+
with session_scope() as session:
|
|
101
|
+
session.add(book)
|
|
102
|
+
# Automatically commits or rolls back
|
|
103
|
+
"""
|
|
104
|
+
session = get_session()
|
|
105
|
+
try:
|
|
106
|
+
yield session
|
|
107
|
+
session.commit()
|
|
108
|
+
except Exception:
|
|
109
|
+
session.rollback()
|
|
110
|
+
raise
|
|
111
|
+
finally:
|
|
112
|
+
session.close()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def close_db():
|
|
116
|
+
"""Close database connection and cleanup."""
|
|
117
|
+
global _engine, _SessionFactory
|
|
118
|
+
|
|
119
|
+
if _engine:
|
|
120
|
+
_engine.dispose()
|
|
121
|
+
_engine = None
|
|
122
|
+
|
|
123
|
+
_SessionFactory = None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def get_or_create(session: Session, model, **kwargs):
|
|
127
|
+
"""
|
|
128
|
+
Get existing instance or create new one.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
session: Database session
|
|
132
|
+
model: SQLAlchemy model class
|
|
133
|
+
**kwargs: Filter criteria and/or values to set
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
Tuple of (instance, created: bool)
|
|
137
|
+
"""
|
|
138
|
+
instance = session.query(model).filter_by(**kwargs).first()
|
|
139
|
+
if instance:
|
|
140
|
+
return instance, False
|
|
141
|
+
else:
|
|
142
|
+
instance = model(**kwargs)
|
|
143
|
+
session.add(instance)
|
|
144
|
+
return instance, True
|