ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +35 -0
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +443 -0
- ebk/ai/llm_providers/__init__.py +21 -0
- ebk/ai/llm_providers/base.py +230 -0
- ebk/ai/llm_providers/ollama.py +362 -0
- ebk/ai/metadata_enrichment.py +396 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +434 -0
- ebk/ai/text_extractor.py +394 -0
- ebk/cli.py +2828 -680
- ebk/config.py +260 -22
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +180 -0
- ebk/db/models.py +526 -0
- ebk/db/session.py +144 -0
- ebk/decorators.py +132 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/html_library.py +1390 -0
- ebk/exports/html_utils.py +117 -0
- ebk/exports/hugo.py +7 -3
- ebk/exports/jinja_export.py +287 -0
- ebk/exports/multi_facet_export.py +164 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/extract_metadata.py +76 -7
- ebk/library_db.py +899 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +444 -0
- ebk/plugins/registry.py +500 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +174 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +1633 -0
- ebk/services/__init__.py +11 -0
- ebk/services/import_service.py +442 -0
- ebk/services/tag_service.py +282 -0
- ebk/services/text_extraction.py +317 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +445 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +301 -0
- ebk/vfs/library_vfs.py +124 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- ebk-0.3.2.dist-info/METADATA +755 -0
- ebk-0.3.2.dist-info/RECORD +69 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
- ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +0 -144
- ebk/imports/ebooks.py +0 -116
- ebk/llm.py +0 -58
- ebk/manager.py +0 -44
- ebk/merge.py +0 -308
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +0 -185
- ebk/streamlit/display.py +0 -168
- ebk/streamlit/filters.py +0 -151
- ebk/streamlit/utils.py +0 -58
- ebk/utils.py +0 -311
- ebk-0.1.0.dist-info/METADATA +0 -457
- ebk-0.1.0.dist-info/RECORD +0 -29
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/library_db.py
ADDED
|
@@ -0,0 +1,899 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Database-backed Library class for ebk.
|
|
3
|
+
|
|
4
|
+
Provides a fluent API for managing ebook libraries using SQLAlchemy + SQLite.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import List, Dict, Any, Optional, Tuple
|
|
9
|
+
from datetime import datetime
|
|
10
|
+
import logging
|
|
11
|
+
|
|
12
|
+
from sqlalchemy import func, or_, and_, text
|
|
13
|
+
from sqlalchemy.orm import Session
|
|
14
|
+
|
|
15
|
+
from .db.models import Book, Author, Subject, File, ExtractedText, PersonalMetadata
|
|
16
|
+
from .db.session import init_db, get_session, close_db
|
|
17
|
+
from .services.import_service import ImportService
|
|
18
|
+
from .services.text_extraction import TextExtractionService
|
|
19
|
+
from .search_parser import parse_search_query
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Library:
|
|
25
|
+
"""
|
|
26
|
+
Database-backed library for managing ebooks.
|
|
27
|
+
|
|
28
|
+
Usage:
|
|
29
|
+
lib = Library.open("/path/to/library")
|
|
30
|
+
lib.add_book(Path("book.pdf"), {"title": "My Book", "creators": ["Author"]})
|
|
31
|
+
results = lib.search("python programming")
|
|
32
|
+
stats = lib.stats()
|
|
33
|
+
lib.close()
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(self, library_path: Path, session: Session):
|
|
37
|
+
self.library_path = Path(library_path)
|
|
38
|
+
self.session = session
|
|
39
|
+
self.import_service = ImportService(library_path, session)
|
|
40
|
+
self.text_service = TextExtractionService(library_path)
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def open(cls, library_path: Path, echo: bool = False) -> 'Library':
|
|
44
|
+
"""
|
|
45
|
+
Open or create a library.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
library_path: Path to library directory
|
|
49
|
+
echo: If True, log all SQL statements
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Library instance
|
|
53
|
+
"""
|
|
54
|
+
library_path = Path(library_path)
|
|
55
|
+
init_db(library_path, echo=echo)
|
|
56
|
+
session = get_session()
|
|
57
|
+
|
|
58
|
+
logger.info(f"Opened library at {library_path}")
|
|
59
|
+
return cls(library_path, session)
|
|
60
|
+
|
|
61
|
+
def close(self):
|
|
62
|
+
"""Close library and cleanup database connection."""
|
|
63
|
+
if self.session:
|
|
64
|
+
self.session.close()
|
|
65
|
+
close_db()
|
|
66
|
+
logger.info("Closed library")
|
|
67
|
+
|
|
68
|
+
def add_book(self, file_path: Path, metadata: Dict[str, Any],
|
|
69
|
+
extract_text: bool = True, extract_cover: bool = True) -> Optional[Book]:
|
|
70
|
+
"""
|
|
71
|
+
Add a book to the library.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
file_path: Path to ebook file
|
|
75
|
+
metadata: Metadata dictionary (title, creators, subjects, etc.)
|
|
76
|
+
extract_text: Whether to extract full text
|
|
77
|
+
extract_cover: Whether to extract cover image
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Book instance or None if import failed
|
|
81
|
+
"""
|
|
82
|
+
book = self.import_service.import_file(
|
|
83
|
+
file_path,
|
|
84
|
+
metadata,
|
|
85
|
+
extract_text=extract_text,
|
|
86
|
+
extract_cover=extract_cover
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
if book:
|
|
90
|
+
logger.info(f"Added book: {book.title}")
|
|
91
|
+
|
|
92
|
+
return book
|
|
93
|
+
|
|
94
|
+
def add_calibre_book(self, metadata_opf_path: Path) -> Optional[Book]:
|
|
95
|
+
"""
|
|
96
|
+
Add book from Calibre metadata.opf file.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
metadata_opf_path: Path to metadata.opf
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Book instance or None
|
|
103
|
+
"""
|
|
104
|
+
return self.import_service.import_calibre_book(metadata_opf_path)
|
|
105
|
+
|
|
106
|
+
def batch_import(self, files_and_metadata: List[Tuple[Path, Dict[str, Any]]],
|
|
107
|
+
show_progress: bool = True) -> List[Book]:
|
|
108
|
+
"""
|
|
109
|
+
Import multiple books with progress tracking.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
files_and_metadata: List of (file_path, metadata) tuples
|
|
113
|
+
show_progress: Whether to show progress bar
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
List of imported Book instances
|
|
117
|
+
"""
|
|
118
|
+
file_paths = [f for f, _ in files_and_metadata]
|
|
119
|
+
metadata_list = [m for _, m in files_and_metadata]
|
|
120
|
+
|
|
121
|
+
return self.import_service.batch_import(
|
|
122
|
+
file_paths,
|
|
123
|
+
metadata_list,
|
|
124
|
+
show_progress=show_progress
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
def get_book(self, book_id: int) -> Optional[Book]:
|
|
128
|
+
"""Get book by ID."""
|
|
129
|
+
return self.session.query(Book).get(book_id)
|
|
130
|
+
|
|
131
|
+
def get_book_by_unique_id(self, unique_id: str) -> Optional[Book]:
|
|
132
|
+
"""Get book by unique ID."""
|
|
133
|
+
return self.session.query(Book).filter_by(unique_id=unique_id).first()
|
|
134
|
+
|
|
135
|
+
def query(self) -> 'QueryBuilder':
|
|
136
|
+
"""Start a fluent query."""
|
|
137
|
+
return QueryBuilder(self.session)
|
|
138
|
+
|
|
139
|
+
def search(self, query: str, limit: int = 50) -> List[Book]:
|
|
140
|
+
"""
|
|
141
|
+
Advanced search across books with field-specific queries and boolean logic.
|
|
142
|
+
|
|
143
|
+
Supports:
|
|
144
|
+
- Field searches: title:Python, author:Knuth, tag:programming
|
|
145
|
+
- Phrases: "machine learning"
|
|
146
|
+
- Boolean: AND (implicit), OR (explicit), NOT/-prefix (negation)
|
|
147
|
+
- Comparisons: rating:>=4, rating:3-5
|
|
148
|
+
- Filters: language:en, format:pdf, favorite:true
|
|
149
|
+
|
|
150
|
+
Examples:
|
|
151
|
+
title:Python rating:>=4 format:pdf
|
|
152
|
+
author:"Donald Knuth" series:TAOCP
|
|
153
|
+
tag:programming favorite:true NOT java
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
query: Search query (supports advanced syntax or plain text)
|
|
157
|
+
limit: Maximum number of results
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
List of matching books
|
|
161
|
+
"""
|
|
162
|
+
try:
|
|
163
|
+
# Parse the query
|
|
164
|
+
parsed = parse_search_query(query)
|
|
165
|
+
|
|
166
|
+
# If no FTS terms and no filters, return empty
|
|
167
|
+
if not parsed.has_fts_terms() and not parsed.has_filters():
|
|
168
|
+
return []
|
|
169
|
+
|
|
170
|
+
# Build the query
|
|
171
|
+
book_ids = []
|
|
172
|
+
|
|
173
|
+
# If we have FTS terms, search FTS5 first
|
|
174
|
+
if parsed.has_fts_terms():
|
|
175
|
+
result = self.session.execute(
|
|
176
|
+
text("""
|
|
177
|
+
SELECT book_id, rank
|
|
178
|
+
FROM books_fts
|
|
179
|
+
WHERE books_fts MATCH :query
|
|
180
|
+
ORDER BY rank
|
|
181
|
+
LIMIT :limit
|
|
182
|
+
"""),
|
|
183
|
+
{"query": parsed.fts_query, "limit": limit * 2} # Get more for filtering
|
|
184
|
+
)
|
|
185
|
+
book_ids = [row[0] for row in result]
|
|
186
|
+
|
|
187
|
+
if not book_ids:
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
# Build filter conditions
|
|
191
|
+
from .search_parser import SearchQueryParser
|
|
192
|
+
parser = SearchQueryParser()
|
|
193
|
+
where_clause, params = parser.to_sql_conditions(parsed)
|
|
194
|
+
|
|
195
|
+
# If we have both FTS and filters, combine them
|
|
196
|
+
if book_ids and where_clause:
|
|
197
|
+
# Start with FTS results and apply filters
|
|
198
|
+
books_query = self.session.query(Book).filter(
|
|
199
|
+
Book.id.in_(book_ids)
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
# Apply additional SQL filters
|
|
203
|
+
if where_clause:
|
|
204
|
+
books_query = books_query.filter(text(where_clause).bindparams(**params))
|
|
205
|
+
|
|
206
|
+
books = books_query.limit(limit).all()
|
|
207
|
+
|
|
208
|
+
# Maintain FTS ranking order
|
|
209
|
+
books_dict = {b.id: b for b in books}
|
|
210
|
+
return [books_dict[bid] for bid in book_ids if bid in books_dict][:limit]
|
|
211
|
+
|
|
212
|
+
# If only FTS (no additional filters)
|
|
213
|
+
elif book_ids:
|
|
214
|
+
books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
|
|
215
|
+
books_dict = {b.id: b for b in books}
|
|
216
|
+
return [books_dict[bid] for bid in book_ids if bid in books_dict][:limit]
|
|
217
|
+
|
|
218
|
+
# If only filters (no FTS)
|
|
219
|
+
elif where_clause:
|
|
220
|
+
books_query = self.session.query(Book)
|
|
221
|
+
books_query = books_query.filter(text(where_clause).bindparams(**params))
|
|
222
|
+
return books_query.limit(limit).all()
|
|
223
|
+
|
|
224
|
+
return []
|
|
225
|
+
|
|
226
|
+
except Exception as e:
|
|
227
|
+
logger.error(f"Search error: {e}")
|
|
228
|
+
logger.exception(e)
|
|
229
|
+
# Fallback to original simple FTS search
|
|
230
|
+
try:
|
|
231
|
+
result = self.session.execute(
|
|
232
|
+
text("""
|
|
233
|
+
SELECT book_id, rank
|
|
234
|
+
FROM books_fts
|
|
235
|
+
WHERE books_fts MATCH :query
|
|
236
|
+
ORDER BY rank
|
|
237
|
+
LIMIT :limit
|
|
238
|
+
"""),
|
|
239
|
+
{"query": query, "limit": limit}
|
|
240
|
+
)
|
|
241
|
+
book_ids = [row[0] for row in result]
|
|
242
|
+
if not book_ids:
|
|
243
|
+
return []
|
|
244
|
+
books = self.session.query(Book).filter(Book.id.in_(book_ids)).all()
|
|
245
|
+
books_dict = {b.id: b for b in books}
|
|
246
|
+
return [books_dict[bid] for bid in book_ids if bid in books_dict]
|
|
247
|
+
except Exception as fallback_error:
|
|
248
|
+
logger.error(f"Fallback search also failed: {fallback_error}")
|
|
249
|
+
return []
|
|
250
|
+
|
|
251
|
+
def stats(self) -> Dict[str, Any]:
|
|
252
|
+
"""
|
|
253
|
+
Get library statistics.
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Dictionary with statistics
|
|
257
|
+
"""
|
|
258
|
+
total_books = self.session.query(func.count(Book.id)).scalar()
|
|
259
|
+
total_authors = self.session.query(func.count(Author.id)).scalar()
|
|
260
|
+
total_subjects = self.session.query(func.count(Subject.id)).scalar()
|
|
261
|
+
total_files = self.session.query(func.count(File.id)).scalar()
|
|
262
|
+
|
|
263
|
+
# Reading stats
|
|
264
|
+
read_count = self.session.query(func.count(PersonalMetadata.id)).filter(
|
|
265
|
+
PersonalMetadata.reading_status == 'read'
|
|
266
|
+
).scalar()
|
|
267
|
+
|
|
268
|
+
reading_count = self.session.query(func.count(PersonalMetadata.id)).filter(
|
|
269
|
+
PersonalMetadata.reading_status == 'reading'
|
|
270
|
+
).scalar()
|
|
271
|
+
|
|
272
|
+
# Language distribution
|
|
273
|
+
lang_dist = self.session.query(
|
|
274
|
+
Book.language,
|
|
275
|
+
func.count(Book.id)
|
|
276
|
+
).group_by(Book.language).all()
|
|
277
|
+
|
|
278
|
+
# Format distribution
|
|
279
|
+
format_dist = self.session.query(
|
|
280
|
+
File.format,
|
|
281
|
+
func.count(File.id)
|
|
282
|
+
).group_by(File.format).all()
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
'total_books': total_books,
|
|
286
|
+
'total_authors': total_authors,
|
|
287
|
+
'total_subjects': total_subjects,
|
|
288
|
+
'total_files': total_files,
|
|
289
|
+
'read_count': read_count,
|
|
290
|
+
'reading_count': reading_count,
|
|
291
|
+
'languages': dict(lang_dist),
|
|
292
|
+
'formats': dict(format_dist)
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
def get_all_books(self, limit: Optional[int] = None, offset: int = 0) -> List[Book]:
|
|
296
|
+
"""
|
|
297
|
+
Get all books with optional pagination.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
limit: Maximum number of books
|
|
301
|
+
offset: Starting offset
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
List of books
|
|
305
|
+
"""
|
|
306
|
+
query = self.session.query(Book).order_by(Book.title)
|
|
307
|
+
|
|
308
|
+
if limit:
|
|
309
|
+
query = query.limit(limit).offset(offset)
|
|
310
|
+
|
|
311
|
+
return query.all()
|
|
312
|
+
|
|
313
|
+
def get_books_by_author(self, author_name: str) -> List[Book]:
|
|
314
|
+
"""Get all books by an author."""
|
|
315
|
+
return self.session.query(Book).join(Book.authors).filter(
|
|
316
|
+
Author.name.ilike(f"%{author_name}%")
|
|
317
|
+
).all()
|
|
318
|
+
|
|
319
|
+
def get_books_by_subject(self, subject_name: str) -> List[Book]:
|
|
320
|
+
"""Get all books with a subject."""
|
|
321
|
+
return self.session.query(Book).join(Book.subjects).filter(
|
|
322
|
+
Subject.name.ilike(f"%{subject_name}%")
|
|
323
|
+
).all()
|
|
324
|
+
|
|
325
|
+
def update_reading_status(self, book_id: int, status: str,
|
|
326
|
+
progress: Optional[int] = None,
|
|
327
|
+
rating: Optional[int] = None):
|
|
328
|
+
"""
|
|
329
|
+
Update reading status for a book.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
book_id: Book ID
|
|
333
|
+
status: Reading status (unread, reading, read)
|
|
334
|
+
progress: Reading progress percentage (0-100)
|
|
335
|
+
rating: Rating (1-5)
|
|
336
|
+
"""
|
|
337
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
338
|
+
book_id=book_id
|
|
339
|
+
).first()
|
|
340
|
+
|
|
341
|
+
if personal:
|
|
342
|
+
personal.reading_status = status
|
|
343
|
+
if progress is not None:
|
|
344
|
+
personal.reading_progress = progress
|
|
345
|
+
if rating is not None:
|
|
346
|
+
personal.rating = rating
|
|
347
|
+
|
|
348
|
+
if status == 'read':
|
|
349
|
+
personal.date_finished = datetime.now()
|
|
350
|
+
|
|
351
|
+
self.session.commit()
|
|
352
|
+
logger.info(f"Updated reading status for book {book_id}: {status}")
|
|
353
|
+
|
|
354
|
+
def set_favorite(self, book_id: int, favorite: bool = True):
|
|
355
|
+
"""
|
|
356
|
+
Mark/unmark book as favorite.
|
|
357
|
+
|
|
358
|
+
Args:
|
|
359
|
+
book_id: Book ID
|
|
360
|
+
favorite: True to mark as favorite, False to unmark
|
|
361
|
+
"""
|
|
362
|
+
from .db.models import PersonalMetadata
|
|
363
|
+
|
|
364
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
365
|
+
book_id=book_id
|
|
366
|
+
).first()
|
|
367
|
+
|
|
368
|
+
if not personal:
|
|
369
|
+
# Create personal metadata if it doesn't exist
|
|
370
|
+
personal = PersonalMetadata(book_id=book_id, favorite=favorite)
|
|
371
|
+
self.session.add(personal)
|
|
372
|
+
else:
|
|
373
|
+
personal.favorite = favorite
|
|
374
|
+
|
|
375
|
+
self.session.commit()
|
|
376
|
+
logger.info(f"Set favorite for book {book_id}: {favorite}")
|
|
377
|
+
|
|
378
|
+
def add_tags(self, book_id: int, tags: List[str]):
|
|
379
|
+
"""
|
|
380
|
+
Add personal tags to a book.
|
|
381
|
+
|
|
382
|
+
Args:
|
|
383
|
+
book_id: Book ID
|
|
384
|
+
tags: List of tag strings
|
|
385
|
+
"""
|
|
386
|
+
from .db.models import PersonalMetadata
|
|
387
|
+
|
|
388
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
389
|
+
book_id=book_id
|
|
390
|
+
).first()
|
|
391
|
+
|
|
392
|
+
if not personal:
|
|
393
|
+
personal = PersonalMetadata(book_id=book_id, personal_tags=tags)
|
|
394
|
+
self.session.add(personal)
|
|
395
|
+
else:
|
|
396
|
+
existing_tags = personal.personal_tags or []
|
|
397
|
+
# Add new tags without duplicates
|
|
398
|
+
combined = list(set(existing_tags + tags))
|
|
399
|
+
personal.personal_tags = combined
|
|
400
|
+
|
|
401
|
+
self.session.commit()
|
|
402
|
+
logger.info(f"Added tags to book {book_id}: {tags}")
|
|
403
|
+
|
|
404
|
+
def remove_tags(self, book_id: int, tags: List[str]):
|
|
405
|
+
"""
|
|
406
|
+
Remove personal tags from a book.
|
|
407
|
+
|
|
408
|
+
Args:
|
|
409
|
+
book_id: Book ID
|
|
410
|
+
tags: List of tag strings to remove
|
|
411
|
+
"""
|
|
412
|
+
from .db.models import PersonalMetadata
|
|
413
|
+
|
|
414
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
415
|
+
book_id=book_id
|
|
416
|
+
).first()
|
|
417
|
+
|
|
418
|
+
if personal and personal.personal_tags:
|
|
419
|
+
personal.personal_tags = [t for t in personal.personal_tags if t not in tags]
|
|
420
|
+
self.session.commit()
|
|
421
|
+
logger.info(f"Removed tags from book {book_id}: {tags}")
|
|
422
|
+
|
|
423
|
+
def add_subject(self, book_id: int, subject_name: str):
|
|
424
|
+
"""
|
|
425
|
+
Add a subject/tag to a book.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
book_id: Book ID
|
|
429
|
+
subject_name: Subject/tag name to add
|
|
430
|
+
"""
|
|
431
|
+
book = self.session.query(Book).filter_by(id=book_id).first()
|
|
432
|
+
if not book:
|
|
433
|
+
logger.warning(f"Book {book_id} not found")
|
|
434
|
+
return
|
|
435
|
+
|
|
436
|
+
# Check if subject already exists
|
|
437
|
+
subject = self.session.query(Subject).filter_by(name=subject_name).first()
|
|
438
|
+
if not subject:
|
|
439
|
+
subject = Subject(name=subject_name)
|
|
440
|
+
self.session.add(subject)
|
|
441
|
+
|
|
442
|
+
# Add subject to book if not already present
|
|
443
|
+
if subject not in book.subjects:
|
|
444
|
+
book.subjects.append(subject)
|
|
445
|
+
self.session.commit()
|
|
446
|
+
logger.info(f"Added subject '{subject_name}' to book {book_id}")
|
|
447
|
+
|
|
448
|
+
def add_annotation(self, book_id: int, content: str,
|
|
449
|
+
page: Optional[int] = None,
|
|
450
|
+
annotation_type: str = 'note'):
|
|
451
|
+
"""
|
|
452
|
+
Add an annotation/comment to a book.
|
|
453
|
+
|
|
454
|
+
Args:
|
|
455
|
+
book_id: Book ID
|
|
456
|
+
content: Annotation text
|
|
457
|
+
page: Page number (optional)
|
|
458
|
+
annotation_type: Type of annotation (note, highlight, bookmark)
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
Annotation ID
|
|
462
|
+
"""
|
|
463
|
+
from .db.models import Annotation
|
|
464
|
+
|
|
465
|
+
annotation = Annotation(
|
|
466
|
+
book_id=book_id,
|
|
467
|
+
content=content,
|
|
468
|
+
page_number=page,
|
|
469
|
+
annotation_type=annotation_type,
|
|
470
|
+
created_at=datetime.now()
|
|
471
|
+
)
|
|
472
|
+
self.session.add(annotation)
|
|
473
|
+
self.session.commit()
|
|
474
|
+
|
|
475
|
+
logger.info(f"Added annotation to book {book_id}")
|
|
476
|
+
return annotation.id
|
|
477
|
+
|
|
478
|
+
def get_annotations(self, book_id: int) -> List:
|
|
479
|
+
"""
|
|
480
|
+
Get all annotations for a book.
|
|
481
|
+
|
|
482
|
+
Args:
|
|
483
|
+
book_id: Book ID
|
|
484
|
+
|
|
485
|
+
Returns:
|
|
486
|
+
List of Annotation objects
|
|
487
|
+
"""
|
|
488
|
+
from .db.models import Annotation
|
|
489
|
+
|
|
490
|
+
return self.session.query(Annotation).filter_by(
|
|
491
|
+
book_id=book_id
|
|
492
|
+
).order_by(Annotation.created_at.desc()).all()
|
|
493
|
+
|
|
494
|
+
def delete_annotation(self, annotation_id: int):
|
|
495
|
+
"""
|
|
496
|
+
Delete an annotation.
|
|
497
|
+
|
|
498
|
+
Args:
|
|
499
|
+
annotation_id: Annotation ID
|
|
500
|
+
"""
|
|
501
|
+
from .db.models import Annotation
|
|
502
|
+
|
|
503
|
+
annotation = self.session.query(Annotation).get(annotation_id)
|
|
504
|
+
if annotation:
|
|
505
|
+
self.session.delete(annotation)
|
|
506
|
+
self.session.commit()
|
|
507
|
+
logger.info(f"Deleted annotation {annotation_id}")
|
|
508
|
+
|
|
509
|
+
def add_to_virtual_library(self, book_id: int, library_name: str):
|
|
510
|
+
"""
|
|
511
|
+
Add a book to a virtual library (collection/view).
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
book_id: Book ID
|
|
515
|
+
library_name: Name of the virtual library
|
|
516
|
+
"""
|
|
517
|
+
from .db.models import PersonalMetadata
|
|
518
|
+
|
|
519
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
520
|
+
book_id=book_id
|
|
521
|
+
).first()
|
|
522
|
+
|
|
523
|
+
if not personal:
|
|
524
|
+
# Use personal_tags as virtual_libraries array
|
|
525
|
+
personal = PersonalMetadata(book_id=book_id, personal_tags=[library_name])
|
|
526
|
+
self.session.add(personal)
|
|
527
|
+
else:
|
|
528
|
+
existing_libs = personal.personal_tags or []
|
|
529
|
+
if library_name not in existing_libs:
|
|
530
|
+
existing_libs.append(library_name)
|
|
531
|
+
personal.personal_tags = existing_libs
|
|
532
|
+
|
|
533
|
+
self.session.commit()
|
|
534
|
+
logger.info(f"Added book {book_id} to virtual library '{library_name}'")
|
|
535
|
+
|
|
536
|
+
def remove_from_virtual_library(self, book_id: int, library_name: str):
|
|
537
|
+
"""
|
|
538
|
+
Remove a book from a virtual library.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
book_id: Book ID
|
|
542
|
+
library_name: Name of the virtual library
|
|
543
|
+
"""
|
|
544
|
+
from .db.models import PersonalMetadata
|
|
545
|
+
|
|
546
|
+
personal = self.session.query(PersonalMetadata).filter_by(
|
|
547
|
+
book_id=book_id
|
|
548
|
+
).first()
|
|
549
|
+
|
|
550
|
+
if personal and personal.personal_tags:
|
|
551
|
+
personal.personal_tags = [lib for lib in personal.personal_tags if lib != library_name]
|
|
552
|
+
self.session.commit()
|
|
553
|
+
logger.info(f"Removed book {book_id} from virtual library '{library_name}'")
|
|
554
|
+
|
|
555
|
+
def get_virtual_library(self, library_name: str) -> List[Book]:
|
|
556
|
+
"""
|
|
557
|
+
Get all books in a virtual library.
|
|
558
|
+
|
|
559
|
+
Args:
|
|
560
|
+
library_name: Name of the virtual library
|
|
561
|
+
|
|
562
|
+
Returns:
|
|
563
|
+
List of books in this virtual library
|
|
564
|
+
"""
|
|
565
|
+
from .db.models import PersonalMetadata
|
|
566
|
+
from sqlalchemy import func
|
|
567
|
+
|
|
568
|
+
# Query books where personal_tags contains the library_name
|
|
569
|
+
# This works with SQLite's JSON support
|
|
570
|
+
books = (self.session.query(Book)
|
|
571
|
+
.join(Book.personal)
|
|
572
|
+
.filter(PersonalMetadata.personal_tags.contains(library_name))
|
|
573
|
+
.all())
|
|
574
|
+
|
|
575
|
+
return books
|
|
576
|
+
|
|
577
|
+
def list_virtual_libraries(self) -> List[str]:
|
|
578
|
+
"""
|
|
579
|
+
Get all unique virtual library names.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
List of virtual library names
|
|
583
|
+
"""
|
|
584
|
+
from .db.models import PersonalMetadata
|
|
585
|
+
|
|
586
|
+
# Get all personal_tags arrays and flatten them
|
|
587
|
+
all_metadata = self.session.query(PersonalMetadata).filter(
|
|
588
|
+
PersonalMetadata.personal_tags.isnot(None)
|
|
589
|
+
).all()
|
|
590
|
+
|
|
591
|
+
libraries = set()
|
|
592
|
+
for pm in all_metadata:
|
|
593
|
+
if pm.personal_tags:
|
|
594
|
+
libraries.update(pm.personal_tags)
|
|
595
|
+
|
|
596
|
+
return sorted(list(libraries))
|
|
597
|
+
|
|
598
|
+
def delete_book(self, book_id: int, delete_files: bool = False):
|
|
599
|
+
"""
|
|
600
|
+
Delete a book from the library.
|
|
601
|
+
|
|
602
|
+
Args:
|
|
603
|
+
book_id: Book ID
|
|
604
|
+
delete_files: If True, also delete physical files
|
|
605
|
+
"""
|
|
606
|
+
book = self.get_book(book_id)
|
|
607
|
+
if not book:
|
|
608
|
+
logger.warning(f"Book {book_id} not found")
|
|
609
|
+
return
|
|
610
|
+
|
|
611
|
+
# Delete physical files if requested
|
|
612
|
+
if delete_files:
|
|
613
|
+
for file in book.files:
|
|
614
|
+
file_path = self.library_path / file.path
|
|
615
|
+
if file_path.exists():
|
|
616
|
+
file_path.unlink()
|
|
617
|
+
logger.info(f"Deleted file: {file_path}")
|
|
618
|
+
|
|
619
|
+
# Delete covers
|
|
620
|
+
for cover in book.covers:
|
|
621
|
+
cover_path = self.library_path / cover.path
|
|
622
|
+
if cover_path.exists():
|
|
623
|
+
cover_path.unlink()
|
|
624
|
+
|
|
625
|
+
# Delete from database (cascade will handle related records)
|
|
626
|
+
self.session.delete(book)
|
|
627
|
+
self.session.commit()
|
|
628
|
+
logger.info(f"Deleted book: {book.title}")
|
|
629
|
+
|
|
630
|
+
def find_similar(
|
|
631
|
+
self,
|
|
632
|
+
book_id: int,
|
|
633
|
+
top_k: int = 10,
|
|
634
|
+
similarity_config: Optional[Any] = None,
|
|
635
|
+
filter_language: bool = True,
|
|
636
|
+
) -> List[Tuple[Book, float]]:
|
|
637
|
+
"""
|
|
638
|
+
Find books similar to the given book.
|
|
639
|
+
|
|
640
|
+
Uses semantic similarity based on content, metadata, etc.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
book_id: ID of the query book
|
|
644
|
+
top_k: Number of similar books to return (default 10)
|
|
645
|
+
similarity_config: Optional BookSimilarity instance
|
|
646
|
+
(default: balanced preset)
|
|
647
|
+
filter_language: If True, only return books in same language
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
List of (book, similarity_score) tuples, sorted by similarity
|
|
651
|
+
|
|
652
|
+
Example:
|
|
653
|
+
>>> similar = lib.find_similar(42, top_k=5)
|
|
654
|
+
>>> for book, score in similar:
|
|
655
|
+
... print(f"{book.title}: {score:.2f}")
|
|
656
|
+
"""
|
|
657
|
+
from ebk.similarity import BookSimilarity
|
|
658
|
+
|
|
659
|
+
# Get query book
|
|
660
|
+
query_book = self.get_book(book_id)
|
|
661
|
+
if not query_book:
|
|
662
|
+
logger.warning(f"Book {book_id} not found")
|
|
663
|
+
return []
|
|
664
|
+
|
|
665
|
+
# Get candidate books
|
|
666
|
+
candidates_query = self.query()
|
|
667
|
+
if filter_language and query_book.language:
|
|
668
|
+
candidates_query = candidates_query.filter_by_language(query_book.language)
|
|
669
|
+
|
|
670
|
+
candidates = candidates_query.all()
|
|
671
|
+
|
|
672
|
+
if not candidates:
|
|
673
|
+
return []
|
|
674
|
+
|
|
675
|
+
# Configure similarity
|
|
676
|
+
if similarity_config is None:
|
|
677
|
+
similarity_config = BookSimilarity().balanced()
|
|
678
|
+
|
|
679
|
+
# Fit on all candidates for performance
|
|
680
|
+
similarity_config.fit(candidates)
|
|
681
|
+
|
|
682
|
+
# Find similar books
|
|
683
|
+
results = similarity_config.find_similar(query_book, candidates, top_k=top_k)
|
|
684
|
+
|
|
685
|
+
logger.info(
|
|
686
|
+
f"Found {len(results)} similar books to '{query_book.title}'"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
return results
|
|
690
|
+
|
|
691
|
+
def compute_similarity_matrix(
|
|
692
|
+
self,
|
|
693
|
+
book_ids: Optional[List[int]] = None,
|
|
694
|
+
similarity_config: Optional[Any] = None,
|
|
695
|
+
) -> Tuple[List[Book], Any]:
|
|
696
|
+
"""
|
|
697
|
+
Compute pairwise similarity matrix for books.
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
book_ids: Optional list of book IDs (default: all books)
|
|
701
|
+
similarity_config: Optional BookSimilarity instance
|
|
702
|
+
(default: balanced preset)
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Tuple of (books, similarity_matrix)
|
|
706
|
+
where similarity_matrix[i][j] = similarity(books[i], books[j])
|
|
707
|
+
|
|
708
|
+
Example:
|
|
709
|
+
>>> books, matrix = lib.compute_similarity_matrix()
|
|
710
|
+
>>> # matrix[0][1] is similarity between books[0] and books[1]
|
|
711
|
+
"""
|
|
712
|
+
from ebk.similarity import BookSimilarity
|
|
713
|
+
|
|
714
|
+
# Get books
|
|
715
|
+
if book_ids:
|
|
716
|
+
books = [self.get_book(book_id) for book_id in book_ids]
|
|
717
|
+
books = [b for b in books if b is not None] # Filter None
|
|
718
|
+
else:
|
|
719
|
+
books = self.query().all()
|
|
720
|
+
|
|
721
|
+
if not books:
|
|
722
|
+
logger.warning("No books found for similarity matrix")
|
|
723
|
+
return [], None
|
|
724
|
+
|
|
725
|
+
# Configure similarity
|
|
726
|
+
if similarity_config is None:
|
|
727
|
+
similarity_config = BookSimilarity().balanced()
|
|
728
|
+
|
|
729
|
+
# Fit and compute matrix
|
|
730
|
+
similarity_config.fit(books)
|
|
731
|
+
matrix = similarity_config.similarity_matrix(books)
|
|
732
|
+
|
|
733
|
+
logger.info(f"Computed {len(books)}x{len(books)} similarity matrix")
|
|
734
|
+
|
|
735
|
+
return books, matrix
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
class QueryBuilder:
|
|
739
|
+
"""Fluent query builder for books."""
|
|
740
|
+
|
|
741
|
+
def __init__(self, session: Session):
|
|
742
|
+
self.session = session
|
|
743
|
+
self._query = session.query(Book)
|
|
744
|
+
|
|
745
|
+
def filter_by_title(self, title: str, exact: bool = False) -> 'QueryBuilder':
|
|
746
|
+
"""Filter by title."""
|
|
747
|
+
if exact:
|
|
748
|
+
self._query = self._query.filter(Book.title == title)
|
|
749
|
+
else:
|
|
750
|
+
self._query = self._query.filter(Book.title.ilike(f"%{title}%"))
|
|
751
|
+
return self
|
|
752
|
+
|
|
753
|
+
def filter_by_author(self, author: str) -> 'QueryBuilder':
|
|
754
|
+
"""Filter by author name."""
|
|
755
|
+
self._query = self._query.join(Book.authors).filter(
|
|
756
|
+
Author.name.ilike(f"%{author}%")
|
|
757
|
+
)
|
|
758
|
+
return self
|
|
759
|
+
|
|
760
|
+
def filter_by_subject(self, subject: str) -> 'QueryBuilder':
|
|
761
|
+
"""Filter by subject."""
|
|
762
|
+
self._query = self._query.join(Book.subjects).filter(
|
|
763
|
+
Subject.name.ilike(f"%{subject}%")
|
|
764
|
+
)
|
|
765
|
+
return self
|
|
766
|
+
|
|
767
|
+
def filter_by_language(self, language: str) -> 'QueryBuilder':
|
|
768
|
+
"""Filter by language code."""
|
|
769
|
+
self._query = self._query.filter(Book.language == language)
|
|
770
|
+
return self
|
|
771
|
+
|
|
772
|
+
def filter_by_publisher(self, publisher: str) -> 'QueryBuilder':
|
|
773
|
+
"""Filter by publisher."""
|
|
774
|
+
self._query = self._query.filter(Book.publisher.ilike(f"%{publisher}%"))
|
|
775
|
+
return self
|
|
776
|
+
|
|
777
|
+
def filter_by_year(self, year: int) -> 'QueryBuilder':
|
|
778
|
+
"""Filter by publication year.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
year: Publication year (e.g., 1975)
|
|
782
|
+
|
|
783
|
+
Returns:
|
|
784
|
+
Self for chaining
|
|
785
|
+
"""
|
|
786
|
+
# publication_date can be "YYYY", "YYYY-MM", or "YYYY-MM-DD"
|
|
787
|
+
# So we match if it starts with the year
|
|
788
|
+
year_str = str(year)
|
|
789
|
+
self._query = self._query.filter(Book.publication_date.like(f"{year_str}%"))
|
|
790
|
+
return self
|
|
791
|
+
|
|
792
|
+
def filter_by_text(self, search_text: str) -> 'QueryBuilder':
|
|
793
|
+
"""Filter by full-text search.
|
|
794
|
+
|
|
795
|
+
Uses FTS5 to search across title, description, and extracted text.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
search_text: Text to search for
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Self for chaining
|
|
802
|
+
"""
|
|
803
|
+
from sqlalchemy import text as sql_text
|
|
804
|
+
|
|
805
|
+
# Query FTS5 table for matching book IDs
|
|
806
|
+
result = self.session.execute(
|
|
807
|
+
sql_text("""
|
|
808
|
+
SELECT book_id
|
|
809
|
+
FROM books_fts
|
|
810
|
+
WHERE books_fts MATCH :query
|
|
811
|
+
ORDER BY rank
|
|
812
|
+
"""),
|
|
813
|
+
{"query": search_text}
|
|
814
|
+
)
|
|
815
|
+
book_ids = [row[0] for row in result]
|
|
816
|
+
|
|
817
|
+
if book_ids:
|
|
818
|
+
self._query = self._query.filter(Book.id.in_(book_ids))
|
|
819
|
+
else:
|
|
820
|
+
# No matches - ensure query returns empty
|
|
821
|
+
self._query = self._query.filter(Book.id == -1)
|
|
822
|
+
|
|
823
|
+
return self
|
|
824
|
+
|
|
825
|
+
def filter_by_reading_status(self, status: str) -> 'QueryBuilder':
|
|
826
|
+
"""Filter by reading status."""
|
|
827
|
+
self._query = self._query.join(Book.personal).filter(
|
|
828
|
+
PersonalMetadata.reading_status == status
|
|
829
|
+
)
|
|
830
|
+
return self
|
|
831
|
+
|
|
832
|
+
def filter_by_rating(self, min_rating: int, max_rating: int = 5) -> 'QueryBuilder':
|
|
833
|
+
"""Filter by rating range."""
|
|
834
|
+
self._query = self._query.join(Book.personal).filter(
|
|
835
|
+
and_(
|
|
836
|
+
PersonalMetadata.rating >= min_rating,
|
|
837
|
+
PersonalMetadata.rating <= max_rating
|
|
838
|
+
)
|
|
839
|
+
)
|
|
840
|
+
return self
|
|
841
|
+
|
|
842
|
+
def filter_by_favorite(self, is_favorite: bool = True) -> 'QueryBuilder':
|
|
843
|
+
"""Filter by favorite status."""
|
|
844
|
+
self._query = self._query.join(Book.personal).filter(
|
|
845
|
+
PersonalMetadata.favorite == is_favorite
|
|
846
|
+
)
|
|
847
|
+
return self
|
|
848
|
+
|
|
849
|
+
def filter_by_format(self, format_name: str) -> 'QueryBuilder':
|
|
850
|
+
"""Filter by file format (e.g., 'pdf', 'epub')."""
|
|
851
|
+
from .db.models import File
|
|
852
|
+
self._query = self._query.join(Book.files).filter(
|
|
853
|
+
File.format.ilike(f'%{format_name}%')
|
|
854
|
+
)
|
|
855
|
+
return self
|
|
856
|
+
|
|
857
|
+
def order_by(self, field: str, desc: bool = False) -> 'QueryBuilder':
|
|
858
|
+
"""
|
|
859
|
+
Order results.
|
|
860
|
+
|
|
861
|
+
Args:
|
|
862
|
+
field: Field name (title, created_at, publication_date)
|
|
863
|
+
desc: Descending order if True
|
|
864
|
+
"""
|
|
865
|
+
field_map = {
|
|
866
|
+
'title': Book.title,
|
|
867
|
+
'created_at': Book.created_at,
|
|
868
|
+
'publication_date': Book.publication_date,
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
if field in field_map:
|
|
872
|
+
order_field = field_map[field]
|
|
873
|
+
if desc:
|
|
874
|
+
order_field = order_field.desc()
|
|
875
|
+
self._query = self._query.order_by(order_field)
|
|
876
|
+
|
|
877
|
+
return self
|
|
878
|
+
|
|
879
|
+
def limit(self, limit: int) -> 'QueryBuilder':
|
|
880
|
+
"""Limit number of results."""
|
|
881
|
+
self._query = self._query.limit(limit)
|
|
882
|
+
return self
|
|
883
|
+
|
|
884
|
+
def offset(self, offset: int) -> 'QueryBuilder':
|
|
885
|
+
"""Set result offset."""
|
|
886
|
+
self._query = self._query.offset(offset)
|
|
887
|
+
return self
|
|
888
|
+
|
|
889
|
+
def all(self) -> List[Book]:
|
|
890
|
+
"""Execute query and return all results."""
|
|
891
|
+
return self._query.all()
|
|
892
|
+
|
|
893
|
+
def first(self) -> Optional[Book]:
|
|
894
|
+
"""Execute query and return first result."""
|
|
895
|
+
return self._query.first()
|
|
896
|
+
|
|
897
|
+
def count(self) -> int:
|
|
898
|
+
"""Get count of matching books."""
|
|
899
|
+
return self._query.count()
|