ebk 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (61) hide show
  1. ebk/ai/__init__.py +23 -0
  2. ebk/ai/knowledge_graph.py +443 -0
  3. ebk/ai/llm_providers/__init__.py +21 -0
  4. ebk/ai/llm_providers/base.py +230 -0
  5. ebk/ai/llm_providers/ollama.py +362 -0
  6. ebk/ai/metadata_enrichment.py +396 -0
  7. ebk/ai/question_generator.py +328 -0
  8. ebk/ai/reading_companion.py +224 -0
  9. ebk/ai/semantic_search.py +434 -0
  10. ebk/ai/text_extractor.py +394 -0
  11. ebk/cli.py +1097 -9
  12. ebk/db/__init__.py +37 -0
  13. ebk/db/migrations.py +180 -0
  14. ebk/db/models.py +526 -0
  15. ebk/db/session.py +144 -0
  16. ebk/exports/__init__.py +0 -0
  17. ebk/exports/base_exporter.py +218 -0
  18. ebk/exports/html_library.py +1390 -0
  19. ebk/exports/html_utils.py +117 -0
  20. ebk/exports/hugo.py +59 -0
  21. ebk/exports/jinja_export.py +287 -0
  22. ebk/exports/multi_facet_export.py +164 -0
  23. ebk/exports/symlink_dag.py +479 -0
  24. ebk/exports/zip.py +25 -0
  25. ebk/library_db.py +155 -0
  26. ebk/repl/__init__.py +9 -0
  27. ebk/repl/find.py +126 -0
  28. ebk/repl/grep.py +174 -0
  29. ebk/repl/shell.py +1677 -0
  30. ebk/repl/text_utils.py +320 -0
  31. ebk/services/__init__.py +11 -0
  32. ebk/services/import_service.py +442 -0
  33. ebk/services/tag_service.py +282 -0
  34. ebk/services/text_extraction.py +317 -0
  35. ebk/similarity/__init__.py +77 -0
  36. ebk/similarity/base.py +154 -0
  37. ebk/similarity/core.py +445 -0
  38. ebk/similarity/extractors.py +168 -0
  39. ebk/similarity/metrics.py +376 -0
  40. ebk/vfs/__init__.py +101 -0
  41. ebk/vfs/base.py +301 -0
  42. ebk/vfs/library_vfs.py +124 -0
  43. ebk/vfs/nodes/__init__.py +54 -0
  44. ebk/vfs/nodes/authors.py +196 -0
  45. ebk/vfs/nodes/books.py +480 -0
  46. ebk/vfs/nodes/files.py +155 -0
  47. ebk/vfs/nodes/metadata.py +385 -0
  48. ebk/vfs/nodes/root.py +100 -0
  49. ebk/vfs/nodes/similar.py +165 -0
  50. ebk/vfs/nodes/subjects.py +184 -0
  51. ebk/vfs/nodes/tags.py +371 -0
  52. ebk/vfs/resolver.py +228 -0
  53. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/METADATA +1 -1
  54. ebk-0.3.2.dist-info/RECORD +69 -0
  55. ebk-0.3.2.dist-info/entry_points.txt +2 -0
  56. ebk-0.3.2.dist-info/top_level.txt +1 -0
  57. ebk-0.3.1.dist-info/RECORD +0 -19
  58. ebk-0.3.1.dist-info/entry_points.txt +0 -6
  59. ebk-0.3.1.dist-info/top_level.txt +0 -2
  60. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/WHEEL +0 -0
  61. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,385 @@
1
+ """Metadata file nodes for books."""
2
+
3
+ from typing import Optional, Dict, Any
4
+
5
+ from ebk.vfs.base import FileNode, DirectoryNode
6
+ from ebk.db.models import Book
7
+
8
+
9
+ class TitleFileNode(FileNode):
10
+ """Book title as a readable file."""
11
+
12
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
13
+ super().__init__(name="title", parent=parent)
14
+ self.book = book
15
+
16
+ def read_content(self) -> str:
17
+ """Read book title.
18
+
19
+ Returns:
20
+ Book title
21
+ """
22
+ return self.book.title or "(No title)"
23
+
24
+ def get_info(self) -> Dict[str, Any]:
25
+ """Get file info with title preview."""
26
+ title = self.book.title or ""
27
+ # Truncate to 60 chars
28
+ preview = title[:60] + "..." if len(title) > 60 else title
29
+ return {
30
+ "type": "file",
31
+ "name": "title",
32
+ "preview": preview,
33
+ }
34
+
35
+
36
+ class AuthorsFileNode(FileNode):
37
+ """Book authors as a readable file (one per line)."""
38
+
39
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
40
+ super().__init__(name="authors", parent=parent)
41
+ self.book = book
42
+
43
+ def read_content(self) -> str:
44
+ """Read authors list.
45
+
46
+ Returns:
47
+ Authors, one per line
48
+ """
49
+ if not self.book.authors:
50
+ return "(No authors)"
51
+
52
+ return "\n".join(author.name for author in self.book.authors)
53
+
54
+ def get_info(self) -> Dict[str, Any]:
55
+ """Get file info with authors preview."""
56
+ if not self.book.authors:
57
+ preview = ""
58
+ else:
59
+ authors = ", ".join(a.name for a in self.book.authors)
60
+ preview = authors[:60] + "..." if len(authors) > 60 else authors
61
+ return {
62
+ "type": "file",
63
+ "name": "authors",
64
+ "preview": preview,
65
+ }
66
+
67
+
68
+ class SubjectsFileNode(FileNode):
69
+ """Book subjects/tags as a readable file (one per line)."""
70
+
71
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
72
+ super().__init__(name="subjects", parent=parent)
73
+ self.book = book
74
+
75
+ def read_content(self) -> str:
76
+ """Read subjects list.
77
+
78
+ Returns:
79
+ Subjects, one per line
80
+ """
81
+ if not self.book.subjects:
82
+ return "(No subjects)"
83
+
84
+ return "\n".join(subject.name for subject in self.book.subjects)
85
+
86
+ def get_info(self) -> Dict[str, Any]:
87
+ """Get file info with subjects preview."""
88
+ if not self.book.subjects:
89
+ preview = ""
90
+ else:
91
+ subjects = ", ".join(s.name for s in self.book.subjects)
92
+ preview = subjects[:60] + "..." if len(subjects) > 60 else subjects
93
+ return {
94
+ "type": "file",
95
+ "name": "subjects",
96
+ "preview": preview,
97
+ }
98
+
99
+
100
+ class DescriptionFileNode(FileNode):
101
+ """Book description as a readable file."""
102
+
103
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
104
+ super().__init__(name="description", parent=parent)
105
+ self.book = book
106
+
107
+ def read_content(self) -> str:
108
+ """Read book description.
109
+
110
+ Returns:
111
+ Book description or placeholder
112
+ """
113
+ return self.book.description or "(No description)"
114
+
115
+
116
+ class TextFileNode(FileNode):
117
+ """Extracted full text as a readable file."""
118
+
119
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
120
+ super().__init__(name="text", parent=parent)
121
+ self.book = book
122
+
123
+ def read_content(self) -> str:
124
+ """Read extracted text.
125
+
126
+ Returns:
127
+ Full extracted text or message if not available
128
+ """
129
+ # Check if any file has extracted text
130
+ if self.book.files:
131
+ for file in self.book.files:
132
+ if file.extracted_text and file.extracted_text.content:
133
+ return file.extracted_text.content
134
+
135
+ return "(No text extracted)"
136
+
137
+ def get_info(self) -> Dict[str, Any]:
138
+ """Get text file info with size.
139
+
140
+ Returns:
141
+ Dict with file information
142
+ """
143
+ info = super().get_info()
144
+
145
+ # Calculate size from extracted text
146
+ if self.book.files:
147
+ for file in self.book.files:
148
+ if file.extracted_text and file.extracted_text.content:
149
+ info["size"] = len(file.extracted_text.content)
150
+ break
151
+
152
+ return info
153
+
154
+
155
+ class YearFileNode(FileNode):
156
+ """Publication year as a readable file."""
157
+
158
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
159
+ super().__init__(name="year", parent=parent)
160
+ self.book = book
161
+
162
+ def read_content(self) -> str:
163
+ """Read publication year.
164
+
165
+ Returns:
166
+ Publication year or placeholder
167
+ """
168
+ if self.book.publication_date:
169
+ # Try to extract year from date string
170
+ date_str = str(self.book.publication_date)
171
+ if len(date_str) >= 4:
172
+ return date_str[:4]
173
+ return date_str
174
+
175
+ return "(Unknown)"
176
+
177
+ def get_info(self) -> Dict[str, Any]:
178
+ """Get file info with year preview."""
179
+ if self.book.publication_date:
180
+ date_str = str(self.book.publication_date)
181
+ preview = date_str[:4] if len(date_str) >= 4 else date_str
182
+ else:
183
+ preview = ""
184
+ return {
185
+ "type": "file",
186
+ "name": "year",
187
+ "preview": preview,
188
+ }
189
+
190
+
191
+ class LanguageFileNode(FileNode):
192
+ """Language code as a readable file."""
193
+
194
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
195
+ super().__init__(name="language", parent=parent)
196
+ self.book = book
197
+
198
+ def read_content(self) -> str:
199
+ """Read language code.
200
+
201
+ Returns:
202
+ Language code or placeholder
203
+ """
204
+ return self.book.language or "(Unknown)"
205
+
206
+ def get_info(self) -> Dict[str, Any]:
207
+ """Get file info with language preview."""
208
+ return {
209
+ "type": "file",
210
+ "name": "language",
211
+ "preview": self.book.language or "",
212
+ }
213
+
214
+
215
+ class PublisherFileNode(FileNode):
216
+ """Publisher name as a readable file."""
217
+
218
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
219
+ super().__init__(name="publisher", parent=parent)
220
+ self.book = book
221
+
222
+ def read_content(self) -> str:
223
+ """Read publisher name.
224
+
225
+ Returns:
226
+ Publisher name or placeholder
227
+ """
228
+ return self.book.publisher or "(Unknown)"
229
+
230
+ def get_info(self) -> Dict[str, Any]:
231
+ """Get file info with publisher preview."""
232
+ publisher = self.book.publisher or ""
233
+ preview = publisher[:60] + "..." if len(publisher) > 60 else publisher
234
+ return {
235
+ "type": "file",
236
+ "name": "publisher",
237
+ "preview": preview,
238
+ }
239
+
240
+
241
+ class MetadataFileNode(FileNode):
242
+ """All metadata formatted in a readable file."""
243
+
244
+ def __init__(self, book: Book, parent: Optional[DirectoryNode] = None):
245
+ super().__init__(name="metadata", parent=parent)
246
+ self.book = book
247
+
248
+ def read_content(self) -> str:
249
+ """Read all metadata formatted nicely.
250
+
251
+ Returns:
252
+ Formatted metadata
253
+ """
254
+ lines = []
255
+
256
+ # Basic info
257
+ lines.append(f"Title: {self.book.title or '(No title)'}")
258
+
259
+ if self.book.subtitle:
260
+ lines.append(f"Subtitle: {self.book.subtitle}")
261
+
262
+ # Authors
263
+ if self.book.authors:
264
+ authors_str = ", ".join(a.name for a in self.book.authors)
265
+ lines.append(f"Authors: {authors_str}")
266
+
267
+ # Publication info
268
+ if self.book.publication_date:
269
+ lines.append(f"Published: {self.book.publication_date}")
270
+
271
+ if self.book.publisher:
272
+ lines.append(f"Publisher: {self.book.publisher}")
273
+
274
+ # Language and series
275
+ if self.book.language:
276
+ lines.append(f"Language: {self.book.language}")
277
+
278
+ if self.book.series:
279
+ series_str = self.book.series
280
+ if self.book.series_index:
281
+ series_str += f" #{self.book.series_index}"
282
+ lines.append(f"Series: {series_str}")
283
+
284
+ # Physical info
285
+ if self.book.page_count:
286
+ lines.append(f"Pages: {self.book.page_count}")
287
+
288
+ # Subjects
289
+ if self.book.subjects:
290
+ subjects_str = ", ".join(s.name for s in self.book.subjects)
291
+ lines.append(f"Subjects: {subjects_str}")
292
+
293
+ # Files
294
+ if self.book.files:
295
+ formats = ", ".join(f.format.upper() for f in self.book.files)
296
+ lines.append(f"Formats: {formats}")
297
+ lines.append(f"Files: {len(self.book.files)}")
298
+
299
+ # Description
300
+ if self.book.description:
301
+ lines.append(f"\nDescription:")
302
+ lines.append(self.book.description)
303
+
304
+ return "\n".join(lines)
305
+
306
+
307
+ class BookColorFile(FileNode):
308
+ """Book color as a writable file."""
309
+
310
+ def __init__(self, book: Book, library, parent: Optional[DirectoryNode] = None):
311
+ """Initialize book color file.
312
+
313
+ Args:
314
+ book: Book database model
315
+ library: Library instance for database access
316
+ parent: Parent BookNode
317
+ """
318
+ super().__init__(name="color", parent=parent)
319
+ self.book = book
320
+ self.library = library
321
+
322
+ def read_content(self) -> str:
323
+ """Read book color.
324
+
325
+ Returns:
326
+ Hex color code or empty string
327
+ """
328
+ return self.book.color or ""
329
+
330
+ def write_content(self, content: str) -> None:
331
+ """Write book color.
332
+
333
+ Args:
334
+ content: Hex color code (e.g., "#FF5733" or "FF5733") or named color
335
+ """
336
+ import re
337
+
338
+ color = content.strip()
339
+
340
+ if not color:
341
+ # Empty string clears the color
342
+ self.book.color = None
343
+ self.library.session.commit()
344
+ return
345
+
346
+ # Support common named colors
347
+ named_colors = {
348
+ 'red': '#FF0000',
349
+ 'green': '#00FF00',
350
+ 'blue': '#0000FF',
351
+ 'yellow': '#FFFF00',
352
+ 'orange': '#FFA500',
353
+ 'purple': '#800080',
354
+ 'pink': '#FFC0CB',
355
+ 'cyan': '#00FFFF',
356
+ 'magenta': '#FF00FF',
357
+ 'lime': '#00FF00',
358
+ 'navy': '#000080',
359
+ 'teal': '#008080',
360
+ 'gray': '#808080',
361
+ 'grey': '#808080',
362
+ 'black': '#000000',
363
+ 'white': '#FFFFFF',
364
+ }
365
+
366
+ # Check if it's a named color first
367
+ color_lower = color.lower()
368
+ if color_lower in named_colors:
369
+ color = named_colors[color_lower]
370
+ else:
371
+ # Add # prefix if not present for hex codes
372
+ if not color.startswith('#'):
373
+ color = '#' + color
374
+
375
+ # Validate hex color format (#RGB or #RRGGBB)
376
+ hex_pattern = r'^#[0-9A-Fa-f]{3}$|^#[0-9A-Fa-f]{6}$'
377
+ if not re.match(hex_pattern, color):
378
+ raise ValueError(
379
+ f"Invalid color format: '{content}'. "
380
+ f"Use hex codes (#FF5733 or #F73) or named colors "
381
+ f"({', '.join(sorted(named_colors.keys()))})"
382
+ )
383
+
384
+ self.book.color = color
385
+ self.library.session.commit()
ebk/vfs/nodes/root.py ADDED
@@ -0,0 +1,100 @@
1
+ """Root VFS node and top-level directories."""
2
+
3
+ from typing import List, Optional, Dict, Any
4
+
5
+ from ebk.vfs.base import DirectoryNode, Node
6
+ from ebk.library_db import Library
7
+
8
+
9
+ class RootNode(DirectoryNode):
10
+ """Root directory (/) of the VFS.
11
+
12
+ Contains top-level directories:
13
+ - books/ - All books
14
+ - authors/ - Browse by author
15
+ - subjects/ - Browse by subject
16
+ - tags/ - Browse by user-defined hierarchical tags
17
+ - series/ - Browse by series
18
+ - recent/ - Recently added/modified books
19
+ - favorites/ - Favorite books
20
+ - unread/ - Unread books
21
+ """
22
+
23
+ def __init__(self, library: Library):
24
+ """Initialize root node.
25
+
26
+ Args:
27
+ library: Library instance for database access
28
+ """
29
+ super().__init__(name="", parent=None) # Root has empty name
30
+ self.library = library
31
+ self._children_cache: Optional[Dict[str, Node]] = None
32
+
33
+ def list_children(self) -> List[Node]:
34
+ """List top-level directories.
35
+
36
+ Returns:
37
+ List of top-level directory nodes
38
+ """
39
+ if self._children_cache is None:
40
+ self._build_children()
41
+
42
+ return list(self._children_cache.values())
43
+
44
+ def get_child(self, name: str) -> Optional[Node]:
45
+ """Get a top-level directory by name.
46
+
47
+ Args:
48
+ name: Directory name
49
+
50
+ Returns:
51
+ Directory node or None
52
+ """
53
+ if self._children_cache is None:
54
+ self._build_children()
55
+
56
+ return self._children_cache.get(name)
57
+
58
+ def _build_children(self) -> None:
59
+ """Build top-level directory nodes."""
60
+ from ebk.vfs.nodes.books import BooksDirectoryNode
61
+ from ebk.vfs.nodes.authors import AuthorsDirectoryNode
62
+ from ebk.vfs.nodes.subjects import SubjectsDirectoryNode
63
+ from ebk.vfs.nodes.tags import TagsDirectoryNode
64
+
65
+ self._children_cache = {
66
+ "books": BooksDirectoryNode(self.library, parent=self),
67
+ "authors": AuthorsDirectoryNode(self.library, parent=self),
68
+ "subjects": SubjectsDirectoryNode(self.library, parent=self),
69
+ "tags": TagsDirectoryNode(self.library, parent=self),
70
+ # TODO: Add series, recent, favorites, unread
71
+ }
72
+
73
+ def get_info(self) -> Dict[str, Any]:
74
+ """Get root directory info.
75
+
76
+ Returns:
77
+ Dict with root directory information
78
+ """
79
+ from ebk.db.models import Tag
80
+
81
+ stats = self.library.stats()
82
+ total_tags = self.library.session.query(Tag).count()
83
+
84
+ return {
85
+ "type": "directory",
86
+ "name": "/",
87
+ "total_books": stats.get("total_books", 0),
88
+ "total_authors": stats.get("total_authors", 0),
89
+ "total_subjects": stats.get("total_subjects", 0),
90
+ "total_tags": total_tags,
91
+ "path": "/",
92
+ }
93
+
94
+ def get_path(self) -> str:
95
+ """Get path (always /).
96
+
97
+ Returns:
98
+ Root path
99
+ """
100
+ return "/"
@@ -0,0 +1,165 @@
1
+ """Similar books VFS node."""
2
+
3
+ from typing import List, Optional, Dict, Any
4
+
5
+ from ebk.vfs.base import VirtualNode, DirectoryNode, SymlinkNode, Node
6
+ from ebk.library_db import Library
7
+ from ebk.db.models import Book
8
+
9
+
10
+ class SimilarDirectoryNode(VirtualNode):
11
+ """/books/42/similar/ - Virtual directory of similar books.
12
+
13
+ Computes similar books on-demand using the similarity system.
14
+ Each child is a symlink to another book with similarity score.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ book: Book,
20
+ library: Library,
21
+ parent: Optional[DirectoryNode] = None,
22
+ top_k: int = 10,
23
+ ):
24
+ """Initialize similar books directory.
25
+
26
+ Args:
27
+ book: Query book
28
+ library: Library instance
29
+ parent: Parent node (usually BookNode)
30
+ top_k: Number of similar books to show (default 10)
31
+ """
32
+ super().__init__(name="similar", parent=parent)
33
+ self.book = book
34
+ self.library = library
35
+ self.top_k = top_k
36
+ self._similar_cache: Optional[List[tuple]] = None
37
+
38
+ def list_children(self) -> List[Node]:
39
+ """List similar books as symlinks.
40
+
41
+ Returns:
42
+ List of SymlinkNode instances pointing to similar books
43
+ """
44
+ if self._similar_cache is None:
45
+ self._compute_similar()
46
+
47
+ symlinks = []
48
+ for similar_book, score in self._similar_cache:
49
+ # Create symlink to the similar book
50
+ target_path = f"/books/{similar_book.id}"
51
+ name = str(similar_book.id)
52
+
53
+ # Create a SimilarBookSymlink with score info
54
+ symlink = SimilarBookSymlink(
55
+ name=name,
56
+ target_path=target_path,
57
+ similar_book=similar_book,
58
+ score=score,
59
+ parent=self,
60
+ )
61
+ symlinks.append(symlink)
62
+
63
+ return symlinks
64
+
65
+ def get_child(self, name: str) -> Optional[Node]:
66
+ """Get a similar book symlink by ID.
67
+
68
+ Args:
69
+ name: Book ID as string
70
+
71
+ Returns:
72
+ SimilarBookSymlink or None
73
+ """
74
+ if self._similar_cache is None:
75
+ self._compute_similar()
76
+
77
+ # Find by ID
78
+ try:
79
+ book_id = int(name)
80
+ except ValueError:
81
+ return None
82
+
83
+ for similar_book, score in self._similar_cache:
84
+ if similar_book.id == book_id:
85
+ target_path = f"/books/{similar_book.id}"
86
+ return SimilarBookSymlink(
87
+ name=name,
88
+ target_path=target_path,
89
+ similar_book=similar_book,
90
+ score=score,
91
+ parent=self,
92
+ )
93
+
94
+ return None
95
+
96
+ def _compute_similar(self) -> None:
97
+ """Compute similar books using similarity system."""
98
+ try:
99
+ # Use library's find_similar method
100
+ results = self.library.find_similar(
101
+ self.book.id,
102
+ top_k=self.top_k,
103
+ filter_language=True,
104
+ )
105
+ self._similar_cache = results
106
+ except Exception:
107
+ # If similarity computation fails, return empty list
108
+ self._similar_cache = []
109
+
110
+ def get_info(self) -> Dict[str, Any]:
111
+ """Get similar directory info.
112
+
113
+ Returns:
114
+ Dict with directory information
115
+ """
116
+ if self._similar_cache is None:
117
+ self._compute_similar()
118
+
119
+ return {
120
+ "type": "virtual",
121
+ "name": "similar",
122
+ "count": len(self._similar_cache),
123
+ "path": self.get_path(),
124
+ }
125
+
126
+
127
+ class SimilarBookSymlink(SymlinkNode):
128
+ """Symlink to a similar book with similarity score.
129
+
130
+ Extends SymlinkNode to include similarity score information.
131
+ """
132
+
133
+ def __init__(
134
+ self,
135
+ name: str,
136
+ target_path: str,
137
+ similar_book: Book,
138
+ score: float,
139
+ parent: Optional[DirectoryNode] = None,
140
+ ):
141
+ """Initialize similar book symlink.
142
+
143
+ Args:
144
+ name: Link name (book ID)
145
+ target_path: Path to target book
146
+ similar_book: The similar book
147
+ score: Similarity score [0, 1]
148
+ parent: Parent node
149
+ """
150
+ super().__init__(name, target_path, parent)
151
+ self.similar_book = similar_book
152
+ self.score = score
153
+
154
+ def get_info(self) -> Dict[str, Any]:
155
+ """Get symlink info with similarity score.
156
+
157
+ Returns:
158
+ Dict with symlink information including score
159
+ """
160
+ info = super().get_info()
161
+ info["score"] = self.score
162
+ info["title"] = self.similar_book.title
163
+ authors_str = ", ".join(a.name for a in self.similar_book.authors) if self.similar_book.authors else ""
164
+ info["authors"] = authors_str
165
+ return info