ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ """
2
+ Metadata Enrichment Service using LLM providers.
3
+
4
+ Provides:
5
+ - Metadata inference from text
6
+ - Auto-tagging
7
+ - Subject categorization
8
+ - Description generation
9
+ - Difficulty level assessment
10
+ """
11
+
12
+ from typing import Dict, Any, List, Optional
13
+ from dataclasses import dataclass
14
+
15
+ from .llm_providers.base import BaseLLMProvider
16
+
17
+
18
+ @dataclass
19
+ class EnrichedMetadata:
20
+ """Enriched metadata generated by LLM."""
21
+
22
+ # Inferred metadata
23
+ title: Optional[str] = None
24
+ authors: Optional[List[str]] = None
25
+ subjects: Optional[List[str]] = None
26
+ description: Optional[str] = None
27
+ keywords: Optional[List[str]] = None
28
+
29
+ # Auto-generated tags
30
+ tags: Optional[List[str]] = None
31
+ categories: Optional[List[str]] = None
32
+
33
+ # Content analysis
34
+ difficulty_level: Optional[str] = None # beginner, intermediate, advanced
35
+ reading_time_minutes: Optional[int] = None
36
+ target_audience: Optional[str] = None
37
+
38
+ # Quality metrics
39
+ confidence_score: float = 0.0 # 0.0 to 1.0
40
+
41
+ def to_dict(self) -> Dict[str, Any]:
42
+ """Convert to dictionary."""
43
+ return {
44
+ k: v for k, v in {
45
+ "title": self.title,
46
+ "authors": self.authors,
47
+ "subjects": self.subjects,
48
+ "description": self.description,
49
+ "keywords": self.keywords,
50
+ "tags": self.tags,
51
+ "categories": self.categories,
52
+ "difficulty_level": self.difficulty_level,
53
+ "reading_time_minutes": self.reading_time_minutes,
54
+ "target_audience": self.target_audience,
55
+ "confidence_score": self.confidence_score,
56
+ }.items() if v is not None
57
+ }
58
+
59
+
60
+ class MetadataEnrichmentService:
61
+ """
62
+ Service for enriching book metadata using LLM.
63
+
64
+ Can work with:
65
+ - Extracted text from PDFs/EPUBs
66
+ - Existing metadata (title, authors, etc.)
67
+ - Combination of both
68
+ """
69
+
70
+ def __init__(self, provider: BaseLLMProvider):
71
+ """
72
+ Initialize service with LLM provider.
73
+
74
+ Args:
75
+ provider: LLM provider instance (e.g., OllamaProvider)
76
+ """
77
+ self.provider = provider
78
+
79
+ async def infer_metadata_from_text(
80
+ self,
81
+ text: str,
82
+ existing_metadata: Optional[Dict[str, Any]] = None,
83
+ max_text_length: int = 5000
84
+ ) -> EnrichedMetadata:
85
+ """
86
+ Infer metadata from extracted text.
87
+
88
+ Args:
89
+ text: Extracted text from book (truncated if too long)
90
+ existing_metadata: Any existing metadata to enhance
91
+ max_text_length: Maximum text length to send to LLM
92
+
93
+ Returns:
94
+ EnrichedMetadata with inferred fields
95
+ """
96
+ # Truncate text if too long
97
+ if len(text) > max_text_length:
98
+ text = text[:max_text_length] + "..."
99
+
100
+ # Build prompt
101
+ prompt = self._build_metadata_inference_prompt(text, existing_metadata)
102
+
103
+ # Define expected schema
104
+ schema = {
105
+ "type": "object",
106
+ "properties": {
107
+ "title": {"type": "string"},
108
+ "authors": {"type": "array", "items": {"type": "string"}},
109
+ "subjects": {"type": "array", "items": {"type": "string"}},
110
+ "description": {"type": "string"},
111
+ "keywords": {"type": "array", "items": {"type": "string"}},
112
+ "difficulty_level": {"type": "string", "enum": ["beginner", "intermediate", "advanced"]},
113
+ "target_audience": {"type": "string"},
114
+ "confidence_score": {"type": "number", "minimum": 0, "maximum": 1},
115
+ }
116
+ }
117
+
118
+ try:
119
+ result = await self.provider.complete_json(
120
+ prompt=prompt,
121
+ schema=schema,
122
+ temperature=0.3, # Lower temperature for more consistent metadata
123
+ )
124
+
125
+ return EnrichedMetadata(
126
+ title=result.get("title"),
127
+ authors=result.get("authors"),
128
+ subjects=result.get("subjects"),
129
+ description=result.get("description"),
130
+ keywords=result.get("keywords"),
131
+ difficulty_level=result.get("difficulty_level"),
132
+ target_audience=result.get("target_audience"),
133
+ confidence_score=result.get("confidence_score", 0.7),
134
+ )
135
+
136
+ except Exception as e:
137
+ print(f"Metadata inference failed: {e}")
138
+ return EnrichedMetadata(confidence_score=0.0)
139
+
140
+ async def generate_tags(
141
+ self,
142
+ title: str,
143
+ authors: Optional[List[str]] = None,
144
+ subjects: Optional[List[str]] = None,
145
+ description: Optional[str] = None,
146
+ text_sample: Optional[str] = None,
147
+ max_tags: int = 15
148
+ ) -> List[str]:
149
+ """
150
+ Generate relevant tags for a book.
151
+
152
+ Args:
153
+ title: Book title
154
+ authors: List of authors
155
+ subjects: Existing subjects/topics
156
+ description: Book description
157
+ text_sample: Sample text from book
158
+ max_tags: Maximum number of tags to generate
159
+
160
+ Returns:
161
+ List of tags
162
+ """
163
+ prompt = f"""Generate up to {max_tags} relevant tags for this book.
164
+
165
+ Title: {title}
166
+ Authors: {', '.join(authors or [])}
167
+ Subjects: {', '.join(subjects or [])}
168
+ Description: {description or 'N/A'}
169
+
170
+ {f'Text Sample: {text_sample[:1000]}...' if text_sample else ''}
171
+
172
+ Generate tags that would help someone find this book. Include:
173
+ - Genre (fiction, non-fiction, textbook, reference, etc.)
174
+ - Topics and themes
175
+ - Reading level (introductory, advanced, comprehensive, etc.)
176
+ - Format type (handbook, guide, manual, tutorial, etc.)
177
+ - Domain (programming, mathematics, history, science, etc.)
178
+
179
+ Return as JSON array: ["tag1", "tag2", ...]"""
180
+
181
+ try:
182
+ result = await self.provider.complete_json(prompt, temperature=0.5)
183
+
184
+ # Handle both array and object responses
185
+ if isinstance(result, list):
186
+ tags = result
187
+ elif isinstance(result, dict) and "tags" in result:
188
+ tags = result["tags"]
189
+ else:
190
+ tags = []
191
+
192
+ # Clean and deduplicate tags
193
+ tags = [str(tag).strip().lower() for tag in tags if tag]
194
+ tags = list(dict.fromkeys(tags)) # Deduplicate while preserving order
195
+
196
+ return tags[:max_tags]
197
+
198
+ except Exception as e:
199
+ print(f"Tag generation failed: {e}")
200
+ return []
201
+
202
+ async def categorize(
203
+ self,
204
+ title: str,
205
+ subjects: Optional[List[str]] = None,
206
+ description: Optional[str] = None,
207
+ ) -> List[str]:
208
+ """
209
+ Categorize book into standard categories.
210
+
211
+ Args:
212
+ title: Book title
213
+ subjects: Existing subjects
214
+ description: Book description
215
+
216
+ Returns:
217
+ List of categories (e.g., ['Computer Science', 'Programming', 'Software Engineering'])
218
+ """
219
+ prompt = f"""Categorize this book into standard academic/library categories.
220
+
221
+ Title: {title}
222
+ Subjects: {', '.join(subjects or [])}
223
+ Description: {description or 'N/A'}
224
+
225
+ Use standard categories like:
226
+ - Computer Science, Mathematics, Physics, Chemistry, Biology
227
+ - Engineering, Medicine, Law, Business
228
+ - History, Philosophy, Psychology, Sociology
229
+ - Literature, Art, Music
230
+ - Programming, Data Science, Machine Learning, AI
231
+
232
+ Return 2-5 relevant categories as JSON array: ["Category1", "Category2", ...]"""
233
+
234
+ try:
235
+ result = await self.provider.complete_json(prompt, temperature=0.3)
236
+
237
+ if isinstance(result, list):
238
+ categories = result
239
+ elif isinstance(result, dict) and "categories" in result:
240
+ categories = result["categories"]
241
+ else:
242
+ categories = []
243
+
244
+ return [str(cat).strip() for cat in categories if cat]
245
+
246
+ except Exception as e:
247
+ print(f"Categorization failed: {e}")
248
+ return []
249
+
250
+ async def enhance_description(
251
+ self,
252
+ title: str,
253
+ existing_description: Optional[str] = None,
254
+ text_sample: Optional[str] = None,
255
+ max_length: int = 500
256
+ ) -> Optional[str]:
257
+ """
258
+ Generate or enhance book description.
259
+
260
+ Args:
261
+ title: Book title
262
+ existing_description: Current description (if any)
263
+ text_sample: Sample text from book
264
+ max_length: Maximum description length
265
+
266
+ Returns:
267
+ Enhanced description
268
+ """
269
+ if existing_description and len(existing_description) > max_length:
270
+ # Already have a good description
271
+ return existing_description
272
+
273
+ prompt = f"""Write a clear, informative description for this book.
274
+
275
+ Title: {title}
276
+ {f'Current Description: {existing_description}' if existing_description else ''}
277
+ {f'Text Sample: {text_sample[:2000]}...' if text_sample else ''}
278
+
279
+ Write a {max_length}-character description that:
280
+ 1. Explains what the book is about
281
+ 2. Identifies the target audience
282
+ 3. Highlights key topics covered
283
+ 4. Mentions the approach/style (e.g., practical guide, theoretical text, reference manual)
284
+
285
+ Return just the description text, no JSON."""
286
+
287
+ try:
288
+ response = await self.provider.complete(prompt, temperature=0.5)
289
+ description = response.content.strip()
290
+
291
+ # Remove quotes if LLM wrapped it
292
+ if description.startswith('"') and description.endswith('"'):
293
+ description = description[1:-1]
294
+
295
+ return description[:max_length]
296
+
297
+ except Exception as e:
298
+ print(f"Description generation failed: {e}")
299
+ return existing_description
300
+
301
+ async def assess_difficulty(
302
+ self,
303
+ text_sample: str,
304
+ subjects: Optional[List[str]] = None
305
+ ) -> str:
306
+ """
307
+ Assess difficulty level of book.
308
+
309
+ Args:
310
+ text_sample: Sample text from book
311
+ subjects: Book subjects/topics
312
+
313
+ Returns:
314
+ Difficulty level: 'beginner', 'intermediate', or 'advanced'
315
+ """
316
+ prompt = f"""Assess the difficulty level of this text.
317
+
318
+ Subjects: {', '.join(subjects or [])}
319
+
320
+ Text Sample:
321
+ {text_sample[:2000]}
322
+
323
+ Consider:
324
+ - Vocabulary complexity
325
+ - Concept difficulty
326
+ - Prerequisites assumed
327
+ - Mathematical/technical content
328
+
329
+ Return one of: "beginner", "intermediate", "advanced"
330
+
331
+ Return as JSON: {{"level": "..."}}"""
332
+
333
+ try:
334
+ result = await self.provider.complete_json(prompt, temperature=0.2)
335
+ level = result.get("level", "intermediate")
336
+
337
+ if level not in ["beginner", "intermediate", "advanced"]:
338
+ level = "intermediate"
339
+
340
+ return level
341
+
342
+ except Exception as e:
343
+ print(f"Difficulty assessment failed: {e}")
344
+ return "intermediate"
345
+
346
+ def _build_metadata_inference_prompt(
347
+ self,
348
+ text: str,
349
+ existing_metadata: Optional[Dict[str, Any]] = None
350
+ ) -> str:
351
+ """Build prompt for metadata inference."""
352
+
353
+ prompt = """Analyze this text and infer metadata about the book.
354
+
355
+ Text Sample:
356
+ ---
357
+ {text}
358
+ ---
359
+
360
+ {existing}
361
+
362
+ Extract or infer:
363
+ 1. Title (if not provided)
364
+ 2. Authors (if mentioned)
365
+ 3. Subjects/topics (main themes and topics covered)
366
+ 4. Description (2-3 sentence summary)
367
+ 5. Keywords (10-15 relevant search terms)
368
+ 6. Difficulty level (beginner/intermediate/advanced)
369
+ 7. Target audience (who should read this)
370
+
371
+ Respond with JSON matching this schema:
372
+ {{
373
+ "title": "string",
374
+ "authors": ["author1", "author2"],
375
+ "subjects": ["subject1", "subject2"],
376
+ "description": "string",
377
+ "keywords": ["keyword1", "keyword2"],
378
+ "difficulty_level": "beginner|intermediate|advanced",
379
+ "target_audience": "string",
380
+ "confidence_score": 0.0-1.0
381
+ }}
382
+
383
+ Only include fields you can confidently infer. Set confidence_score based on how much information was available."""
384
+
385
+ existing_str = ""
386
+ if existing_metadata:
387
+ existing_str = f"""
388
+ Existing Metadata (enhance if possible):
389
+ Title: {existing_metadata.get('title', 'Unknown')}
390
+ Authors: {', '.join(existing_metadata.get('authors', []))}
391
+ Subjects: {', '.join(existing_metadata.get('subjects', []))}
392
+ """
393
+
394
+ return prompt.format(text=text, existing=existing_str)