ebk 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (61) hide show
  1. ebk/ai/__init__.py +23 -0
  2. ebk/ai/knowledge_graph.py +443 -0
  3. ebk/ai/llm_providers/__init__.py +21 -0
  4. ebk/ai/llm_providers/base.py +230 -0
  5. ebk/ai/llm_providers/ollama.py +362 -0
  6. ebk/ai/metadata_enrichment.py +396 -0
  7. ebk/ai/question_generator.py +328 -0
  8. ebk/ai/reading_companion.py +224 -0
  9. ebk/ai/semantic_search.py +434 -0
  10. ebk/ai/text_extractor.py +394 -0
  11. ebk/cli.py +1097 -9
  12. ebk/db/__init__.py +37 -0
  13. ebk/db/migrations.py +180 -0
  14. ebk/db/models.py +526 -0
  15. ebk/db/session.py +144 -0
  16. ebk/exports/__init__.py +0 -0
  17. ebk/exports/base_exporter.py +218 -0
  18. ebk/exports/html_library.py +1390 -0
  19. ebk/exports/html_utils.py +117 -0
  20. ebk/exports/hugo.py +59 -0
  21. ebk/exports/jinja_export.py +287 -0
  22. ebk/exports/multi_facet_export.py +164 -0
  23. ebk/exports/symlink_dag.py +479 -0
  24. ebk/exports/zip.py +25 -0
  25. ebk/library_db.py +155 -0
  26. ebk/repl/__init__.py +9 -0
  27. ebk/repl/find.py +126 -0
  28. ebk/repl/grep.py +174 -0
  29. ebk/repl/shell.py +1677 -0
  30. ebk/repl/text_utils.py +320 -0
  31. ebk/services/__init__.py +11 -0
  32. ebk/services/import_service.py +442 -0
  33. ebk/services/tag_service.py +282 -0
  34. ebk/services/text_extraction.py +317 -0
  35. ebk/similarity/__init__.py +77 -0
  36. ebk/similarity/base.py +154 -0
  37. ebk/similarity/core.py +445 -0
  38. ebk/similarity/extractors.py +168 -0
  39. ebk/similarity/metrics.py +376 -0
  40. ebk/vfs/__init__.py +101 -0
  41. ebk/vfs/base.py +301 -0
  42. ebk/vfs/library_vfs.py +124 -0
  43. ebk/vfs/nodes/__init__.py +54 -0
  44. ebk/vfs/nodes/authors.py +196 -0
  45. ebk/vfs/nodes/books.py +480 -0
  46. ebk/vfs/nodes/files.py +155 -0
  47. ebk/vfs/nodes/metadata.py +385 -0
  48. ebk/vfs/nodes/root.py +100 -0
  49. ebk/vfs/nodes/similar.py +165 -0
  50. ebk/vfs/nodes/subjects.py +184 -0
  51. ebk/vfs/nodes/tags.py +371 -0
  52. ebk/vfs/resolver.py +228 -0
  53. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/METADATA +1 -1
  54. ebk-0.3.2.dist-info/RECORD +69 -0
  55. ebk-0.3.2.dist-info/entry_points.txt +2 -0
  56. ebk-0.3.2.dist-info/top_level.txt +1 -0
  57. ebk-0.3.1.dist-info/RECORD +0 -19
  58. ebk-0.3.1.dist-info/entry_points.txt +0 -6
  59. ebk-0.3.1.dist-info/top_level.txt +0 -2
  60. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/WHEEL +0 -0
  61. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/licenses/LICENSE +0 -0
ebk/ai/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ """
2
+ AI-powered features for ebk: Knowledge graphs, semantic search, and intelligent reading assistance.
3
+ """
4
+
5
+ from .knowledge_graph import KnowledgeGraph, ConceptNode, ConceptRelation
6
+ from .text_extractor import TextExtractor, ChapterExtractor
7
+ from .semantic_search import SemanticSearch, EmbeddingStore
8
+ from .reading_companion import ReadingCompanion, ReadingSession
9
+ from .question_generator import QuestionGenerator, QuizBuilder
10
+
11
+ __all__ = [
12
+ 'KnowledgeGraph',
13
+ 'ConceptNode',
14
+ 'ConceptRelation',
15
+ 'TextExtractor',
16
+ 'ChapterExtractor',
17
+ 'SemanticSearch',
18
+ 'EmbeddingStore',
19
+ 'ReadingCompanion',
20
+ 'ReadingSession',
21
+ 'QuestionGenerator',
22
+ 'QuizBuilder'
23
+ ]
@@ -0,0 +1,443 @@
1
+ """
2
+ Knowledge Graph implementation for connecting concepts across books.
3
+ """
4
+
5
+ import json
6
+ import hashlib
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Set, Tuple, Any
9
+ from dataclasses import dataclass, field
10
+ from datetime import datetime
11
+ import networkx as nx
12
+ import numpy as np
13
+ from collections import defaultdict
14
+
15
+
16
+ @dataclass
17
+ class ConceptNode:
18
+ """Represents a concept/idea extracted from books."""
19
+ id: str
20
+ name: str
21
+ description: str
22
+ source_books: List[str] = field(default_factory=list)
23
+ contexts: List[Dict[str, Any]] = field(default_factory=list)
24
+ keywords: List[str] = field(default_factory=list)
25
+ importance_score: float = 0.0
26
+ created_at: datetime = field(default_factory=datetime.now)
27
+
28
+ def add_context(self, book_id: str, page: int, quote: str, chapter: str = None):
29
+ """Add a context where this concept appears."""
30
+ self.contexts.append({
31
+ 'book_id': book_id,
32
+ 'page': page,
33
+ 'quote': quote,
34
+ 'chapter': chapter,
35
+ 'timestamp': datetime.now().isoformat()
36
+ })
37
+ if book_id not in self.source_books:
38
+ self.source_books.append(book_id)
39
+
40
+ def to_dict(self) -> Dict:
41
+ """Convert to dictionary for serialization."""
42
+ return {
43
+ 'id': self.id,
44
+ 'name': self.name,
45
+ 'description': self.description,
46
+ 'source_books': self.source_books,
47
+ 'contexts': self.contexts,
48
+ 'keywords': self.keywords,
49
+ 'importance_score': self.importance_score,
50
+ 'created_at': self.created_at.isoformat()
51
+ }
52
+
53
+
54
+ @dataclass
55
+ class ConceptRelation:
56
+ """Represents a relationship between two concepts."""
57
+ source_id: str
58
+ target_id: str
59
+ relation_type: str # 'supports', 'contradicts', 'extends', 'examples', 'causes', etc.
60
+ strength: float = 1.0
61
+ evidence: List[Dict[str, Any]] = field(default_factory=list)
62
+
63
+ def add_evidence(self, book_id: str, description: str):
64
+ """Add evidence for this relationship."""
65
+ self.evidence.append({
66
+ 'book_id': book_id,
67
+ 'description': description,
68
+ 'timestamp': datetime.now().isoformat()
69
+ })
70
+
71
+
72
+ class KnowledgeGraph:
73
+ """
74
+ A knowledge graph that connects concepts across multiple books.
75
+ Uses NetworkX for graph operations and provides rich querying capabilities.
76
+ """
77
+
78
+ def __init__(self, library_path: Path):
79
+ self.library_path = Path(library_path)
80
+ self.graph_path = self.library_path / '.knowledge_graph'
81
+ self.graph_path.mkdir(exist_ok=True)
82
+
83
+ self.graph = nx.DiGraph()
84
+ self.concepts: Dict[str, ConceptNode] = {}
85
+ self.concept_index: Dict[str, List[str]] = defaultdict(list) # keyword -> concept_ids
86
+ self.book_concepts: Dict[str, Set[str]] = defaultdict(set) # book_id -> concept_ids
87
+
88
+ self.load_graph()
89
+
90
+ def generate_concept_id(self, name: str, context: str = "") -> str:
91
+ """Generate a unique ID for a concept."""
92
+ content = f"{name.lower()}:{context}"
93
+ return hashlib.md5(content.encode()).hexdigest()[:12]
94
+
95
+ def add_concept(self, name: str, description: str,
96
+ book_id: str = None, page: int = None,
97
+ quote: str = None, keywords: List[str] = None) -> ConceptNode:
98
+ """Add a new concept or update existing one."""
99
+ concept_id = self.generate_concept_id(name, description[:50])
100
+
101
+ if concept_id in self.concepts:
102
+ concept = self.concepts[concept_id]
103
+ if book_id and quote:
104
+ concept.add_context(book_id, page, quote)
105
+ else:
106
+ concept = ConceptNode(
107
+ id=concept_id,
108
+ name=name,
109
+ description=description,
110
+ keywords=keywords or self._extract_keywords(name, description)
111
+ )
112
+ if book_id and quote:
113
+ concept.add_context(book_id, page, quote)
114
+
115
+ self.concepts[concept_id] = concept
116
+ self.graph.add_node(concept_id, **concept.to_dict())
117
+
118
+ # Update indices
119
+ for keyword in concept.keywords:
120
+ self.concept_index[keyword.lower()].append(concept_id)
121
+ if book_id:
122
+ self.book_concepts[book_id].add(concept_id)
123
+
124
+ return concept
125
+
126
+ def add_relation(self, source_name: str, target_name: str,
127
+ relation_type: str, strength: float = 1.0,
128
+ book_id: str = None, evidence: str = None) -> ConceptRelation:
129
+ """Add a relationship between two concepts."""
130
+ source_id = self.generate_concept_id(source_name, "")
131
+ target_id = self.generate_concept_id(target_name, "")
132
+
133
+ # Ensure both concepts exist
134
+ if source_id not in self.concepts or target_id not in self.concepts:
135
+ raise ValueError(f"Both concepts must exist before creating a relation")
136
+
137
+ relation = ConceptRelation(
138
+ source_id=source_id,
139
+ target_id=target_id,
140
+ relation_type=relation_type,
141
+ strength=strength
142
+ )
143
+
144
+ if book_id and evidence:
145
+ relation.add_evidence(book_id, evidence)
146
+
147
+ self.graph.add_edge(
148
+ source_id, target_id,
149
+ type=relation_type,
150
+ strength=strength,
151
+ evidence=relation.evidence
152
+ )
153
+
154
+ return relation
155
+
156
+ def find_concept_path(self, start_concept: str, end_concept: str) -> List[str]:
157
+ """Find the shortest path between two concepts."""
158
+ start_id = self.generate_concept_id(start_concept, "")
159
+ end_id = self.generate_concept_id(end_concept, "")
160
+
161
+ if start_id not in self.graph or end_id not in self.graph:
162
+ return []
163
+
164
+ try:
165
+ path = nx.shortest_path(self.graph, start_id, end_id)
166
+ return [self.concepts[node_id].name for node_id in path]
167
+ except nx.NetworkXNoPath:
168
+ return []
169
+
170
+ def find_related_concepts(self, concept_name: str,
171
+ max_distance: int = 2,
172
+ min_strength: float = 0.5) -> List[Tuple[str, float]]:
173
+ """Find concepts related to a given concept within a certain distance."""
174
+ concept_id = self.generate_concept_id(concept_name, "")
175
+
176
+ if concept_id not in self.graph:
177
+ # Try fuzzy matching
178
+ concept_id = self._fuzzy_find_concept(concept_name)
179
+ if not concept_id:
180
+ return []
181
+
182
+ related = []
183
+ visited = set()
184
+
185
+ # BFS with distance tracking
186
+ queue = [(concept_id, 0, 1.0)]
187
+
188
+ while queue:
189
+ current_id, distance, accumulated_strength = queue.pop(0)
190
+
191
+ if current_id in visited or distance > max_distance:
192
+ continue
193
+
194
+ visited.add(current_id)
195
+
196
+ if current_id != concept_id and accumulated_strength >= min_strength:
197
+ concept = self.concepts[current_id]
198
+ related.append((concept.name, accumulated_strength))
199
+
200
+ # Explore neighbors
201
+ for neighbor in self.graph.neighbors(current_id):
202
+ edge_data = self.graph[current_id][neighbor]
203
+ new_strength = accumulated_strength * edge_data.get('strength', 1.0)
204
+ queue.append((neighbor, distance + 1, new_strength))
205
+
206
+ # Sort by relevance (accumulated strength)
207
+ related.sort(key=lambda x: x[1], reverse=True)
208
+ return related
209
+
210
+ def get_concept_connections(self, book_id: str) -> Dict[str, List[str]]:
211
+ """Get all concept connections for a specific book."""
212
+ book_concept_ids = self.book_concepts.get(book_id, set())
213
+ connections = {}
214
+
215
+ for concept_id in book_concept_ids:
216
+ concept = self.concepts[concept_id]
217
+ neighbors = []
218
+
219
+ for neighbor_id in self.graph.neighbors(concept_id):
220
+ neighbor = self.concepts[neighbor_id]
221
+ edge_data = self.graph[concept_id][neighbor_id]
222
+ neighbors.append({
223
+ 'name': neighbor.name,
224
+ 'relation': edge_data.get('type', 'related'),
225
+ 'strength': edge_data.get('strength', 1.0)
226
+ })
227
+
228
+ if neighbors:
229
+ connections[concept.name] = neighbors
230
+
231
+ return connections
232
+
233
+ def generate_reading_path(self, start_topic: str,
234
+ end_topic: str,
235
+ available_books: List[str]) -> List[Dict[str, Any]]:
236
+ """
237
+ Generate a reading path from one topic to another using available books.
238
+ Returns a sequence of books and the concepts they'll teach.
239
+ """
240
+ start_concepts = self._find_concepts_by_topic(start_topic)
241
+ end_concepts = self._find_concepts_by_topic(end_topic)
242
+
243
+ if not start_concepts or not end_concepts:
244
+ return []
245
+
246
+ # Find paths between all concept pairs
247
+ all_paths = []
248
+ for start_id in start_concepts:
249
+ for end_id in end_concepts:
250
+ try:
251
+ path = nx.shortest_path(self.graph, start_id, end_id)
252
+ all_paths.append(path)
253
+ except nx.NetworkXNoPath:
254
+ continue
255
+
256
+ if not all_paths:
257
+ return []
258
+
259
+ # Select the best path (shortest with most book coverage)
260
+ best_path = min(all_paths, key=len)
261
+
262
+ # Map concepts to books
263
+ reading_sequence = []
264
+ covered_concepts = set()
265
+
266
+ for concept_id in best_path:
267
+ if concept_id in covered_concepts:
268
+ continue
269
+
270
+ concept = self.concepts[concept_id]
271
+ # Find which available book best covers this concept
272
+ best_book = None
273
+ max_coverage = 0
274
+
275
+ for book_id in concept.source_books:
276
+ if book_id in available_books:
277
+ coverage = len([c for c in concept.contexts if c['book_id'] == book_id])
278
+ if coverage > max_coverage:
279
+ max_coverage = coverage
280
+ best_book = book_id
281
+
282
+ if best_book:
283
+ reading_sequence.append({
284
+ 'book_id': best_book,
285
+ 'concept': concept.name,
286
+ 'description': concept.description,
287
+ 'why': f"Bridges understanding from {start_topic} towards {end_topic}"
288
+ })
289
+ covered_concepts.add(concept_id)
290
+
291
+ return reading_sequence
292
+
293
+ def calculate_concept_importance(self) -> Dict[str, float]:
294
+ """
295
+ Calculate importance scores for all concepts using PageRank-like algorithm.
296
+ """
297
+ if not self.graph.nodes():
298
+ return {}
299
+
300
+ # Calculate PageRank
301
+ pagerank_scores = nx.pagerank(self.graph, weight='strength')
302
+
303
+ # Update concept importance scores
304
+ for concept_id, score in pagerank_scores.items():
305
+ if concept_id in self.concepts:
306
+ self.concepts[concept_id].importance_score = score
307
+
308
+ return pagerank_scores
309
+
310
+ def get_key_concepts(self, top_n: int = 10) -> List[ConceptNode]:
311
+ """Get the most important concepts in the knowledge graph."""
312
+ self.calculate_concept_importance()
313
+
314
+ sorted_concepts = sorted(
315
+ self.concepts.values(),
316
+ key=lambda c: c.importance_score,
317
+ reverse=True
318
+ )
319
+
320
+ return sorted_concepts[:top_n]
321
+
322
+ def export_for_visualization(self) -> Dict[str, Any]:
323
+ """Export graph data for visualization tools."""
324
+ nodes = []
325
+ edges = []
326
+
327
+ for concept_id, concept in self.concepts.items():
328
+ nodes.append({
329
+ 'id': concept_id,
330
+ 'label': concept.name,
331
+ 'title': concept.description,
332
+ 'value': concept.importance_score * 100,
333
+ 'group': len(concept.source_books) # Group by number of source books
334
+ })
335
+
336
+ for source, target, data in self.graph.edges(data=True):
337
+ edges.append({
338
+ 'from': source,
339
+ 'to': target,
340
+ 'label': data.get('type', 'related'),
341
+ 'value': data.get('strength', 1.0)
342
+ })
343
+
344
+ return {
345
+ 'nodes': nodes,
346
+ 'edges': edges,
347
+ 'metadata': {
348
+ 'total_concepts': len(self.concepts),
349
+ 'total_relations': self.graph.number_of_edges(),
350
+ 'books_indexed': len(self.book_concepts)
351
+ }
352
+ }
353
+
354
+ def save_graph(self):
355
+ """Persist the knowledge graph to disk."""
356
+ # Save concepts
357
+ concepts_data = {
358
+ cid: concept.to_dict()
359
+ for cid, concept in self.concepts.items()
360
+ }
361
+ with open(self.graph_path / 'concepts.json', 'w') as f:
362
+ json.dump(concepts_data, f, indent=2)
363
+
364
+ # Save graph structure
365
+ graph_data = nx.node_link_data(self.graph)
366
+ with open(self.graph_path / 'graph.json', 'w') as f:
367
+ json.dump(graph_data, f, indent=2)
368
+
369
+ # Save indices
370
+ indices = {
371
+ 'concept_index': dict(self.concept_index),
372
+ 'book_concepts': {k: list(v) for k, v in self.book_concepts.items()}
373
+ }
374
+ with open(self.graph_path / 'indices.json', 'w') as f:
375
+ json.dump(indices, f, indent=2)
376
+
377
+ def load_graph(self):
378
+ """Load the knowledge graph from disk."""
379
+ concepts_file = self.graph_path / 'concepts.json'
380
+ graph_file = self.graph_path / 'graph.json'
381
+ indices_file = self.graph_path / 'indices.json'
382
+
383
+ if concepts_file.exists():
384
+ with open(concepts_file, 'r') as f:
385
+ concepts_data = json.load(f)
386
+ for cid, cdata in concepts_data.items():
387
+ # Reconstruct ConceptNode
388
+ cdata['created_at'] = datetime.fromisoformat(cdata['created_at'])
389
+ self.concepts[cid] = ConceptNode(**{
390
+ k: v for k, v in cdata.items()
391
+ if k in ConceptNode.__dataclass_fields__
392
+ })
393
+
394
+ if graph_file.exists():
395
+ with open(graph_file, 'r') as f:
396
+ graph_data = json.load(f)
397
+ self.graph = nx.node_link_graph(graph_data)
398
+
399
+ if indices_file.exists():
400
+ with open(indices_file, 'r') as f:
401
+ indices = json.load(f)
402
+ self.concept_index = defaultdict(list, indices.get('concept_index', {}))
403
+ self.book_concepts = defaultdict(
404
+ set,
405
+ {k: set(v) for k, v in indices.get('book_concepts', {}).items()}
406
+ )
407
+
408
+ def _extract_keywords(self, name: str, description: str) -> List[str]:
409
+ """Extract keywords from concept name and description."""
410
+ # Simple keyword extraction - can be enhanced with NLP
411
+ import re
412
+ text = f"{name} {description}".lower()
413
+ words = re.findall(r'\b[a-z]+\b', text)
414
+ # Filter common words and return unique keywords
415
+ stopwords = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'as', 'is', 'was', 'are', 'been'}
416
+ keywords = list(set(w for w in words if w not in stopwords and len(w) > 3))
417
+ return keywords[:10] # Limit to 10 keywords
418
+
419
+ def _fuzzy_find_concept(self, name: str) -> Optional[str]:
420
+ """Find concept by fuzzy matching the name."""
421
+ name_lower = name.lower()
422
+ for concept_id, concept in self.concepts.items():
423
+ if name_lower in concept.name.lower() or concept.name.lower() in name_lower:
424
+ return concept_id
425
+ return None
426
+
427
+ def _find_concepts_by_topic(self, topic: str) -> List[str]:
428
+ """Find all concepts related to a topic."""
429
+ topic_lower = topic.lower()
430
+ related_concepts = []
431
+
432
+ # Search in concept names and descriptions
433
+ for concept_id, concept in self.concepts.items():
434
+ if (topic_lower in concept.name.lower() or
435
+ topic_lower in concept.description.lower() or
436
+ any(topic_lower in kw.lower() for kw in concept.keywords)):
437
+ related_concepts.append(concept_id)
438
+
439
+ # Search in concept index
440
+ for keyword in topic_lower.split():
441
+ related_concepts.extend(self.concept_index.get(keyword, []))
442
+
443
+ return list(set(related_concepts))
@@ -0,0 +1,21 @@
1
+ """
2
+ LLM Provider Abstractions for EBK.
3
+
4
+ Provides a unified interface for various LLM providers including:
5
+ - Ollama (local and remote)
6
+ - OpenAI
7
+ - Anthropic
8
+ - Any OpenAI-compatible API
9
+
10
+ Future: MCP client support for tool calling and web search.
11
+ """
12
+
13
+ from .base import BaseLLMProvider, LLMConfig, LLMResponse
14
+ from .ollama import OllamaProvider
15
+
16
+ __all__ = [
17
+ 'BaseLLMProvider',
18
+ 'LLMConfig',
19
+ 'LLMResponse',
20
+ 'OllamaProvider',
21
+ ]