ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +35 -0
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +443 -0
- ebk/ai/llm_providers/__init__.py +21 -0
- ebk/ai/llm_providers/base.py +230 -0
- ebk/ai/llm_providers/ollama.py +362 -0
- ebk/ai/metadata_enrichment.py +396 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +434 -0
- ebk/ai/text_extractor.py +394 -0
- ebk/cli.py +2828 -680
- ebk/config.py +260 -22
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +180 -0
- ebk/db/models.py +526 -0
- ebk/db/session.py +144 -0
- ebk/decorators.py +132 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/html_library.py +1390 -0
- ebk/exports/html_utils.py +117 -0
- ebk/exports/hugo.py +7 -3
- ebk/exports/jinja_export.py +287 -0
- ebk/exports/multi_facet_export.py +164 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/extract_metadata.py +76 -7
- ebk/library_db.py +899 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +444 -0
- ebk/plugins/registry.py +500 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +174 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +1633 -0
- ebk/services/__init__.py +11 -0
- ebk/services/import_service.py +442 -0
- ebk/services/tag_service.py +282 -0
- ebk/services/text_extraction.py +317 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +445 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +301 -0
- ebk/vfs/library_vfs.py +124 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- ebk-0.3.2.dist-info/METADATA +755 -0
- ebk-0.3.2.dist-info/RECORD +69 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
- ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +0 -144
- ebk/imports/ebooks.py +0 -116
- ebk/llm.py +0 -58
- ebk/manager.py +0 -44
- ebk/merge.py +0 -308
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +0 -185
- ebk/streamlit/display.py +0 -168
- ebk/streamlit/filters.py +0 -151
- ebk/streamlit/utils.py +0 -58
- ebk/utils.py +0 -311
- ebk-0.1.0.dist-info/METADATA +0 -457
- ebk-0.1.0.dist-info/RECORD +0 -29
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
- {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/__init__.py
CHANGED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ebk - A powerful eBook metadata management tool with SQLAlchemy + SQLite backend.
|
|
3
|
+
|
|
4
|
+
Main API:
|
|
5
|
+
from ebk.library_db import Library
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
# Open or create a library
|
|
9
|
+
lib = Library.open(Path("/path/to/library"))
|
|
10
|
+
|
|
11
|
+
# Add a book
|
|
12
|
+
book = lib.add_book(
|
|
13
|
+
Path("book.pdf"),
|
|
14
|
+
metadata={"title": "My Book", "creators": ["Author"]},
|
|
15
|
+
extract_text=True
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Search with full-text search
|
|
19
|
+
results = lib.search("python programming", limit=50)
|
|
20
|
+
|
|
21
|
+
# Query with fluent API
|
|
22
|
+
results = (lib.query()
|
|
23
|
+
.filter_by_language("en")
|
|
24
|
+
.filter_by_author("Knuth")
|
|
25
|
+
.limit(20)
|
|
26
|
+
.all())
|
|
27
|
+
|
|
28
|
+
# Always close when done
|
|
29
|
+
lib.close()
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
from .library_db import Library
|
|
33
|
+
|
|
34
|
+
__version__ = "0.3.1"
|
|
35
|
+
__all__ = ["Library"]
|
ebk/ai/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AI-powered features for ebk: Knowledge graphs, semantic search, and intelligent reading assistance.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from .knowledge_graph import KnowledgeGraph, ConceptNode, ConceptRelation
|
|
6
|
+
from .text_extractor import TextExtractor, ChapterExtractor
|
|
7
|
+
from .semantic_search import SemanticSearch, EmbeddingStore
|
|
8
|
+
from .reading_companion import ReadingCompanion, ReadingSession
|
|
9
|
+
from .question_generator import QuestionGenerator, QuizBuilder
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
'KnowledgeGraph',
|
|
13
|
+
'ConceptNode',
|
|
14
|
+
'ConceptRelation',
|
|
15
|
+
'TextExtractor',
|
|
16
|
+
'ChapterExtractor',
|
|
17
|
+
'SemanticSearch',
|
|
18
|
+
'EmbeddingStore',
|
|
19
|
+
'ReadingCompanion',
|
|
20
|
+
'ReadingSession',
|
|
21
|
+
'QuestionGenerator',
|
|
22
|
+
'QuizBuilder'
|
|
23
|
+
]
|
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph implementation for connecting concepts across books.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import hashlib
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List, Optional, Set, Tuple, Any
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
import networkx as nx
|
|
12
|
+
import numpy as np
|
|
13
|
+
from collections import defaultdict
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class ConceptNode:
|
|
18
|
+
"""Represents a concept/idea extracted from books."""
|
|
19
|
+
id: str
|
|
20
|
+
name: str
|
|
21
|
+
description: str
|
|
22
|
+
source_books: List[str] = field(default_factory=list)
|
|
23
|
+
contexts: List[Dict[str, Any]] = field(default_factory=list)
|
|
24
|
+
keywords: List[str] = field(default_factory=list)
|
|
25
|
+
importance_score: float = 0.0
|
|
26
|
+
created_at: datetime = field(default_factory=datetime.now)
|
|
27
|
+
|
|
28
|
+
def add_context(self, book_id: str, page: int, quote: str, chapter: str = None):
|
|
29
|
+
"""Add a context where this concept appears."""
|
|
30
|
+
self.contexts.append({
|
|
31
|
+
'book_id': book_id,
|
|
32
|
+
'page': page,
|
|
33
|
+
'quote': quote,
|
|
34
|
+
'chapter': chapter,
|
|
35
|
+
'timestamp': datetime.now().isoformat()
|
|
36
|
+
})
|
|
37
|
+
if book_id not in self.source_books:
|
|
38
|
+
self.source_books.append(book_id)
|
|
39
|
+
|
|
40
|
+
def to_dict(self) -> Dict:
|
|
41
|
+
"""Convert to dictionary for serialization."""
|
|
42
|
+
return {
|
|
43
|
+
'id': self.id,
|
|
44
|
+
'name': self.name,
|
|
45
|
+
'description': self.description,
|
|
46
|
+
'source_books': self.source_books,
|
|
47
|
+
'contexts': self.contexts,
|
|
48
|
+
'keywords': self.keywords,
|
|
49
|
+
'importance_score': self.importance_score,
|
|
50
|
+
'created_at': self.created_at.isoformat()
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class ConceptRelation:
|
|
56
|
+
"""Represents a relationship between two concepts."""
|
|
57
|
+
source_id: str
|
|
58
|
+
target_id: str
|
|
59
|
+
relation_type: str # 'supports', 'contradicts', 'extends', 'examples', 'causes', etc.
|
|
60
|
+
strength: float = 1.0
|
|
61
|
+
evidence: List[Dict[str, Any]] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
def add_evidence(self, book_id: str, description: str):
|
|
64
|
+
"""Add evidence for this relationship."""
|
|
65
|
+
self.evidence.append({
|
|
66
|
+
'book_id': book_id,
|
|
67
|
+
'description': description,
|
|
68
|
+
'timestamp': datetime.now().isoformat()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class KnowledgeGraph:
|
|
73
|
+
"""
|
|
74
|
+
A knowledge graph that connects concepts across multiple books.
|
|
75
|
+
Uses NetworkX for graph operations and provides rich querying capabilities.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, library_path: Path):
|
|
79
|
+
self.library_path = Path(library_path)
|
|
80
|
+
self.graph_path = self.library_path / '.knowledge_graph'
|
|
81
|
+
self.graph_path.mkdir(exist_ok=True)
|
|
82
|
+
|
|
83
|
+
self.graph = nx.DiGraph()
|
|
84
|
+
self.concepts: Dict[str, ConceptNode] = {}
|
|
85
|
+
self.concept_index: Dict[str, List[str]] = defaultdict(list) # keyword -> concept_ids
|
|
86
|
+
self.book_concepts: Dict[str, Set[str]] = defaultdict(set) # book_id -> concept_ids
|
|
87
|
+
|
|
88
|
+
self.load_graph()
|
|
89
|
+
|
|
90
|
+
def generate_concept_id(self, name: str, context: str = "") -> str:
|
|
91
|
+
"""Generate a unique ID for a concept."""
|
|
92
|
+
content = f"{name.lower()}:{context}"
|
|
93
|
+
return hashlib.md5(content.encode()).hexdigest()[:12]
|
|
94
|
+
|
|
95
|
+
def add_concept(self, name: str, description: str,
|
|
96
|
+
book_id: str = None, page: int = None,
|
|
97
|
+
quote: str = None, keywords: List[str] = None) -> ConceptNode:
|
|
98
|
+
"""Add a new concept or update existing one."""
|
|
99
|
+
concept_id = self.generate_concept_id(name, description[:50])
|
|
100
|
+
|
|
101
|
+
if concept_id in self.concepts:
|
|
102
|
+
concept = self.concepts[concept_id]
|
|
103
|
+
if book_id and quote:
|
|
104
|
+
concept.add_context(book_id, page, quote)
|
|
105
|
+
else:
|
|
106
|
+
concept = ConceptNode(
|
|
107
|
+
id=concept_id,
|
|
108
|
+
name=name,
|
|
109
|
+
description=description,
|
|
110
|
+
keywords=keywords or self._extract_keywords(name, description)
|
|
111
|
+
)
|
|
112
|
+
if book_id and quote:
|
|
113
|
+
concept.add_context(book_id, page, quote)
|
|
114
|
+
|
|
115
|
+
self.concepts[concept_id] = concept
|
|
116
|
+
self.graph.add_node(concept_id, **concept.to_dict())
|
|
117
|
+
|
|
118
|
+
# Update indices
|
|
119
|
+
for keyword in concept.keywords:
|
|
120
|
+
self.concept_index[keyword.lower()].append(concept_id)
|
|
121
|
+
if book_id:
|
|
122
|
+
self.book_concepts[book_id].add(concept_id)
|
|
123
|
+
|
|
124
|
+
return concept
|
|
125
|
+
|
|
126
|
+
def add_relation(self, source_name: str, target_name: str,
|
|
127
|
+
relation_type: str, strength: float = 1.0,
|
|
128
|
+
book_id: str = None, evidence: str = None) -> ConceptRelation:
|
|
129
|
+
"""Add a relationship between two concepts."""
|
|
130
|
+
source_id = self.generate_concept_id(source_name, "")
|
|
131
|
+
target_id = self.generate_concept_id(target_name, "")
|
|
132
|
+
|
|
133
|
+
# Ensure both concepts exist
|
|
134
|
+
if source_id not in self.concepts or target_id not in self.concepts:
|
|
135
|
+
raise ValueError(f"Both concepts must exist before creating a relation")
|
|
136
|
+
|
|
137
|
+
relation = ConceptRelation(
|
|
138
|
+
source_id=source_id,
|
|
139
|
+
target_id=target_id,
|
|
140
|
+
relation_type=relation_type,
|
|
141
|
+
strength=strength
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
if book_id and evidence:
|
|
145
|
+
relation.add_evidence(book_id, evidence)
|
|
146
|
+
|
|
147
|
+
self.graph.add_edge(
|
|
148
|
+
source_id, target_id,
|
|
149
|
+
type=relation_type,
|
|
150
|
+
strength=strength,
|
|
151
|
+
evidence=relation.evidence
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return relation
|
|
155
|
+
|
|
156
|
+
def find_concept_path(self, start_concept: str, end_concept: str) -> List[str]:
|
|
157
|
+
"""Find the shortest path between two concepts."""
|
|
158
|
+
start_id = self.generate_concept_id(start_concept, "")
|
|
159
|
+
end_id = self.generate_concept_id(end_concept, "")
|
|
160
|
+
|
|
161
|
+
if start_id not in self.graph or end_id not in self.graph:
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
path = nx.shortest_path(self.graph, start_id, end_id)
|
|
166
|
+
return [self.concepts[node_id].name for node_id in path]
|
|
167
|
+
except nx.NetworkXNoPath:
|
|
168
|
+
return []
|
|
169
|
+
|
|
170
|
+
def find_related_concepts(self, concept_name: str,
|
|
171
|
+
max_distance: int = 2,
|
|
172
|
+
min_strength: float = 0.5) -> List[Tuple[str, float]]:
|
|
173
|
+
"""Find concepts related to a given concept within a certain distance."""
|
|
174
|
+
concept_id = self.generate_concept_id(concept_name, "")
|
|
175
|
+
|
|
176
|
+
if concept_id not in self.graph:
|
|
177
|
+
# Try fuzzy matching
|
|
178
|
+
concept_id = self._fuzzy_find_concept(concept_name)
|
|
179
|
+
if not concept_id:
|
|
180
|
+
return []
|
|
181
|
+
|
|
182
|
+
related = []
|
|
183
|
+
visited = set()
|
|
184
|
+
|
|
185
|
+
# BFS with distance tracking
|
|
186
|
+
queue = [(concept_id, 0, 1.0)]
|
|
187
|
+
|
|
188
|
+
while queue:
|
|
189
|
+
current_id, distance, accumulated_strength = queue.pop(0)
|
|
190
|
+
|
|
191
|
+
if current_id in visited or distance > max_distance:
|
|
192
|
+
continue
|
|
193
|
+
|
|
194
|
+
visited.add(current_id)
|
|
195
|
+
|
|
196
|
+
if current_id != concept_id and accumulated_strength >= min_strength:
|
|
197
|
+
concept = self.concepts[current_id]
|
|
198
|
+
related.append((concept.name, accumulated_strength))
|
|
199
|
+
|
|
200
|
+
# Explore neighbors
|
|
201
|
+
for neighbor in self.graph.neighbors(current_id):
|
|
202
|
+
edge_data = self.graph[current_id][neighbor]
|
|
203
|
+
new_strength = accumulated_strength * edge_data.get('strength', 1.0)
|
|
204
|
+
queue.append((neighbor, distance + 1, new_strength))
|
|
205
|
+
|
|
206
|
+
# Sort by relevance (accumulated strength)
|
|
207
|
+
related.sort(key=lambda x: x[1], reverse=True)
|
|
208
|
+
return related
|
|
209
|
+
|
|
210
|
+
def get_concept_connections(self, book_id: str) -> Dict[str, List[str]]:
|
|
211
|
+
"""Get all concept connections for a specific book."""
|
|
212
|
+
book_concept_ids = self.book_concepts.get(book_id, set())
|
|
213
|
+
connections = {}
|
|
214
|
+
|
|
215
|
+
for concept_id in book_concept_ids:
|
|
216
|
+
concept = self.concepts[concept_id]
|
|
217
|
+
neighbors = []
|
|
218
|
+
|
|
219
|
+
for neighbor_id in self.graph.neighbors(concept_id):
|
|
220
|
+
neighbor = self.concepts[neighbor_id]
|
|
221
|
+
edge_data = self.graph[concept_id][neighbor_id]
|
|
222
|
+
neighbors.append({
|
|
223
|
+
'name': neighbor.name,
|
|
224
|
+
'relation': edge_data.get('type', 'related'),
|
|
225
|
+
'strength': edge_data.get('strength', 1.0)
|
|
226
|
+
})
|
|
227
|
+
|
|
228
|
+
if neighbors:
|
|
229
|
+
connections[concept.name] = neighbors
|
|
230
|
+
|
|
231
|
+
return connections
|
|
232
|
+
|
|
233
|
+
def generate_reading_path(self, start_topic: str,
|
|
234
|
+
end_topic: str,
|
|
235
|
+
available_books: List[str]) -> List[Dict[str, Any]]:
|
|
236
|
+
"""
|
|
237
|
+
Generate a reading path from one topic to another using available books.
|
|
238
|
+
Returns a sequence of books and the concepts they'll teach.
|
|
239
|
+
"""
|
|
240
|
+
start_concepts = self._find_concepts_by_topic(start_topic)
|
|
241
|
+
end_concepts = self._find_concepts_by_topic(end_topic)
|
|
242
|
+
|
|
243
|
+
if not start_concepts or not end_concepts:
|
|
244
|
+
return []
|
|
245
|
+
|
|
246
|
+
# Find paths between all concept pairs
|
|
247
|
+
all_paths = []
|
|
248
|
+
for start_id in start_concepts:
|
|
249
|
+
for end_id in end_concepts:
|
|
250
|
+
try:
|
|
251
|
+
path = nx.shortest_path(self.graph, start_id, end_id)
|
|
252
|
+
all_paths.append(path)
|
|
253
|
+
except nx.NetworkXNoPath:
|
|
254
|
+
continue
|
|
255
|
+
|
|
256
|
+
if not all_paths:
|
|
257
|
+
return []
|
|
258
|
+
|
|
259
|
+
# Select the best path (shortest with most book coverage)
|
|
260
|
+
best_path = min(all_paths, key=len)
|
|
261
|
+
|
|
262
|
+
# Map concepts to books
|
|
263
|
+
reading_sequence = []
|
|
264
|
+
covered_concepts = set()
|
|
265
|
+
|
|
266
|
+
for concept_id in best_path:
|
|
267
|
+
if concept_id in covered_concepts:
|
|
268
|
+
continue
|
|
269
|
+
|
|
270
|
+
concept = self.concepts[concept_id]
|
|
271
|
+
# Find which available book best covers this concept
|
|
272
|
+
best_book = None
|
|
273
|
+
max_coverage = 0
|
|
274
|
+
|
|
275
|
+
for book_id in concept.source_books:
|
|
276
|
+
if book_id in available_books:
|
|
277
|
+
coverage = len([c for c in concept.contexts if c['book_id'] == book_id])
|
|
278
|
+
if coverage > max_coverage:
|
|
279
|
+
max_coverage = coverage
|
|
280
|
+
best_book = book_id
|
|
281
|
+
|
|
282
|
+
if best_book:
|
|
283
|
+
reading_sequence.append({
|
|
284
|
+
'book_id': best_book,
|
|
285
|
+
'concept': concept.name,
|
|
286
|
+
'description': concept.description,
|
|
287
|
+
'why': f"Bridges understanding from {start_topic} towards {end_topic}"
|
|
288
|
+
})
|
|
289
|
+
covered_concepts.add(concept_id)
|
|
290
|
+
|
|
291
|
+
return reading_sequence
|
|
292
|
+
|
|
293
|
+
def calculate_concept_importance(self) -> Dict[str, float]:
|
|
294
|
+
"""
|
|
295
|
+
Calculate importance scores for all concepts using PageRank-like algorithm.
|
|
296
|
+
"""
|
|
297
|
+
if not self.graph.nodes():
|
|
298
|
+
return {}
|
|
299
|
+
|
|
300
|
+
# Calculate PageRank
|
|
301
|
+
pagerank_scores = nx.pagerank(self.graph, weight='strength')
|
|
302
|
+
|
|
303
|
+
# Update concept importance scores
|
|
304
|
+
for concept_id, score in pagerank_scores.items():
|
|
305
|
+
if concept_id in self.concepts:
|
|
306
|
+
self.concepts[concept_id].importance_score = score
|
|
307
|
+
|
|
308
|
+
return pagerank_scores
|
|
309
|
+
|
|
310
|
+
def get_key_concepts(self, top_n: int = 10) -> List[ConceptNode]:
|
|
311
|
+
"""Get the most important concepts in the knowledge graph."""
|
|
312
|
+
self.calculate_concept_importance()
|
|
313
|
+
|
|
314
|
+
sorted_concepts = sorted(
|
|
315
|
+
self.concepts.values(),
|
|
316
|
+
key=lambda c: c.importance_score,
|
|
317
|
+
reverse=True
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
return sorted_concepts[:top_n]
|
|
321
|
+
|
|
322
|
+
def export_for_visualization(self) -> Dict[str, Any]:
|
|
323
|
+
"""Export graph data for visualization tools."""
|
|
324
|
+
nodes = []
|
|
325
|
+
edges = []
|
|
326
|
+
|
|
327
|
+
for concept_id, concept in self.concepts.items():
|
|
328
|
+
nodes.append({
|
|
329
|
+
'id': concept_id,
|
|
330
|
+
'label': concept.name,
|
|
331
|
+
'title': concept.description,
|
|
332
|
+
'value': concept.importance_score * 100,
|
|
333
|
+
'group': len(concept.source_books) # Group by number of source books
|
|
334
|
+
})
|
|
335
|
+
|
|
336
|
+
for source, target, data in self.graph.edges(data=True):
|
|
337
|
+
edges.append({
|
|
338
|
+
'from': source,
|
|
339
|
+
'to': target,
|
|
340
|
+
'label': data.get('type', 'related'),
|
|
341
|
+
'value': data.get('strength', 1.0)
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
return {
|
|
345
|
+
'nodes': nodes,
|
|
346
|
+
'edges': edges,
|
|
347
|
+
'metadata': {
|
|
348
|
+
'total_concepts': len(self.concepts),
|
|
349
|
+
'total_relations': self.graph.number_of_edges(),
|
|
350
|
+
'books_indexed': len(self.book_concepts)
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
def save_graph(self):
|
|
355
|
+
"""Persist the knowledge graph to disk."""
|
|
356
|
+
# Save concepts
|
|
357
|
+
concepts_data = {
|
|
358
|
+
cid: concept.to_dict()
|
|
359
|
+
for cid, concept in self.concepts.items()
|
|
360
|
+
}
|
|
361
|
+
with open(self.graph_path / 'concepts.json', 'w') as f:
|
|
362
|
+
json.dump(concepts_data, f, indent=2)
|
|
363
|
+
|
|
364
|
+
# Save graph structure
|
|
365
|
+
graph_data = nx.node_link_data(self.graph)
|
|
366
|
+
with open(self.graph_path / 'graph.json', 'w') as f:
|
|
367
|
+
json.dump(graph_data, f, indent=2)
|
|
368
|
+
|
|
369
|
+
# Save indices
|
|
370
|
+
indices = {
|
|
371
|
+
'concept_index': dict(self.concept_index),
|
|
372
|
+
'book_concepts': {k: list(v) for k, v in self.book_concepts.items()}
|
|
373
|
+
}
|
|
374
|
+
with open(self.graph_path / 'indices.json', 'w') as f:
|
|
375
|
+
json.dump(indices, f, indent=2)
|
|
376
|
+
|
|
377
|
+
def load_graph(self):
|
|
378
|
+
"""Load the knowledge graph from disk."""
|
|
379
|
+
concepts_file = self.graph_path / 'concepts.json'
|
|
380
|
+
graph_file = self.graph_path / 'graph.json'
|
|
381
|
+
indices_file = self.graph_path / 'indices.json'
|
|
382
|
+
|
|
383
|
+
if concepts_file.exists():
|
|
384
|
+
with open(concepts_file, 'r') as f:
|
|
385
|
+
concepts_data = json.load(f)
|
|
386
|
+
for cid, cdata in concepts_data.items():
|
|
387
|
+
# Reconstruct ConceptNode
|
|
388
|
+
cdata['created_at'] = datetime.fromisoformat(cdata['created_at'])
|
|
389
|
+
self.concepts[cid] = ConceptNode(**{
|
|
390
|
+
k: v for k, v in cdata.items()
|
|
391
|
+
if k in ConceptNode.__dataclass_fields__
|
|
392
|
+
})
|
|
393
|
+
|
|
394
|
+
if graph_file.exists():
|
|
395
|
+
with open(graph_file, 'r') as f:
|
|
396
|
+
graph_data = json.load(f)
|
|
397
|
+
self.graph = nx.node_link_graph(graph_data)
|
|
398
|
+
|
|
399
|
+
if indices_file.exists():
|
|
400
|
+
with open(indices_file, 'r') as f:
|
|
401
|
+
indices = json.load(f)
|
|
402
|
+
self.concept_index = defaultdict(list, indices.get('concept_index', {}))
|
|
403
|
+
self.book_concepts = defaultdict(
|
|
404
|
+
set,
|
|
405
|
+
{k: set(v) for k, v in indices.get('book_concepts', {}).items()}
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
def _extract_keywords(self, name: str, description: str) -> List[str]:
|
|
409
|
+
"""Extract keywords from concept name and description."""
|
|
410
|
+
# Simple keyword extraction - can be enhanced with NLP
|
|
411
|
+
import re
|
|
412
|
+
text = f"{name} {description}".lower()
|
|
413
|
+
words = re.findall(r'\b[a-z]+\b', text)
|
|
414
|
+
# Filter common words and return unique keywords
|
|
415
|
+
stopwords = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'as', 'is', 'was', 'are', 'been'}
|
|
416
|
+
keywords = list(set(w for w in words if w not in stopwords and len(w) > 3))
|
|
417
|
+
return keywords[:10] # Limit to 10 keywords
|
|
418
|
+
|
|
419
|
+
def _fuzzy_find_concept(self, name: str) -> Optional[str]:
|
|
420
|
+
"""Find concept by fuzzy matching the name."""
|
|
421
|
+
name_lower = name.lower()
|
|
422
|
+
for concept_id, concept in self.concepts.items():
|
|
423
|
+
if name_lower in concept.name.lower() or concept.name.lower() in name_lower:
|
|
424
|
+
return concept_id
|
|
425
|
+
return None
|
|
426
|
+
|
|
427
|
+
def _find_concepts_by_topic(self, topic: str) -> List[str]:
|
|
428
|
+
"""Find all concepts related to a topic."""
|
|
429
|
+
topic_lower = topic.lower()
|
|
430
|
+
related_concepts = []
|
|
431
|
+
|
|
432
|
+
# Search in concept names and descriptions
|
|
433
|
+
for concept_id, concept in self.concepts.items():
|
|
434
|
+
if (topic_lower in concept.name.lower() or
|
|
435
|
+
topic_lower in concept.description.lower() or
|
|
436
|
+
any(topic_lower in kw.lower() for kw in concept.keywords)):
|
|
437
|
+
related_concepts.append(concept_id)
|
|
438
|
+
|
|
439
|
+
# Search in concept index
|
|
440
|
+
for keyword in topic_lower.split():
|
|
441
|
+
related_concepts.extend(self.concept_index.get(keyword, []))
|
|
442
|
+
|
|
443
|
+
return list(set(related_concepts))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Provider Abstractions for EBK.
|
|
3
|
+
|
|
4
|
+
Provides a unified interface for various LLM providers including:
|
|
5
|
+
- Ollama (local and remote)
|
|
6
|
+
- OpenAI
|
|
7
|
+
- Anthropic
|
|
8
|
+
- Any OpenAI-compatible API
|
|
9
|
+
|
|
10
|
+
Future: MCP client support for tool calling and web search.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from .base import BaseLLMProvider, LLMConfig, LLMResponse
|
|
14
|
+
from .ollama import OllamaProvider
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
'BaseLLMProvider',
|
|
18
|
+
'LLMConfig',
|
|
19
|
+
'LLMResponse',
|
|
20
|
+
'OllamaProvider',
|
|
21
|
+
]
|