agent-brain-rag 1.1.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {agent_brain_rag-1.1.0.dist-info → agent_brain_rag-2.0.0.dist-info}/METADATA +68 -27
- agent_brain_rag-2.0.0.dist-info/RECORD +50 -0
- agent_brain_rag-2.0.0.dist-info/entry_points.txt +4 -0
- {doc_serve_server → agent_brain_server}/__init__.py +1 -1
- {doc_serve_server → agent_brain_server}/api/main.py +90 -26
- {doc_serve_server → agent_brain_server}/api/routers/health.py +4 -2
- {doc_serve_server → agent_brain_server}/api/routers/index.py +1 -1
- {doc_serve_server → agent_brain_server}/api/routers/query.py +3 -3
- agent_brain_server/config/provider_config.py +308 -0
- {doc_serve_server → agent_brain_server}/config/settings.py +12 -1
- agent_brain_server/indexing/__init__.py +40 -0
- {doc_serve_server → agent_brain_server}/indexing/bm25_index.py +1 -1
- {doc_serve_server → agent_brain_server}/indexing/chunking.py +1 -1
- agent_brain_server/indexing/embedding.py +225 -0
- agent_brain_server/indexing/graph_extractors.py +582 -0
- agent_brain_server/indexing/graph_index.py +536 -0
- {doc_serve_server → agent_brain_server}/models/__init__.py +9 -0
- agent_brain_server/models/graph.py +253 -0
- {doc_serve_server → agent_brain_server}/models/health.py +15 -3
- {doc_serve_server → agent_brain_server}/models/query.py +14 -1
- agent_brain_server/providers/__init__.py +64 -0
- agent_brain_server/providers/base.py +251 -0
- agent_brain_server/providers/embedding/__init__.py +23 -0
- agent_brain_server/providers/embedding/cohere.py +163 -0
- agent_brain_server/providers/embedding/ollama.py +150 -0
- agent_brain_server/providers/embedding/openai.py +118 -0
- agent_brain_server/providers/exceptions.py +95 -0
- agent_brain_server/providers/factory.py +157 -0
- agent_brain_server/providers/summarization/__init__.py +41 -0
- agent_brain_server/providers/summarization/anthropic.py +87 -0
- agent_brain_server/providers/summarization/gemini.py +96 -0
- agent_brain_server/providers/summarization/grok.py +95 -0
- agent_brain_server/providers/summarization/ollama.py +114 -0
- agent_brain_server/providers/summarization/openai.py +87 -0
- {doc_serve_server → agent_brain_server}/services/indexing_service.py +43 -4
- {doc_serve_server → agent_brain_server}/services/query_service.py +212 -4
- agent_brain_server/storage/__init__.py +21 -0
- agent_brain_server/storage/graph_store.py +519 -0
- {doc_serve_server → agent_brain_server}/storage/vector_store.py +36 -1
- {doc_serve_server → agent_brain_server}/storage_paths.py +2 -0
- agent_brain_rag-1.1.0.dist-info/RECORD +0 -31
- agent_brain_rag-1.1.0.dist-info/entry_points.txt +0 -3
- doc_serve_server/indexing/__init__.py +0 -19
- doc_serve_server/indexing/embedding.py +0 -274
- doc_serve_server/storage/__init__.py +0 -5
- {agent_brain_rag-1.1.0.dist-info → agent_brain_rag-2.0.0.dist-info}/WHEEL +0 -0
- {doc_serve_server → agent_brain_server}/api/__init__.py +0 -0
- {doc_serve_server → agent_brain_server}/api/routers/__init__.py +0 -0
- {doc_serve_server → agent_brain_server}/config/__init__.py +0 -0
- {doc_serve_server → agent_brain_server}/indexing/document_loader.py +0 -0
- {doc_serve_server → agent_brain_server}/locking.py +0 -0
- {doc_serve_server → agent_brain_server}/models/index.py +0 -0
- {doc_serve_server → agent_brain_server}/project_root.py +0 -0
- {doc_serve_server → agent_brain_server}/runtime.py +0 -0
- {doc_serve_server → agent_brain_server}/services/__init__.py +0 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
"""Graph store manager for GraphRAG feature (Feature 113).
|
|
2
|
+
|
|
3
|
+
Provides abstraction over graph storage backends:
|
|
4
|
+
- SimplePropertyGraphStore: In-memory graph with JSON persistence (default)
|
|
5
|
+
- Kuzu: High-performance embedded graph database (optional)
|
|
6
|
+
|
|
7
|
+
All graph operations are no-ops when ENABLE_GRAPH_INDEX is False.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
from datetime import datetime, timezone
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Optional
|
|
15
|
+
|
|
16
|
+
from agent_brain_server.config import settings
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class GraphStoreManager:
|
|
22
|
+
"""Manages graph storage backends for GraphRAG.
|
|
23
|
+
|
|
24
|
+
Supports SimplePropertyGraphStore (default) and Kuzu (optional).
|
|
25
|
+
Implements singleton pattern for consistent graph access.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
persist_dir: Directory for graph persistence.
|
|
29
|
+
store_type: Backend type - "simple" or "kuzu".
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
_instance: Optional["GraphStoreManager"] = None
|
|
33
|
+
|
|
34
|
+
def __init__(self, persist_dir: Path, store_type: str = "simple") -> None:
|
|
35
|
+
"""Initialize graph store manager.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
persist_dir: Directory for graph persistence.
|
|
39
|
+
store_type: Backend type - "simple" or "kuzu".
|
|
40
|
+
"""
|
|
41
|
+
self.persist_dir = persist_dir
|
|
42
|
+
self.store_type = store_type
|
|
43
|
+
self._graph_store: Optional[Any] = None
|
|
44
|
+
self._initialized = False
|
|
45
|
+
self._entity_count = 0
|
|
46
|
+
self._relationship_count = 0
|
|
47
|
+
self._last_updated: Optional[datetime] = None
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def get_instance(
|
|
51
|
+
cls,
|
|
52
|
+
persist_dir: Optional[Path] = None,
|
|
53
|
+
store_type: Optional[str] = None,
|
|
54
|
+
) -> "GraphStoreManager":
|
|
55
|
+
"""Get or create singleton instance.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
persist_dir: Directory for graph persistence.
|
|
59
|
+
store_type: Backend type - "simple" or "kuzu".
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
The singleton GraphStoreManager instance.
|
|
63
|
+
"""
|
|
64
|
+
if cls._instance is None:
|
|
65
|
+
if persist_dir is None:
|
|
66
|
+
persist_dir = Path(settings.GRAPH_INDEX_PATH)
|
|
67
|
+
if store_type is None:
|
|
68
|
+
store_type = settings.GRAPH_STORE_TYPE
|
|
69
|
+
cls._instance = cls(persist_dir, store_type)
|
|
70
|
+
return cls._instance
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def reset_instance(cls) -> None:
|
|
74
|
+
"""Reset the singleton instance. Used for testing."""
|
|
75
|
+
cls._instance = None
|
|
76
|
+
|
|
77
|
+
def initialize(self) -> None:
|
|
78
|
+
"""Initialize the graph store based on store_type.
|
|
79
|
+
|
|
80
|
+
For "simple": Uses SimplePropertyGraphStore with JSON persistence.
|
|
81
|
+
For "kuzu": Attempts to use Kuzu, falls back to simple with warning.
|
|
82
|
+
|
|
83
|
+
This is a no-op when ENABLE_GRAPH_INDEX is False.
|
|
84
|
+
"""
|
|
85
|
+
if not settings.ENABLE_GRAPH_INDEX:
|
|
86
|
+
logger.debug("Graph indexing disabled, skipping initialization")
|
|
87
|
+
return
|
|
88
|
+
|
|
89
|
+
if self._initialized:
|
|
90
|
+
logger.debug("Graph store already initialized")
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Ensure persistence directory exists
|
|
94
|
+
self.persist_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
|
|
96
|
+
if self.store_type == "kuzu":
|
|
97
|
+
self._initialize_kuzu_store()
|
|
98
|
+
else:
|
|
99
|
+
self._initialize_simple_store()
|
|
100
|
+
|
|
101
|
+
# Try to load existing graph data
|
|
102
|
+
self.load()
|
|
103
|
+
|
|
104
|
+
self._initialized = True
|
|
105
|
+
logger.info(
|
|
106
|
+
f"Graph store initialized: type={self.store_type}, "
|
|
107
|
+
f"entities={self._entity_count}, relationships={self._relationship_count}"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
def _initialize_simple_store(self) -> None:
|
|
111
|
+
"""Initialize SimplePropertyGraphStore backend."""
|
|
112
|
+
try:
|
|
113
|
+
from llama_index.core.graph_stores import SimplePropertyGraphStore
|
|
114
|
+
|
|
115
|
+
self._graph_store = SimplePropertyGraphStore()
|
|
116
|
+
logger.debug("Initialized SimplePropertyGraphStore")
|
|
117
|
+
except ImportError as e:
|
|
118
|
+
logger.warning(f"Failed to import SimplePropertyGraphStore: {e}")
|
|
119
|
+
# Create a minimal fallback store
|
|
120
|
+
self._graph_store = _MinimalGraphStore()
|
|
121
|
+
logger.debug("Using minimal fallback graph store")
|
|
122
|
+
|
|
123
|
+
def _initialize_kuzu_store(self) -> None:
|
|
124
|
+
"""Initialize Kuzu graph store with fallback to simple."""
|
|
125
|
+
try:
|
|
126
|
+
import kuzu # noqa: F401 - just check import
|
|
127
|
+
from llama_index.graph_stores.kuzu import KuzuPropertyGraphStore
|
|
128
|
+
|
|
129
|
+
kuzu_db_path = self.persist_dir / "kuzu_db"
|
|
130
|
+
kuzu_db_path.mkdir(parents=True, exist_ok=True)
|
|
131
|
+
|
|
132
|
+
self._graph_store = KuzuPropertyGraphStore(
|
|
133
|
+
database_path=str(kuzu_db_path),
|
|
134
|
+
)
|
|
135
|
+
logger.debug(f"Initialized KuzuPropertyGraphStore at {kuzu_db_path}")
|
|
136
|
+
except ImportError as e:
|
|
137
|
+
logger.warning(
|
|
138
|
+
f"Kuzu not available ({e}), falling back to SimplePropertyGraphStore. "
|
|
139
|
+
"Install with: pip install llama-index-graph-stores-kuzu"
|
|
140
|
+
)
|
|
141
|
+
self.store_type = "simple"
|
|
142
|
+
self._initialize_simple_store()
|
|
143
|
+
|
|
144
|
+
def persist(self) -> None:
|
|
145
|
+
"""Persist graph to disk.
|
|
146
|
+
|
|
147
|
+
For SimplePropertyGraphStore, serializes to JSON.
|
|
148
|
+
For Kuzu, data is automatically persisted.
|
|
149
|
+
|
|
150
|
+
This is a no-op when ENABLE_GRAPH_INDEX is False or not initialized.
|
|
151
|
+
"""
|
|
152
|
+
if not settings.ENABLE_GRAPH_INDEX:
|
|
153
|
+
return
|
|
154
|
+
|
|
155
|
+
if not self._initialized or self._graph_store is None:
|
|
156
|
+
logger.debug("Graph store not initialized, skipping persist")
|
|
157
|
+
return
|
|
158
|
+
|
|
159
|
+
if self.store_type == "simple":
|
|
160
|
+
self._persist_simple_store()
|
|
161
|
+
|
|
162
|
+
self._last_updated = datetime.now(timezone.utc)
|
|
163
|
+
logger.debug(
|
|
164
|
+
f"Graph persisted: entities={self._entity_count}, "
|
|
165
|
+
f"relationships={self._relationship_count}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _persist_simple_store(self) -> None:
|
|
169
|
+
"""Persist SimplePropertyGraphStore to JSON."""
|
|
170
|
+
persist_path = self.persist_dir / "graph_store.json"
|
|
171
|
+
llamaindex_persist_path = self.persist_dir / "graph_store_llamaindex.json"
|
|
172
|
+
|
|
173
|
+
try:
|
|
174
|
+
# Try LlamaIndex native persistence first
|
|
175
|
+
graph_store = self._graph_store
|
|
176
|
+
if graph_store is not None and hasattr(graph_store, "persist"):
|
|
177
|
+
graph_store.persist(str(llamaindex_persist_path))
|
|
178
|
+
logger.debug(
|
|
179
|
+
f"Graph persisted via LlamaIndex to {llamaindex_persist_path}"
|
|
180
|
+
)
|
|
181
|
+
elif graph_store is not None and hasattr(graph_store, "_data"):
|
|
182
|
+
# Minimal store fallback - use our own format
|
|
183
|
+
data = getattr(graph_store, "_data", {})
|
|
184
|
+
with open(persist_path, "w") as f:
|
|
185
|
+
json.dump(data, f, indent=2, default=str)
|
|
186
|
+
logger.debug(f"Graph persisted to {persist_path}")
|
|
187
|
+
|
|
188
|
+
# Always persist metadata separately
|
|
189
|
+
metadata = {
|
|
190
|
+
"entity_count": self._entity_count,
|
|
191
|
+
"relationship_count": self._relationship_count,
|
|
192
|
+
"last_updated": (
|
|
193
|
+
self._last_updated.isoformat() if self._last_updated else None
|
|
194
|
+
),
|
|
195
|
+
"store_type": self.store_type,
|
|
196
|
+
}
|
|
197
|
+
metadata_path = self.persist_dir / "graph_metadata.json"
|
|
198
|
+
with open(metadata_path, "w") as f:
|
|
199
|
+
json.dump(metadata, f, indent=2)
|
|
200
|
+
|
|
201
|
+
except (OSError, TypeError) as e:
|
|
202
|
+
logger.error(f"Failed to persist graph store: {e}")
|
|
203
|
+
|
|
204
|
+
def load(self) -> bool:
|
|
205
|
+
"""Load graph from disk.
|
|
206
|
+
|
|
207
|
+
For SimplePropertyGraphStore, loads from JSON.
|
|
208
|
+
For Kuzu, data is automatically loaded.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
True if loaded successfully, False otherwise.
|
|
212
|
+
"""
|
|
213
|
+
if not settings.ENABLE_GRAPH_INDEX:
|
|
214
|
+
return False
|
|
215
|
+
|
|
216
|
+
if self._graph_store is None:
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
if self.store_type == "simple":
|
|
220
|
+
return self._load_simple_store()
|
|
221
|
+
|
|
222
|
+
# Kuzu loads automatically, just update counts
|
|
223
|
+
self._update_counts()
|
|
224
|
+
return True
|
|
225
|
+
|
|
226
|
+
def _load_simple_store(self) -> bool:
|
|
227
|
+
"""Load SimplePropertyGraphStore from persisted data."""
|
|
228
|
+
llamaindex_persist_path = self.persist_dir / "graph_store_llamaindex.json"
|
|
229
|
+
persist_path = self.persist_dir / "graph_store.json"
|
|
230
|
+
metadata_path = self.persist_dir / "graph_metadata.json"
|
|
231
|
+
|
|
232
|
+
# Load metadata if available
|
|
233
|
+
if metadata_path.exists():
|
|
234
|
+
try:
|
|
235
|
+
with open(metadata_path) as f:
|
|
236
|
+
metadata = json.load(f)
|
|
237
|
+
self._entity_count = metadata.get("entity_count", 0)
|
|
238
|
+
self._relationship_count = metadata.get("relationship_count", 0)
|
|
239
|
+
last_updated_str = metadata.get("last_updated")
|
|
240
|
+
if last_updated_str:
|
|
241
|
+
self._last_updated = datetime.fromisoformat(last_updated_str)
|
|
242
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
243
|
+
logger.warning(f"Failed to load graph metadata: {e}")
|
|
244
|
+
|
|
245
|
+
# Try LlamaIndex native load first
|
|
246
|
+
if llamaindex_persist_path.exists():
|
|
247
|
+
try:
|
|
248
|
+
from llama_index.core.graph_stores import SimplePropertyGraphStore
|
|
249
|
+
|
|
250
|
+
self._graph_store = SimplePropertyGraphStore.from_persist_path(
|
|
251
|
+
str(llamaindex_persist_path)
|
|
252
|
+
)
|
|
253
|
+
logger.debug(
|
|
254
|
+
f"Graph loaded from {llamaindex_persist_path}: "
|
|
255
|
+
f"entities={self._entity_count}, "
|
|
256
|
+
f"relationships={self._relationship_count}"
|
|
257
|
+
)
|
|
258
|
+
return True
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.warning(f"Failed to load via LlamaIndex: {e}")
|
|
261
|
+
|
|
262
|
+
# Fall back to minimal store format
|
|
263
|
+
if persist_path.exists():
|
|
264
|
+
try:
|
|
265
|
+
with open(persist_path) as f:
|
|
266
|
+
data = json.load(f)
|
|
267
|
+
|
|
268
|
+
# Restore minimal store data
|
|
269
|
+
graph_store = self._graph_store
|
|
270
|
+
if graph_store is not None and hasattr(graph_store, "_data"):
|
|
271
|
+
graph_store._data = data
|
|
272
|
+
if "entities" in data:
|
|
273
|
+
graph_store._entities = data.get("entities", {})
|
|
274
|
+
if "relationships" in data:
|
|
275
|
+
graph_store._relationships = data.get("relationships", [])
|
|
276
|
+
|
|
277
|
+
logger.debug(
|
|
278
|
+
f"Graph loaded from {persist_path}: "
|
|
279
|
+
f"entities={self._entity_count}, "
|
|
280
|
+
f"relationships={self._relationship_count}"
|
|
281
|
+
)
|
|
282
|
+
return True
|
|
283
|
+
except (OSError, json.JSONDecodeError) as e:
|
|
284
|
+
logger.error(f"Failed to load graph store: {e}")
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
logger.debug("No graph data found to load")
|
|
288
|
+
return False
|
|
289
|
+
|
|
290
|
+
def _update_counts(self) -> None:
|
|
291
|
+
"""Update entity and relationship counts from graph store."""
|
|
292
|
+
if self._graph_store is None:
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
try:
|
|
296
|
+
# Try to get counts from graph store
|
|
297
|
+
if hasattr(self._graph_store, "get_triplets"):
|
|
298
|
+
triplets = self._graph_store.get_triplets()
|
|
299
|
+
entities: set[str] = set()
|
|
300
|
+
for triplet in triplets:
|
|
301
|
+
if hasattr(triplet, "subject"):
|
|
302
|
+
entities.add(triplet.subject)
|
|
303
|
+
if hasattr(triplet, "object"):
|
|
304
|
+
entities.add(triplet.object)
|
|
305
|
+
self._entity_count = len(entities)
|
|
306
|
+
self._relationship_count = len(triplets)
|
|
307
|
+
elif hasattr(self._graph_store, "_entities"):
|
|
308
|
+
self._entity_count = len(self._graph_store._entities)
|
|
309
|
+
self._relationship_count = len(
|
|
310
|
+
getattr(self._graph_store, "_relationships", [])
|
|
311
|
+
)
|
|
312
|
+
except Exception as e:
|
|
313
|
+
logger.warning(f"Failed to update graph counts: {e}")
|
|
314
|
+
|
|
315
|
+
def add_triplet(
|
|
316
|
+
self,
|
|
317
|
+
subject: str,
|
|
318
|
+
predicate: str,
|
|
319
|
+
obj: str,
|
|
320
|
+
subject_type: Optional[str] = None,
|
|
321
|
+
object_type: Optional[str] = None,
|
|
322
|
+
source_chunk_id: Optional[str] = None,
|
|
323
|
+
) -> bool:
|
|
324
|
+
"""Add a triplet to the graph.
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
subject: Subject entity.
|
|
328
|
+
predicate: Relationship type.
|
|
329
|
+
obj: Object entity.
|
|
330
|
+
subject_type: Optional type for subject.
|
|
331
|
+
object_type: Optional type for object.
|
|
332
|
+
source_chunk_id: Optional source chunk ID.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
True if added successfully, False otherwise.
|
|
336
|
+
"""
|
|
337
|
+
if not settings.ENABLE_GRAPH_INDEX:
|
|
338
|
+
return False
|
|
339
|
+
|
|
340
|
+
if not self._initialized or self._graph_store is None:
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
if hasattr(self._graph_store, "upsert_triplet"):
|
|
345
|
+
self._graph_store.upsert_triplet(
|
|
346
|
+
subject=subject,
|
|
347
|
+
predicate=predicate,
|
|
348
|
+
object_=obj,
|
|
349
|
+
)
|
|
350
|
+
elif hasattr(self._graph_store, "add_triplet"):
|
|
351
|
+
self._graph_store.add_triplet(subject, predicate, obj)
|
|
352
|
+
elif hasattr(self._graph_store, "_add_triplet"):
|
|
353
|
+
# Minimal store fallback
|
|
354
|
+
self._graph_store._add_triplet(
|
|
355
|
+
subject, predicate, obj, subject_type, object_type, source_chunk_id
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# Update counts
|
|
359
|
+
self._entity_count = max(
|
|
360
|
+
self._entity_count,
|
|
361
|
+
self._entity_count + 1, # Approximate
|
|
362
|
+
)
|
|
363
|
+
self._relationship_count += 1
|
|
364
|
+
self._last_updated = datetime.now(timezone.utc)
|
|
365
|
+
|
|
366
|
+
return True
|
|
367
|
+
except Exception as e:
|
|
368
|
+
logger.error(f"Failed to add triplet: {e}")
|
|
369
|
+
return False
|
|
370
|
+
|
|
371
|
+
def clear(self) -> None:
|
|
372
|
+
"""Clear all graph data.
|
|
373
|
+
|
|
374
|
+
This is a no-op when ENABLE_GRAPH_INDEX is False.
|
|
375
|
+
"""
|
|
376
|
+
if not settings.ENABLE_GRAPH_INDEX:
|
|
377
|
+
return
|
|
378
|
+
|
|
379
|
+
if self._graph_store is not None:
|
|
380
|
+
if hasattr(self._graph_store, "clear"):
|
|
381
|
+
self._graph_store.clear()
|
|
382
|
+
elif hasattr(self._graph_store, "_data"):
|
|
383
|
+
self._graph_store._data = {}
|
|
384
|
+
|
|
385
|
+
self._entity_count = 0
|
|
386
|
+
self._relationship_count = 0
|
|
387
|
+
self._last_updated = None
|
|
388
|
+
|
|
389
|
+
# Remove persisted data
|
|
390
|
+
persist_path = self.persist_dir / "graph_store.json"
|
|
391
|
+
if persist_path.exists():
|
|
392
|
+
persist_path.unlink()
|
|
393
|
+
|
|
394
|
+
logger.info("Graph store cleared")
|
|
395
|
+
|
|
396
|
+
@property
|
|
397
|
+
def is_initialized(self) -> bool:
|
|
398
|
+
"""Check if the graph store is initialized."""
|
|
399
|
+
return self._initialized
|
|
400
|
+
|
|
401
|
+
@property
|
|
402
|
+
def entity_count(self) -> int:
|
|
403
|
+
"""Return number of entities in graph."""
|
|
404
|
+
return self._entity_count
|
|
405
|
+
|
|
406
|
+
@property
|
|
407
|
+
def relationship_count(self) -> int:
|
|
408
|
+
"""Return number of relationships in graph."""
|
|
409
|
+
return self._relationship_count
|
|
410
|
+
|
|
411
|
+
@property
|
|
412
|
+
def last_updated(self) -> Optional[datetime]:
|
|
413
|
+
"""Return timestamp of last update."""
|
|
414
|
+
return self._last_updated
|
|
415
|
+
|
|
416
|
+
@property
|
|
417
|
+
def graph_store(self) -> Optional[Any]:
|
|
418
|
+
"""Return the underlying graph store instance."""
|
|
419
|
+
return self._graph_store
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class _MinimalGraphStore:
|
|
423
|
+
"""Minimal fallback graph store when LlamaIndex is not available.
|
|
424
|
+
|
|
425
|
+
Provides basic in-memory graph storage with JSON serialization.
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
def __init__(self) -> None:
|
|
429
|
+
"""Initialize minimal graph store."""
|
|
430
|
+
self._data: dict[str, Any] = {
|
|
431
|
+
"entities": {},
|
|
432
|
+
"relationships": [],
|
|
433
|
+
}
|
|
434
|
+
self._entities: dict[str, dict[str, Any]] = {}
|
|
435
|
+
self._relationships: list[dict[str, Any]] = []
|
|
436
|
+
|
|
437
|
+
def _add_triplet(
|
|
438
|
+
self,
|
|
439
|
+
subject: str,
|
|
440
|
+
predicate: str,
|
|
441
|
+
obj: str,
|
|
442
|
+
subject_type: Optional[str] = None,
|
|
443
|
+
object_type: Optional[str] = None,
|
|
444
|
+
source_chunk_id: Optional[str] = None,
|
|
445
|
+
) -> None:
|
|
446
|
+
"""Add a triplet to the minimal store."""
|
|
447
|
+
# Add entities
|
|
448
|
+
if subject not in self._entities:
|
|
449
|
+
self._entities[subject] = {"name": subject, "type": subject_type}
|
|
450
|
+
if obj not in self._entities:
|
|
451
|
+
self._entities[obj] = {"name": obj, "type": object_type}
|
|
452
|
+
|
|
453
|
+
# Add relationship
|
|
454
|
+
self._relationships.append(
|
|
455
|
+
{
|
|
456
|
+
"subject": subject,
|
|
457
|
+
"predicate": predicate,
|
|
458
|
+
"object": obj,
|
|
459
|
+
"source_chunk_id": source_chunk_id,
|
|
460
|
+
}
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
# Update data dict
|
|
464
|
+
self._data["entities"] = self._entities
|
|
465
|
+
self._data["relationships"] = self._relationships
|
|
466
|
+
|
|
467
|
+
def clear(self) -> None:
|
|
468
|
+
"""Clear all data."""
|
|
469
|
+
self._data = {"entities": {}, "relationships": []}
|
|
470
|
+
self._entities = {}
|
|
471
|
+
self._relationships = []
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
# Module-level singleton access
|
|
475
|
+
_graph_store_manager: Optional[GraphStoreManager] = None
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def get_graph_store_manager(
|
|
479
|
+
persist_dir: Optional[Path] = None,
|
|
480
|
+
store_type: Optional[str] = None,
|
|
481
|
+
) -> GraphStoreManager:
|
|
482
|
+
"""Get the global graph store manager instance.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
persist_dir: Directory for graph persistence.
|
|
486
|
+
store_type: Backend type - "simple" or "kuzu".
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
The singleton GraphStoreManager instance.
|
|
490
|
+
"""
|
|
491
|
+
global _graph_store_manager
|
|
492
|
+
if _graph_store_manager is None:
|
|
493
|
+
_graph_store_manager = GraphStoreManager.get_instance(persist_dir, store_type)
|
|
494
|
+
return _graph_store_manager
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
def initialize_graph_store(
|
|
498
|
+
persist_dir: Optional[Path] = None,
|
|
499
|
+
store_type: Optional[str] = None,
|
|
500
|
+
) -> GraphStoreManager:
|
|
501
|
+
"""Initialize and return the global graph store manager.
|
|
502
|
+
|
|
503
|
+
Args:
|
|
504
|
+
persist_dir: Directory for graph persistence.
|
|
505
|
+
store_type: Backend type - "simple" or "kuzu".
|
|
506
|
+
|
|
507
|
+
Returns:
|
|
508
|
+
The initialized GraphStoreManager instance.
|
|
509
|
+
"""
|
|
510
|
+
manager = get_graph_store_manager(persist_dir, store_type)
|
|
511
|
+
manager.initialize()
|
|
512
|
+
return manager
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def reset_graph_store_manager() -> None:
|
|
516
|
+
"""Reset the global graph store manager. Used for testing."""
|
|
517
|
+
global _graph_store_manager
|
|
518
|
+
_graph_store_manager = None
|
|
519
|
+
GraphStoreManager.reset_instance()
|
|
@@ -9,7 +9,7 @@ from typing import Any, Optional
|
|
|
9
9
|
import chromadb
|
|
10
10
|
from chromadb.config import Settings as ChromaSettings
|
|
11
11
|
|
|
12
|
-
from
|
|
12
|
+
from agent_brain_server.config import settings
|
|
13
13
|
|
|
14
14
|
logger = logging.getLogger(__name__)
|
|
15
15
|
|
|
@@ -260,6 +260,41 @@ class VectorStoreManager:
|
|
|
260
260
|
return 0
|
|
261
261
|
return self._collection.count()
|
|
262
262
|
|
|
263
|
+
async def get_by_id(self, chunk_id: str) -> Optional[dict[str, Any]]:
|
|
264
|
+
"""
|
|
265
|
+
Get a document by its chunk ID.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
chunk_id: The unique identifier of the chunk.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Dictionary with 'text' and 'metadata' keys, or None if not found.
|
|
272
|
+
"""
|
|
273
|
+
if not self.is_initialized:
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
async with self._lock:
|
|
277
|
+
assert self._collection is not None
|
|
278
|
+
try:
|
|
279
|
+
results = self._collection.get(
|
|
280
|
+
ids=[chunk_id],
|
|
281
|
+
include=["documents", "metadatas"], # type: ignore[list-item]
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if results["ids"] and results["ids"]:
|
|
285
|
+
documents = results.get("documents", [[]])
|
|
286
|
+
metadatas = results.get("metadatas", [[]])
|
|
287
|
+
text = documents[0] if documents else ""
|
|
288
|
+
metadata = metadatas[0] if metadatas else {}
|
|
289
|
+
return {
|
|
290
|
+
"text": text,
|
|
291
|
+
"metadata": metadata if metadata else {},
|
|
292
|
+
}
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.warning(f"Failed to get document by ID {chunk_id}: {e}")
|
|
295
|
+
|
|
296
|
+
return None
|
|
297
|
+
|
|
263
298
|
async def delete_collection(self) -> None:
|
|
264
299
|
"""
|
|
265
300
|
Delete the entire collection.
|
|
@@ -12,6 +12,7 @@ SUBDIRECTORIES = [
|
|
|
12
12
|
"data/chroma_db",
|
|
13
13
|
"data/bm25_index",
|
|
14
14
|
"data/llamaindex",
|
|
15
|
+
"data/graph_index",
|
|
15
16
|
"logs",
|
|
16
17
|
]
|
|
17
18
|
|
|
@@ -48,6 +49,7 @@ def resolve_storage_paths(state_dir: Path) -> dict[str, Path]:
|
|
|
48
49
|
"chroma_db": state_dir / "data" / "chroma_db",
|
|
49
50
|
"bm25_index": state_dir / "data" / "bm25_index",
|
|
50
51
|
"llamaindex": state_dir / "data" / "llamaindex",
|
|
52
|
+
"graph_index": state_dir / "data" / "graph_index",
|
|
51
53
|
"logs": state_dir / "logs",
|
|
52
54
|
}
|
|
53
55
|
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
doc_serve_server/__init__.py,sha256=CWV76BF1YvkQVzexuEm1sl4Zn4z27wRd_9dWbAZNWa0,95
|
|
2
|
-
doc_serve_server/api/__init__.py,sha256=nvTvO_ahHAAsRDlV3dL_JlNruSdan4kav_P5sT_1PFk,93
|
|
3
|
-
doc_serve_server/api/main.py,sha256=a3g_N9VEY-VAG-HvNXMW7u-xWvwjP3P9BtPlxNgC900,10628
|
|
4
|
-
doc_serve_server/api/routers/__init__.py,sha256=Z3PUKDwxeI8T88xMQeTcQ8nq1bQnu0nYjRnqNIsDUyY,254
|
|
5
|
-
doc_serve_server/api/routers/health.py,sha256=cbiBooeKMKoTpje8xd2Yywpr7fSrCJZP9ubl8EJOjEo,3442
|
|
6
|
-
doc_serve_server/api/routers/index.py,sha256=M4zOlXX_pmMYDcmfK6fh9PX5qqw-7xHuwG-Mp-0CZw0,6754
|
|
7
|
-
doc_serve_server/api/routers/query.py,sha256=zHyzQBPOz2Xz8MAF1rU2reWopYJsXCBWFW2kQe_f4hg,2787
|
|
8
|
-
doc_serve_server/config/__init__.py,sha256=zzDErZGUBwUm5Fk43OHJN7eWpeIw_1kWdnhsN6QQqSc,84
|
|
9
|
-
doc_serve_server/config/settings.py,sha256=ePpGSJ2VVov_LOtQtaOedi_HM-Yf8YcpJQzSyLHfyZQ,2629
|
|
10
|
-
doc_serve_server/indexing/__init__.py,sha256=7P1zcYAQFO4ME6uLtc75LL-GT7dEzXk2yY0JAcP0kRc,587
|
|
11
|
-
doc_serve_server/indexing/bm25_index.py,sha256=0JZ4_T1d9H7FLUDqnnXHcoO2_qrztzXrtNj7HNMg2WQ,5266
|
|
12
|
-
doc_serve_server/indexing/chunking.py,sha256=uGeCRRT19eVcFPpK9KkxI0_1OXjFRQZZI6B5CwCVR-4,29846
|
|
13
|
-
doc_serve_server/indexing/document_loader.py,sha256=D6U3mDl_2jFfT_JxD-5yudtqlJW8Fh0zj5lhz0V0tPs,16090
|
|
14
|
-
doc_serve_server/indexing/embedding.py,sha256=MtL9SLpBQxh99hJ_dufah0xyIGwznHGwnfnqcdyhSCc,9053
|
|
15
|
-
doc_serve_server/locking.py,sha256=yRpswpBem4964gTh1VH4VUPT-vBIRnNEVgWdorJA4Hg,3365
|
|
16
|
-
doc_serve_server/models/__init__.py,sha256=JLVsiVet-JDVC9s6ZefBcTXWZT5TVXrbI0E2B3ejpWM,478
|
|
17
|
-
doc_serve_server/models/health.py,sha256=-NWwyPI0cUK1Sqhf5x0WO-ilyJbwJxvROzeaFn12WDc,3661
|
|
18
|
-
doc_serve_server/models/index.py,sha256=pjDv7phLS6dpiHLlEtcAuXQN_aHIfA_4lMkAZ-NkXZQ,5400
|
|
19
|
-
doc_serve_server/models/query.py,sha256=phl-bJFxhu83k1iP4Cx3GH5qJUQ12Y1Ah_bAwXOJVZM,6054
|
|
20
|
-
doc_serve_server/project_root.py,sha256=HIY5NMRDYWYIT7K6B_UMOGo1zXn9zwAdGI6ApViKI_8,2194
|
|
21
|
-
doc_serve_server/runtime.py,sha256=bcchfDBt_tn2_r-lFhxqKp0RncKrz152D-oM5RcKGrU,3076
|
|
22
|
-
doc_serve_server/services/__init__.py,sha256=E4VPN9Rqa2mxGQQEQn-5IYj63LSPTrA8aIx8ENO5xcc,296
|
|
23
|
-
doc_serve_server/services/indexing_service.py,sha256=rOs0xlMy_1RQuoPt62w428-CshPWj91CFSpkL251TEI,18555
|
|
24
|
-
doc_serve_server/services/query_service.py,sha256=elyKWl02v8BzjdBr8S7KOiUUEdS2Rp3V85ifUU5Bi-g,14852
|
|
25
|
-
doc_serve_server/storage/__init__.py,sha256=T4N0DhDT3av7zn3Ra6uLhMsjL9N5wakqC4cl-QXWvmM,222
|
|
26
|
-
doc_serve_server/storage/vector_store.py,sha256=9pdEPvlf2JAUnwxDpb9CwY_A7sIqv2iu5ddy5SQm-ys,10881
|
|
27
|
-
doc_serve_server/storage_paths.py,sha256=aHSsTwELwIk1VOLO0L6O-RsbRJ1U20jvJ-EQsFKG1a8,1761
|
|
28
|
-
agent_brain_rag-1.1.0.dist-info/METADATA,sha256=dOzH_3y00uehax1LiI5QBHToTwnN09tGyoRygBw_uVA,5230
|
|
29
|
-
agent_brain_rag-1.1.0.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
|
30
|
-
agent_brain_rag-1.1.0.dist-info/entry_points.txt,sha256=DOPhlYyScH9BTPXr9_FJzgRplzHe6bR9q2pTCCg2QOI,59
|
|
31
|
-
agent_brain_rag-1.1.0.dist-info/RECORD,,
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
"""Indexing pipeline components for document processing."""
|
|
2
|
-
|
|
3
|
-
from doc_serve_server.indexing.bm25_index import BM25IndexManager, get_bm25_manager
|
|
4
|
-
from doc_serve_server.indexing.chunking import CodeChunker, ContextAwareChunker
|
|
5
|
-
from doc_serve_server.indexing.document_loader import DocumentLoader
|
|
6
|
-
from doc_serve_server.indexing.embedding import (
|
|
7
|
-
EmbeddingGenerator,
|
|
8
|
-
get_embedding_generator,
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
__all__ = [
|
|
12
|
-
"DocumentLoader",
|
|
13
|
-
"ContextAwareChunker",
|
|
14
|
-
"CodeChunker",
|
|
15
|
-
"EmbeddingGenerator",
|
|
16
|
-
"get_embedding_generator",
|
|
17
|
-
"BM25IndexManager",
|
|
18
|
-
"get_bm25_manager",
|
|
19
|
-
]
|