ltcai 4.3.3 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/docs/CHANGELOG.md +37 -0
- package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
- package/lattice_brain/__init__.py +38 -23
- package/lattice_brain/_kg_common.py +11 -1
- package/lattice_brain/context.py +212 -2
- package/lattice_brain/conversations.py +234 -1
- package/lattice_brain/discovery.py +11 -1
- package/lattice_brain/documents.py +11 -1
- package/lattice_brain/graph/__init__.py +28 -0
- package/lattice_brain/graph/_kg_common.py +1123 -0
- package/lattice_brain/graph/curator.py +473 -0
- package/lattice_brain/graph/discovery.py +1455 -0
- package/lattice_brain/graph/documents.py +218 -0
- package/lattice_brain/graph/identity.py +175 -0
- package/lattice_brain/graph/ingest.py +644 -0
- package/lattice_brain/graph/network.py +205 -0
- package/lattice_brain/graph/projection.py +571 -0
- package/lattice_brain/graph/provenance.py +401 -0
- package/lattice_brain/graph/retrieval.py +1341 -0
- package/lattice_brain/graph/schema.py +640 -0
- package/lattice_brain/graph/store.py +237 -0
- package/lattice_brain/graph/write_master.py +225 -0
- package/lattice_brain/identity.py +11 -13
- package/lattice_brain/ingest.py +11 -1
- package/lattice_brain/ingestion.py +318 -0
- package/lattice_brain/memory.py +100 -1
- package/lattice_brain/network.py +11 -1
- package/lattice_brain/portability.py +431 -0
- package/lattice_brain/projection.py +11 -1
- package/lattice_brain/provenance.py +11 -1
- package/lattice_brain/retrieval.py +11 -1
- package/lattice_brain/runtime/__init__.py +32 -0
- package/lattice_brain/runtime/agent_runtime.py +569 -0
- package/lattice_brain/runtime/hooks.py +754 -0
- package/lattice_brain/runtime/multi_agent.py +795 -0
- package/lattice_brain/schema.py +11 -1
- package/lattice_brain/store.py +10 -2
- package/lattice_brain/workflow.py +461 -0
- package/lattice_brain/write_master.py +11 -1
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/agents.py +2 -2
- package/latticeai/api/browser.py +1 -1
- package/latticeai/api/chat.py +1 -1
- package/latticeai/api/computer_use.py +1 -1
- package/latticeai/api/hooks.py +2 -2
- package/latticeai/api/mcp.py +1 -1
- package/latticeai/api/tools.py +1 -1
- package/latticeai/api/workflow_designer.py +2 -2
- package/latticeai/app_factory.py +4 -4
- package/latticeai/brain/__init__.py +24 -6
- package/latticeai/brain/_kg_common.py +11 -1117
- package/latticeai/brain/context.py +12 -208
- package/latticeai/brain/conversations.py +12 -231
- package/latticeai/brain/discovery.py +13 -1451
- package/latticeai/brain/documents.py +13 -214
- package/latticeai/brain/identity.py +11 -169
- package/latticeai/brain/ingest.py +13 -640
- package/latticeai/brain/memory.py +12 -97
- package/latticeai/brain/network.py +12 -200
- package/latticeai/brain/projection.py +13 -567
- package/latticeai/brain/provenance.py +13 -397
- package/latticeai/brain/retrieval.py +13 -1337
- package/latticeai/brain/schema.py +12 -635
- package/latticeai/brain/store.py +13 -233
- package/latticeai/brain/write_master.py +13 -221
- package/latticeai/core/agent.py +1 -1
- package/latticeai/core/agent_registry.py +2 -2
- package/latticeai/core/builtin_hooks.py +2 -2
- package/latticeai/core/graph_curator.py +6 -468
- package/latticeai/core/hooks.py +6 -749
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/multi_agent.py +6 -790
- package/latticeai/core/workflow_engine.py +6 -456
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/services/agent_runtime.py +6 -564
- package/latticeai/services/ingestion.py +6 -313
- package/latticeai/services/kg_portability.py +6 -426
- package/latticeai/services/platform_runtime.py +3 -3
- package/latticeai/services/run_executor.py +1 -1
- package/latticeai/services/upload_service.py +1 -1
- package/p_reinforce.py +1 -1
- package/package.json +1 -1
- package/scripts/bump_version.py +1 -1
- package/scripts/wheel_smoke.py +7 -0
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/tauri.conf.json +1 -1
- package/static/app/asset-manifest.json +1 -1
|
@@ -1,1341 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
"""Deprecated shim: physically moved to lattice_brain.graph.retrieval.
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Kept only for the compatibility window. The module aliases itself to the
|
|
4
|
+
physical module so identity, singletons, and monkeypatching are preserved.
|
|
5
|
+
"""
|
|
4
6
|
|
|
5
|
-
|
|
7
|
+
import sys
|
|
8
|
+
import warnings
|
|
6
9
|
|
|
10
|
+
import lattice_brain.graph.retrieval as _impl
|
|
7
11
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
"Chat", # 대화 세션
|
|
15
|
-
"Document", # 파일 (PDF·PPT·Word·Excel·이미지)
|
|
16
|
-
"CodeFile", # 코드 파일
|
|
17
|
-
"Spreadsheet", # 엑셀/CSV
|
|
18
|
-
"SlideDeck", # 프레젠테이션
|
|
19
|
-
"Image", # 이미지
|
|
20
|
-
"ImageText", # OCR 텍스트
|
|
21
|
-
"Concept", # 개념 / 아이디어 / 기술 용어
|
|
22
|
-
"Person", # 사람
|
|
23
|
-
"Error", # 오류 / 버그
|
|
24
|
-
"Code", # 코드 / 함수
|
|
25
|
-
"Feature", # 소프트웨어 기능
|
|
26
|
-
"Task", # 할 일
|
|
27
|
-
"Decision", # 결정 사항
|
|
28
|
-
# v3.6.0 Knowledge Graph First — 1급 엔티티를 그래프에 노출
|
|
29
|
-
"Source", # 수집 출처 (파일/URL/브라우저 탭/git)
|
|
30
|
-
"Repository", # git 저장소
|
|
31
|
-
"Meeting", # 회의
|
|
32
|
-
"Organization", # 조직
|
|
33
|
-
"Workflow", # 워크플로우
|
|
34
|
-
"Agent", # 에이전트
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
def list_documents(self, limit: int = 200) -> Dict[str, Any]:
|
|
38
|
-
"""List ingested ``Document`` nodes with their ingest + index state.
|
|
39
|
-
|
|
40
|
-
Powers the Files view: every accepted upload and every indexed local
|
|
41
|
-
document becomes a ``Document`` node. A document is reported ``indexed``
|
|
42
|
-
once its retrieval chunks exist (searchable in Chat / Hybrid Search).
|
|
43
|
-
"""
|
|
44
|
-
limit = max(1, min(int(limit or 200), 1000))
|
|
45
|
-
nt, _ = self._read_tables()
|
|
46
|
-
documents: List[Dict[str, Any]] = []
|
|
47
|
-
with self._connect() as conn:
|
|
48
|
-
rows = conn.execute(
|
|
49
|
-
f"SELECT id, title, summary, metadata_json, created_at, updated_at "
|
|
50
|
-
f"FROM {nt} WHERE type='Document' ORDER BY updated_at DESC, id ASC LIMIT ?",
|
|
51
|
-
(limit,),
|
|
52
|
-
).fetchall()
|
|
53
|
-
for row in rows:
|
|
54
|
-
meta = _safe_loads(row["metadata_json"]) or {}
|
|
55
|
-
extracted = meta.get("extracted") or {}
|
|
56
|
-
node_id = row["id"]
|
|
57
|
-
chunk_count = conn.execute(
|
|
58
|
-
f"SELECT COUNT(*) AS c FROM {nt} WHERE type='Chunk' AND metadata_json LIKE ?",
|
|
59
|
-
(f"%{node_id}%",),
|
|
60
|
-
).fetchone()["c"]
|
|
61
|
-
documents.append(
|
|
62
|
-
{
|
|
63
|
-
"id": node_id,
|
|
64
|
-
"filename": meta.get("filename") or row["title"],
|
|
65
|
-
"ext": meta.get("ext"),
|
|
66
|
-
"mime_type": meta.get("mime_type"),
|
|
67
|
-
"bytes": meta.get("bytes"),
|
|
68
|
-
"sha256": meta.get("sha256"),
|
|
69
|
-
"uploader": meta.get("uploader"),
|
|
70
|
-
"chars": extracted.get("chars"),
|
|
71
|
-
"chunks": int(chunk_count or 0),
|
|
72
|
-
"indexed": int(chunk_count or 0) > 0,
|
|
73
|
-
"ingest_state": "indexed"
|
|
74
|
-
if int(chunk_count or 0) > 0
|
|
75
|
-
else "ingested",
|
|
76
|
-
"created_at": row["created_at"],
|
|
77
|
-
"updated_at": row["updated_at"],
|
|
78
|
-
}
|
|
79
|
-
)
|
|
80
|
-
return {
|
|
81
|
-
"documents": documents,
|
|
82
|
-
"total": len(documents),
|
|
83
|
-
"generated_at": datetime.now().isoformat(timespec="seconds"),
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
def workspaces_of(self, node_ids) -> Dict[str, Optional[str]]:
|
|
87
|
-
"""Map node ids to their workspace scope (None = legacy-global)."""
|
|
88
|
-
ids = [str(i) for i in node_ids if i]
|
|
89
|
-
if not ids:
|
|
90
|
-
return {}
|
|
91
|
-
placeholders = ",".join("?" for _ in ids)
|
|
92
|
-
with self._connect() as conn:
|
|
93
|
-
try:
|
|
94
|
-
return {
|
|
95
|
-
row["id"]: row["workspace_id"]
|
|
96
|
-
for row in conn.execute(
|
|
97
|
-
f"SELECT id, workspace_id FROM nodes_v2 WHERE id IN ({placeholders})",
|
|
98
|
-
ids,
|
|
99
|
-
).fetchall()
|
|
100
|
-
}
|
|
101
|
-
except Exception:
|
|
102
|
-
return {}
|
|
103
|
-
|
|
104
|
-
def filter_scoped_nodes(self, items, allowed_workspaces, *, id_key: str = "id"):
|
|
105
|
-
"""Drop items scoped to a workspace the caller is not a member of.
|
|
106
|
-
|
|
107
|
-
``allowed_workspaces=None`` means no scoping (single-user / no-auth
|
|
108
|
-
mode). Legacy-global rows (no workspace) stay visible to everyone on
|
|
109
|
-
the machine — the documented pre-v4 compatibility behavior.
|
|
110
|
-
"""
|
|
111
|
-
if allowed_workspaces is None:
|
|
112
|
-
return list(items)
|
|
113
|
-
allowed = set(allowed_workspaces)
|
|
114
|
-
scopes = self.workspaces_of([item.get(id_key) for item in items])
|
|
115
|
-
return [
|
|
116
|
-
item
|
|
117
|
-
for item in items
|
|
118
|
-
if scopes.get(item.get(id_key)) is None
|
|
119
|
-
or scopes.get(item.get(id_key)) in allowed
|
|
120
|
-
]
|
|
121
|
-
|
|
122
|
-
def graph(self, limit: int = 300, *, allowed_workspaces=None) -> Dict[str, Any]:
|
|
123
|
-
limit = max(1, min(int(limit or 300), 2000))
|
|
124
|
-
visible = ",".join(f"'{t}'" for t in self._GRAPH_VISIBLE_TYPES)
|
|
125
|
-
nt, et = self._read_tables()
|
|
126
|
-
with self._connect() as conn:
|
|
127
|
-
nodes = [
|
|
128
|
-
{
|
|
129
|
-
"id": row["id"],
|
|
130
|
-
"type": row["type"],
|
|
131
|
-
"title": row["title"],
|
|
132
|
-
"summary": row["summary"],
|
|
133
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
134
|
-
"updated_at": row["updated_at"],
|
|
135
|
-
}
|
|
136
|
-
for row in conn.execute(
|
|
137
|
-
f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE type IN ({visible}) ORDER BY updated_at DESC, id ASC LIMIT ?",
|
|
138
|
-
(limit,),
|
|
139
|
-
)
|
|
140
|
-
]
|
|
141
|
-
node_ids = {node["id"] for node in nodes}
|
|
142
|
-
edges: List[Dict[str, Any]] = []
|
|
143
|
-
if node_ids:
|
|
144
|
-
edge_rows = conn.execute(
|
|
145
|
-
f"""
|
|
146
|
-
SELECT id, from_node, to_node, type, weight, metadata_json
|
|
147
|
-
FROM {et}
|
|
148
|
-
WHERE from_node IN (
|
|
149
|
-
SELECT id FROM {nt} WHERE type IN ({visible})
|
|
150
|
-
ORDER BY updated_at DESC, id ASC LIMIT ?
|
|
151
|
-
)
|
|
152
|
-
AND to_node IN (
|
|
153
|
-
SELECT id FROM {nt} WHERE type IN ({visible})
|
|
154
|
-
ORDER BY updated_at DESC, id ASC LIMIT ?
|
|
155
|
-
)
|
|
156
|
-
ORDER BY weight DESC, created_at DESC, id ASC
|
|
157
|
-
""",
|
|
158
|
-
(limit, limit),
|
|
159
|
-
).fetchall()
|
|
160
|
-
edges = [
|
|
161
|
-
{
|
|
162
|
-
"id": row["id"],
|
|
163
|
-
"from": row["from_node"],
|
|
164
|
-
"to": row["to_node"],
|
|
165
|
-
"type": row["type"],
|
|
166
|
-
"weight": row["weight"],
|
|
167
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
168
|
-
}
|
|
169
|
-
for row in edge_rows
|
|
170
|
-
]
|
|
171
|
-
|
|
172
|
-
if allowed_workspaces is not None:
|
|
173
|
-
nodes = self.filter_scoped_nodes(nodes, allowed_workspaces)
|
|
174
|
-
kept_ids = {node["id"] for node in nodes}
|
|
175
|
-
edges = [e for e in edges if e["from"] in kept_ids and e["to"] in kept_ids]
|
|
176
|
-
|
|
177
|
-
degree_map: Dict[str, int] = {}
|
|
178
|
-
now = datetime.now()
|
|
179
|
-
node_by_id = {node["id"]: node for node in nodes}
|
|
180
|
-
topic_metrics: Dict[str, Dict[str, Any]] = {}
|
|
181
|
-
|
|
182
|
-
for edge in edges:
|
|
183
|
-
degree_map[edge["from"]] = degree_map.get(edge["from"], 0) + 1
|
|
184
|
-
degree_map[edge["to"]] = degree_map.get(edge["to"], 0) + 1
|
|
185
|
-
from_node = node_by_id.get(edge["from"])
|
|
186
|
-
to_node = node_by_id.get(edge["to"])
|
|
187
|
-
if not from_node or not to_node:
|
|
188
|
-
continue
|
|
189
|
-
for topic_node, other_node in ((from_node, to_node), (to_node, from_node)):
|
|
190
|
-
if topic_node["type"] != "Topic":
|
|
191
|
-
continue
|
|
192
|
-
metrics = topic_metrics.setdefault(
|
|
193
|
-
topic_node["id"],
|
|
194
|
-
{
|
|
195
|
-
"mention_count": 0.0,
|
|
196
|
-
"conversation_ids": set(),
|
|
197
|
-
},
|
|
198
|
-
)
|
|
199
|
-
if edge["type"] in {"mentions", "discusses"}:
|
|
200
|
-
metrics["mention_count"] += max(
|
|
201
|
-
0.5, float(edge.get("weight") or 1.0)
|
|
202
|
-
)
|
|
203
|
-
other_meta = other_node.get("metadata") or {}
|
|
204
|
-
conversation_id = other_meta.get("conversation_id")
|
|
205
|
-
if other_node["type"] == "Conversation":
|
|
206
|
-
conversation_id = other_node["id"]
|
|
207
|
-
if conversation_id:
|
|
208
|
-
metrics["conversation_ids"].add(str(conversation_id))
|
|
209
|
-
|
|
210
|
-
type_max_raw: Dict[str, float] = {}
|
|
211
|
-
for node in nodes:
|
|
212
|
-
degree = degree_map.get(node["id"], 0)
|
|
213
|
-
recency = _recency_score(node.get("updated_at"), now=now)
|
|
214
|
-
metrics = {
|
|
215
|
-
"degree": degree,
|
|
216
|
-
"recency_score": round(recency, 4),
|
|
217
|
-
}
|
|
218
|
-
if node["type"] == "Topic":
|
|
219
|
-
topic_stat = topic_metrics.get(node["id"], {})
|
|
220
|
-
mention_count = float(topic_stat.get("mention_count") or 0.0)
|
|
221
|
-
conversation_count = len(topic_stat.get("conversation_ids") or ())
|
|
222
|
-
raw_importance = (
|
|
223
|
-
math.log1p(mention_count) * 2.8
|
|
224
|
-
+ math.log1p(conversation_count) * 2.2
|
|
225
|
-
+ recency * 1.4
|
|
226
|
-
+ math.sqrt(max(0, degree)) * 0.45
|
|
227
|
-
)
|
|
228
|
-
metrics.update(
|
|
229
|
-
{
|
|
230
|
-
"mention_count": round(mention_count, 2),
|
|
231
|
-
"conversation_count": conversation_count,
|
|
232
|
-
}
|
|
233
|
-
)
|
|
234
|
-
else:
|
|
235
|
-
raw_importance = math.log1p(max(0, degree)) * 1.4 + recency * 0.9
|
|
236
|
-
|
|
237
|
-
metrics["importance_raw"] = round(raw_importance, 4)
|
|
238
|
-
node["importance"] = round(raw_importance, 4)
|
|
239
|
-
node["_raw_importance"] = raw_importance
|
|
240
|
-
node["metadata"] = {
|
|
241
|
-
**(node.get("metadata") or {}),
|
|
242
|
-
"graph_metrics": metrics,
|
|
243
|
-
}
|
|
244
|
-
type_max_raw[node["type"]] = max(
|
|
245
|
-
type_max_raw.get(node["type"], 0.0), raw_importance
|
|
246
|
-
)
|
|
247
|
-
|
|
248
|
-
for node in nodes:
|
|
249
|
-
max_raw = max(type_max_raw.get(node["type"], 0.0), 0.0001)
|
|
250
|
-
importance_norm = min(1.0, (node.get("_raw_importance") or 0.0) / max_raw)
|
|
251
|
-
node["importance_norm"] = round(importance_norm, 4)
|
|
252
|
-
node["metadata"]["graph_metrics"]["importance_norm"] = node[
|
|
253
|
-
"importance_norm"
|
|
254
|
-
]
|
|
255
|
-
node.pop("_raw_importance", None)
|
|
256
|
-
return {"nodes": nodes, "edges": edges}
|
|
257
|
-
|
|
258
|
-
def search(self, query: str, limit: int = 30) -> Dict[str, Any]:
|
|
259
|
-
query = str(query or "").strip()
|
|
260
|
-
q = f"%{query}%"
|
|
261
|
-
limit = max(1, min(int(limit or 30), 100))
|
|
262
|
-
nt, et = self._read_tables()
|
|
263
|
-
with self._connect() as conn:
|
|
264
|
-
rows = []
|
|
265
|
-
if query:
|
|
266
|
-
fts_ids = self._fts_match_ids(conn, query, limit)
|
|
267
|
-
if fts_ids:
|
|
268
|
-
placeholders = ",".join("?" for _ in fts_ids)
|
|
269
|
-
by_id = {
|
|
270
|
-
row["id"]: row
|
|
271
|
-
for row in conn.execute(
|
|
272
|
-
f"""
|
|
273
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
274
|
-
FROM {nt} WHERE id IN ({placeholders})
|
|
275
|
-
""",
|
|
276
|
-
fts_ids,
|
|
277
|
-
).fetchall()
|
|
278
|
-
}
|
|
279
|
-
# Preserve FTS bm25 rank order.
|
|
280
|
-
rows = [by_id[i] for i in fts_ids if i in by_id]
|
|
281
|
-
else:
|
|
282
|
-
rows = conn.execute(
|
|
283
|
-
f"""
|
|
284
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
285
|
-
FROM {nt}
|
|
286
|
-
WHERE title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?
|
|
287
|
-
ORDER BY updated_at DESC, id ASC
|
|
288
|
-
LIMIT ?
|
|
289
|
-
""",
|
|
290
|
-
(q, q, q, limit),
|
|
291
|
-
).fetchall()
|
|
292
|
-
|
|
293
|
-
if len(rows) < limit:
|
|
294
|
-
terms = _topic_candidates(query, limit=8)
|
|
295
|
-
if terms:
|
|
296
|
-
clauses = []
|
|
297
|
-
params: List[str] = []
|
|
298
|
-
for term in terms:
|
|
299
|
-
clauses.append(
|
|
300
|
-
"(title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)"
|
|
301
|
-
)
|
|
302
|
-
params.extend([f"%{term}%", f"%{term}%", f"%{term}%"])
|
|
303
|
-
extra = conn.execute(
|
|
304
|
-
f"""
|
|
305
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
306
|
-
FROM {nt}
|
|
307
|
-
WHERE {" OR ".join(clauses)}
|
|
308
|
-
ORDER BY updated_at DESC, id ASC
|
|
309
|
-
LIMIT ?
|
|
310
|
-
""",
|
|
311
|
-
(*params, limit * 3),
|
|
312
|
-
).fetchall()
|
|
313
|
-
by_id = {row["id"]: row for row in rows}
|
|
314
|
-
for row in extra:
|
|
315
|
-
by_id.setdefault(row["id"], row)
|
|
316
|
-
rows = list(by_id.values())
|
|
317
|
-
|
|
318
|
-
terms_for_score = set(_topic_candidates(query, limit=12))
|
|
319
|
-
|
|
320
|
-
def score(row: sqlite3.Row) -> tuple:
|
|
321
|
-
haystack = (
|
|
322
|
-
f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
|
|
323
|
-
)
|
|
324
|
-
hits = sum(1 for term in terms_for_score if term.lower() in haystack)
|
|
325
|
-
type_boost = (
|
|
326
|
-
1
|
|
327
|
-
if row["type"]
|
|
328
|
-
in {
|
|
329
|
-
"Decision",
|
|
330
|
-
"Task",
|
|
331
|
-
"File",
|
|
332
|
-
"Document",
|
|
333
|
-
"CodeFile",
|
|
334
|
-
"Spreadsheet",
|
|
335
|
-
"SlideDeck",
|
|
336
|
-
"Image",
|
|
337
|
-
"ImageText",
|
|
338
|
-
"Page",
|
|
339
|
-
"Slide",
|
|
340
|
-
}
|
|
341
|
-
else 0
|
|
342
|
-
)
|
|
343
|
-
return (hits, type_boost, row["updated_at"] or "")
|
|
344
|
-
|
|
345
|
-
# Deterministic contract: rows with equal relevance order by id ASC
|
|
346
|
-
# (stable sort preserves the pre-sort under reverse=True), matching
|
|
347
|
-
# the legacy LIKE path regardless of FTS bm25 tie ordering.
|
|
348
|
-
rows = sorted(rows, key=lambda r: r["id"])
|
|
349
|
-
rows = sorted(rows, key=score, reverse=True)[:limit]
|
|
350
|
-
return {
|
|
351
|
-
"query": query,
|
|
352
|
-
"matches": [
|
|
353
|
-
{
|
|
354
|
-
"id": row["id"],
|
|
355
|
-
"type": row["type"],
|
|
356
|
-
"title": row["title"],
|
|
357
|
-
"summary": row["summary"],
|
|
358
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
359
|
-
"updated_at": row["updated_at"],
|
|
360
|
-
}
|
|
361
|
-
for row in rows
|
|
362
|
-
],
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
def context_for_query(self, query: str, limit: int = 6) -> str:
|
|
366
|
-
"""Return compact graph-backed RAG context for chat generation."""
|
|
367
|
-
query = str(query or "").strip()
|
|
368
|
-
if not query:
|
|
369
|
-
return ""
|
|
370
|
-
matches = self.search(query, limit).get("matches", [])
|
|
371
|
-
if not matches:
|
|
372
|
-
topics = _topic_candidates(query, limit=4)
|
|
373
|
-
if topics:
|
|
374
|
-
nt, et = self._read_tables()
|
|
375
|
-
with self._connect() as conn:
|
|
376
|
-
rows = []
|
|
377
|
-
for topic in topics:
|
|
378
|
-
rows.extend(
|
|
379
|
-
conn.execute(
|
|
380
|
-
f"""
|
|
381
|
-
SELECT id, type, title, summary, metadata_json
|
|
382
|
-
FROM {nt}
|
|
383
|
-
WHERE title LIKE ? OR metadata_json LIKE ?
|
|
384
|
-
ORDER BY updated_at DESC, id ASC
|
|
385
|
-
LIMIT 3
|
|
386
|
-
""",
|
|
387
|
-
(f"%{topic}%", f"%{topic}%"),
|
|
388
|
-
).fetchall()
|
|
389
|
-
)
|
|
390
|
-
seen = set()
|
|
391
|
-
matches = []
|
|
392
|
-
for row in rows:
|
|
393
|
-
if row["id"] in seen:
|
|
394
|
-
continue
|
|
395
|
-
seen.add(row["id"])
|
|
396
|
-
matches.append(
|
|
397
|
-
{
|
|
398
|
-
"id": row["id"],
|
|
399
|
-
"type": row["type"],
|
|
400
|
-
"title": row["title"],
|
|
401
|
-
"summary": row["summary"],
|
|
402
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
403
|
-
}
|
|
404
|
-
)
|
|
405
|
-
if len(matches) >= limit:
|
|
406
|
-
break
|
|
407
|
-
lines = []
|
|
408
|
-
for match in matches[:limit]:
|
|
409
|
-
meta = match.get("metadata") or {}
|
|
410
|
-
source = (
|
|
411
|
-
meta.get("relative_path")
|
|
412
|
-
or meta.get("filename")
|
|
413
|
-
or meta.get("conversation_id")
|
|
414
|
-
or meta.get("source")
|
|
415
|
-
or match["id"]
|
|
416
|
-
)
|
|
417
|
-
summary = _clean_text(match.get("summary") or "")[:700]
|
|
418
|
-
lines.append(
|
|
419
|
-
f"- [{match['type']}] {match['title']} | source={source} | {summary}"
|
|
420
|
-
)
|
|
421
|
-
return "\n".join(lines)
|
|
422
|
-
|
|
423
|
-
def neighbors(self, node_id: str) -> Dict[str, Any]:
|
|
424
|
-
"""Return direct neighbors (1-hop) of a node."""
|
|
425
|
-
nt, et = self._read_tables()
|
|
426
|
-
with self._connect() as conn:
|
|
427
|
-
edge_rows = conn.execute(
|
|
428
|
-
f"SELECT from_node, to_node, type, weight FROM {et} WHERE from_node=? OR to_node=? ORDER BY id ASC",
|
|
429
|
-
(node_id, node_id),
|
|
430
|
-
).fetchall()
|
|
431
|
-
neighbor_ids: set = set()
|
|
432
|
-
edges = []
|
|
433
|
-
for row in edge_rows:
|
|
434
|
-
neighbor_ids.add(row["from_node"])
|
|
435
|
-
neighbor_ids.add(row["to_node"])
|
|
436
|
-
edges.append(
|
|
437
|
-
{
|
|
438
|
-
"from": row["from_node"],
|
|
439
|
-
"to": row["to_node"],
|
|
440
|
-
"type": row["type"],
|
|
441
|
-
"weight": row["weight"],
|
|
442
|
-
}
|
|
443
|
-
)
|
|
444
|
-
neighbor_ids.discard(node_id)
|
|
445
|
-
nodes = []
|
|
446
|
-
if neighbor_ids:
|
|
447
|
-
placeholders = ",".join("?" * len(neighbor_ids))
|
|
448
|
-
nodes = [
|
|
449
|
-
{
|
|
450
|
-
"id": row["id"],
|
|
451
|
-
"type": row["type"],
|
|
452
|
-
"title": row["title"],
|
|
453
|
-
"summary": row["summary"],
|
|
454
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
455
|
-
}
|
|
456
|
-
for row in conn.execute(
|
|
457
|
-
f"SELECT id, type, title, summary, metadata_json FROM {nt} WHERE id IN ({placeholders}) ORDER BY id ASC",
|
|
458
|
-
list(neighbor_ids),
|
|
459
|
-
)
|
|
460
|
-
]
|
|
461
|
-
return {"node_id": node_id, "neighbors": nodes, "edges": edges}
|
|
462
|
-
|
|
463
|
-
def get_node(self, node_id: str) -> Dict[str, Any]:
|
|
464
|
-
node_id = str(node_id or "").strip()
|
|
465
|
-
if not node_id:
|
|
466
|
-
raise ValueError("node_id required")
|
|
467
|
-
nt, et = self._read_tables()
|
|
468
|
-
with self._connect() as conn:
|
|
469
|
-
row = conn.execute(
|
|
470
|
-
f"""
|
|
471
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
472
|
-
FROM {nt}
|
|
473
|
-
WHERE id=?
|
|
474
|
-
""",
|
|
475
|
-
(node_id,),
|
|
476
|
-
).fetchone()
|
|
477
|
-
if not row:
|
|
478
|
-
raise ValueError(f"graph node not found: {node_id}")
|
|
479
|
-
degree = conn.execute(
|
|
480
|
-
f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
|
|
481
|
-
(node_id, node_id),
|
|
482
|
-
).fetchone()["c"]
|
|
483
|
-
return {
|
|
484
|
-
"id": row["id"],
|
|
485
|
-
"type": row["type"],
|
|
486
|
-
"title": row["title"],
|
|
487
|
-
"summary": row["summary"],
|
|
488
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
489
|
-
"updated_at": row["updated_at"],
|
|
490
|
-
"degree": degree,
|
|
491
|
-
}
|
|
492
|
-
|
|
493
|
-
def relationship_search(
|
|
494
|
-
self,
|
|
495
|
-
*,
|
|
496
|
-
query: str = "",
|
|
497
|
-
node_id: str = "",
|
|
498
|
-
relationship_type: str = "",
|
|
499
|
-
limit: int = 30,
|
|
500
|
-
) -> Dict[str, Any]:
|
|
501
|
-
query = str(query or "").strip()
|
|
502
|
-
node_id = str(node_id or "").strip()
|
|
503
|
-
relationship_type = str(relationship_type or "").strip()
|
|
504
|
-
limit = max(1, min(int(limit or 30), 200))
|
|
505
|
-
nt, et = self._read_tables()
|
|
506
|
-
where = []
|
|
507
|
-
params: List[Any] = []
|
|
508
|
-
if node_id:
|
|
509
|
-
where.append("(e.from_node=? OR e.to_node=?)")
|
|
510
|
-
params.extend([node_id, node_id])
|
|
511
|
-
if relationship_type:
|
|
512
|
-
where.append("e.type LIKE ?")
|
|
513
|
-
params.append(f"%{relationship_type}%")
|
|
514
|
-
if query:
|
|
515
|
-
where.append(
|
|
516
|
-
"(e.type LIKE ? OR e.metadata_json LIKE ? OR src.title LIKE ? OR dst.title LIKE ? OR src.summary LIKE ? OR dst.summary LIKE ?)"
|
|
517
|
-
)
|
|
518
|
-
params.extend([f"%{query}%"] * 6)
|
|
519
|
-
where_sql = "WHERE " + " AND ".join(where) if where else ""
|
|
520
|
-
with self._connect() as conn:
|
|
521
|
-
rows = conn.execute(
|
|
522
|
-
f"""
|
|
523
|
-
SELECT
|
|
524
|
-
e.id, e.from_node, e.to_node, e.type, e.weight, e.metadata_json, e.created_at,
|
|
525
|
-
src.type AS source_type, src.title AS source_title, src.summary AS source_summary,
|
|
526
|
-
src.metadata_json AS source_metadata,
|
|
527
|
-
dst.type AS target_type, dst.title AS target_title, dst.summary AS target_summary,
|
|
528
|
-
dst.metadata_json AS target_metadata
|
|
529
|
-
FROM {et} e
|
|
530
|
-
JOIN {nt} src ON src.id=e.from_node
|
|
531
|
-
JOIN {nt} dst ON dst.id=e.to_node
|
|
532
|
-
{where_sql}
|
|
533
|
-
ORDER BY e.weight DESC, e.created_at DESC, e.id ASC
|
|
534
|
-
LIMIT ?
|
|
535
|
-
""",
|
|
536
|
-
(*params, limit),
|
|
537
|
-
).fetchall()
|
|
538
|
-
return {
|
|
539
|
-
"query": query,
|
|
540
|
-
"node_id": node_id,
|
|
541
|
-
"relationship_type": relationship_type,
|
|
542
|
-
"relationships": [
|
|
543
|
-
{
|
|
544
|
-
"id": row["id"],
|
|
545
|
-
"type": row["type"],
|
|
546
|
-
"weight": row["weight"],
|
|
547
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
548
|
-
"created_at": row["created_at"],
|
|
549
|
-
"source": {
|
|
550
|
-
"id": row["from_node"],
|
|
551
|
-
"type": row["source_type"],
|
|
552
|
-
"title": row["source_title"],
|
|
553
|
-
"summary": row["source_summary"],
|
|
554
|
-
"metadata": _safe_loads(row["source_metadata"]),
|
|
555
|
-
},
|
|
556
|
-
"target": {
|
|
557
|
-
"id": row["to_node"],
|
|
558
|
-
"type": row["target_type"],
|
|
559
|
-
"title": row["target_title"],
|
|
560
|
-
"summary": row["target_summary"],
|
|
561
|
-
"metadata": _safe_loads(row["target_metadata"]),
|
|
562
|
-
},
|
|
563
|
-
}
|
|
564
|
-
for row in rows
|
|
565
|
-
],
|
|
566
|
-
}
|
|
567
|
-
|
|
568
|
-
def traverse(
|
|
569
|
-
self, node_id: str, *, depth: int = 1, limit: int = 100
|
|
570
|
-
) -> Dict[str, Any]:
|
|
571
|
-
node_id = str(node_id or "").strip()
|
|
572
|
-
if not node_id:
|
|
573
|
-
raise ValueError("node_id required")
|
|
574
|
-
depth = max(0, min(int(depth or 1), 4))
|
|
575
|
-
limit = max(1, min(int(limit or 100), 500))
|
|
576
|
-
nt, et = self._read_tables()
|
|
577
|
-
visited = {node_id}
|
|
578
|
-
frontier = {node_id}
|
|
579
|
-
edges_by_id: Dict[str, Dict[str, Any]] = {}
|
|
580
|
-
with self._connect() as conn:
|
|
581
|
-
for _ in range(depth):
|
|
582
|
-
if not frontier or len(visited) >= limit:
|
|
583
|
-
break
|
|
584
|
-
placeholders = ",".join("?" * len(frontier))
|
|
585
|
-
rows = conn.execute(
|
|
586
|
-
f"""
|
|
587
|
-
SELECT id, from_node, to_node, type, weight, metadata_json
|
|
588
|
-
FROM {et}
|
|
589
|
-
WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})
|
|
590
|
-
ORDER BY weight DESC, id ASC
|
|
591
|
-
LIMIT ?
|
|
592
|
-
""",
|
|
593
|
-
(*frontier, *frontier, limit * 3),
|
|
594
|
-
).fetchall()
|
|
595
|
-
next_frontier = set()
|
|
596
|
-
for row in rows:
|
|
597
|
-
edges_by_id[row["id"]] = {
|
|
598
|
-
"id": row["id"],
|
|
599
|
-
"from": row["from_node"],
|
|
600
|
-
"to": row["to_node"],
|
|
601
|
-
"type": row["type"],
|
|
602
|
-
"weight": row["weight"],
|
|
603
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
604
|
-
}
|
|
605
|
-
for candidate in (row["from_node"], row["to_node"]):
|
|
606
|
-
if candidate not in visited and len(visited) < limit:
|
|
607
|
-
visited.add(candidate)
|
|
608
|
-
next_frontier.add(candidate)
|
|
609
|
-
frontier = next_frontier
|
|
610
|
-
placeholders = ",".join("?" * len(visited))
|
|
611
|
-
node_rows = conn.execute(
|
|
612
|
-
f"""
|
|
613
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
614
|
-
FROM {nt}
|
|
615
|
-
WHERE id IN ({placeholders})
|
|
616
|
-
ORDER BY updated_at DESC, id ASC
|
|
617
|
-
""",
|
|
618
|
-
list(visited),
|
|
619
|
-
).fetchall()
|
|
620
|
-
return {
|
|
621
|
-
"root": node_id,
|
|
622
|
-
"depth": depth,
|
|
623
|
-
"nodes": [
|
|
624
|
-
{
|
|
625
|
-
"id": row["id"],
|
|
626
|
-
"type": row["type"],
|
|
627
|
-
"title": row["title"],
|
|
628
|
-
"summary": row["summary"],
|
|
629
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
630
|
-
"updated_at": row["updated_at"],
|
|
631
|
-
}
|
|
632
|
-
for row in node_rows
|
|
633
|
-
],
|
|
634
|
-
"edges": list(edges_by_id.values()),
|
|
635
|
-
}
|
|
636
|
-
|
|
637
|
-
def _iter_vector_source_items(
|
|
638
|
-
self,
|
|
639
|
-
conn: sqlite3.Connection,
|
|
640
|
-
*,
|
|
641
|
-
include_nodes: bool = True,
|
|
642
|
-
include_chunks: bool = True,
|
|
643
|
-
) -> List[Dict[str, Any]]:
|
|
644
|
-
items: List[Dict[str, Any]] = []
|
|
645
|
-
if include_nodes:
|
|
646
|
-
for row in conn.execute(
|
|
647
|
-
"""
|
|
648
|
-
SELECT id, type, title, summary, metadata_json
|
|
649
|
-
FROM nodes
|
|
650
|
-
WHERE type <> 'Chunk'
|
|
651
|
-
ORDER BY updated_at DESC, id ASC
|
|
652
|
-
"""
|
|
653
|
-
).fetchall():
|
|
654
|
-
metadata = _safe_loads(row["metadata_json"])
|
|
655
|
-
text = self._vector_text_for_node(
|
|
656
|
-
title=row["title"],
|
|
657
|
-
summary=row["summary"] or "",
|
|
658
|
-
metadata=metadata,
|
|
659
|
-
)
|
|
660
|
-
if text:
|
|
661
|
-
items.append(
|
|
662
|
-
{
|
|
663
|
-
"item_id": row["id"],
|
|
664
|
-
"item_type": "node",
|
|
665
|
-
"source_node": row["id"],
|
|
666
|
-
"text": text,
|
|
667
|
-
"metadata": {"node_type": row["type"], **metadata},
|
|
668
|
-
}
|
|
669
|
-
)
|
|
670
|
-
if include_chunks:
|
|
671
|
-
for row in conn.execute(
|
|
672
|
-
"""
|
|
673
|
-
SELECT c.id, c.source_node AS parent_source_node, c.text, c.metadata_json
|
|
674
|
-
FROM chunks c
|
|
675
|
-
JOIN nodes n ON n.id=c.id
|
|
676
|
-
ORDER BY c.created_at DESC, c.id ASC
|
|
677
|
-
"""
|
|
678
|
-
).fetchall():
|
|
679
|
-
metadata = _safe_loads(row["metadata_json"])
|
|
680
|
-
text = _clean_text(row["text"] or "")
|
|
681
|
-
if text:
|
|
682
|
-
items.append(
|
|
683
|
-
{
|
|
684
|
-
"item_id": row["id"],
|
|
685
|
-
"item_type": "chunk",
|
|
686
|
-
"source_node": row["id"],
|
|
687
|
-
"text": text,
|
|
688
|
-
"metadata": {
|
|
689
|
-
**metadata,
|
|
690
|
-
"parent_source_node": row["parent_source_node"],
|
|
691
|
-
},
|
|
692
|
-
}
|
|
693
|
-
)
|
|
694
|
-
return items
|
|
695
|
-
|
|
696
|
-
def rebuild_vector_index(
|
|
697
|
-
self,
|
|
698
|
-
*,
|
|
699
|
-
full: bool = False,
|
|
700
|
-
include_nodes: bool = True,
|
|
701
|
-
include_chunks: bool = True,
|
|
702
|
-
) -> Dict[str, Any]:
|
|
703
|
-
"""Rebuild the derived vector index without mutating graph content."""
|
|
704
|
-
op_id = f"vector-op:{_sha256_text(f'{time.time()}:{os.getpid()}')[:24]}"
|
|
705
|
-
requested_at = _now()
|
|
706
|
-
started = time.perf_counter()
|
|
707
|
-
try:
|
|
708
|
-
with self._connect() as conn:
|
|
709
|
-
conn.execute(
|
|
710
|
-
"""
|
|
711
|
-
INSERT INTO vector_index_operations(
|
|
712
|
-
id, operation, status, requested_at, started_at, metadata_json
|
|
713
|
-
)
|
|
714
|
-
VALUES (?, ?, 'running', ?, ?, ?)
|
|
715
|
-
""",
|
|
716
|
-
(
|
|
717
|
-
op_id,
|
|
718
|
-
"rebuild_full" if full else "rebuild_incremental",
|
|
719
|
-
requested_at,
|
|
720
|
-
requested_at,
|
|
721
|
-
_json(
|
|
722
|
-
{
|
|
723
|
-
"include_nodes": include_nodes,
|
|
724
|
-
"include_chunks": include_chunks,
|
|
725
|
-
}
|
|
726
|
-
),
|
|
727
|
-
),
|
|
728
|
-
)
|
|
729
|
-
if full:
|
|
730
|
-
filters = []
|
|
731
|
-
if include_nodes:
|
|
732
|
-
filters.append("'node'")
|
|
733
|
-
if include_chunks:
|
|
734
|
-
filters.append("'chunk'")
|
|
735
|
-
if filters:
|
|
736
|
-
conn.execute(
|
|
737
|
-
f"DELETE FROM vector_embeddings WHERE item_type IN ({','.join(filters)})"
|
|
738
|
-
)
|
|
739
|
-
items = self._iter_vector_source_items(
|
|
740
|
-
conn,
|
|
741
|
-
include_nodes=include_nodes,
|
|
742
|
-
include_chunks=include_chunks,
|
|
743
|
-
)
|
|
744
|
-
indexed = skipped = 0
|
|
745
|
-
for item in items:
|
|
746
|
-
changed = self._upsert_vector_item(conn, **item)
|
|
747
|
-
if changed:
|
|
748
|
-
indexed += 1
|
|
749
|
-
else:
|
|
750
|
-
skipped += 1
|
|
751
|
-
duration_ms = round((time.perf_counter() - started) * 1000, 2)
|
|
752
|
-
conn.execute(
|
|
753
|
-
"""
|
|
754
|
-
UPDATE vector_index_operations
|
|
755
|
-
SET status='completed', completed_at=?, items_total=?,
|
|
756
|
-
items_indexed=?, items_skipped=?, metadata_json=?
|
|
757
|
-
WHERE id=?
|
|
758
|
-
""",
|
|
759
|
-
(
|
|
760
|
-
_now(),
|
|
761
|
-
len(items),
|
|
762
|
-
indexed,
|
|
763
|
-
skipped,
|
|
764
|
-
_json(
|
|
765
|
-
{
|
|
766
|
-
"include_nodes": include_nodes,
|
|
767
|
-
"include_chunks": include_chunks,
|
|
768
|
-
"duration_ms": duration_ms,
|
|
769
|
-
"embedding_model": self._embedding_model.model_id,
|
|
770
|
-
"embedding_dim": self._embedding_model.dim,
|
|
771
|
-
}
|
|
772
|
-
),
|
|
773
|
-
op_id,
|
|
774
|
-
),
|
|
775
|
-
)
|
|
776
|
-
return {
|
|
777
|
-
"status": "completed",
|
|
778
|
-
"operation_id": op_id,
|
|
779
|
-
"full": bool(full),
|
|
780
|
-
"items_total": len(items),
|
|
781
|
-
"items_indexed": indexed,
|
|
782
|
-
"items_skipped": skipped,
|
|
783
|
-
"duration_ms": duration_ms,
|
|
784
|
-
"embedding_model": self._embedding_model.model_id,
|
|
785
|
-
"embedding_dim": self._embedding_model.dim,
|
|
786
|
-
}
|
|
787
|
-
except Exception as exc:
|
|
788
|
-
duration_ms = round((time.perf_counter() - started) * 1000, 2)
|
|
789
|
-
with self._connect() as conn:
|
|
790
|
-
conn.execute(
|
|
791
|
-
"""
|
|
792
|
-
INSERT INTO vector_index_operations(
|
|
793
|
-
id, operation, status, requested_at, started_at, completed_at,
|
|
794
|
-
error_message, metadata_json
|
|
795
|
-
)
|
|
796
|
-
VALUES (?, ?, 'failed', ?, ?, ?, ?, ?)
|
|
797
|
-
ON CONFLICT(id) DO UPDATE SET
|
|
798
|
-
status='failed',
|
|
799
|
-
completed_at=excluded.completed_at,
|
|
800
|
-
error_message=excluded.error_message,
|
|
801
|
-
metadata_json=excluded.metadata_json
|
|
802
|
-
""",
|
|
803
|
-
(
|
|
804
|
-
op_id,
|
|
805
|
-
"rebuild_full" if full else "rebuild_incremental",
|
|
806
|
-
requested_at,
|
|
807
|
-
requested_at,
|
|
808
|
-
_now(),
|
|
809
|
-
str(exc),
|
|
810
|
-
_json({"duration_ms": duration_ms}),
|
|
811
|
-
),
|
|
812
|
-
)
|
|
813
|
-
raise
|
|
814
|
-
|
|
815
|
-
def index_status(self) -> Dict[str, Any]:
|
|
816
|
-
storage_capabilities = None
|
|
817
|
-
try:
|
|
818
|
-
storage_capabilities = self.storage_engine.capabilities().as_dict()
|
|
819
|
-
except Exception as exc:
|
|
820
|
-
storage_capabilities = {
|
|
821
|
-
"engine": "sqlite",
|
|
822
|
-
"available": False,
|
|
823
|
-
"reason": str(exc),
|
|
824
|
-
}
|
|
825
|
-
with self._connect() as conn:
|
|
826
|
-
vector_counts = {
|
|
827
|
-
row["item_type"]: row["count"]
|
|
828
|
-
for row in conn.execute(
|
|
829
|
-
"SELECT item_type, COUNT(*) AS count FROM vector_embeddings GROUP BY item_type"
|
|
830
|
-
)
|
|
831
|
-
}
|
|
832
|
-
source_items = self._iter_vector_source_items(conn)
|
|
833
|
-
vector_rows = {
|
|
834
|
-
row["item_id"]: row
|
|
835
|
-
for row in conn.execute(
|
|
836
|
-
"""
|
|
837
|
-
SELECT item_id, text_hash, embedding_dim, embedding_model, indexed_at
|
|
838
|
-
FROM vector_embeddings
|
|
839
|
-
"""
|
|
840
|
-
).fetchall()
|
|
841
|
-
}
|
|
842
|
-
latest_rows = conn.execute(
|
|
843
|
-
"""
|
|
844
|
-
SELECT id, operation, status, requested_at, started_at, completed_at,
|
|
845
|
-
items_total, items_indexed, items_skipped, error_message, metadata_json
|
|
846
|
-
FROM vector_index_operations
|
|
847
|
-
ORDER BY requested_at DESC, id DESC
|
|
848
|
-
LIMIT 5
|
|
849
|
-
"""
|
|
850
|
-
).fetchall()
|
|
851
|
-
missing = stale = ready = 0
|
|
852
|
-
for item in source_items:
|
|
853
|
-
vector_row = vector_rows.get(item["item_id"])
|
|
854
|
-
expected_hash = _sha256_text(_clean_text(item["text"]))
|
|
855
|
-
if not vector_row:
|
|
856
|
-
missing += 1
|
|
857
|
-
elif (
|
|
858
|
-
vector_row["text_hash"] != expected_hash
|
|
859
|
-
or vector_row["embedding_dim"] != self._embedding_model.dim
|
|
860
|
-
or vector_row["embedding_model"] != self._embedding_model.model_id
|
|
861
|
-
):
|
|
862
|
-
stale += 1
|
|
863
|
-
else:
|
|
864
|
-
ready += 1
|
|
865
|
-
pending = missing + stale
|
|
866
|
-
return {
|
|
867
|
-
"status": "ready" if pending == 0 else "needs_reindex",
|
|
868
|
-
"storage": {
|
|
869
|
-
"db_path": str(self.db_path),
|
|
870
|
-
"backend": "sqlite",
|
|
871
|
-
"embedding_model": self._embedding_model.model_id,
|
|
872
|
-
"embedding_dim": self._embedding_model.dim,
|
|
873
|
-
# Honest capability report: trigram FTS5 keyword index, or
|
|
874
|
-
# LIKE-scan fallback when this SQLite build lacks it.
|
|
875
|
-
"fts_enabled": bool(getattr(self, "_fts_enabled", False)),
|
|
876
|
-
"engine": storage_capabilities,
|
|
877
|
-
"vector_search_backend": (
|
|
878
|
-
storage_capabilities.get("vector_backend")
|
|
879
|
-
if isinstance(storage_capabilities, dict)
|
|
880
|
-
else "bruteforce-cosine"
|
|
881
|
-
),
|
|
882
|
-
"vector_search_mode": (
|
|
883
|
-
(storage_capabilities.get("metadata") or {}).get("vector_mode")
|
|
884
|
-
if isinstance(storage_capabilities, dict)
|
|
885
|
-
else "fallback"
|
|
886
|
-
),
|
|
887
|
-
"sqlite_vec_ann_available": (
|
|
888
|
-
bool((storage_capabilities.get("metadata") or {}).get("sqlite_vec_ann_available"))
|
|
889
|
-
if isinstance(storage_capabilities, dict)
|
|
890
|
-
else False
|
|
891
|
-
),
|
|
892
|
-
},
|
|
893
|
-
"source_items": len(source_items),
|
|
894
|
-
"indexed_items": sum(vector_counts.values()),
|
|
895
|
-
"ready_items": ready,
|
|
896
|
-
"missing_items": missing,
|
|
897
|
-
"stale_items": stale,
|
|
898
|
-
"pending_items": pending,
|
|
899
|
-
"by_item_type": vector_counts,
|
|
900
|
-
"operations": [
|
|
901
|
-
{
|
|
902
|
-
"id": row["id"],
|
|
903
|
-
"operation": row["operation"],
|
|
904
|
-
"status": row["status"],
|
|
905
|
-
"requested_at": row["requested_at"],
|
|
906
|
-
"started_at": row["started_at"],
|
|
907
|
-
"completed_at": row["completed_at"],
|
|
908
|
-
"items_total": row["items_total"],
|
|
909
|
-
"items_indexed": row["items_indexed"],
|
|
910
|
-
"items_skipped": row["items_skipped"],
|
|
911
|
-
"error_message": row["error_message"],
|
|
912
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
913
|
-
}
|
|
914
|
-
for row in latest_rows
|
|
915
|
-
],
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
def vector_search(
|
|
919
|
-
self,
|
|
920
|
-
query: str,
|
|
921
|
-
*,
|
|
922
|
-
limit: int = 30,
|
|
923
|
-
min_score: float = 0.0,
|
|
924
|
-
max_candidates: int = 10_000,
|
|
925
|
-
) -> Dict[str, Any]:
|
|
926
|
-
query = str(query or "").strip()
|
|
927
|
-
limit = max(1, min(int(limit or 30), 100))
|
|
928
|
-
min_score = float(min_score or 0.0)
|
|
929
|
-
if not query:
|
|
930
|
-
return {"query": query, "matches": []}
|
|
931
|
-
query_vector = self._embedding_model.embed(query)
|
|
932
|
-
max_candidates = max(limit, min(int(max_candidates or 10_000), 50_000))
|
|
933
|
-
with self._connect() as conn:
|
|
934
|
-
rows = conn.execute(
|
|
935
|
-
"""
|
|
936
|
-
SELECT
|
|
937
|
-
ve.item_id, ve.item_type, ve.source_node, ve.embedding,
|
|
938
|
-
ve.embedding_dim, ve.embedding_model, ve.metadata_json AS vector_metadata,
|
|
939
|
-
n.type AS node_type, n.title AS node_title, n.summary AS node_summary,
|
|
940
|
-
n.metadata_json AS node_metadata, n.updated_at AS node_updated_at,
|
|
941
|
-
c.text AS chunk_text, c.source_node AS parent_node_id,
|
|
942
|
-
pn.type AS parent_type, pn.title AS parent_title,
|
|
943
|
-
pn.summary AS parent_summary, pn.metadata_json AS parent_metadata,
|
|
944
|
-
pn.updated_at AS parent_updated_at
|
|
945
|
-
FROM vector_embeddings ve
|
|
946
|
-
LEFT JOIN nodes n ON n.id=ve.source_node
|
|
947
|
-
LEFT JOIN chunks c ON c.id=ve.item_id
|
|
948
|
-
LEFT JOIN nodes pn ON pn.id=c.source_node
|
|
949
|
-
WHERE ve.embedding_model=? AND ve.embedding_dim=?
|
|
950
|
-
ORDER BY ve.indexed_at DESC
|
|
951
|
-
LIMIT ?
|
|
952
|
-
""",
|
|
953
|
-
(
|
|
954
|
-
self._embedding_model.model_id,
|
|
955
|
-
self._embedding_model.dim,
|
|
956
|
-
max_candidates,
|
|
957
|
-
),
|
|
958
|
-
).fetchall()
|
|
959
|
-
scored = []
|
|
960
|
-
for row in rows:
|
|
961
|
-
vector = self._embedding_model.decode(
|
|
962
|
-
row["embedding"], row["embedding_dim"]
|
|
963
|
-
)
|
|
964
|
-
score = self._embedding_model.similarity(query_vector, vector)
|
|
965
|
-
if score < min_score:
|
|
966
|
-
continue
|
|
967
|
-
is_chunk = row["item_type"] == "chunk"
|
|
968
|
-
summary = (
|
|
969
|
-
row["chunk_text"]
|
|
970
|
-
if is_chunk and row["chunk_text"]
|
|
971
|
-
else row["node_summary"]
|
|
972
|
-
)
|
|
973
|
-
parent_metadata = _safe_loads(row["parent_metadata"])
|
|
974
|
-
node_metadata = _safe_loads(row["node_metadata"])
|
|
975
|
-
scored.append(
|
|
976
|
-
{
|
|
977
|
-
"id": row["item_id"],
|
|
978
|
-
"node_id": row["parent_node_id"]
|
|
979
|
-
if is_chunk and row["parent_node_id"]
|
|
980
|
-
else row["source_node"],
|
|
981
|
-
"item_type": row["item_type"],
|
|
982
|
-
"type": "Chunk" if is_chunk else row["node_type"],
|
|
983
|
-
"title": row["parent_title"]
|
|
984
|
-
if is_chunk and row["parent_title"]
|
|
985
|
-
else row["node_title"],
|
|
986
|
-
"summary": _clean_text(summary or "")[:1000],
|
|
987
|
-
"score": round(float(score), 6),
|
|
988
|
-
"metadata": {
|
|
989
|
-
**(parent_metadata if is_chunk else node_metadata),
|
|
990
|
-
"vector": _safe_loads(row["vector_metadata"]),
|
|
991
|
-
"parent_node_id": row["parent_node_id"],
|
|
992
|
-
"parent_type": row["parent_type"],
|
|
993
|
-
},
|
|
994
|
-
"updated_at": row["parent_updated_at"]
|
|
995
|
-
if is_chunk and row["parent_updated_at"]
|
|
996
|
-
else row["node_updated_at"],
|
|
997
|
-
}
|
|
998
|
-
)
|
|
999
|
-
scored.sort(
|
|
1000
|
-
key=lambda item: (item["score"], item.get("updated_at") or ""), reverse=True
|
|
1001
|
-
)
|
|
1002
|
-
return {
|
|
1003
|
-
"query": query,
|
|
1004
|
-
"embedding_model": self._embedding_model.model_id,
|
|
1005
|
-
"embedding_dim": self._embedding_model.dim,
|
|
1006
|
-
"matches": scored[:limit],
|
|
1007
|
-
}
|
|
1008
|
-
|
|
1009
|
-
def delete_conversation(self, conversation_id: str) -> Dict[str, Any]:
|
|
1010
|
-
conversation_id = str(conversation_id or "").strip()
|
|
1011
|
-
if not conversation_id:
|
|
1012
|
-
return {"status": "skipped", "removed_nodes": 0}
|
|
1013
|
-
conv_id = f"conversation:{_slug(conversation_id)}"
|
|
1014
|
-
with self._connect() as conn:
|
|
1015
|
-
# Edge rows may carry the legacy lowercase label (pre-v4) or the
|
|
1016
|
-
# canonical EdgeType value (v4 write door) — match both.
|
|
1017
|
-
direct_ids = [
|
|
1018
|
-
row["to_node"]
|
|
1019
|
-
for row in conn.execute(
|
|
1020
|
-
"SELECT to_node FROM edges WHERE from_node=? AND type IN ('contains', 'CONTAINS')",
|
|
1021
|
-
(conv_id,),
|
|
1022
|
-
)
|
|
1023
|
-
]
|
|
1024
|
-
remove_ids = set(direct_ids)
|
|
1025
|
-
child_types = [
|
|
1026
|
-
"has_chunk",
|
|
1027
|
-
"implies",
|
|
1028
|
-
"contains_signal",
|
|
1029
|
-
"has_page",
|
|
1030
|
-
"has_slide",
|
|
1031
|
-
"has_sheet",
|
|
1032
|
-
"contains_image",
|
|
1033
|
-
]
|
|
1034
|
-
child_types += [t.upper() for t in child_types]
|
|
1035
|
-
placeholders = ",".join("?" for _ in child_types)
|
|
1036
|
-
for source_id in list(direct_ids):
|
|
1037
|
-
for row in conn.execute(
|
|
1038
|
-
f"SELECT to_node FROM edges WHERE from_node=? AND type IN ({placeholders})",
|
|
1039
|
-
(source_id, *child_types),
|
|
1040
|
-
):
|
|
1041
|
-
remove_ids.add(row["to_node"])
|
|
1042
|
-
remove_ids.add(conv_id)
|
|
1043
|
-
for node_id in remove_ids:
|
|
1044
|
-
conn.execute("DELETE FROM nodes WHERE id=?", (node_id,))
|
|
1045
|
-
if KGStoreV2 is not None:
|
|
1046
|
-
conn.execute(
|
|
1047
|
-
"DELETE FROM nodes_v2 WHERE id=?", (node_id,)
|
|
1048
|
-
) # edges_v2 cascade
|
|
1049
|
-
conn.execute(
|
|
1050
|
-
"""
|
|
1051
|
-
DELETE FROM nodes
|
|
1052
|
-
WHERE type='Topic'
|
|
1053
|
-
AND id NOT IN (SELECT to_node FROM edges)
|
|
1054
|
-
AND id NOT IN (SELECT from_node FROM edges)
|
|
1055
|
-
"""
|
|
1056
|
-
)
|
|
1057
|
-
if KGStoreV2 is not None:
|
|
1058
|
-
conn.execute(
|
|
1059
|
-
"""
|
|
1060
|
-
DELETE FROM nodes_v2
|
|
1061
|
-
WHERE legacy_type='Topic'
|
|
1062
|
-
AND id NOT IN (SELECT target FROM edges_v2)
|
|
1063
|
-
AND id NOT IN (SELECT source FROM edges_v2)
|
|
1064
|
-
"""
|
|
1065
|
-
)
|
|
1066
|
-
return {
|
|
1067
|
-
"status": "ok",
|
|
1068
|
-
"conversation_id": conversation_id,
|
|
1069
|
-
"removed_nodes": len(remove_ids),
|
|
1070
|
-
}
|
|
1071
|
-
|
|
1072
|
-
def clear_all(self) -> Dict[str, Any]:
|
|
1073
|
-
with self._connect() as conn:
|
|
1074
|
-
counts = {
|
|
1075
|
-
"nodes": conn.execute("SELECT COUNT(*) AS c FROM nodes").fetchone()[
|
|
1076
|
-
"c"
|
|
1077
|
-
],
|
|
1078
|
-
"edges": conn.execute("SELECT COUNT(*) AS c FROM edges").fetchone()[
|
|
1079
|
-
"c"
|
|
1080
|
-
],
|
|
1081
|
-
"chunks": conn.execute("SELECT COUNT(*) AS c FROM chunks").fetchone()[
|
|
1082
|
-
"c"
|
|
1083
|
-
],
|
|
1084
|
-
"knowledge_sources": conn.execute(
|
|
1085
|
-
"SELECT COUNT(*) AS c FROM knowledge_sources"
|
|
1086
|
-
).fetchone()["c"],
|
|
1087
|
-
"local_file_index": conn.execute(
|
|
1088
|
-
"SELECT COUNT(*) AS c FROM local_file_index"
|
|
1089
|
-
).fetchone()["c"],
|
|
1090
|
-
}
|
|
1091
|
-
conn.execute("DELETE FROM local_file_index")
|
|
1092
|
-
conn.execute("DELETE FROM knowledge_sources")
|
|
1093
|
-
conn.execute("DELETE FROM chunks")
|
|
1094
|
-
conn.execute("DELETE FROM edges")
|
|
1095
|
-
conn.execute("DELETE FROM nodes")
|
|
1096
|
-
if KGStoreV2 is not None:
|
|
1097
|
-
conn.execute("DELETE FROM edges_v2")
|
|
1098
|
-
conn.execute("DELETE FROM nodes_v2")
|
|
1099
|
-
if self.blob_dir.exists():
|
|
1100
|
-
shutil.rmtree(self.blob_dir, ignore_errors=True)
|
|
1101
|
-
self.blob_dir.mkdir(parents=True, exist_ok=True)
|
|
1102
|
-
return {"status": "ok", "removed": counts}
|
|
1103
|
-
|
|
1104
|
-
def stats(self) -> Dict[str, Any]:
|
|
1105
|
-
nt, et = self._read_tables()
|
|
1106
|
-
with self._connect() as conn:
|
|
1107
|
-
node_counts = {
|
|
1108
|
-
row["type"]: row["count"]
|
|
1109
|
-
for row in conn.execute(
|
|
1110
|
-
f"SELECT type, COUNT(*) AS count FROM {nt} GROUP BY type"
|
|
1111
|
-
)
|
|
1112
|
-
}
|
|
1113
|
-
edge_counts = {
|
|
1114
|
-
row["type"]: row["count"]
|
|
1115
|
-
for row in conn.execute(
|
|
1116
|
-
f"SELECT type, COUNT(*) AS count FROM {et} GROUP BY type"
|
|
1117
|
-
)
|
|
1118
|
-
}
|
|
1119
|
-
local_sources = conn.execute(
|
|
1120
|
-
"SELECT COUNT(*) AS c FROM knowledge_sources"
|
|
1121
|
-
).fetchone()["c"]
|
|
1122
|
-
local_file_status = {
|
|
1123
|
-
row["status"]: row["count"]
|
|
1124
|
-
for row in conn.execute(
|
|
1125
|
-
"SELECT status, COUNT(*) AS count FROM local_file_index GROUP BY status"
|
|
1126
|
-
)
|
|
1127
|
-
}
|
|
1128
|
-
v2 = None
|
|
1129
|
-
if KGStoreV2 is not None:
|
|
1130
|
-
try:
|
|
1131
|
-
v2 = KGStoreV2(self.db_path).stats()
|
|
1132
|
-
except Exception as e:
|
|
1133
|
-
v2 = {"available": False, "error": str(e)}
|
|
1134
|
-
return {
|
|
1135
|
-
"db_path": str(self.db_path),
|
|
1136
|
-
"schema_version": GRAPH_SCHEMA_VERSION,
|
|
1137
|
-
"v2_schema_available": KGStoreV2 is not None,
|
|
1138
|
-
"nodes": node_counts,
|
|
1139
|
-
"edges": edge_counts,
|
|
1140
|
-
"local_sources": local_sources,
|
|
1141
|
-
"local_file_status": local_file_status,
|
|
1142
|
-
"v2": v2,
|
|
1143
|
-
}
|
|
1144
|
-
|
|
1145
|
-
def search_for_document_generation(
|
|
1146
|
-
self, query: str, limit: int = 10
|
|
1147
|
-
) -> List[Dict[str, Any]]:
|
|
1148
|
-
"""Hybrid retrieval optimized for document generation.
|
|
1149
|
-
|
|
1150
|
-
Scoring: 0.5*text_relevance + 0.3*graph_relationship + 0.2*recency
|
|
1151
|
-
Returns nodes with rich context for document generation prompts.
|
|
1152
|
-
"""
|
|
1153
|
-
query = str(query or "").strip()
|
|
1154
|
-
if not query:
|
|
1155
|
-
return []
|
|
1156
|
-
limit = max(1, min(int(limit or 10), 50))
|
|
1157
|
-
terms = _topic_candidates(query, limit=12)
|
|
1158
|
-
now = datetime.now()
|
|
1159
|
-
nt, et = self._read_tables()
|
|
1160
|
-
|
|
1161
|
-
with self._connect() as conn:
|
|
1162
|
-
candidate_rows = []
|
|
1163
|
-
seen_ids = set()
|
|
1164
|
-
|
|
1165
|
-
if query:
|
|
1166
|
-
q = f"%{query}%"
|
|
1167
|
-
rows = conn.execute(
|
|
1168
|
-
f"""
|
|
1169
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
1170
|
-
FROM {nt}
|
|
1171
|
-
WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
|
|
1172
|
-
AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
|
|
1173
|
-
'Spreadsheet', 'Image', 'ImageText', 'Chat',
|
|
1174
|
-
'Decision', 'Task', 'Concept', 'Feature',
|
|
1175
|
-
'Page', 'Slide')
|
|
1176
|
-
ORDER BY updated_at DESC, id ASC
|
|
1177
|
-
LIMIT ?
|
|
1178
|
-
""",
|
|
1179
|
-
(q, q, q, limit * 5),
|
|
1180
|
-
).fetchall()
|
|
1181
|
-
for row in rows:
|
|
1182
|
-
if row["id"] not in seen_ids:
|
|
1183
|
-
seen_ids.add(row["id"])
|
|
1184
|
-
candidate_rows.append(row)
|
|
1185
|
-
|
|
1186
|
-
for term in terms:
|
|
1187
|
-
t = f"%{term}%"
|
|
1188
|
-
rows = conn.execute(
|
|
1189
|
-
f"""
|
|
1190
|
-
SELECT id, type, title, summary, metadata_json, updated_at
|
|
1191
|
-
FROM {nt}
|
|
1192
|
-
WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
|
|
1193
|
-
AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
|
|
1194
|
-
'Spreadsheet', 'Image', 'ImageText', 'Chat',
|
|
1195
|
-
'Decision', 'Task', 'Concept', 'Feature',
|
|
1196
|
-
'Page', 'Slide')
|
|
1197
|
-
ORDER BY updated_at DESC, id ASC
|
|
1198
|
-
LIMIT ?
|
|
1199
|
-
""",
|
|
1200
|
-
(t, t, t, limit * 3),
|
|
1201
|
-
).fetchall()
|
|
1202
|
-
for row in rows:
|
|
1203
|
-
if row["id"] not in seen_ids:
|
|
1204
|
-
seen_ids.add(row["id"])
|
|
1205
|
-
candidate_rows.append(row)
|
|
1206
|
-
|
|
1207
|
-
scored_results = []
|
|
1208
|
-
for row in candidate_rows:
|
|
1209
|
-
haystack = (
|
|
1210
|
-
f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
|
|
1211
|
-
)
|
|
1212
|
-
|
|
1213
|
-
text_hits = sum(1 for term in terms if term.lower() in haystack)
|
|
1214
|
-
text_score = min(1.0, text_hits / max(len(terms), 1))
|
|
1215
|
-
|
|
1216
|
-
edge_count = conn.execute(
|
|
1217
|
-
f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
|
|
1218
|
-
(row["id"], row["id"]),
|
|
1219
|
-
).fetchone()["c"]
|
|
1220
|
-
graph_score = min(1.0, math.log1p(edge_count) / 4.0)
|
|
1221
|
-
|
|
1222
|
-
recency = _recency_score(
|
|
1223
|
-
row["updated_at"], now=now, half_life_days=14.0
|
|
1224
|
-
)
|
|
1225
|
-
|
|
1226
|
-
doc_type_boost = (
|
|
1227
|
-
1.2
|
|
1228
|
-
if row["type"]
|
|
1229
|
-
in (
|
|
1230
|
-
"Document",
|
|
1231
|
-
"File",
|
|
1232
|
-
"SlideDeck",
|
|
1233
|
-
"Decision",
|
|
1234
|
-
)
|
|
1235
|
-
else 1.0
|
|
1236
|
-
)
|
|
1237
|
-
|
|
1238
|
-
hybrid_score = (
|
|
1239
|
-
0.5 * text_score + 0.3 * graph_score + 0.2 * recency
|
|
1240
|
-
) * doc_type_boost
|
|
1241
|
-
|
|
1242
|
-
meta = _safe_loads(row["metadata_json"])
|
|
1243
|
-
neighbor_concepts = []
|
|
1244
|
-
neighbor_rows = conn.execute(
|
|
1245
|
-
f"""
|
|
1246
|
-
SELECT n.title, n.type FROM {et} e
|
|
1247
|
-
JOIN {nt} n ON n.id = CASE WHEN e.from_node = ? THEN e.to_node ELSE e.from_node END
|
|
1248
|
-
WHERE (e.from_node = ? OR e.to_node = ?)
|
|
1249
|
-
AND n.type IN ('Concept', 'Feature', 'Decision', 'Task')
|
|
1250
|
-
LIMIT 8
|
|
1251
|
-
""",
|
|
1252
|
-
(row["id"], row["id"], row["id"]),
|
|
1253
|
-
).fetchall()
|
|
1254
|
-
for nr in neighbor_rows:
|
|
1255
|
-
neighbor_concepts.append({"title": nr["title"], "type": nr["type"]})
|
|
1256
|
-
|
|
1257
|
-
scored_results.append(
|
|
1258
|
-
{
|
|
1259
|
-
"id": row["id"],
|
|
1260
|
-
"type": row["type"],
|
|
1261
|
-
"title": row["title"],
|
|
1262
|
-
"summary": row["summary"],
|
|
1263
|
-
"metadata": meta,
|
|
1264
|
-
"updated_at": row["updated_at"],
|
|
1265
|
-
"hybrid_score": round(hybrid_score, 4),
|
|
1266
|
-
"scores": {
|
|
1267
|
-
"text": round(text_score, 4),
|
|
1268
|
-
"graph": round(graph_score, 4),
|
|
1269
|
-
"recency": round(recency, 4),
|
|
1270
|
-
},
|
|
1271
|
-
"related_concepts": neighbor_concepts,
|
|
1272
|
-
}
|
|
1273
|
-
)
|
|
1274
|
-
|
|
1275
|
-
scored_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
|
|
1276
|
-
return scored_results[:limit]
|
|
1277
|
-
|
|
1278
|
-
def multi_hop_context(
|
|
1279
|
-
self, node_ids: List[str], max_hops: int = 2
|
|
1280
|
-
) -> Dict[str, Any]:
|
|
1281
|
-
"""Multi-hop graph traversal from seed nodes for richer context."""
|
|
1282
|
-
visited_nodes = set()
|
|
1283
|
-
visited_edges = set()
|
|
1284
|
-
all_nodes = []
|
|
1285
|
-
all_edges = []
|
|
1286
|
-
frontier = set(node_ids)
|
|
1287
|
-
nt, et = self._read_tables()
|
|
1288
|
-
|
|
1289
|
-
with self._connect() as conn:
|
|
1290
|
-
for hop in range(max_hops):
|
|
1291
|
-
if not frontier:
|
|
1292
|
-
break
|
|
1293
|
-
next_frontier = set()
|
|
1294
|
-
for nid in frontier:
|
|
1295
|
-
if nid in visited_nodes:
|
|
1296
|
-
continue
|
|
1297
|
-
visited_nodes.add(nid)
|
|
1298
|
-
row = conn.execute(
|
|
1299
|
-
f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE id=?",
|
|
1300
|
-
(nid,),
|
|
1301
|
-
).fetchone()
|
|
1302
|
-
if row:
|
|
1303
|
-
all_nodes.append(
|
|
1304
|
-
{
|
|
1305
|
-
"id": row["id"],
|
|
1306
|
-
"type": row["type"],
|
|
1307
|
-
"title": row["title"],
|
|
1308
|
-
"summary": row["summary"],
|
|
1309
|
-
"metadata": _safe_loads(row["metadata_json"]),
|
|
1310
|
-
"hop": hop,
|
|
1311
|
-
}
|
|
1312
|
-
)
|
|
1313
|
-
edge_rows = conn.execute(
|
|
1314
|
-
f"""
|
|
1315
|
-
SELECT id, from_node, to_node, type, weight
|
|
1316
|
-
FROM {et} WHERE from_node=? OR to_node=?
|
|
1317
|
-
ORDER BY id ASC
|
|
1318
|
-
""",
|
|
1319
|
-
(nid, nid),
|
|
1320
|
-
).fetchall()
|
|
1321
|
-
for er in edge_rows:
|
|
1322
|
-
if er["id"] not in visited_edges:
|
|
1323
|
-
visited_edges.add(er["id"])
|
|
1324
|
-
all_edges.append(
|
|
1325
|
-
{
|
|
1326
|
-
"from": er["from_node"],
|
|
1327
|
-
"to": er["to_node"],
|
|
1328
|
-
"type": er["type"],
|
|
1329
|
-
"weight": er["weight"],
|
|
1330
|
-
}
|
|
1331
|
-
)
|
|
1332
|
-
other = (
|
|
1333
|
-
er["to_node"]
|
|
1334
|
-
if er["from_node"] == nid
|
|
1335
|
-
else er["from_node"]
|
|
1336
|
-
)
|
|
1337
|
-
if other not in visited_nodes:
|
|
1338
|
-
next_frontier.add(other)
|
|
1339
|
-
frontier = next_frontier
|
|
1340
|
-
|
|
1341
|
-
return {"nodes": all_nodes, "edges": all_edges}
|
|
12
|
+
warnings.warn(
|
|
13
|
+
"latticeai.brain.retrieval is deprecated; import lattice_brain.graph.retrieval instead",
|
|
14
|
+
DeprecationWarning,
|
|
15
|
+
stacklevel=2,
|
|
16
|
+
)
|
|
17
|
+
sys.modules[__name__] = _impl
|