ltcai 4.3.1 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +191 -278
  2. package/docs/CHANGELOG.md +128 -0
  3. package/docs/V4_3_2_DEADCODE_AUDIT_REPORT.md +174 -0
  4. package/docs/V4_3_2_DOCUMENTATION_CLEANUP_REPORT.md +81 -0
  5. package/docs/V4_3_2_GITHUB_VERCEL_CHECK_REPORT.md +75 -0
  6. package/docs/V4_3_2_GRAPH_UX_REPORT.md +48 -0
  7. package/docs/V4_3_2_INDEPENDENT_AUDIT_PACKAGE.md +209 -0
  8. package/docs/V4_3_2_PRODUCT_POLISH_REPORT.md +57 -0
  9. package/docs/V4_3_2_SELF_AUDIT_REPORT.md +63 -0
  10. package/docs/V4_3_2_VALIDATION_REPORT.md +97 -0
  11. package/docs/V4_3_3_VALIDATION_REPORT.md +46 -0
  12. package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
  13. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +18 -19
  14. package/frontend/openapi.json +1 -1
  15. package/frontend/src/components/primitives.tsx +92 -10
  16. package/frontend/src/pages/Act.tsx +11 -9
  17. package/frontend/src/pages/Ask.tsx +2 -2
  18. package/frontend/src/pages/Brain.tsx +607 -65
  19. package/frontend/src/pages/Capture.tsx +11 -7
  20. package/frontend/src/pages/Library.tsx +3 -3
  21. package/frontend/src/pages/System.tsx +186 -23
  22. package/lattice_brain/__init__.py +38 -23
  23. package/lattice_brain/_kg_common.py +11 -1
  24. package/lattice_brain/context.py +212 -2
  25. package/lattice_brain/conversations.py +234 -1
  26. package/lattice_brain/discovery.py +11 -1
  27. package/lattice_brain/documents.py +11 -1
  28. package/lattice_brain/graph/__init__.py +28 -0
  29. package/lattice_brain/graph/_kg_common.py +1123 -0
  30. package/lattice_brain/graph/curator.py +473 -0
  31. package/lattice_brain/graph/discovery.py +1455 -0
  32. package/lattice_brain/graph/documents.py +218 -0
  33. package/lattice_brain/graph/identity.py +175 -0
  34. package/lattice_brain/graph/ingest.py +644 -0
  35. package/lattice_brain/graph/network.py +205 -0
  36. package/lattice_brain/graph/projection.py +571 -0
  37. package/lattice_brain/graph/provenance.py +401 -0
  38. package/lattice_brain/graph/retrieval.py +1341 -0
  39. package/lattice_brain/graph/schema.py +640 -0
  40. package/lattice_brain/graph/store.py +237 -0
  41. package/lattice_brain/graph/write_master.py +225 -0
  42. package/lattice_brain/identity.py +11 -13
  43. package/lattice_brain/ingest.py +11 -1
  44. package/lattice_brain/ingestion.py +318 -0
  45. package/lattice_brain/memory.py +100 -1
  46. package/lattice_brain/network.py +11 -1
  47. package/lattice_brain/portability.py +431 -0
  48. package/lattice_brain/projection.py +11 -1
  49. package/lattice_brain/provenance.py +11 -1
  50. package/lattice_brain/retrieval.py +11 -1
  51. package/lattice_brain/runtime/__init__.py +32 -0
  52. package/lattice_brain/runtime/agent_runtime.py +569 -0
  53. package/lattice_brain/runtime/hooks.py +754 -0
  54. package/lattice_brain/runtime/multi_agent.py +795 -0
  55. package/lattice_brain/schema.py +11 -1
  56. package/lattice_brain/store.py +10 -2
  57. package/lattice_brain/workflow.py +461 -0
  58. package/lattice_brain/write_master.py +11 -1
  59. package/latticeai/__init__.py +1 -1
  60. package/latticeai/api/agents.py +2 -2
  61. package/latticeai/api/browser.py +1 -1
  62. package/latticeai/api/chat.py +1 -1
  63. package/latticeai/api/computer_use.py +1 -1
  64. package/latticeai/api/hooks.py +2 -2
  65. package/latticeai/api/mcp.py +1 -1
  66. package/latticeai/api/tools.py +1 -1
  67. package/latticeai/api/workflow_designer.py +2 -2
  68. package/latticeai/app_factory.py +4 -4
  69. package/latticeai/brain/__init__.py +24 -6
  70. package/latticeai/brain/_kg_common.py +11 -1117
  71. package/latticeai/brain/context.py +12 -208
  72. package/latticeai/brain/conversations.py +12 -231
  73. package/latticeai/brain/discovery.py +13 -1451
  74. package/latticeai/brain/documents.py +13 -214
  75. package/latticeai/brain/identity.py +11 -169
  76. package/latticeai/brain/ingest.py +13 -640
  77. package/latticeai/brain/memory.py +12 -97
  78. package/latticeai/brain/network.py +12 -200
  79. package/latticeai/brain/projection.py +13 -567
  80. package/latticeai/brain/provenance.py +13 -397
  81. package/latticeai/brain/retrieval.py +13 -1337
  82. package/latticeai/brain/schema.py +12 -635
  83. package/latticeai/brain/store.py +13 -233
  84. package/latticeai/brain/write_master.py +13 -221
  85. package/latticeai/core/agent.py +1 -1
  86. package/latticeai/core/agent_registry.py +2 -2
  87. package/latticeai/core/builtin_hooks.py +2 -2
  88. package/latticeai/core/graph_curator.py +6 -468
  89. package/latticeai/core/hooks.py +6 -749
  90. package/latticeai/core/marketplace.py +1 -1
  91. package/latticeai/core/multi_agent.py +6 -790
  92. package/latticeai/core/workflow_engine.py +6 -456
  93. package/latticeai/core/workspace_os.py +1 -1
  94. package/latticeai/services/agent_runtime.py +6 -564
  95. package/latticeai/services/ingestion.py +6 -313
  96. package/latticeai/services/kg_portability.py +6 -426
  97. package/latticeai/services/platform_runtime.py +3 -3
  98. package/latticeai/services/run_executor.py +1 -1
  99. package/latticeai/services/upload_service.py +1 -1
  100. package/p_reinforce.py +1 -1
  101. package/package.json +3 -6
  102. package/scripts/build_vercel_static.mjs +77 -0
  103. package/scripts/bump_version.py +1 -1
  104. package/scripts/check_markdown_links.mjs +75 -0
  105. package/scripts/wheel_smoke.py +7 -0
  106. package/src-tauri/Cargo.lock +1 -1
  107. package/src-tauri/Cargo.toml +1 -1
  108. package/src-tauri/src/main.rs +12 -2
  109. package/src-tauri/tauri.conf.json +1 -1
  110. package/static/app/asset-manifest.json +5 -5
  111. package/static/app/assets/index-CHHal8Zl.css +2 -0
  112. package/static/app/assets/index-pdzil9ac.js +333 -0
  113. package/static/app/assets/index-pdzil9ac.js.map +1 -0
  114. package/static/app/index.html +2 -2
  115. package/latticeai/api/deps.py +0 -15
  116. package/scripts/capture/README.md +0 -28
  117. package/scripts/capture/capture_enterprise.js +0 -8
  118. package/scripts/capture/capture_graph.js +0 -8
  119. package/scripts/capture/capture_onboarding.js +0 -8
  120. package/scripts/capture/capture_page.js +0 -43
  121. package/scripts/capture/capture_release_media.js +0 -125
  122. package/scripts/capture/capture_skills.js +0 -8
  123. package/scripts/capture/capture_v340.js +0 -88
  124. package/scripts/capture/capture_workspace.js +0 -8
  125. package/scripts/generate_diagrams.py +0 -512
  126. package/scripts/release-0.3.1.sh +0 -105
  127. package/scripts/take_screenshots.js +0 -69
  128. package/static/app/assets/index-BhPuj8rT.js +0 -333
  129. package/static/app/assets/index-BhPuj8rT.js.map +0 -1
  130. package/static/app/assets/index-yZswHE3d.css +0 -2
  131. package/static/css/tokens.3ba22e37.css +0 -260
@@ -1,1341 +1,17 @@
1
- from __future__ import annotations
1
+ """Deprecated shim: physically moved to lattice_brain.graph.retrieval.
2
2
 
3
- # ruff: noqa: F403,F405
3
+ Kept only for the compatibility window. The module aliases itself to the
4
+ physical module so identity, singletons, and monkeypatching are preserved.
5
+ """
4
6
 
5
- from ._kg_common import * # noqa: F403,F401
7
+ import sys
8
+ import warnings
6
9
 
10
+ import lattice_brain.graph.retrieval as _impl
7
11
 
8
- class KnowledgeGraphRetrievalMixin:
9
- _GRAPH_VISIBLE_TYPES = (
10
- "Computer", # 내 컴퓨터
11
- "Drive", # 드라이브 / 볼륨
12
- "Folder", # 폴더
13
- "File", # 일반 파일
14
- "Chat", # 대화 세션
15
- "Document", # 파일 (PDF·PPT·Word·Excel·이미지)
16
- "CodeFile", # 코드 파일
17
- "Spreadsheet", # 엑셀/CSV
18
- "SlideDeck", # 프레젠테이션
19
- "Image", # 이미지
20
- "ImageText", # OCR 텍스트
21
- "Concept", # 개념 / 아이디어 / 기술 용어
22
- "Person", # 사람
23
- "Error", # 오류 / 버그
24
- "Code", # 코드 / 함수
25
- "Feature", # 소프트웨어 기능
26
- "Task", # 할 일
27
- "Decision", # 결정 사항
28
- # v3.6.0 Knowledge Graph First — 1급 엔티티를 그래프에 노출
29
- "Source", # 수집 출처 (파일/URL/브라우저 탭/git)
30
- "Repository", # git 저장소
31
- "Meeting", # 회의
32
- "Organization", # 조직
33
- "Workflow", # 워크플로우
34
- "Agent", # 에이전트
35
- )
36
-
37
- def list_documents(self, limit: int = 200) -> Dict[str, Any]:
38
- """List ingested ``Document`` nodes with their ingest + index state.
39
-
40
- Powers the Files view: every accepted upload and every indexed local
41
- document becomes a ``Document`` node. A document is reported ``indexed``
42
- once its retrieval chunks exist (searchable in Chat / Hybrid Search).
43
- """
44
- limit = max(1, min(int(limit or 200), 1000))
45
- nt, _ = self._read_tables()
46
- documents: List[Dict[str, Any]] = []
47
- with self._connect() as conn:
48
- rows = conn.execute(
49
- f"SELECT id, title, summary, metadata_json, created_at, updated_at "
50
- f"FROM {nt} WHERE type='Document' ORDER BY updated_at DESC, id ASC LIMIT ?",
51
- (limit,),
52
- ).fetchall()
53
- for row in rows:
54
- meta = _safe_loads(row["metadata_json"]) or {}
55
- extracted = meta.get("extracted") or {}
56
- node_id = row["id"]
57
- chunk_count = conn.execute(
58
- f"SELECT COUNT(*) AS c FROM {nt} WHERE type='Chunk' AND metadata_json LIKE ?",
59
- (f"%{node_id}%",),
60
- ).fetchone()["c"]
61
- documents.append(
62
- {
63
- "id": node_id,
64
- "filename": meta.get("filename") or row["title"],
65
- "ext": meta.get("ext"),
66
- "mime_type": meta.get("mime_type"),
67
- "bytes": meta.get("bytes"),
68
- "sha256": meta.get("sha256"),
69
- "uploader": meta.get("uploader"),
70
- "chars": extracted.get("chars"),
71
- "chunks": int(chunk_count or 0),
72
- "indexed": int(chunk_count or 0) > 0,
73
- "ingest_state": "indexed"
74
- if int(chunk_count or 0) > 0
75
- else "ingested",
76
- "created_at": row["created_at"],
77
- "updated_at": row["updated_at"],
78
- }
79
- )
80
- return {
81
- "documents": documents,
82
- "total": len(documents),
83
- "generated_at": datetime.now().isoformat(timespec="seconds"),
84
- }
85
-
86
- def workspaces_of(self, node_ids) -> Dict[str, Optional[str]]:
87
- """Map node ids to their workspace scope (None = legacy-global)."""
88
- ids = [str(i) for i in node_ids if i]
89
- if not ids:
90
- return {}
91
- placeholders = ",".join("?" for _ in ids)
92
- with self._connect() as conn:
93
- try:
94
- return {
95
- row["id"]: row["workspace_id"]
96
- for row in conn.execute(
97
- f"SELECT id, workspace_id FROM nodes_v2 WHERE id IN ({placeholders})",
98
- ids,
99
- ).fetchall()
100
- }
101
- except Exception:
102
- return {}
103
-
104
- def filter_scoped_nodes(self, items, allowed_workspaces, *, id_key: str = "id"):
105
- """Drop items scoped to a workspace the caller is not a member of.
106
-
107
- ``allowed_workspaces=None`` means no scoping (single-user / no-auth
108
- mode). Legacy-global rows (no workspace) stay visible to everyone on
109
- the machine — the documented pre-v4 compatibility behavior.
110
- """
111
- if allowed_workspaces is None:
112
- return list(items)
113
- allowed = set(allowed_workspaces)
114
- scopes = self.workspaces_of([item.get(id_key) for item in items])
115
- return [
116
- item
117
- for item in items
118
- if scopes.get(item.get(id_key)) is None
119
- or scopes.get(item.get(id_key)) in allowed
120
- ]
121
-
122
- def graph(self, limit: int = 300, *, allowed_workspaces=None) -> Dict[str, Any]:
123
- limit = max(1, min(int(limit or 300), 2000))
124
- visible = ",".join(f"'{t}'" for t in self._GRAPH_VISIBLE_TYPES)
125
- nt, et = self._read_tables()
126
- with self._connect() as conn:
127
- nodes = [
128
- {
129
- "id": row["id"],
130
- "type": row["type"],
131
- "title": row["title"],
132
- "summary": row["summary"],
133
- "metadata": _safe_loads(row["metadata_json"]),
134
- "updated_at": row["updated_at"],
135
- }
136
- for row in conn.execute(
137
- f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE type IN ({visible}) ORDER BY updated_at DESC, id ASC LIMIT ?",
138
- (limit,),
139
- )
140
- ]
141
- node_ids = {node["id"] for node in nodes}
142
- edges: List[Dict[str, Any]] = []
143
- if node_ids:
144
- edge_rows = conn.execute(
145
- f"""
146
- SELECT id, from_node, to_node, type, weight, metadata_json
147
- FROM {et}
148
- WHERE from_node IN (
149
- SELECT id FROM {nt} WHERE type IN ({visible})
150
- ORDER BY updated_at DESC, id ASC LIMIT ?
151
- )
152
- AND to_node IN (
153
- SELECT id FROM {nt} WHERE type IN ({visible})
154
- ORDER BY updated_at DESC, id ASC LIMIT ?
155
- )
156
- ORDER BY weight DESC, created_at DESC, id ASC
157
- """,
158
- (limit, limit),
159
- ).fetchall()
160
- edges = [
161
- {
162
- "id": row["id"],
163
- "from": row["from_node"],
164
- "to": row["to_node"],
165
- "type": row["type"],
166
- "weight": row["weight"],
167
- "metadata": _safe_loads(row["metadata_json"]),
168
- }
169
- for row in edge_rows
170
- ]
171
-
172
- if allowed_workspaces is not None:
173
- nodes = self.filter_scoped_nodes(nodes, allowed_workspaces)
174
- kept_ids = {node["id"] for node in nodes}
175
- edges = [e for e in edges if e["from"] in kept_ids and e["to"] in kept_ids]
176
-
177
- degree_map: Dict[str, int] = {}
178
- now = datetime.now()
179
- node_by_id = {node["id"]: node for node in nodes}
180
- topic_metrics: Dict[str, Dict[str, Any]] = {}
181
-
182
- for edge in edges:
183
- degree_map[edge["from"]] = degree_map.get(edge["from"], 0) + 1
184
- degree_map[edge["to"]] = degree_map.get(edge["to"], 0) + 1
185
- from_node = node_by_id.get(edge["from"])
186
- to_node = node_by_id.get(edge["to"])
187
- if not from_node or not to_node:
188
- continue
189
- for topic_node, other_node in ((from_node, to_node), (to_node, from_node)):
190
- if topic_node["type"] != "Topic":
191
- continue
192
- metrics = topic_metrics.setdefault(
193
- topic_node["id"],
194
- {
195
- "mention_count": 0.0,
196
- "conversation_ids": set(),
197
- },
198
- )
199
- if edge["type"] in {"mentions", "discusses"}:
200
- metrics["mention_count"] += max(
201
- 0.5, float(edge.get("weight") or 1.0)
202
- )
203
- other_meta = other_node.get("metadata") or {}
204
- conversation_id = other_meta.get("conversation_id")
205
- if other_node["type"] == "Conversation":
206
- conversation_id = other_node["id"]
207
- if conversation_id:
208
- metrics["conversation_ids"].add(str(conversation_id))
209
-
210
- type_max_raw: Dict[str, float] = {}
211
- for node in nodes:
212
- degree = degree_map.get(node["id"], 0)
213
- recency = _recency_score(node.get("updated_at"), now=now)
214
- metrics = {
215
- "degree": degree,
216
- "recency_score": round(recency, 4),
217
- }
218
- if node["type"] == "Topic":
219
- topic_stat = topic_metrics.get(node["id"], {})
220
- mention_count = float(topic_stat.get("mention_count") or 0.0)
221
- conversation_count = len(topic_stat.get("conversation_ids") or ())
222
- raw_importance = (
223
- math.log1p(mention_count) * 2.8
224
- + math.log1p(conversation_count) * 2.2
225
- + recency * 1.4
226
- + math.sqrt(max(0, degree)) * 0.45
227
- )
228
- metrics.update(
229
- {
230
- "mention_count": round(mention_count, 2),
231
- "conversation_count": conversation_count,
232
- }
233
- )
234
- else:
235
- raw_importance = math.log1p(max(0, degree)) * 1.4 + recency * 0.9
236
-
237
- metrics["importance_raw"] = round(raw_importance, 4)
238
- node["importance"] = round(raw_importance, 4)
239
- node["_raw_importance"] = raw_importance
240
- node["metadata"] = {
241
- **(node.get("metadata") or {}),
242
- "graph_metrics": metrics,
243
- }
244
- type_max_raw[node["type"]] = max(
245
- type_max_raw.get(node["type"], 0.0), raw_importance
246
- )
247
-
248
- for node in nodes:
249
- max_raw = max(type_max_raw.get(node["type"], 0.0), 0.0001)
250
- importance_norm = min(1.0, (node.get("_raw_importance") or 0.0) / max_raw)
251
- node["importance_norm"] = round(importance_norm, 4)
252
- node["metadata"]["graph_metrics"]["importance_norm"] = node[
253
- "importance_norm"
254
- ]
255
- node.pop("_raw_importance", None)
256
- return {"nodes": nodes, "edges": edges}
257
-
258
- def search(self, query: str, limit: int = 30) -> Dict[str, Any]:
259
- query = str(query or "").strip()
260
- q = f"%{query}%"
261
- limit = max(1, min(int(limit or 30), 100))
262
- nt, et = self._read_tables()
263
- with self._connect() as conn:
264
- rows = []
265
- if query:
266
- fts_ids = self._fts_match_ids(conn, query, limit)
267
- if fts_ids:
268
- placeholders = ",".join("?" for _ in fts_ids)
269
- by_id = {
270
- row["id"]: row
271
- for row in conn.execute(
272
- f"""
273
- SELECT id, type, title, summary, metadata_json, updated_at
274
- FROM {nt} WHERE id IN ({placeholders})
275
- """,
276
- fts_ids,
277
- ).fetchall()
278
- }
279
- # Preserve FTS bm25 rank order.
280
- rows = [by_id[i] for i in fts_ids if i in by_id]
281
- else:
282
- rows = conn.execute(
283
- f"""
284
- SELECT id, type, title, summary, metadata_json, updated_at
285
- FROM {nt}
286
- WHERE title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?
287
- ORDER BY updated_at DESC, id ASC
288
- LIMIT ?
289
- """,
290
- (q, q, q, limit),
291
- ).fetchall()
292
-
293
- if len(rows) < limit:
294
- terms = _topic_candidates(query, limit=8)
295
- if terms:
296
- clauses = []
297
- params: List[str] = []
298
- for term in terms:
299
- clauses.append(
300
- "(title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)"
301
- )
302
- params.extend([f"%{term}%", f"%{term}%", f"%{term}%"])
303
- extra = conn.execute(
304
- f"""
305
- SELECT id, type, title, summary, metadata_json, updated_at
306
- FROM {nt}
307
- WHERE {" OR ".join(clauses)}
308
- ORDER BY updated_at DESC, id ASC
309
- LIMIT ?
310
- """,
311
- (*params, limit * 3),
312
- ).fetchall()
313
- by_id = {row["id"]: row for row in rows}
314
- for row in extra:
315
- by_id.setdefault(row["id"], row)
316
- rows = list(by_id.values())
317
-
318
- terms_for_score = set(_topic_candidates(query, limit=12))
319
-
320
- def score(row: sqlite3.Row) -> tuple:
321
- haystack = (
322
- f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
323
- )
324
- hits = sum(1 for term in terms_for_score if term.lower() in haystack)
325
- type_boost = (
326
- 1
327
- if row["type"]
328
- in {
329
- "Decision",
330
- "Task",
331
- "File",
332
- "Document",
333
- "CodeFile",
334
- "Spreadsheet",
335
- "SlideDeck",
336
- "Image",
337
- "ImageText",
338
- "Page",
339
- "Slide",
340
- }
341
- else 0
342
- )
343
- return (hits, type_boost, row["updated_at"] or "")
344
-
345
- # Deterministic contract: rows with equal relevance order by id ASC
346
- # (stable sort preserves the pre-sort under reverse=True), matching
347
- # the legacy LIKE path regardless of FTS bm25 tie ordering.
348
- rows = sorted(rows, key=lambda r: r["id"])
349
- rows = sorted(rows, key=score, reverse=True)[:limit]
350
- return {
351
- "query": query,
352
- "matches": [
353
- {
354
- "id": row["id"],
355
- "type": row["type"],
356
- "title": row["title"],
357
- "summary": row["summary"],
358
- "metadata": _safe_loads(row["metadata_json"]),
359
- "updated_at": row["updated_at"],
360
- }
361
- for row in rows
362
- ],
363
- }
364
-
365
- def context_for_query(self, query: str, limit: int = 6) -> str:
366
- """Return compact graph-backed RAG context for chat generation."""
367
- query = str(query or "").strip()
368
- if not query:
369
- return ""
370
- matches = self.search(query, limit).get("matches", [])
371
- if not matches:
372
- topics = _topic_candidates(query, limit=4)
373
- if topics:
374
- nt, et = self._read_tables()
375
- with self._connect() as conn:
376
- rows = []
377
- for topic in topics:
378
- rows.extend(
379
- conn.execute(
380
- f"""
381
- SELECT id, type, title, summary, metadata_json
382
- FROM {nt}
383
- WHERE title LIKE ? OR metadata_json LIKE ?
384
- ORDER BY updated_at DESC, id ASC
385
- LIMIT 3
386
- """,
387
- (f"%{topic}%", f"%{topic}%"),
388
- ).fetchall()
389
- )
390
- seen = set()
391
- matches = []
392
- for row in rows:
393
- if row["id"] in seen:
394
- continue
395
- seen.add(row["id"])
396
- matches.append(
397
- {
398
- "id": row["id"],
399
- "type": row["type"],
400
- "title": row["title"],
401
- "summary": row["summary"],
402
- "metadata": _safe_loads(row["metadata_json"]),
403
- }
404
- )
405
- if len(matches) >= limit:
406
- break
407
- lines = []
408
- for match in matches[:limit]:
409
- meta = match.get("metadata") or {}
410
- source = (
411
- meta.get("relative_path")
412
- or meta.get("filename")
413
- or meta.get("conversation_id")
414
- or meta.get("source")
415
- or match["id"]
416
- )
417
- summary = _clean_text(match.get("summary") or "")[:700]
418
- lines.append(
419
- f"- [{match['type']}] {match['title']} | source={source} | {summary}"
420
- )
421
- return "\n".join(lines)
422
-
423
- def neighbors(self, node_id: str) -> Dict[str, Any]:
424
- """Return direct neighbors (1-hop) of a node."""
425
- nt, et = self._read_tables()
426
- with self._connect() as conn:
427
- edge_rows = conn.execute(
428
- f"SELECT from_node, to_node, type, weight FROM {et} WHERE from_node=? OR to_node=? ORDER BY id ASC",
429
- (node_id, node_id),
430
- ).fetchall()
431
- neighbor_ids: set = set()
432
- edges = []
433
- for row in edge_rows:
434
- neighbor_ids.add(row["from_node"])
435
- neighbor_ids.add(row["to_node"])
436
- edges.append(
437
- {
438
- "from": row["from_node"],
439
- "to": row["to_node"],
440
- "type": row["type"],
441
- "weight": row["weight"],
442
- }
443
- )
444
- neighbor_ids.discard(node_id)
445
- nodes = []
446
- if neighbor_ids:
447
- placeholders = ",".join("?" * len(neighbor_ids))
448
- nodes = [
449
- {
450
- "id": row["id"],
451
- "type": row["type"],
452
- "title": row["title"],
453
- "summary": row["summary"],
454
- "metadata": _safe_loads(row["metadata_json"]),
455
- }
456
- for row in conn.execute(
457
- f"SELECT id, type, title, summary, metadata_json FROM {nt} WHERE id IN ({placeholders}) ORDER BY id ASC",
458
- list(neighbor_ids),
459
- )
460
- ]
461
- return {"node_id": node_id, "neighbors": nodes, "edges": edges}
462
-
463
- def get_node(self, node_id: str) -> Dict[str, Any]:
464
- node_id = str(node_id or "").strip()
465
- if not node_id:
466
- raise ValueError("node_id required")
467
- nt, et = self._read_tables()
468
- with self._connect() as conn:
469
- row = conn.execute(
470
- f"""
471
- SELECT id, type, title, summary, metadata_json, updated_at
472
- FROM {nt}
473
- WHERE id=?
474
- """,
475
- (node_id,),
476
- ).fetchone()
477
- if not row:
478
- raise ValueError(f"graph node not found: {node_id}")
479
- degree = conn.execute(
480
- f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
481
- (node_id, node_id),
482
- ).fetchone()["c"]
483
- return {
484
- "id": row["id"],
485
- "type": row["type"],
486
- "title": row["title"],
487
- "summary": row["summary"],
488
- "metadata": _safe_loads(row["metadata_json"]),
489
- "updated_at": row["updated_at"],
490
- "degree": degree,
491
- }
492
-
493
- def relationship_search(
494
- self,
495
- *,
496
- query: str = "",
497
- node_id: str = "",
498
- relationship_type: str = "",
499
- limit: int = 30,
500
- ) -> Dict[str, Any]:
501
- query = str(query or "").strip()
502
- node_id = str(node_id or "").strip()
503
- relationship_type = str(relationship_type or "").strip()
504
- limit = max(1, min(int(limit or 30), 200))
505
- nt, et = self._read_tables()
506
- where = []
507
- params: List[Any] = []
508
- if node_id:
509
- where.append("(e.from_node=? OR e.to_node=?)")
510
- params.extend([node_id, node_id])
511
- if relationship_type:
512
- where.append("e.type LIKE ?")
513
- params.append(f"%{relationship_type}%")
514
- if query:
515
- where.append(
516
- "(e.type LIKE ? OR e.metadata_json LIKE ? OR src.title LIKE ? OR dst.title LIKE ? OR src.summary LIKE ? OR dst.summary LIKE ?)"
517
- )
518
- params.extend([f"%{query}%"] * 6)
519
- where_sql = "WHERE " + " AND ".join(where) if where else ""
520
- with self._connect() as conn:
521
- rows = conn.execute(
522
- f"""
523
- SELECT
524
- e.id, e.from_node, e.to_node, e.type, e.weight, e.metadata_json, e.created_at,
525
- src.type AS source_type, src.title AS source_title, src.summary AS source_summary,
526
- src.metadata_json AS source_metadata,
527
- dst.type AS target_type, dst.title AS target_title, dst.summary AS target_summary,
528
- dst.metadata_json AS target_metadata
529
- FROM {et} e
530
- JOIN {nt} src ON src.id=e.from_node
531
- JOIN {nt} dst ON dst.id=e.to_node
532
- {where_sql}
533
- ORDER BY e.weight DESC, e.created_at DESC, e.id ASC
534
- LIMIT ?
535
- """,
536
- (*params, limit),
537
- ).fetchall()
538
- return {
539
- "query": query,
540
- "node_id": node_id,
541
- "relationship_type": relationship_type,
542
- "relationships": [
543
- {
544
- "id": row["id"],
545
- "type": row["type"],
546
- "weight": row["weight"],
547
- "metadata": _safe_loads(row["metadata_json"]),
548
- "created_at": row["created_at"],
549
- "source": {
550
- "id": row["from_node"],
551
- "type": row["source_type"],
552
- "title": row["source_title"],
553
- "summary": row["source_summary"],
554
- "metadata": _safe_loads(row["source_metadata"]),
555
- },
556
- "target": {
557
- "id": row["to_node"],
558
- "type": row["target_type"],
559
- "title": row["target_title"],
560
- "summary": row["target_summary"],
561
- "metadata": _safe_loads(row["target_metadata"]),
562
- },
563
- }
564
- for row in rows
565
- ],
566
- }
567
-
568
- def traverse(
569
- self, node_id: str, *, depth: int = 1, limit: int = 100
570
- ) -> Dict[str, Any]:
571
- node_id = str(node_id or "").strip()
572
- if not node_id:
573
- raise ValueError("node_id required")
574
- depth = max(0, min(int(depth or 1), 4))
575
- limit = max(1, min(int(limit or 100), 500))
576
- nt, et = self._read_tables()
577
- visited = {node_id}
578
- frontier = {node_id}
579
- edges_by_id: Dict[str, Dict[str, Any]] = {}
580
- with self._connect() as conn:
581
- for _ in range(depth):
582
- if not frontier or len(visited) >= limit:
583
- break
584
- placeholders = ",".join("?" * len(frontier))
585
- rows = conn.execute(
586
- f"""
587
- SELECT id, from_node, to_node, type, weight, metadata_json
588
- FROM {et}
589
- WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})
590
- ORDER BY weight DESC, id ASC
591
- LIMIT ?
592
- """,
593
- (*frontier, *frontier, limit * 3),
594
- ).fetchall()
595
- next_frontier = set()
596
- for row in rows:
597
- edges_by_id[row["id"]] = {
598
- "id": row["id"],
599
- "from": row["from_node"],
600
- "to": row["to_node"],
601
- "type": row["type"],
602
- "weight": row["weight"],
603
- "metadata": _safe_loads(row["metadata_json"]),
604
- }
605
- for candidate in (row["from_node"], row["to_node"]):
606
- if candidate not in visited and len(visited) < limit:
607
- visited.add(candidate)
608
- next_frontier.add(candidate)
609
- frontier = next_frontier
610
- placeholders = ",".join("?" * len(visited))
611
- node_rows = conn.execute(
612
- f"""
613
- SELECT id, type, title, summary, metadata_json, updated_at
614
- FROM {nt}
615
- WHERE id IN ({placeholders})
616
- ORDER BY updated_at DESC, id ASC
617
- """,
618
- list(visited),
619
- ).fetchall()
620
- return {
621
- "root": node_id,
622
- "depth": depth,
623
- "nodes": [
624
- {
625
- "id": row["id"],
626
- "type": row["type"],
627
- "title": row["title"],
628
- "summary": row["summary"],
629
- "metadata": _safe_loads(row["metadata_json"]),
630
- "updated_at": row["updated_at"],
631
- }
632
- for row in node_rows
633
- ],
634
- "edges": list(edges_by_id.values()),
635
- }
636
-
637
- def _iter_vector_source_items(
638
- self,
639
- conn: sqlite3.Connection,
640
- *,
641
- include_nodes: bool = True,
642
- include_chunks: bool = True,
643
- ) -> List[Dict[str, Any]]:
644
- items: List[Dict[str, Any]] = []
645
- if include_nodes:
646
- for row in conn.execute(
647
- """
648
- SELECT id, type, title, summary, metadata_json
649
- FROM nodes
650
- WHERE type <> 'Chunk'
651
- ORDER BY updated_at DESC, id ASC
652
- """
653
- ).fetchall():
654
- metadata = _safe_loads(row["metadata_json"])
655
- text = self._vector_text_for_node(
656
- title=row["title"],
657
- summary=row["summary"] or "",
658
- metadata=metadata,
659
- )
660
- if text:
661
- items.append(
662
- {
663
- "item_id": row["id"],
664
- "item_type": "node",
665
- "source_node": row["id"],
666
- "text": text,
667
- "metadata": {"node_type": row["type"], **metadata},
668
- }
669
- )
670
- if include_chunks:
671
- for row in conn.execute(
672
- """
673
- SELECT c.id, c.source_node AS parent_source_node, c.text, c.metadata_json
674
- FROM chunks c
675
- JOIN nodes n ON n.id=c.id
676
- ORDER BY c.created_at DESC, c.id ASC
677
- """
678
- ).fetchall():
679
- metadata = _safe_loads(row["metadata_json"])
680
- text = _clean_text(row["text"] or "")
681
- if text:
682
- items.append(
683
- {
684
- "item_id": row["id"],
685
- "item_type": "chunk",
686
- "source_node": row["id"],
687
- "text": text,
688
- "metadata": {
689
- **metadata,
690
- "parent_source_node": row["parent_source_node"],
691
- },
692
- }
693
- )
694
- return items
695
-
696
- def rebuild_vector_index(
697
- self,
698
- *,
699
- full: bool = False,
700
- include_nodes: bool = True,
701
- include_chunks: bool = True,
702
- ) -> Dict[str, Any]:
703
- """Rebuild the derived vector index without mutating graph content."""
704
- op_id = f"vector-op:{_sha256_text(f'{time.time()}:{os.getpid()}')[:24]}"
705
- requested_at = _now()
706
- started = time.perf_counter()
707
- try:
708
- with self._connect() as conn:
709
- conn.execute(
710
- """
711
- INSERT INTO vector_index_operations(
712
- id, operation, status, requested_at, started_at, metadata_json
713
- )
714
- VALUES (?, ?, 'running', ?, ?, ?)
715
- """,
716
- (
717
- op_id,
718
- "rebuild_full" if full else "rebuild_incremental",
719
- requested_at,
720
- requested_at,
721
- _json(
722
- {
723
- "include_nodes": include_nodes,
724
- "include_chunks": include_chunks,
725
- }
726
- ),
727
- ),
728
- )
729
- if full:
730
- filters = []
731
- if include_nodes:
732
- filters.append("'node'")
733
- if include_chunks:
734
- filters.append("'chunk'")
735
- if filters:
736
- conn.execute(
737
- f"DELETE FROM vector_embeddings WHERE item_type IN ({','.join(filters)})"
738
- )
739
- items = self._iter_vector_source_items(
740
- conn,
741
- include_nodes=include_nodes,
742
- include_chunks=include_chunks,
743
- )
744
- indexed = skipped = 0
745
- for item in items:
746
- changed = self._upsert_vector_item(conn, **item)
747
- if changed:
748
- indexed += 1
749
- else:
750
- skipped += 1
751
- duration_ms = round((time.perf_counter() - started) * 1000, 2)
752
- conn.execute(
753
- """
754
- UPDATE vector_index_operations
755
- SET status='completed', completed_at=?, items_total=?,
756
- items_indexed=?, items_skipped=?, metadata_json=?
757
- WHERE id=?
758
- """,
759
- (
760
- _now(),
761
- len(items),
762
- indexed,
763
- skipped,
764
- _json(
765
- {
766
- "include_nodes": include_nodes,
767
- "include_chunks": include_chunks,
768
- "duration_ms": duration_ms,
769
- "embedding_model": self._embedding_model.model_id,
770
- "embedding_dim": self._embedding_model.dim,
771
- }
772
- ),
773
- op_id,
774
- ),
775
- )
776
- return {
777
- "status": "completed",
778
- "operation_id": op_id,
779
- "full": bool(full),
780
- "items_total": len(items),
781
- "items_indexed": indexed,
782
- "items_skipped": skipped,
783
- "duration_ms": duration_ms,
784
- "embedding_model": self._embedding_model.model_id,
785
- "embedding_dim": self._embedding_model.dim,
786
- }
787
- except Exception as exc:
788
- duration_ms = round((time.perf_counter() - started) * 1000, 2)
789
- with self._connect() as conn:
790
- conn.execute(
791
- """
792
- INSERT INTO vector_index_operations(
793
- id, operation, status, requested_at, started_at, completed_at,
794
- error_message, metadata_json
795
- )
796
- VALUES (?, ?, 'failed', ?, ?, ?, ?, ?)
797
- ON CONFLICT(id) DO UPDATE SET
798
- status='failed',
799
- completed_at=excluded.completed_at,
800
- error_message=excluded.error_message,
801
- metadata_json=excluded.metadata_json
802
- """,
803
- (
804
- op_id,
805
- "rebuild_full" if full else "rebuild_incremental",
806
- requested_at,
807
- requested_at,
808
- _now(),
809
- str(exc),
810
- _json({"duration_ms": duration_ms}),
811
- ),
812
- )
813
- raise
814
-
815
- def index_status(self) -> Dict[str, Any]:
816
- storage_capabilities = None
817
- try:
818
- storage_capabilities = self.storage_engine.capabilities().as_dict()
819
- except Exception as exc:
820
- storage_capabilities = {
821
- "engine": "sqlite",
822
- "available": False,
823
- "reason": str(exc),
824
- }
825
- with self._connect() as conn:
826
- vector_counts = {
827
- row["item_type"]: row["count"]
828
- for row in conn.execute(
829
- "SELECT item_type, COUNT(*) AS count FROM vector_embeddings GROUP BY item_type"
830
- )
831
- }
832
- source_items = self._iter_vector_source_items(conn)
833
- vector_rows = {
834
- row["item_id"]: row
835
- for row in conn.execute(
836
- """
837
- SELECT item_id, text_hash, embedding_dim, embedding_model, indexed_at
838
- FROM vector_embeddings
839
- """
840
- ).fetchall()
841
- }
842
- latest_rows = conn.execute(
843
- """
844
- SELECT id, operation, status, requested_at, started_at, completed_at,
845
- items_total, items_indexed, items_skipped, error_message, metadata_json
846
- FROM vector_index_operations
847
- ORDER BY requested_at DESC, id DESC
848
- LIMIT 5
849
- """
850
- ).fetchall()
851
- missing = stale = ready = 0
852
- for item in source_items:
853
- vector_row = vector_rows.get(item["item_id"])
854
- expected_hash = _sha256_text(_clean_text(item["text"]))
855
- if not vector_row:
856
- missing += 1
857
- elif (
858
- vector_row["text_hash"] != expected_hash
859
- or vector_row["embedding_dim"] != self._embedding_model.dim
860
- or vector_row["embedding_model"] != self._embedding_model.model_id
861
- ):
862
- stale += 1
863
- else:
864
- ready += 1
865
- pending = missing + stale
866
- return {
867
- "status": "ready" if pending == 0 else "needs_reindex",
868
- "storage": {
869
- "db_path": str(self.db_path),
870
- "backend": "sqlite",
871
- "embedding_model": self._embedding_model.model_id,
872
- "embedding_dim": self._embedding_model.dim,
873
- # Honest capability report: trigram FTS5 keyword index, or
874
- # LIKE-scan fallback when this SQLite build lacks it.
875
- "fts_enabled": bool(getattr(self, "_fts_enabled", False)),
876
- "engine": storage_capabilities,
877
- "vector_search_backend": (
878
- storage_capabilities.get("vector_backend")
879
- if isinstance(storage_capabilities, dict)
880
- else "bruteforce-cosine"
881
- ),
882
- "vector_search_mode": (
883
- (storage_capabilities.get("metadata") or {}).get("vector_mode")
884
- if isinstance(storage_capabilities, dict)
885
- else "fallback"
886
- ),
887
- "sqlite_vec_ann_available": (
888
- bool((storage_capabilities.get("metadata") or {}).get("sqlite_vec_ann_available"))
889
- if isinstance(storage_capabilities, dict)
890
- else False
891
- ),
892
- },
893
- "source_items": len(source_items),
894
- "indexed_items": sum(vector_counts.values()),
895
- "ready_items": ready,
896
- "missing_items": missing,
897
- "stale_items": stale,
898
- "pending_items": pending,
899
- "by_item_type": vector_counts,
900
- "operations": [
901
- {
902
- "id": row["id"],
903
- "operation": row["operation"],
904
- "status": row["status"],
905
- "requested_at": row["requested_at"],
906
- "started_at": row["started_at"],
907
- "completed_at": row["completed_at"],
908
- "items_total": row["items_total"],
909
- "items_indexed": row["items_indexed"],
910
- "items_skipped": row["items_skipped"],
911
- "error_message": row["error_message"],
912
- "metadata": _safe_loads(row["metadata_json"]),
913
- }
914
- for row in latest_rows
915
- ],
916
- }
917
-
918
- def vector_search(
919
- self,
920
- query: str,
921
- *,
922
- limit: int = 30,
923
- min_score: float = 0.0,
924
- max_candidates: int = 10_000,
925
- ) -> Dict[str, Any]:
926
- query = str(query or "").strip()
927
- limit = max(1, min(int(limit or 30), 100))
928
- min_score = float(min_score or 0.0)
929
- if not query:
930
- return {"query": query, "matches": []}
931
- query_vector = self._embedding_model.embed(query)
932
- max_candidates = max(limit, min(int(max_candidates or 10_000), 50_000))
933
- with self._connect() as conn:
934
- rows = conn.execute(
935
- """
936
- SELECT
937
- ve.item_id, ve.item_type, ve.source_node, ve.embedding,
938
- ve.embedding_dim, ve.embedding_model, ve.metadata_json AS vector_metadata,
939
- n.type AS node_type, n.title AS node_title, n.summary AS node_summary,
940
- n.metadata_json AS node_metadata, n.updated_at AS node_updated_at,
941
- c.text AS chunk_text, c.source_node AS parent_node_id,
942
- pn.type AS parent_type, pn.title AS parent_title,
943
- pn.summary AS parent_summary, pn.metadata_json AS parent_metadata,
944
- pn.updated_at AS parent_updated_at
945
- FROM vector_embeddings ve
946
- LEFT JOIN nodes n ON n.id=ve.source_node
947
- LEFT JOIN chunks c ON c.id=ve.item_id
948
- LEFT JOIN nodes pn ON pn.id=c.source_node
949
- WHERE ve.embedding_model=? AND ve.embedding_dim=?
950
- ORDER BY ve.indexed_at DESC
951
- LIMIT ?
952
- """,
953
- (
954
- self._embedding_model.model_id,
955
- self._embedding_model.dim,
956
- max_candidates,
957
- ),
958
- ).fetchall()
959
- scored = []
960
- for row in rows:
961
- vector = self._embedding_model.decode(
962
- row["embedding"], row["embedding_dim"]
963
- )
964
- score = self._embedding_model.similarity(query_vector, vector)
965
- if score < min_score:
966
- continue
967
- is_chunk = row["item_type"] == "chunk"
968
- summary = (
969
- row["chunk_text"]
970
- if is_chunk and row["chunk_text"]
971
- else row["node_summary"]
972
- )
973
- parent_metadata = _safe_loads(row["parent_metadata"])
974
- node_metadata = _safe_loads(row["node_metadata"])
975
- scored.append(
976
- {
977
- "id": row["item_id"],
978
- "node_id": row["parent_node_id"]
979
- if is_chunk and row["parent_node_id"]
980
- else row["source_node"],
981
- "item_type": row["item_type"],
982
- "type": "Chunk" if is_chunk else row["node_type"],
983
- "title": row["parent_title"]
984
- if is_chunk and row["parent_title"]
985
- else row["node_title"],
986
- "summary": _clean_text(summary or "")[:1000],
987
- "score": round(float(score), 6),
988
- "metadata": {
989
- **(parent_metadata if is_chunk else node_metadata),
990
- "vector": _safe_loads(row["vector_metadata"]),
991
- "parent_node_id": row["parent_node_id"],
992
- "parent_type": row["parent_type"],
993
- },
994
- "updated_at": row["parent_updated_at"]
995
- if is_chunk and row["parent_updated_at"]
996
- else row["node_updated_at"],
997
- }
998
- )
999
- scored.sort(
1000
- key=lambda item: (item["score"], item.get("updated_at") or ""), reverse=True
1001
- )
1002
- return {
1003
- "query": query,
1004
- "embedding_model": self._embedding_model.model_id,
1005
- "embedding_dim": self._embedding_model.dim,
1006
- "matches": scored[:limit],
1007
- }
1008
-
1009
- def delete_conversation(self, conversation_id: str) -> Dict[str, Any]:
1010
- conversation_id = str(conversation_id or "").strip()
1011
- if not conversation_id:
1012
- return {"status": "skipped", "removed_nodes": 0}
1013
- conv_id = f"conversation:{_slug(conversation_id)}"
1014
- with self._connect() as conn:
1015
- # Edge rows may carry the legacy lowercase label (pre-v4) or the
1016
- # canonical EdgeType value (v4 write door) — match both.
1017
- direct_ids = [
1018
- row["to_node"]
1019
- for row in conn.execute(
1020
- "SELECT to_node FROM edges WHERE from_node=? AND type IN ('contains', 'CONTAINS')",
1021
- (conv_id,),
1022
- )
1023
- ]
1024
- remove_ids = set(direct_ids)
1025
- child_types = [
1026
- "has_chunk",
1027
- "implies",
1028
- "contains_signal",
1029
- "has_page",
1030
- "has_slide",
1031
- "has_sheet",
1032
- "contains_image",
1033
- ]
1034
- child_types += [t.upper() for t in child_types]
1035
- placeholders = ",".join("?" for _ in child_types)
1036
- for source_id in list(direct_ids):
1037
- for row in conn.execute(
1038
- f"SELECT to_node FROM edges WHERE from_node=? AND type IN ({placeholders})",
1039
- (source_id, *child_types),
1040
- ):
1041
- remove_ids.add(row["to_node"])
1042
- remove_ids.add(conv_id)
1043
- for node_id in remove_ids:
1044
- conn.execute("DELETE FROM nodes WHERE id=?", (node_id,))
1045
- if KGStoreV2 is not None:
1046
- conn.execute(
1047
- "DELETE FROM nodes_v2 WHERE id=?", (node_id,)
1048
- ) # edges_v2 cascade
1049
- conn.execute(
1050
- """
1051
- DELETE FROM nodes
1052
- WHERE type='Topic'
1053
- AND id NOT IN (SELECT to_node FROM edges)
1054
- AND id NOT IN (SELECT from_node FROM edges)
1055
- """
1056
- )
1057
- if KGStoreV2 is not None:
1058
- conn.execute(
1059
- """
1060
- DELETE FROM nodes_v2
1061
- WHERE legacy_type='Topic'
1062
- AND id NOT IN (SELECT target FROM edges_v2)
1063
- AND id NOT IN (SELECT source FROM edges_v2)
1064
- """
1065
- )
1066
- return {
1067
- "status": "ok",
1068
- "conversation_id": conversation_id,
1069
- "removed_nodes": len(remove_ids),
1070
- }
1071
-
1072
- def clear_all(self) -> Dict[str, Any]:
1073
- with self._connect() as conn:
1074
- counts = {
1075
- "nodes": conn.execute("SELECT COUNT(*) AS c FROM nodes").fetchone()[
1076
- "c"
1077
- ],
1078
- "edges": conn.execute("SELECT COUNT(*) AS c FROM edges").fetchone()[
1079
- "c"
1080
- ],
1081
- "chunks": conn.execute("SELECT COUNT(*) AS c FROM chunks").fetchone()[
1082
- "c"
1083
- ],
1084
- "knowledge_sources": conn.execute(
1085
- "SELECT COUNT(*) AS c FROM knowledge_sources"
1086
- ).fetchone()["c"],
1087
- "local_file_index": conn.execute(
1088
- "SELECT COUNT(*) AS c FROM local_file_index"
1089
- ).fetchone()["c"],
1090
- }
1091
- conn.execute("DELETE FROM local_file_index")
1092
- conn.execute("DELETE FROM knowledge_sources")
1093
- conn.execute("DELETE FROM chunks")
1094
- conn.execute("DELETE FROM edges")
1095
- conn.execute("DELETE FROM nodes")
1096
- if KGStoreV2 is not None:
1097
- conn.execute("DELETE FROM edges_v2")
1098
- conn.execute("DELETE FROM nodes_v2")
1099
- if self.blob_dir.exists():
1100
- shutil.rmtree(self.blob_dir, ignore_errors=True)
1101
- self.blob_dir.mkdir(parents=True, exist_ok=True)
1102
- return {"status": "ok", "removed": counts}
1103
-
1104
- def stats(self) -> Dict[str, Any]:
1105
- nt, et = self._read_tables()
1106
- with self._connect() as conn:
1107
- node_counts = {
1108
- row["type"]: row["count"]
1109
- for row in conn.execute(
1110
- f"SELECT type, COUNT(*) AS count FROM {nt} GROUP BY type"
1111
- )
1112
- }
1113
- edge_counts = {
1114
- row["type"]: row["count"]
1115
- for row in conn.execute(
1116
- f"SELECT type, COUNT(*) AS count FROM {et} GROUP BY type"
1117
- )
1118
- }
1119
- local_sources = conn.execute(
1120
- "SELECT COUNT(*) AS c FROM knowledge_sources"
1121
- ).fetchone()["c"]
1122
- local_file_status = {
1123
- row["status"]: row["count"]
1124
- for row in conn.execute(
1125
- "SELECT status, COUNT(*) AS count FROM local_file_index GROUP BY status"
1126
- )
1127
- }
1128
- v2 = None
1129
- if KGStoreV2 is not None:
1130
- try:
1131
- v2 = KGStoreV2(self.db_path).stats()
1132
- except Exception as e:
1133
- v2 = {"available": False, "error": str(e)}
1134
- return {
1135
- "db_path": str(self.db_path),
1136
- "schema_version": GRAPH_SCHEMA_VERSION,
1137
- "v2_schema_available": KGStoreV2 is not None,
1138
- "nodes": node_counts,
1139
- "edges": edge_counts,
1140
- "local_sources": local_sources,
1141
- "local_file_status": local_file_status,
1142
- "v2": v2,
1143
- }
1144
-
1145
- def search_for_document_generation(
1146
- self, query: str, limit: int = 10
1147
- ) -> List[Dict[str, Any]]:
1148
- """Hybrid retrieval optimized for document generation.
1149
-
1150
- Scoring: 0.5*text_relevance + 0.3*graph_relationship + 0.2*recency
1151
- Returns nodes with rich context for document generation prompts.
1152
- """
1153
- query = str(query or "").strip()
1154
- if not query:
1155
- return []
1156
- limit = max(1, min(int(limit or 10), 50))
1157
- terms = _topic_candidates(query, limit=12)
1158
- now = datetime.now()
1159
- nt, et = self._read_tables()
1160
-
1161
- with self._connect() as conn:
1162
- candidate_rows = []
1163
- seen_ids = set()
1164
-
1165
- if query:
1166
- q = f"%{query}%"
1167
- rows = conn.execute(
1168
- f"""
1169
- SELECT id, type, title, summary, metadata_json, updated_at
1170
- FROM {nt}
1171
- WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
1172
- AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
1173
- 'Spreadsheet', 'Image', 'ImageText', 'Chat',
1174
- 'Decision', 'Task', 'Concept', 'Feature',
1175
- 'Page', 'Slide')
1176
- ORDER BY updated_at DESC, id ASC
1177
- LIMIT ?
1178
- """,
1179
- (q, q, q, limit * 5),
1180
- ).fetchall()
1181
- for row in rows:
1182
- if row["id"] not in seen_ids:
1183
- seen_ids.add(row["id"])
1184
- candidate_rows.append(row)
1185
-
1186
- for term in terms:
1187
- t = f"%{term}%"
1188
- rows = conn.execute(
1189
- f"""
1190
- SELECT id, type, title, summary, metadata_json, updated_at
1191
- FROM {nt}
1192
- WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
1193
- AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
1194
- 'Spreadsheet', 'Image', 'ImageText', 'Chat',
1195
- 'Decision', 'Task', 'Concept', 'Feature',
1196
- 'Page', 'Slide')
1197
- ORDER BY updated_at DESC, id ASC
1198
- LIMIT ?
1199
- """,
1200
- (t, t, t, limit * 3),
1201
- ).fetchall()
1202
- for row in rows:
1203
- if row["id"] not in seen_ids:
1204
- seen_ids.add(row["id"])
1205
- candidate_rows.append(row)
1206
-
1207
- scored_results = []
1208
- for row in candidate_rows:
1209
- haystack = (
1210
- f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
1211
- )
1212
-
1213
- text_hits = sum(1 for term in terms if term.lower() in haystack)
1214
- text_score = min(1.0, text_hits / max(len(terms), 1))
1215
-
1216
- edge_count = conn.execute(
1217
- f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
1218
- (row["id"], row["id"]),
1219
- ).fetchone()["c"]
1220
- graph_score = min(1.0, math.log1p(edge_count) / 4.0)
1221
-
1222
- recency = _recency_score(
1223
- row["updated_at"], now=now, half_life_days=14.0
1224
- )
1225
-
1226
- doc_type_boost = (
1227
- 1.2
1228
- if row["type"]
1229
- in (
1230
- "Document",
1231
- "File",
1232
- "SlideDeck",
1233
- "Decision",
1234
- )
1235
- else 1.0
1236
- )
1237
-
1238
- hybrid_score = (
1239
- 0.5 * text_score + 0.3 * graph_score + 0.2 * recency
1240
- ) * doc_type_boost
1241
-
1242
- meta = _safe_loads(row["metadata_json"])
1243
- neighbor_concepts = []
1244
- neighbor_rows = conn.execute(
1245
- f"""
1246
- SELECT n.title, n.type FROM {et} e
1247
- JOIN {nt} n ON n.id = CASE WHEN e.from_node = ? THEN e.to_node ELSE e.from_node END
1248
- WHERE (e.from_node = ? OR e.to_node = ?)
1249
- AND n.type IN ('Concept', 'Feature', 'Decision', 'Task')
1250
- LIMIT 8
1251
- """,
1252
- (row["id"], row["id"], row["id"]),
1253
- ).fetchall()
1254
- for nr in neighbor_rows:
1255
- neighbor_concepts.append({"title": nr["title"], "type": nr["type"]})
1256
-
1257
- scored_results.append(
1258
- {
1259
- "id": row["id"],
1260
- "type": row["type"],
1261
- "title": row["title"],
1262
- "summary": row["summary"],
1263
- "metadata": meta,
1264
- "updated_at": row["updated_at"],
1265
- "hybrid_score": round(hybrid_score, 4),
1266
- "scores": {
1267
- "text": round(text_score, 4),
1268
- "graph": round(graph_score, 4),
1269
- "recency": round(recency, 4),
1270
- },
1271
- "related_concepts": neighbor_concepts,
1272
- }
1273
- )
1274
-
1275
- scored_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
1276
- return scored_results[:limit]
1277
-
1278
- def multi_hop_context(
1279
- self, node_ids: List[str], max_hops: int = 2
1280
- ) -> Dict[str, Any]:
1281
- """Multi-hop graph traversal from seed nodes for richer context."""
1282
- visited_nodes = set()
1283
- visited_edges = set()
1284
- all_nodes = []
1285
- all_edges = []
1286
- frontier = set(node_ids)
1287
- nt, et = self._read_tables()
1288
-
1289
- with self._connect() as conn:
1290
- for hop in range(max_hops):
1291
- if not frontier:
1292
- break
1293
- next_frontier = set()
1294
- for nid in frontier:
1295
- if nid in visited_nodes:
1296
- continue
1297
- visited_nodes.add(nid)
1298
- row = conn.execute(
1299
- f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE id=?",
1300
- (nid,),
1301
- ).fetchone()
1302
- if row:
1303
- all_nodes.append(
1304
- {
1305
- "id": row["id"],
1306
- "type": row["type"],
1307
- "title": row["title"],
1308
- "summary": row["summary"],
1309
- "metadata": _safe_loads(row["metadata_json"]),
1310
- "hop": hop,
1311
- }
1312
- )
1313
- edge_rows = conn.execute(
1314
- f"""
1315
- SELECT id, from_node, to_node, type, weight
1316
- FROM {et} WHERE from_node=? OR to_node=?
1317
- ORDER BY id ASC
1318
- """,
1319
- (nid, nid),
1320
- ).fetchall()
1321
- for er in edge_rows:
1322
- if er["id"] not in visited_edges:
1323
- visited_edges.add(er["id"])
1324
- all_edges.append(
1325
- {
1326
- "from": er["from_node"],
1327
- "to": er["to_node"],
1328
- "type": er["type"],
1329
- "weight": er["weight"],
1330
- }
1331
- )
1332
- other = (
1333
- er["to_node"]
1334
- if er["from_node"] == nid
1335
- else er["from_node"]
1336
- )
1337
- if other not in visited_nodes:
1338
- next_frontier.add(other)
1339
- frontier = next_frontier
1340
-
1341
- return {"nodes": all_nodes, "edges": all_edges}
12
+ warnings.warn(
13
+ "latticeai.brain.retrieval is deprecated; import lattice_brain.graph.retrieval instead",
14
+ DeprecationWarning,
15
+ stacklevel=2,
16
+ )
17
+ sys.modules[__name__] = _impl