ltcai 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/README.md +42 -33
  2. package/desktop/electron/main.cjs +44 -0
  3. package/docs/CHANGELOG.md +106 -0
  4. package/docs/REALTIME_COLLABORATION.md +3 -3
  5. package/docs/V3_FRONTEND.md +9 -8
  6. package/docs/V4_1_FRONTEND_ARCHITECTURE_REVIEW.md +65 -0
  7. package/docs/V4_1_FRONTEND_MIGRATION_REPORT.md +70 -0
  8. package/docs/V4_1_VALIDATION_REPORT.md +47 -0
  9. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +95 -45
  10. package/docs/kg-schema.md +6 -2
  11. package/docs/spec-vs-impl.md +10 -10
  12. package/frontend/index.html +24 -0
  13. package/frontend/openapi.json +14190 -0
  14. package/frontend/src/App.tsx +184 -0
  15. package/frontend/src/api/client.ts +317 -0
  16. package/frontend/src/api/openapi.ts +16637 -0
  17. package/frontend/src/components/primitives.tsx +204 -0
  18. package/frontend/src/components/ui/badge.tsx +27 -0
  19. package/frontend/src/components/ui/button.tsx +37 -0
  20. package/frontend/src/components/ui/card.tsx +22 -0
  21. package/frontend/src/components/ui/input.tsx +16 -0
  22. package/frontend/src/components/ui/textarea.tsx +16 -0
  23. package/frontend/src/lib/utils.ts +33 -0
  24. package/frontend/src/main.tsx +23 -0
  25. package/frontend/src/pages/Act.tsx +245 -0
  26. package/frontend/src/pages/Ask.tsx +200 -0
  27. package/frontend/src/pages/Brain.tsx +267 -0
  28. package/frontend/src/pages/Capture.tsx +158 -0
  29. package/frontend/src/pages/Library.tsx +187 -0
  30. package/frontend/src/pages/System.tsx +344 -0
  31. package/frontend/src/routes.ts +85 -0
  32. package/frontend/src/store/appStore.ts +54 -0
  33. package/frontend/src/styles.css +107 -0
  34. package/kg_schema.py +2 -603
  35. package/knowledge_graph.py +37 -4958
  36. package/latticeai/__init__.py +1 -1
  37. package/latticeai/api/admin.py +15 -16
  38. package/latticeai/api/agents.py +13 -6
  39. package/latticeai/api/auth.py +19 -11
  40. package/latticeai/api/invitations.py +100 -0
  41. package/latticeai/api/knowledge_graph.py +4 -11
  42. package/latticeai/api/plugins.py +3 -6
  43. package/latticeai/api/realtime.py +4 -7
  44. package/latticeai/api/setup.py +5 -4
  45. package/latticeai/api/static_routes.py +13 -16
  46. package/latticeai/api/ui_redirects.py +26 -0
  47. package/latticeai/api/workflow_designer.py +39 -6
  48. package/latticeai/api/workspace.py +24 -10
  49. package/latticeai/app_factory.py +88 -17
  50. package/latticeai/brain/_kg_common.py +1123 -0
  51. package/latticeai/brain/discovery.py +1455 -0
  52. package/latticeai/brain/documents.py +218 -0
  53. package/latticeai/brain/ingest.py +644 -0
  54. package/latticeai/brain/projection.py +561 -0
  55. package/latticeai/brain/provenance.py +401 -0
  56. package/latticeai/brain/retrieval.py +1316 -0
  57. package/latticeai/brain/schema.py +640 -0
  58. package/latticeai/brain/store.py +216 -0
  59. package/latticeai/brain/write_master.py +225 -0
  60. package/latticeai/core/invitations.py +131 -0
  61. package/latticeai/core/marketplace.py +1 -1
  62. package/latticeai/core/multi_agent.py +1 -1
  63. package/latticeai/core/policy.py +54 -0
  64. package/latticeai/core/realtime.py +65 -44
  65. package/latticeai/core/sessions.py +31 -5
  66. package/latticeai/core/users.py +147 -0
  67. package/latticeai/core/workspace_os.py +420 -20
  68. package/latticeai/services/agent_runtime.py +242 -4
  69. package/latticeai/services/run_executor.py +328 -0
  70. package/latticeai/services/workspace_service.py +27 -19
  71. package/package.json +54 -27
  72. package/scripts/build_frontend_assets.mjs +38 -0
  73. package/scripts/bump_version.py +1 -1
  74. package/scripts/export_openapi.py +31 -0
  75. package/scripts/lint_frontend.mjs +86 -0
  76. package/scripts/run_python.mjs +47 -0
  77. package/src-tauri/Cargo.lock +4833 -0
  78. package/src-tauri/Cargo.toml +19 -0
  79. package/src-tauri/build.rs +3 -0
  80. package/src-tauri/capabilities/default.json +7 -0
  81. package/src-tauri/src/main.rs +78 -0
  82. package/src-tauri/tauri.conf.json +36 -0
  83. package/static/app/asset-manifest.json +32 -0
  84. package/static/app/assets/core-CwxXejkd.js +2 -0
  85. package/static/app/assets/core-CwxXejkd.js.map +1 -0
  86. package/static/app/assets/index-CJRAzNnf.js +333 -0
  87. package/static/app/assets/index-CJRAzNnf.js.map +1 -0
  88. package/static/app/assets/index-CSwBBgf4.css +2 -0
  89. package/static/app/index.html +25 -0
  90. package/static/manifest.json +2 -2
  91. package/static/sw.js +4 -4
  92. package/scripts/build_v3_assets.mjs +0 -170
  93. package/scripts/lint_v3.mjs +0 -97
  94. package/static/account.html +0 -113
  95. package/static/activity.html +0 -73
  96. package/static/admin.html +0 -486
  97. package/static/agents.html +0 -139
  98. package/static/chat.html +0 -841
  99. package/static/css/reference/account.css +0 -439
  100. package/static/css/reference/admin.css +0 -610
  101. package/static/css/reference/base.css +0 -1661
  102. package/static/css/reference/chat.css +0 -4623
  103. package/static/css/reference/graph.css +0 -1016
  104. package/static/css/responsive.css +0 -861
  105. package/static/graph.html +0 -122
  106. package/static/platform.css +0 -104
  107. package/static/plugins.html +0 -136
  108. package/static/scripts/account.js +0 -238
  109. package/static/scripts/admin.js +0 -1614
  110. package/static/scripts/chat.js +0 -5081
  111. package/static/scripts/graph.js +0 -1804
  112. package/static/scripts/platform.js +0 -64
  113. package/static/scripts/ux.js +0 -167
  114. package/static/scripts/workspace.js +0 -948
  115. package/static/v3/asset-manifest.json +0 -56
  116. package/static/v3/css/lattice.base.49deefb5.css +0 -128
  117. package/static/v3/css/lattice.base.css +0 -128
  118. package/static/v3/css/lattice.components.cde18231.css +0 -472
  119. package/static/v3/css/lattice.components.css +0 -472
  120. package/static/v3/css/lattice.shell.29d36d85.css +0 -452
  121. package/static/v3/css/lattice.shell.css +0 -452
  122. package/static/v3/css/lattice.tokens.304cbc40.css +0 -135
  123. package/static/v3/css/lattice.tokens.css +0 -135
  124. package/static/v3/css/lattice.views.0a18b6c5.css +0 -360
  125. package/static/v3/css/lattice.views.css +0 -360
  126. package/static/v3/index.html +0 -68
  127. package/static/v3/js/app.356e6452.js +0 -26
  128. package/static/v3/js/app.js +0 -26
  129. package/static/v3/js/core/api.7a308b89.js +0 -568
  130. package/static/v3/js/core/api.js +0 -568
  131. package/static/v3/js/core/components.f25b3b93.js +0 -230
  132. package/static/v3/js/core/components.js +0 -230
  133. package/static/v3/js/core/dom.a2773eb0.js +0 -148
  134. package/static/v3/js/core/dom.js +0 -148
  135. package/static/v3/js/core/router.584570f2.js +0 -37
  136. package/static/v3/js/core/router.js +0 -37
  137. package/static/v3/js/core/routes.7222343d.js +0 -93
  138. package/static/v3/js/core/routes.js +0 -93
  139. package/static/v3/js/core/shell.a1657f20.js +0 -391
  140. package/static/v3/js/core/shell.js +0 -391
  141. package/static/v3/js/core/store.204a08b2.js +0 -113
  142. package/static/v3/js/core/store.js +0 -113
  143. package/static/v3/js/views/admin-audit.660a1fb1.js +0 -185
  144. package/static/v3/js/views/admin-audit.js +0 -185
  145. package/static/v3/js/views/admin-permissions.a7ae5f09.js +0 -177
  146. package/static/v3/js/views/admin-permissions.js +0 -177
  147. package/static/v3/js/views/admin-policies.3658fd86.js +0 -102
  148. package/static/v3/js/views/admin-policies.js +0 -102
  149. package/static/v3/js/views/admin-private-vpc.7d342d36.js +0 -135
  150. package/static/v3/js/views/admin-private-vpc.js +0 -135
  151. package/static/v3/js/views/admin-security.07c66b72.js +0 -180
  152. package/static/v3/js/views/admin-security.js +0 -180
  153. package/static/v3/js/views/admin-users.03bac88c.js +0 -168
  154. package/static/v3/js/views/admin-users.js +0 -168
  155. package/static/v3/js/views/agents.014d0b74.js +0 -541
  156. package/static/v3/js/views/agents.js +0 -541
  157. package/static/v3/js/views/chat.e6dd7dd0.js +0 -601
  158. package/static/v3/js/views/chat.js +0 -601
  159. package/static/v3/js/views/files.adad14c1.js +0 -365
  160. package/static/v3/js/views/files.js +0 -365
  161. package/static/v3/js/views/graph-canvas.17c15d65.js +0 -509
  162. package/static/v3/js/views/graph-canvas.js +0 -509
  163. package/static/v3/js/views/home.24f8b8ae.js +0 -200
  164. package/static/v3/js/views/home.js +0 -200
  165. package/static/v3/js/views/hooks.37895880.js +0 -220
  166. package/static/v3/js/views/hooks.js +0 -220
  167. package/static/v3/js/views/hybrid-search.2fb63ed9.js +0 -194
  168. package/static/v3/js/views/hybrid-search.js +0 -194
  169. package/static/v3/js/views/knowledge-graph.5e40cbeb.js +0 -509
  170. package/static/v3/js/views/knowledge-graph.js +0 -509
  171. package/static/v3/js/views/marketplace.ab0583d4.js +0 -141
  172. package/static/v3/js/views/marketplace.js +0 -141
  173. package/static/v3/js/views/mcp.99b5c6a7.js +0 -114
  174. package/static/v3/js/views/mcp.js +0 -114
  175. package/static/v3/js/views/memory.4ebdf474.js +0 -147
  176. package/static/v3/js/views/memory.js +0 -147
  177. package/static/v3/js/views/models.a1ffa147.js +0 -256
  178. package/static/v3/js/views/models.js +0 -256
  179. package/static/v3/js/views/my-computer.d9d9ae1c.js +0 -463
  180. package/static/v3/js/views/my-computer.js +0 -463
  181. package/static/v3/js/views/pipeline.c522f1ce.js +0 -157
  182. package/static/v3/js/views/pipeline.js +0 -157
  183. package/static/v3/js/views/planning.9ac3e313.js +0 -153
  184. package/static/v3/js/views/planning.js +0 -153
  185. package/static/v3/js/views/settings.8631fa5e.js +0 -318
  186. package/static/v3/js/views/settings.js +0 -318
  187. package/static/v3/js/views/skills.c6c2f965.js +0 -109
  188. package/static/v3/js/views/skills.js +0 -109
  189. package/static/v3/js/views/tools.e4f11276.js +0 -108
  190. package/static/v3/js/views/tools.js +0 -108
  191. package/static/v3/js/views/workflows.26c57290.js +0 -128
  192. package/static/v3/js/views/workflows.js +0 -128
  193. package/static/workflows.html +0 -146
  194. package/static/workspace.css +0 -1121
  195. package/static/workspace.html +0 -357
@@ -0,0 +1,1316 @@
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: F403,F405
4
+
5
+ from ._kg_common import * # noqa: F403,F401
6
+
7
+
8
+ class KnowledgeGraphRetrievalMixin:
9
+ _GRAPH_VISIBLE_TYPES = (
10
+ "Computer", # 내 컴퓨터
11
+ "Drive", # 드라이브 / 볼륨
12
+ "Folder", # 폴더
13
+ "File", # 일반 파일
14
+ "Chat", # 대화 세션
15
+ "Document", # 파일 (PDF·PPT·Word·Excel·이미지)
16
+ "CodeFile", # 코드 파일
17
+ "Spreadsheet", # 엑셀/CSV
18
+ "SlideDeck", # 프레젠테이션
19
+ "Image", # 이미지
20
+ "ImageText", # OCR 텍스트
21
+ "Concept", # 개념 / 아이디어 / 기술 용어
22
+ "Person", # 사람
23
+ "Error", # 오류 / 버그
24
+ "Code", # 코드 / 함수
25
+ "Feature", # 소프트웨어 기능
26
+ "Task", # 할 일
27
+ "Decision", # 결정 사항
28
+ # v3.6.0 Knowledge Graph First — 1급 엔티티를 그래프에 노출
29
+ "Source", # 수집 출처 (파일/URL/브라우저 탭/git)
30
+ "Repository", # git 저장소
31
+ "Meeting", # 회의
32
+ "Organization", # 조직
33
+ "Workflow", # 워크플로우
34
+ "Agent", # 에이전트
35
+ )
36
+
37
+ def list_documents(self, limit: int = 200) -> Dict[str, Any]:
38
+ """List ingested ``Document`` nodes with their ingest + index state.
39
+
40
+ Powers the Files view: every accepted upload and every indexed local
41
+ document becomes a ``Document`` node. A document is reported ``indexed``
42
+ once its retrieval chunks exist (searchable in Chat / Hybrid Search).
43
+ """
44
+ limit = max(1, min(int(limit or 200), 1000))
45
+ nt, _ = self._read_tables()
46
+ documents: List[Dict[str, Any]] = []
47
+ with self._connect() as conn:
48
+ rows = conn.execute(
49
+ f"SELECT id, title, summary, metadata_json, created_at, updated_at "
50
+ f"FROM {nt} WHERE type='Document' ORDER BY updated_at DESC, id ASC LIMIT ?",
51
+ (limit,),
52
+ ).fetchall()
53
+ for row in rows:
54
+ meta = _safe_loads(row["metadata_json"]) or {}
55
+ extracted = meta.get("extracted") or {}
56
+ node_id = row["id"]
57
+ chunk_count = conn.execute(
58
+ f"SELECT COUNT(*) AS c FROM {nt} WHERE type='Chunk' AND metadata_json LIKE ?",
59
+ (f"%{node_id}%",),
60
+ ).fetchone()["c"]
61
+ documents.append(
62
+ {
63
+ "id": node_id,
64
+ "filename": meta.get("filename") or row["title"],
65
+ "ext": meta.get("ext"),
66
+ "mime_type": meta.get("mime_type"),
67
+ "bytes": meta.get("bytes"),
68
+ "sha256": meta.get("sha256"),
69
+ "uploader": meta.get("uploader"),
70
+ "chars": extracted.get("chars"),
71
+ "chunks": int(chunk_count or 0),
72
+ "indexed": int(chunk_count or 0) > 0,
73
+ "ingest_state": "indexed"
74
+ if int(chunk_count or 0) > 0
75
+ else "ingested",
76
+ "created_at": row["created_at"],
77
+ "updated_at": row["updated_at"],
78
+ }
79
+ )
80
+ return {
81
+ "documents": documents,
82
+ "total": len(documents),
83
+ "generated_at": datetime.now().isoformat(timespec="seconds"),
84
+ }
85
+
86
+ def workspaces_of(self, node_ids) -> Dict[str, Optional[str]]:
87
+ """Map node ids to their workspace scope (None = legacy-global)."""
88
+ ids = [str(i) for i in node_ids if i]
89
+ if not ids:
90
+ return {}
91
+ placeholders = ",".join("?" for _ in ids)
92
+ with self._connect() as conn:
93
+ try:
94
+ return {
95
+ row["id"]: row["workspace_id"]
96
+ for row in conn.execute(
97
+ f"SELECT id, workspace_id FROM nodes_v2 WHERE id IN ({placeholders})",
98
+ ids,
99
+ ).fetchall()
100
+ }
101
+ except Exception:
102
+ return {}
103
+
104
+ def filter_scoped_nodes(self, items, allowed_workspaces, *, id_key: str = "id"):
105
+ """Drop items scoped to a workspace the caller is not a member of.
106
+
107
+ ``allowed_workspaces=None`` means no scoping (single-user / no-auth
108
+ mode). Legacy-global rows (no workspace) stay visible to everyone on
109
+ the machine — the documented pre-v4 compatibility behavior.
110
+ """
111
+ if allowed_workspaces is None:
112
+ return list(items)
113
+ allowed = set(allowed_workspaces)
114
+ scopes = self.workspaces_of([item.get(id_key) for item in items])
115
+ return [
116
+ item
117
+ for item in items
118
+ if scopes.get(item.get(id_key)) is None
119
+ or scopes.get(item.get(id_key)) in allowed
120
+ ]
121
+
122
+ def graph(self, limit: int = 300, *, allowed_workspaces=None) -> Dict[str, Any]:
123
+ limit = max(1, min(int(limit or 300), 2000))
124
+ visible = ",".join(f"'{t}'" for t in self._GRAPH_VISIBLE_TYPES)
125
+ nt, et = self._read_tables()
126
+ with self._connect() as conn:
127
+ nodes = [
128
+ {
129
+ "id": row["id"],
130
+ "type": row["type"],
131
+ "title": row["title"],
132
+ "summary": row["summary"],
133
+ "metadata": _safe_loads(row["metadata_json"]),
134
+ "updated_at": row["updated_at"],
135
+ }
136
+ for row in conn.execute(
137
+ f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE type IN ({visible}) ORDER BY updated_at DESC, id ASC LIMIT ?",
138
+ (limit,),
139
+ )
140
+ ]
141
+ node_ids = {node["id"] for node in nodes}
142
+ edges: List[Dict[str, Any]] = []
143
+ if node_ids:
144
+ edge_rows = conn.execute(
145
+ f"""
146
+ SELECT id, from_node, to_node, type, weight, metadata_json
147
+ FROM {et}
148
+ WHERE from_node IN (
149
+ SELECT id FROM {nt} WHERE type IN ({visible})
150
+ ORDER BY updated_at DESC, id ASC LIMIT ?
151
+ )
152
+ AND to_node IN (
153
+ SELECT id FROM {nt} WHERE type IN ({visible})
154
+ ORDER BY updated_at DESC, id ASC LIMIT ?
155
+ )
156
+ ORDER BY weight DESC, created_at DESC, id ASC
157
+ """,
158
+ (limit, limit),
159
+ ).fetchall()
160
+ edges = [
161
+ {
162
+ "id": row["id"],
163
+ "from": row["from_node"],
164
+ "to": row["to_node"],
165
+ "type": row["type"],
166
+ "weight": row["weight"],
167
+ "metadata": _safe_loads(row["metadata_json"]),
168
+ }
169
+ for row in edge_rows
170
+ ]
171
+
172
+ if allowed_workspaces is not None:
173
+ nodes = self.filter_scoped_nodes(nodes, allowed_workspaces)
174
+ kept_ids = {node["id"] for node in nodes}
175
+ edges = [e for e in edges if e["from"] in kept_ids and e["to"] in kept_ids]
176
+
177
+ degree_map: Dict[str, int] = {}
178
+ now = datetime.now()
179
+ node_by_id = {node["id"]: node for node in nodes}
180
+ topic_metrics: Dict[str, Dict[str, Any]] = {}
181
+
182
+ for edge in edges:
183
+ degree_map[edge["from"]] = degree_map.get(edge["from"], 0) + 1
184
+ degree_map[edge["to"]] = degree_map.get(edge["to"], 0) + 1
185
+ from_node = node_by_id.get(edge["from"])
186
+ to_node = node_by_id.get(edge["to"])
187
+ if not from_node or not to_node:
188
+ continue
189
+ for topic_node, other_node in ((from_node, to_node), (to_node, from_node)):
190
+ if topic_node["type"] != "Topic":
191
+ continue
192
+ metrics = topic_metrics.setdefault(
193
+ topic_node["id"],
194
+ {
195
+ "mention_count": 0.0,
196
+ "conversation_ids": set(),
197
+ },
198
+ )
199
+ if edge["type"] in {"mentions", "discusses"}:
200
+ metrics["mention_count"] += max(
201
+ 0.5, float(edge.get("weight") or 1.0)
202
+ )
203
+ other_meta = other_node.get("metadata") or {}
204
+ conversation_id = other_meta.get("conversation_id")
205
+ if other_node["type"] == "Conversation":
206
+ conversation_id = other_node["id"]
207
+ if conversation_id:
208
+ metrics["conversation_ids"].add(str(conversation_id))
209
+
210
+ type_max_raw: Dict[str, float] = {}
211
+ for node in nodes:
212
+ degree = degree_map.get(node["id"], 0)
213
+ recency = _recency_score(node.get("updated_at"), now=now)
214
+ metrics = {
215
+ "degree": degree,
216
+ "recency_score": round(recency, 4),
217
+ }
218
+ if node["type"] == "Topic":
219
+ topic_stat = topic_metrics.get(node["id"], {})
220
+ mention_count = float(topic_stat.get("mention_count") or 0.0)
221
+ conversation_count = len(topic_stat.get("conversation_ids") or ())
222
+ raw_importance = (
223
+ math.log1p(mention_count) * 2.8
224
+ + math.log1p(conversation_count) * 2.2
225
+ + recency * 1.4
226
+ + math.sqrt(max(0, degree)) * 0.45
227
+ )
228
+ metrics.update(
229
+ {
230
+ "mention_count": round(mention_count, 2),
231
+ "conversation_count": conversation_count,
232
+ }
233
+ )
234
+ else:
235
+ raw_importance = math.log1p(max(0, degree)) * 1.4 + recency * 0.9
236
+
237
+ metrics["importance_raw"] = round(raw_importance, 4)
238
+ node["importance"] = round(raw_importance, 4)
239
+ node["_raw_importance"] = raw_importance
240
+ node["metadata"] = {
241
+ **(node.get("metadata") or {}),
242
+ "graph_metrics": metrics,
243
+ }
244
+ type_max_raw[node["type"]] = max(
245
+ type_max_raw.get(node["type"], 0.0), raw_importance
246
+ )
247
+
248
+ for node in nodes:
249
+ max_raw = max(type_max_raw.get(node["type"], 0.0), 0.0001)
250
+ importance_norm = min(1.0, (node.get("_raw_importance") or 0.0) / max_raw)
251
+ node["importance_norm"] = round(importance_norm, 4)
252
+ node["metadata"]["graph_metrics"]["importance_norm"] = node[
253
+ "importance_norm"
254
+ ]
255
+ node.pop("_raw_importance", None)
256
+ return {"nodes": nodes, "edges": edges}
257
+
258
+ def search(self, query: str, limit: int = 30) -> Dict[str, Any]:
259
+ query = str(query or "").strip()
260
+ q = f"%{query}%"
261
+ limit = max(1, min(int(limit or 30), 100))
262
+ nt, et = self._read_tables()
263
+ with self._connect() as conn:
264
+ rows = []
265
+ if query:
266
+ fts_ids = self._fts_match_ids(conn, query, limit)
267
+ if fts_ids:
268
+ placeholders = ",".join("?" for _ in fts_ids)
269
+ by_id = {
270
+ row["id"]: row
271
+ for row in conn.execute(
272
+ f"""
273
+ SELECT id, type, title, summary, metadata_json, updated_at
274
+ FROM {nt} WHERE id IN ({placeholders})
275
+ """,
276
+ fts_ids,
277
+ ).fetchall()
278
+ }
279
+ # Preserve FTS bm25 rank order.
280
+ rows = [by_id[i] for i in fts_ids if i in by_id]
281
+ else:
282
+ rows = conn.execute(
283
+ f"""
284
+ SELECT id, type, title, summary, metadata_json, updated_at
285
+ FROM {nt}
286
+ WHERE title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?
287
+ ORDER BY updated_at DESC, id ASC
288
+ LIMIT ?
289
+ """,
290
+ (q, q, q, limit),
291
+ ).fetchall()
292
+
293
+ if len(rows) < limit:
294
+ terms = _topic_candidates(query, limit=8)
295
+ if terms:
296
+ clauses = []
297
+ params: List[str] = []
298
+ for term in terms:
299
+ clauses.append(
300
+ "(title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)"
301
+ )
302
+ params.extend([f"%{term}%", f"%{term}%", f"%{term}%"])
303
+ extra = conn.execute(
304
+ f"""
305
+ SELECT id, type, title, summary, metadata_json, updated_at
306
+ FROM {nt}
307
+ WHERE {" OR ".join(clauses)}
308
+ ORDER BY updated_at DESC, id ASC
309
+ LIMIT ?
310
+ """,
311
+ (*params, limit * 3),
312
+ ).fetchall()
313
+ by_id = {row["id"]: row for row in rows}
314
+ for row in extra:
315
+ by_id.setdefault(row["id"], row)
316
+ rows = list(by_id.values())
317
+
318
+ terms_for_score = set(_topic_candidates(query, limit=12))
319
+
320
+ def score(row: sqlite3.Row) -> tuple:
321
+ haystack = (
322
+ f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
323
+ )
324
+ hits = sum(1 for term in terms_for_score if term.lower() in haystack)
325
+ type_boost = (
326
+ 1
327
+ if row["type"]
328
+ in {
329
+ "Decision",
330
+ "Task",
331
+ "File",
332
+ "Document",
333
+ "CodeFile",
334
+ "Spreadsheet",
335
+ "SlideDeck",
336
+ "Image",
337
+ "ImageText",
338
+ "Page",
339
+ "Slide",
340
+ }
341
+ else 0
342
+ )
343
+ return (hits, type_boost, row["updated_at"] or "")
344
+
345
+ # Deterministic contract: rows with equal relevance order by id ASC
346
+ # (stable sort preserves the pre-sort under reverse=True), matching
347
+ # the legacy LIKE path regardless of FTS bm25 tie ordering.
348
+ rows = sorted(rows, key=lambda r: r["id"])
349
+ rows = sorted(rows, key=score, reverse=True)[:limit]
350
+ return {
351
+ "query": query,
352
+ "matches": [
353
+ {
354
+ "id": row["id"],
355
+ "type": row["type"],
356
+ "title": row["title"],
357
+ "summary": row["summary"],
358
+ "metadata": _safe_loads(row["metadata_json"]),
359
+ "updated_at": row["updated_at"],
360
+ }
361
+ for row in rows
362
+ ],
363
+ }
364
+
365
+ def context_for_query(self, query: str, limit: int = 6) -> str:
366
+ """Return compact graph-backed RAG context for chat generation."""
367
+ query = str(query or "").strip()
368
+ if not query:
369
+ return ""
370
+ matches = self.search(query, limit).get("matches", [])
371
+ if not matches:
372
+ topics = _topic_candidates(query, limit=4)
373
+ if topics:
374
+ nt, et = self._read_tables()
375
+ with self._connect() as conn:
376
+ rows = []
377
+ for topic in topics:
378
+ rows.extend(
379
+ conn.execute(
380
+ f"""
381
+ SELECT id, type, title, summary, metadata_json
382
+ FROM {nt}
383
+ WHERE title LIKE ? OR metadata_json LIKE ?
384
+ ORDER BY updated_at DESC, id ASC
385
+ LIMIT 3
386
+ """,
387
+ (f"%{topic}%", f"%{topic}%"),
388
+ ).fetchall()
389
+ )
390
+ seen = set()
391
+ matches = []
392
+ for row in rows:
393
+ if row["id"] in seen:
394
+ continue
395
+ seen.add(row["id"])
396
+ matches.append(
397
+ {
398
+ "id": row["id"],
399
+ "type": row["type"],
400
+ "title": row["title"],
401
+ "summary": row["summary"],
402
+ "metadata": _safe_loads(row["metadata_json"]),
403
+ }
404
+ )
405
+ if len(matches) >= limit:
406
+ break
407
+ lines = []
408
+ for match in matches[:limit]:
409
+ meta = match.get("metadata") or {}
410
+ source = (
411
+ meta.get("relative_path")
412
+ or meta.get("filename")
413
+ or meta.get("conversation_id")
414
+ or meta.get("source")
415
+ or match["id"]
416
+ )
417
+ summary = _clean_text(match.get("summary") or "")[:700]
418
+ lines.append(
419
+ f"- [{match['type']}] {match['title']} | source={source} | {summary}"
420
+ )
421
+ return "\n".join(lines)
422
+
423
+ def neighbors(self, node_id: str) -> Dict[str, Any]:
424
+ """Return direct neighbors (1-hop) of a node."""
425
+ nt, et = self._read_tables()
426
+ with self._connect() as conn:
427
+ edge_rows = conn.execute(
428
+ f"SELECT from_node, to_node, type, weight FROM {et} WHERE from_node=? OR to_node=? ORDER BY id ASC",
429
+ (node_id, node_id),
430
+ ).fetchall()
431
+ neighbor_ids: set = set()
432
+ edges = []
433
+ for row in edge_rows:
434
+ neighbor_ids.add(row["from_node"])
435
+ neighbor_ids.add(row["to_node"])
436
+ edges.append(
437
+ {
438
+ "from": row["from_node"],
439
+ "to": row["to_node"],
440
+ "type": row["type"],
441
+ "weight": row["weight"],
442
+ }
443
+ )
444
+ neighbor_ids.discard(node_id)
445
+ nodes = []
446
+ if neighbor_ids:
447
+ placeholders = ",".join("?" * len(neighbor_ids))
448
+ nodes = [
449
+ {
450
+ "id": row["id"],
451
+ "type": row["type"],
452
+ "title": row["title"],
453
+ "summary": row["summary"],
454
+ "metadata": _safe_loads(row["metadata_json"]),
455
+ }
456
+ for row in conn.execute(
457
+ f"SELECT id, type, title, summary, metadata_json FROM {nt} WHERE id IN ({placeholders}) ORDER BY id ASC",
458
+ list(neighbor_ids),
459
+ )
460
+ ]
461
+ return {"node_id": node_id, "neighbors": nodes, "edges": edges}
462
+
463
+ def get_node(self, node_id: str) -> Dict[str, Any]:
464
+ node_id = str(node_id or "").strip()
465
+ if not node_id:
466
+ raise ValueError("node_id required")
467
+ nt, et = self._read_tables()
468
+ with self._connect() as conn:
469
+ row = conn.execute(
470
+ f"""
471
+ SELECT id, type, title, summary, metadata_json, updated_at
472
+ FROM {nt}
473
+ WHERE id=?
474
+ """,
475
+ (node_id,),
476
+ ).fetchone()
477
+ if not row:
478
+ raise ValueError(f"graph node not found: {node_id}")
479
+ degree = conn.execute(
480
+ f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
481
+ (node_id, node_id),
482
+ ).fetchone()["c"]
483
+ return {
484
+ "id": row["id"],
485
+ "type": row["type"],
486
+ "title": row["title"],
487
+ "summary": row["summary"],
488
+ "metadata": _safe_loads(row["metadata_json"]),
489
+ "updated_at": row["updated_at"],
490
+ "degree": degree,
491
+ }
492
+
493
+ def relationship_search(
494
+ self,
495
+ *,
496
+ query: str = "",
497
+ node_id: str = "",
498
+ relationship_type: str = "",
499
+ limit: int = 30,
500
+ ) -> Dict[str, Any]:
501
+ query = str(query or "").strip()
502
+ node_id = str(node_id or "").strip()
503
+ relationship_type = str(relationship_type or "").strip()
504
+ limit = max(1, min(int(limit or 30), 200))
505
+ nt, et = self._read_tables()
506
+ where = []
507
+ params: List[Any] = []
508
+ if node_id:
509
+ where.append("(e.from_node=? OR e.to_node=?)")
510
+ params.extend([node_id, node_id])
511
+ if relationship_type:
512
+ where.append("e.type LIKE ?")
513
+ params.append(f"%{relationship_type}%")
514
+ if query:
515
+ where.append(
516
+ "(e.type LIKE ? OR e.metadata_json LIKE ? OR src.title LIKE ? OR dst.title LIKE ? OR src.summary LIKE ? OR dst.summary LIKE ?)"
517
+ )
518
+ params.extend([f"%{query}%"] * 6)
519
+ where_sql = "WHERE " + " AND ".join(where) if where else ""
520
+ with self._connect() as conn:
521
+ rows = conn.execute(
522
+ f"""
523
+ SELECT
524
+ e.id, e.from_node, e.to_node, e.type, e.weight, e.metadata_json, e.created_at,
525
+ src.type AS source_type, src.title AS source_title, src.summary AS source_summary,
526
+ src.metadata_json AS source_metadata,
527
+ dst.type AS target_type, dst.title AS target_title, dst.summary AS target_summary,
528
+ dst.metadata_json AS target_metadata
529
+ FROM {et} e
530
+ JOIN {nt} src ON src.id=e.from_node
531
+ JOIN {nt} dst ON dst.id=e.to_node
532
+ {where_sql}
533
+ ORDER BY e.weight DESC, e.created_at DESC, e.id ASC
534
+ LIMIT ?
535
+ """,
536
+ (*params, limit),
537
+ ).fetchall()
538
+ return {
539
+ "query": query,
540
+ "node_id": node_id,
541
+ "relationship_type": relationship_type,
542
+ "relationships": [
543
+ {
544
+ "id": row["id"],
545
+ "type": row["type"],
546
+ "weight": row["weight"],
547
+ "metadata": _safe_loads(row["metadata_json"]),
548
+ "created_at": row["created_at"],
549
+ "source": {
550
+ "id": row["from_node"],
551
+ "type": row["source_type"],
552
+ "title": row["source_title"],
553
+ "summary": row["source_summary"],
554
+ "metadata": _safe_loads(row["source_metadata"]),
555
+ },
556
+ "target": {
557
+ "id": row["to_node"],
558
+ "type": row["target_type"],
559
+ "title": row["target_title"],
560
+ "summary": row["target_summary"],
561
+ "metadata": _safe_loads(row["target_metadata"]),
562
+ },
563
+ }
564
+ for row in rows
565
+ ],
566
+ }
567
+
568
+ def traverse(
569
+ self, node_id: str, *, depth: int = 1, limit: int = 100
570
+ ) -> Dict[str, Any]:
571
+ node_id = str(node_id or "").strip()
572
+ if not node_id:
573
+ raise ValueError("node_id required")
574
+ depth = max(0, min(int(depth or 1), 4))
575
+ limit = max(1, min(int(limit or 100), 500))
576
+ nt, et = self._read_tables()
577
+ visited = {node_id}
578
+ frontier = {node_id}
579
+ edges_by_id: Dict[str, Dict[str, Any]] = {}
580
+ with self._connect() as conn:
581
+ for _ in range(depth):
582
+ if not frontier or len(visited) >= limit:
583
+ break
584
+ placeholders = ",".join("?" * len(frontier))
585
+ rows = conn.execute(
586
+ f"""
587
+ SELECT id, from_node, to_node, type, weight, metadata_json
588
+ FROM {et}
589
+ WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})
590
+ ORDER BY weight DESC, id ASC
591
+ LIMIT ?
592
+ """,
593
+ (*frontier, *frontier, limit * 3),
594
+ ).fetchall()
595
+ next_frontier = set()
596
+ for row in rows:
597
+ edges_by_id[row["id"]] = {
598
+ "id": row["id"],
599
+ "from": row["from_node"],
600
+ "to": row["to_node"],
601
+ "type": row["type"],
602
+ "weight": row["weight"],
603
+ "metadata": _safe_loads(row["metadata_json"]),
604
+ }
605
+ for candidate in (row["from_node"], row["to_node"]):
606
+ if candidate not in visited and len(visited) < limit:
607
+ visited.add(candidate)
608
+ next_frontier.add(candidate)
609
+ frontier = next_frontier
610
+ placeholders = ",".join("?" * len(visited))
611
+ node_rows = conn.execute(
612
+ f"""
613
+ SELECT id, type, title, summary, metadata_json, updated_at
614
+ FROM {nt}
615
+ WHERE id IN ({placeholders})
616
+ ORDER BY updated_at DESC, id ASC
617
+ """,
618
+ list(visited),
619
+ ).fetchall()
620
+ return {
621
+ "root": node_id,
622
+ "depth": depth,
623
+ "nodes": [
624
+ {
625
+ "id": row["id"],
626
+ "type": row["type"],
627
+ "title": row["title"],
628
+ "summary": row["summary"],
629
+ "metadata": _safe_loads(row["metadata_json"]),
630
+ "updated_at": row["updated_at"],
631
+ }
632
+ for row in node_rows
633
+ ],
634
+ "edges": list(edges_by_id.values()),
635
+ }
636
+
637
+ def _iter_vector_source_items(
638
+ self,
639
+ conn: sqlite3.Connection,
640
+ *,
641
+ include_nodes: bool = True,
642
+ include_chunks: bool = True,
643
+ ) -> List[Dict[str, Any]]:
644
+ items: List[Dict[str, Any]] = []
645
+ if include_nodes:
646
+ for row in conn.execute(
647
+ """
648
+ SELECT id, type, title, summary, metadata_json
649
+ FROM nodes
650
+ WHERE type <> 'Chunk'
651
+ ORDER BY updated_at DESC, id ASC
652
+ """
653
+ ).fetchall():
654
+ metadata = _safe_loads(row["metadata_json"])
655
+ text = self._vector_text_for_node(
656
+ title=row["title"],
657
+ summary=row["summary"] or "",
658
+ metadata=metadata,
659
+ )
660
+ if text:
661
+ items.append(
662
+ {
663
+ "item_id": row["id"],
664
+ "item_type": "node",
665
+ "source_node": row["id"],
666
+ "text": text,
667
+ "metadata": {"node_type": row["type"], **metadata},
668
+ }
669
+ )
670
+ if include_chunks:
671
+ for row in conn.execute(
672
+ """
673
+ SELECT c.id, c.source_node AS parent_source_node, c.text, c.metadata_json
674
+ FROM chunks c
675
+ JOIN nodes n ON n.id=c.id
676
+ ORDER BY c.created_at DESC, c.id ASC
677
+ """
678
+ ).fetchall():
679
+ metadata = _safe_loads(row["metadata_json"])
680
+ text = _clean_text(row["text"] or "")
681
+ if text:
682
+ items.append(
683
+ {
684
+ "item_id": row["id"],
685
+ "item_type": "chunk",
686
+ "source_node": row["id"],
687
+ "text": text,
688
+ "metadata": {
689
+ **metadata,
690
+ "parent_source_node": row["parent_source_node"],
691
+ },
692
+ }
693
+ )
694
+ return items
695
+
696
+ def rebuild_vector_index(
697
+ self,
698
+ *,
699
+ full: bool = False,
700
+ include_nodes: bool = True,
701
+ include_chunks: bool = True,
702
+ ) -> Dict[str, Any]:
703
+ """Rebuild the derived vector index without mutating graph content."""
704
+ op_id = f"vector-op:{_sha256_text(f'{time.time()}:{os.getpid()}')[:24]}"
705
+ requested_at = _now()
706
+ started = time.perf_counter()
707
+ try:
708
+ with self._connect() as conn:
709
+ conn.execute(
710
+ """
711
+ INSERT INTO vector_index_operations(
712
+ id, operation, status, requested_at, started_at, metadata_json
713
+ )
714
+ VALUES (?, ?, 'running', ?, ?, ?)
715
+ """,
716
+ (
717
+ op_id,
718
+ "rebuild_full" if full else "rebuild_incremental",
719
+ requested_at,
720
+ requested_at,
721
+ _json(
722
+ {
723
+ "include_nodes": include_nodes,
724
+ "include_chunks": include_chunks,
725
+ }
726
+ ),
727
+ ),
728
+ )
729
+ if full:
730
+ filters = []
731
+ if include_nodes:
732
+ filters.append("'node'")
733
+ if include_chunks:
734
+ filters.append("'chunk'")
735
+ if filters:
736
+ conn.execute(
737
+ f"DELETE FROM vector_embeddings WHERE item_type IN ({','.join(filters)})"
738
+ )
739
+ items = self._iter_vector_source_items(
740
+ conn,
741
+ include_nodes=include_nodes,
742
+ include_chunks=include_chunks,
743
+ )
744
+ indexed = skipped = 0
745
+ for item in items:
746
+ changed = self._upsert_vector_item(conn, **item)
747
+ if changed:
748
+ indexed += 1
749
+ else:
750
+ skipped += 1
751
+ duration_ms = round((time.perf_counter() - started) * 1000, 2)
752
+ conn.execute(
753
+ """
754
+ UPDATE vector_index_operations
755
+ SET status='completed', completed_at=?, items_total=?,
756
+ items_indexed=?, items_skipped=?, metadata_json=?
757
+ WHERE id=?
758
+ """,
759
+ (
760
+ _now(),
761
+ len(items),
762
+ indexed,
763
+ skipped,
764
+ _json(
765
+ {
766
+ "include_nodes": include_nodes,
767
+ "include_chunks": include_chunks,
768
+ "duration_ms": duration_ms,
769
+ "embedding_model": self._embedding_model.model_id,
770
+ "embedding_dim": self._embedding_model.dim,
771
+ }
772
+ ),
773
+ op_id,
774
+ ),
775
+ )
776
+ return {
777
+ "status": "completed",
778
+ "operation_id": op_id,
779
+ "full": bool(full),
780
+ "items_total": len(items),
781
+ "items_indexed": indexed,
782
+ "items_skipped": skipped,
783
+ "duration_ms": duration_ms,
784
+ "embedding_model": self._embedding_model.model_id,
785
+ "embedding_dim": self._embedding_model.dim,
786
+ }
787
+ except Exception as exc:
788
+ duration_ms = round((time.perf_counter() - started) * 1000, 2)
789
+ with self._connect() as conn:
790
+ conn.execute(
791
+ """
792
+ INSERT INTO vector_index_operations(
793
+ id, operation, status, requested_at, started_at, completed_at,
794
+ error_message, metadata_json
795
+ )
796
+ VALUES (?, ?, 'failed', ?, ?, ?, ?, ?)
797
+ ON CONFLICT(id) DO UPDATE SET
798
+ status='failed',
799
+ completed_at=excluded.completed_at,
800
+ error_message=excluded.error_message,
801
+ metadata_json=excluded.metadata_json
802
+ """,
803
+ (
804
+ op_id,
805
+ "rebuild_full" if full else "rebuild_incremental",
806
+ requested_at,
807
+ requested_at,
808
+ _now(),
809
+ str(exc),
810
+ _json({"duration_ms": duration_ms}),
811
+ ),
812
+ )
813
+ raise
814
+
815
+ def index_status(self) -> Dict[str, Any]:
816
+ with self._connect() as conn:
817
+ vector_counts = {
818
+ row["item_type"]: row["count"]
819
+ for row in conn.execute(
820
+ "SELECT item_type, COUNT(*) AS count FROM vector_embeddings GROUP BY item_type"
821
+ )
822
+ }
823
+ source_items = self._iter_vector_source_items(conn)
824
+ vector_rows = {
825
+ row["item_id"]: row
826
+ for row in conn.execute(
827
+ """
828
+ SELECT item_id, text_hash, embedding_dim, embedding_model, indexed_at
829
+ FROM vector_embeddings
830
+ """
831
+ ).fetchall()
832
+ }
833
+ latest_rows = conn.execute(
834
+ """
835
+ SELECT id, operation, status, requested_at, started_at, completed_at,
836
+ items_total, items_indexed, items_skipped, error_message, metadata_json
837
+ FROM vector_index_operations
838
+ ORDER BY requested_at DESC, id DESC
839
+ LIMIT 5
840
+ """
841
+ ).fetchall()
842
+ missing = stale = ready = 0
843
+ for item in source_items:
844
+ vector_row = vector_rows.get(item["item_id"])
845
+ expected_hash = _sha256_text(_clean_text(item["text"]))
846
+ if not vector_row:
847
+ missing += 1
848
+ elif (
849
+ vector_row["text_hash"] != expected_hash
850
+ or vector_row["embedding_dim"] != self._embedding_model.dim
851
+ or vector_row["embedding_model"] != self._embedding_model.model_id
852
+ ):
853
+ stale += 1
854
+ else:
855
+ ready += 1
856
+ pending = missing + stale
857
+ return {
858
+ "status": "ready" if pending == 0 else "needs_reindex",
859
+ "storage": {
860
+ "db_path": str(self.db_path),
861
+ "backend": "sqlite",
862
+ "embedding_model": self._embedding_model.model_id,
863
+ "embedding_dim": self._embedding_model.dim,
864
+ # Honest capability report: trigram FTS5 keyword index, or
865
+ # LIKE-scan fallback when this SQLite build lacks it.
866
+ "fts_enabled": bool(getattr(self, "_fts_enabled", False)),
867
+ },
868
+ "source_items": len(source_items),
869
+ "indexed_items": sum(vector_counts.values()),
870
+ "ready_items": ready,
871
+ "missing_items": missing,
872
+ "stale_items": stale,
873
+ "pending_items": pending,
874
+ "by_item_type": vector_counts,
875
+ "operations": [
876
+ {
877
+ "id": row["id"],
878
+ "operation": row["operation"],
879
+ "status": row["status"],
880
+ "requested_at": row["requested_at"],
881
+ "started_at": row["started_at"],
882
+ "completed_at": row["completed_at"],
883
+ "items_total": row["items_total"],
884
+ "items_indexed": row["items_indexed"],
885
+ "items_skipped": row["items_skipped"],
886
+ "error_message": row["error_message"],
887
+ "metadata": _safe_loads(row["metadata_json"]),
888
+ }
889
+ for row in latest_rows
890
+ ],
891
+ }
892
+
893
+ def vector_search(
894
+ self,
895
+ query: str,
896
+ *,
897
+ limit: int = 30,
898
+ min_score: float = 0.0,
899
+ max_candidates: int = 10_000,
900
+ ) -> Dict[str, Any]:
901
+ query = str(query or "").strip()
902
+ limit = max(1, min(int(limit or 30), 100))
903
+ min_score = float(min_score or 0.0)
904
+ if not query:
905
+ return {"query": query, "matches": []}
906
+ query_vector = self._embedding_model.embed(query)
907
+ max_candidates = max(limit, min(int(max_candidates or 10_000), 50_000))
908
+ with self._connect() as conn:
909
+ rows = conn.execute(
910
+ """
911
+ SELECT
912
+ ve.item_id, ve.item_type, ve.source_node, ve.embedding,
913
+ ve.embedding_dim, ve.embedding_model, ve.metadata_json AS vector_metadata,
914
+ n.type AS node_type, n.title AS node_title, n.summary AS node_summary,
915
+ n.metadata_json AS node_metadata, n.updated_at AS node_updated_at,
916
+ c.text AS chunk_text, c.source_node AS parent_node_id,
917
+ pn.type AS parent_type, pn.title AS parent_title,
918
+ pn.summary AS parent_summary, pn.metadata_json AS parent_metadata,
919
+ pn.updated_at AS parent_updated_at
920
+ FROM vector_embeddings ve
921
+ LEFT JOIN nodes n ON n.id=ve.source_node
922
+ LEFT JOIN chunks c ON c.id=ve.item_id
923
+ LEFT JOIN nodes pn ON pn.id=c.source_node
924
+ WHERE ve.embedding_model=? AND ve.embedding_dim=?
925
+ ORDER BY ve.indexed_at DESC
926
+ LIMIT ?
927
+ """,
928
+ (
929
+ self._embedding_model.model_id,
930
+ self._embedding_model.dim,
931
+ max_candidates,
932
+ ),
933
+ ).fetchall()
934
+ scored = []
935
+ for row in rows:
936
+ vector = self._embedding_model.decode(
937
+ row["embedding"], row["embedding_dim"]
938
+ )
939
+ score = self._embedding_model.similarity(query_vector, vector)
940
+ if score < min_score:
941
+ continue
942
+ is_chunk = row["item_type"] == "chunk"
943
+ summary = (
944
+ row["chunk_text"]
945
+ if is_chunk and row["chunk_text"]
946
+ else row["node_summary"]
947
+ )
948
+ parent_metadata = _safe_loads(row["parent_metadata"])
949
+ node_metadata = _safe_loads(row["node_metadata"])
950
+ scored.append(
951
+ {
952
+ "id": row["item_id"],
953
+ "node_id": row["parent_node_id"]
954
+ if is_chunk and row["parent_node_id"]
955
+ else row["source_node"],
956
+ "item_type": row["item_type"],
957
+ "type": "Chunk" if is_chunk else row["node_type"],
958
+ "title": row["parent_title"]
959
+ if is_chunk and row["parent_title"]
960
+ else row["node_title"],
961
+ "summary": _clean_text(summary or "")[:1000],
962
+ "score": round(float(score), 6),
963
+ "metadata": {
964
+ **(parent_metadata if is_chunk else node_metadata),
965
+ "vector": _safe_loads(row["vector_metadata"]),
966
+ "parent_node_id": row["parent_node_id"],
967
+ "parent_type": row["parent_type"],
968
+ },
969
+ "updated_at": row["parent_updated_at"]
970
+ if is_chunk and row["parent_updated_at"]
971
+ else row["node_updated_at"],
972
+ }
973
+ )
974
+ scored.sort(
975
+ key=lambda item: (item["score"], item.get("updated_at") or ""), reverse=True
976
+ )
977
+ return {
978
+ "query": query,
979
+ "embedding_model": self._embedding_model.model_id,
980
+ "embedding_dim": self._embedding_model.dim,
981
+ "matches": scored[:limit],
982
+ }
983
+
984
+ def delete_conversation(self, conversation_id: str) -> Dict[str, Any]:
985
+ conversation_id = str(conversation_id or "").strip()
986
+ if not conversation_id:
987
+ return {"status": "skipped", "removed_nodes": 0}
988
+ conv_id = f"conversation:{_slug(conversation_id)}"
989
+ with self._connect() as conn:
990
+ # Edge rows may carry the legacy lowercase label (pre-v4) or the
991
+ # canonical EdgeType value (v4 write door) — match both.
992
+ direct_ids = [
993
+ row["to_node"]
994
+ for row in conn.execute(
995
+ "SELECT to_node FROM edges WHERE from_node=? AND type IN ('contains', 'CONTAINS')",
996
+ (conv_id,),
997
+ )
998
+ ]
999
+ remove_ids = set(direct_ids)
1000
+ child_types = [
1001
+ "has_chunk",
1002
+ "implies",
1003
+ "contains_signal",
1004
+ "has_page",
1005
+ "has_slide",
1006
+ "has_sheet",
1007
+ "contains_image",
1008
+ ]
1009
+ child_types += [t.upper() for t in child_types]
1010
+ placeholders = ",".join("?" for _ in child_types)
1011
+ for source_id in list(direct_ids):
1012
+ for row in conn.execute(
1013
+ f"SELECT to_node FROM edges WHERE from_node=? AND type IN ({placeholders})",
1014
+ (source_id, *child_types),
1015
+ ):
1016
+ remove_ids.add(row["to_node"])
1017
+ remove_ids.add(conv_id)
1018
+ for node_id in remove_ids:
1019
+ conn.execute("DELETE FROM nodes WHERE id=?", (node_id,))
1020
+ if KGStoreV2 is not None:
1021
+ conn.execute(
1022
+ "DELETE FROM nodes_v2 WHERE id=?", (node_id,)
1023
+ ) # edges_v2 cascade
1024
+ conn.execute(
1025
+ """
1026
+ DELETE FROM nodes
1027
+ WHERE type='Topic'
1028
+ AND id NOT IN (SELECT to_node FROM edges)
1029
+ AND id NOT IN (SELECT from_node FROM edges)
1030
+ """
1031
+ )
1032
+ if KGStoreV2 is not None:
1033
+ conn.execute(
1034
+ """
1035
+ DELETE FROM nodes_v2
1036
+ WHERE legacy_type='Topic'
1037
+ AND id NOT IN (SELECT target FROM edges_v2)
1038
+ AND id NOT IN (SELECT source FROM edges_v2)
1039
+ """
1040
+ )
1041
+ return {
1042
+ "status": "ok",
1043
+ "conversation_id": conversation_id,
1044
+ "removed_nodes": len(remove_ids),
1045
+ }
1046
+
1047
+ def clear_all(self) -> Dict[str, Any]:
1048
+ with self._connect() as conn:
1049
+ counts = {
1050
+ "nodes": conn.execute("SELECT COUNT(*) AS c FROM nodes").fetchone()[
1051
+ "c"
1052
+ ],
1053
+ "edges": conn.execute("SELECT COUNT(*) AS c FROM edges").fetchone()[
1054
+ "c"
1055
+ ],
1056
+ "chunks": conn.execute("SELECT COUNT(*) AS c FROM chunks").fetchone()[
1057
+ "c"
1058
+ ],
1059
+ "knowledge_sources": conn.execute(
1060
+ "SELECT COUNT(*) AS c FROM knowledge_sources"
1061
+ ).fetchone()["c"],
1062
+ "local_file_index": conn.execute(
1063
+ "SELECT COUNT(*) AS c FROM local_file_index"
1064
+ ).fetchone()["c"],
1065
+ }
1066
+ conn.execute("DELETE FROM local_file_index")
1067
+ conn.execute("DELETE FROM knowledge_sources")
1068
+ conn.execute("DELETE FROM chunks")
1069
+ conn.execute("DELETE FROM edges")
1070
+ conn.execute("DELETE FROM nodes")
1071
+ if KGStoreV2 is not None:
1072
+ conn.execute("DELETE FROM edges_v2")
1073
+ conn.execute("DELETE FROM nodes_v2")
1074
+ if self.blob_dir.exists():
1075
+ shutil.rmtree(self.blob_dir, ignore_errors=True)
1076
+ self.blob_dir.mkdir(parents=True, exist_ok=True)
1077
+ return {"status": "ok", "removed": counts}
1078
+
1079
+ def stats(self) -> Dict[str, Any]:
1080
+ nt, et = self._read_tables()
1081
+ with self._connect() as conn:
1082
+ node_counts = {
1083
+ row["type"]: row["count"]
1084
+ for row in conn.execute(
1085
+ f"SELECT type, COUNT(*) AS count FROM {nt} GROUP BY type"
1086
+ )
1087
+ }
1088
+ edge_counts = {
1089
+ row["type"]: row["count"]
1090
+ for row in conn.execute(
1091
+ f"SELECT type, COUNT(*) AS count FROM {et} GROUP BY type"
1092
+ )
1093
+ }
1094
+ local_sources = conn.execute(
1095
+ "SELECT COUNT(*) AS c FROM knowledge_sources"
1096
+ ).fetchone()["c"]
1097
+ local_file_status = {
1098
+ row["status"]: row["count"]
1099
+ for row in conn.execute(
1100
+ "SELECT status, COUNT(*) AS count FROM local_file_index GROUP BY status"
1101
+ )
1102
+ }
1103
+ v2 = None
1104
+ if KGStoreV2 is not None:
1105
+ try:
1106
+ v2 = KGStoreV2(self.db_path).stats()
1107
+ except Exception as e:
1108
+ v2 = {"available": False, "error": str(e)}
1109
+ return {
1110
+ "db_path": str(self.db_path),
1111
+ "schema_version": GRAPH_SCHEMA_VERSION,
1112
+ "v2_schema_available": KGStoreV2 is not None,
1113
+ "nodes": node_counts,
1114
+ "edges": edge_counts,
1115
+ "local_sources": local_sources,
1116
+ "local_file_status": local_file_status,
1117
+ "v2": v2,
1118
+ }
1119
+
1120
+ def search_for_document_generation(
1121
+ self, query: str, limit: int = 10
1122
+ ) -> List[Dict[str, Any]]:
1123
+ """Hybrid retrieval optimized for document generation.
1124
+
1125
+ Scoring: 0.5*text_relevance + 0.3*graph_relationship + 0.2*recency
1126
+ Returns nodes with rich context for document generation prompts.
1127
+ """
1128
+ query = str(query or "").strip()
1129
+ if not query:
1130
+ return []
1131
+ limit = max(1, min(int(limit or 10), 50))
1132
+ terms = _topic_candidates(query, limit=12)
1133
+ now = datetime.now()
1134
+ nt, et = self._read_tables()
1135
+
1136
+ with self._connect() as conn:
1137
+ candidate_rows = []
1138
+ seen_ids = set()
1139
+
1140
+ if query:
1141
+ q = f"%{query}%"
1142
+ rows = conn.execute(
1143
+ f"""
1144
+ SELECT id, type, title, summary, metadata_json, updated_at
1145
+ FROM {nt}
1146
+ WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
1147
+ AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
1148
+ 'Spreadsheet', 'Image', 'ImageText', 'Chat',
1149
+ 'Decision', 'Task', 'Concept', 'Feature',
1150
+ 'Page', 'Slide')
1151
+ ORDER BY updated_at DESC, id ASC
1152
+ LIMIT ?
1153
+ """,
1154
+ (q, q, q, limit * 5),
1155
+ ).fetchall()
1156
+ for row in rows:
1157
+ if row["id"] not in seen_ids:
1158
+ seen_ids.add(row["id"])
1159
+ candidate_rows.append(row)
1160
+
1161
+ for term in terms:
1162
+ t = f"%{term}%"
1163
+ rows = conn.execute(
1164
+ f"""
1165
+ SELECT id, type, title, summary, metadata_json, updated_at
1166
+ FROM {nt}
1167
+ WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
1168
+ AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
1169
+ 'Spreadsheet', 'Image', 'ImageText', 'Chat',
1170
+ 'Decision', 'Task', 'Concept', 'Feature',
1171
+ 'Page', 'Slide')
1172
+ ORDER BY updated_at DESC, id ASC
1173
+ LIMIT ?
1174
+ """,
1175
+ (t, t, t, limit * 3),
1176
+ ).fetchall()
1177
+ for row in rows:
1178
+ if row["id"] not in seen_ids:
1179
+ seen_ids.add(row["id"])
1180
+ candidate_rows.append(row)
1181
+
1182
+ scored_results = []
1183
+ for row in candidate_rows:
1184
+ haystack = (
1185
+ f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
1186
+ )
1187
+
1188
+ text_hits = sum(1 for term in terms if term.lower() in haystack)
1189
+ text_score = min(1.0, text_hits / max(len(terms), 1))
1190
+
1191
+ edge_count = conn.execute(
1192
+ f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
1193
+ (row["id"], row["id"]),
1194
+ ).fetchone()["c"]
1195
+ graph_score = min(1.0, math.log1p(edge_count) / 4.0)
1196
+
1197
+ recency = _recency_score(
1198
+ row["updated_at"], now=now, half_life_days=14.0
1199
+ )
1200
+
1201
+ doc_type_boost = (
1202
+ 1.2
1203
+ if row["type"]
1204
+ in (
1205
+ "Document",
1206
+ "File",
1207
+ "SlideDeck",
1208
+ "Decision",
1209
+ )
1210
+ else 1.0
1211
+ )
1212
+
1213
+ hybrid_score = (
1214
+ 0.5 * text_score + 0.3 * graph_score + 0.2 * recency
1215
+ ) * doc_type_boost
1216
+
1217
+ meta = _safe_loads(row["metadata_json"])
1218
+ neighbor_concepts = []
1219
+ neighbor_rows = conn.execute(
1220
+ f"""
1221
+ SELECT n.title, n.type FROM {et} e
1222
+ JOIN {nt} n ON n.id = CASE WHEN e.from_node = ? THEN e.to_node ELSE e.from_node END
1223
+ WHERE (e.from_node = ? OR e.to_node = ?)
1224
+ AND n.type IN ('Concept', 'Feature', 'Decision', 'Task')
1225
+ LIMIT 8
1226
+ """,
1227
+ (row["id"], row["id"], row["id"]),
1228
+ ).fetchall()
1229
+ for nr in neighbor_rows:
1230
+ neighbor_concepts.append({"title": nr["title"], "type": nr["type"]})
1231
+
1232
+ scored_results.append(
1233
+ {
1234
+ "id": row["id"],
1235
+ "type": row["type"],
1236
+ "title": row["title"],
1237
+ "summary": row["summary"],
1238
+ "metadata": meta,
1239
+ "updated_at": row["updated_at"],
1240
+ "hybrid_score": round(hybrid_score, 4),
1241
+ "scores": {
1242
+ "text": round(text_score, 4),
1243
+ "graph": round(graph_score, 4),
1244
+ "recency": round(recency, 4),
1245
+ },
1246
+ "related_concepts": neighbor_concepts,
1247
+ }
1248
+ )
1249
+
1250
+ scored_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
1251
+ return scored_results[:limit]
1252
+
1253
+ def multi_hop_context(
1254
+ self, node_ids: List[str], max_hops: int = 2
1255
+ ) -> Dict[str, Any]:
1256
+ """Multi-hop graph traversal from seed nodes for richer context."""
1257
+ visited_nodes = set()
1258
+ visited_edges = set()
1259
+ all_nodes = []
1260
+ all_edges = []
1261
+ frontier = set(node_ids)
1262
+ nt, et = self._read_tables()
1263
+
1264
+ with self._connect() as conn:
1265
+ for hop in range(max_hops):
1266
+ if not frontier:
1267
+ break
1268
+ next_frontier = set()
1269
+ for nid in frontier:
1270
+ if nid in visited_nodes:
1271
+ continue
1272
+ visited_nodes.add(nid)
1273
+ row = conn.execute(
1274
+ f"SELECT id, type, title, summary, metadata_json, updated_at FROM {nt} WHERE id=?",
1275
+ (nid,),
1276
+ ).fetchone()
1277
+ if row:
1278
+ all_nodes.append(
1279
+ {
1280
+ "id": row["id"],
1281
+ "type": row["type"],
1282
+ "title": row["title"],
1283
+ "summary": row["summary"],
1284
+ "metadata": _safe_loads(row["metadata_json"]),
1285
+ "hop": hop,
1286
+ }
1287
+ )
1288
+ edge_rows = conn.execute(
1289
+ f"""
1290
+ SELECT id, from_node, to_node, type, weight
1291
+ FROM {et} WHERE from_node=? OR to_node=?
1292
+ ORDER BY id ASC
1293
+ """,
1294
+ (nid, nid),
1295
+ ).fetchall()
1296
+ for er in edge_rows:
1297
+ if er["id"] not in visited_edges:
1298
+ visited_edges.add(er["id"])
1299
+ all_edges.append(
1300
+ {
1301
+ "from": er["from_node"],
1302
+ "to": er["to_node"],
1303
+ "type": er["type"],
1304
+ "weight": er["weight"],
1305
+ }
1306
+ )
1307
+ other = (
1308
+ er["to_node"]
1309
+ if er["from_node"] == nid
1310
+ else er["from_node"]
1311
+ )
1312
+ if other not in visited_nodes:
1313
+ next_frontier.add(other)
1314
+ frontier = next_frontier
1315
+
1316
+ return {"nodes": all_nodes, "edges": all_edges}