ltcai 4.0.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (195) hide show
  1. package/README.md +42 -33
  2. package/desktop/electron/main.cjs +44 -0
  3. package/docs/CHANGELOG.md +106 -0
  4. package/docs/REALTIME_COLLABORATION.md +3 -3
  5. package/docs/V3_FRONTEND.md +9 -8
  6. package/docs/V4_1_FRONTEND_ARCHITECTURE_REVIEW.md +65 -0
  7. package/docs/V4_1_FRONTEND_MIGRATION_REPORT.md +70 -0
  8. package/docs/V4_1_VALIDATION_REPORT.md +47 -0
  9. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +95 -45
  10. package/docs/kg-schema.md +6 -2
  11. package/docs/spec-vs-impl.md +10 -10
  12. package/frontend/index.html +24 -0
  13. package/frontend/openapi.json +14190 -0
  14. package/frontend/src/App.tsx +184 -0
  15. package/frontend/src/api/client.ts +317 -0
  16. package/frontend/src/api/openapi.ts +16637 -0
  17. package/frontend/src/components/primitives.tsx +204 -0
  18. package/frontend/src/components/ui/badge.tsx +27 -0
  19. package/frontend/src/components/ui/button.tsx +37 -0
  20. package/frontend/src/components/ui/card.tsx +22 -0
  21. package/frontend/src/components/ui/input.tsx +16 -0
  22. package/frontend/src/components/ui/textarea.tsx +16 -0
  23. package/frontend/src/lib/utils.ts +33 -0
  24. package/frontend/src/main.tsx +23 -0
  25. package/frontend/src/pages/Act.tsx +245 -0
  26. package/frontend/src/pages/Ask.tsx +200 -0
  27. package/frontend/src/pages/Brain.tsx +267 -0
  28. package/frontend/src/pages/Capture.tsx +158 -0
  29. package/frontend/src/pages/Library.tsx +187 -0
  30. package/frontend/src/pages/System.tsx +344 -0
  31. package/frontend/src/routes.ts +85 -0
  32. package/frontend/src/store/appStore.ts +54 -0
  33. package/frontend/src/styles.css +107 -0
  34. package/kg_schema.py +2 -603
  35. package/knowledge_graph.py +37 -4958
  36. package/latticeai/__init__.py +1 -1
  37. package/latticeai/api/admin.py +15 -16
  38. package/latticeai/api/agents.py +13 -6
  39. package/latticeai/api/auth.py +19 -11
  40. package/latticeai/api/invitations.py +100 -0
  41. package/latticeai/api/knowledge_graph.py +4 -11
  42. package/latticeai/api/plugins.py +3 -6
  43. package/latticeai/api/realtime.py +4 -7
  44. package/latticeai/api/setup.py +5 -4
  45. package/latticeai/api/static_routes.py +13 -16
  46. package/latticeai/api/ui_redirects.py +26 -0
  47. package/latticeai/api/workflow_designer.py +39 -6
  48. package/latticeai/api/workspace.py +24 -10
  49. package/latticeai/app_factory.py +88 -17
  50. package/latticeai/brain/_kg_common.py +1123 -0
  51. package/latticeai/brain/discovery.py +1455 -0
  52. package/latticeai/brain/documents.py +218 -0
  53. package/latticeai/brain/ingest.py +644 -0
  54. package/latticeai/brain/projection.py +561 -0
  55. package/latticeai/brain/provenance.py +401 -0
  56. package/latticeai/brain/retrieval.py +1316 -0
  57. package/latticeai/brain/schema.py +640 -0
  58. package/latticeai/brain/store.py +216 -0
  59. package/latticeai/brain/write_master.py +225 -0
  60. package/latticeai/core/invitations.py +131 -0
  61. package/latticeai/core/marketplace.py +1 -1
  62. package/latticeai/core/multi_agent.py +1 -1
  63. package/latticeai/core/policy.py +54 -0
  64. package/latticeai/core/realtime.py +65 -44
  65. package/latticeai/core/sessions.py +31 -5
  66. package/latticeai/core/users.py +147 -0
  67. package/latticeai/core/workspace_os.py +420 -20
  68. package/latticeai/services/agent_runtime.py +242 -4
  69. package/latticeai/services/run_executor.py +328 -0
  70. package/latticeai/services/workspace_service.py +27 -19
  71. package/package.json +54 -27
  72. package/scripts/build_frontend_assets.mjs +38 -0
  73. package/scripts/bump_version.py +1 -1
  74. package/scripts/export_openapi.py +31 -0
  75. package/scripts/lint_frontend.mjs +86 -0
  76. package/scripts/run_python.mjs +47 -0
  77. package/src-tauri/Cargo.lock +4833 -0
  78. package/src-tauri/Cargo.toml +19 -0
  79. package/src-tauri/build.rs +3 -0
  80. package/src-tauri/capabilities/default.json +7 -0
  81. package/src-tauri/src/main.rs +78 -0
  82. package/src-tauri/tauri.conf.json +36 -0
  83. package/static/app/asset-manifest.json +32 -0
  84. package/static/app/assets/core-CwxXejkd.js +2 -0
  85. package/static/app/assets/core-CwxXejkd.js.map +1 -0
  86. package/static/app/assets/index-CJRAzNnf.js +333 -0
  87. package/static/app/assets/index-CJRAzNnf.js.map +1 -0
  88. package/static/app/assets/index-CSwBBgf4.css +2 -0
  89. package/static/app/index.html +25 -0
  90. package/static/manifest.json +2 -2
  91. package/static/sw.js +4 -4
  92. package/scripts/build_v3_assets.mjs +0 -170
  93. package/scripts/lint_v3.mjs +0 -97
  94. package/static/account.html +0 -113
  95. package/static/activity.html +0 -73
  96. package/static/admin.html +0 -486
  97. package/static/agents.html +0 -139
  98. package/static/chat.html +0 -841
  99. package/static/css/reference/account.css +0 -439
  100. package/static/css/reference/admin.css +0 -610
  101. package/static/css/reference/base.css +0 -1661
  102. package/static/css/reference/chat.css +0 -4623
  103. package/static/css/reference/graph.css +0 -1016
  104. package/static/css/responsive.css +0 -861
  105. package/static/graph.html +0 -122
  106. package/static/platform.css +0 -104
  107. package/static/plugins.html +0 -136
  108. package/static/scripts/account.js +0 -238
  109. package/static/scripts/admin.js +0 -1614
  110. package/static/scripts/chat.js +0 -5081
  111. package/static/scripts/graph.js +0 -1804
  112. package/static/scripts/platform.js +0 -64
  113. package/static/scripts/ux.js +0 -167
  114. package/static/scripts/workspace.js +0 -948
  115. package/static/v3/asset-manifest.json +0 -56
  116. package/static/v3/css/lattice.base.49deefb5.css +0 -128
  117. package/static/v3/css/lattice.base.css +0 -128
  118. package/static/v3/css/lattice.components.cde18231.css +0 -472
  119. package/static/v3/css/lattice.components.css +0 -472
  120. package/static/v3/css/lattice.shell.29d36d85.css +0 -452
  121. package/static/v3/css/lattice.shell.css +0 -452
  122. package/static/v3/css/lattice.tokens.304cbc40.css +0 -135
  123. package/static/v3/css/lattice.tokens.css +0 -135
  124. package/static/v3/css/lattice.views.0a18b6c5.css +0 -360
  125. package/static/v3/css/lattice.views.css +0 -360
  126. package/static/v3/index.html +0 -68
  127. package/static/v3/js/app.356e6452.js +0 -26
  128. package/static/v3/js/app.js +0 -26
  129. package/static/v3/js/core/api.7a308b89.js +0 -568
  130. package/static/v3/js/core/api.js +0 -568
  131. package/static/v3/js/core/components.f25b3b93.js +0 -230
  132. package/static/v3/js/core/components.js +0 -230
  133. package/static/v3/js/core/dom.a2773eb0.js +0 -148
  134. package/static/v3/js/core/dom.js +0 -148
  135. package/static/v3/js/core/router.584570f2.js +0 -37
  136. package/static/v3/js/core/router.js +0 -37
  137. package/static/v3/js/core/routes.7222343d.js +0 -93
  138. package/static/v3/js/core/routes.js +0 -93
  139. package/static/v3/js/core/shell.a1657f20.js +0 -391
  140. package/static/v3/js/core/shell.js +0 -391
  141. package/static/v3/js/core/store.204a08b2.js +0 -113
  142. package/static/v3/js/core/store.js +0 -113
  143. package/static/v3/js/views/admin-audit.660a1fb1.js +0 -185
  144. package/static/v3/js/views/admin-audit.js +0 -185
  145. package/static/v3/js/views/admin-permissions.a7ae5f09.js +0 -177
  146. package/static/v3/js/views/admin-permissions.js +0 -177
  147. package/static/v3/js/views/admin-policies.3658fd86.js +0 -102
  148. package/static/v3/js/views/admin-policies.js +0 -102
  149. package/static/v3/js/views/admin-private-vpc.7d342d36.js +0 -135
  150. package/static/v3/js/views/admin-private-vpc.js +0 -135
  151. package/static/v3/js/views/admin-security.07c66b72.js +0 -180
  152. package/static/v3/js/views/admin-security.js +0 -180
  153. package/static/v3/js/views/admin-users.03bac88c.js +0 -168
  154. package/static/v3/js/views/admin-users.js +0 -168
  155. package/static/v3/js/views/agents.014d0b74.js +0 -541
  156. package/static/v3/js/views/agents.js +0 -541
  157. package/static/v3/js/views/chat.e6dd7dd0.js +0 -601
  158. package/static/v3/js/views/chat.js +0 -601
  159. package/static/v3/js/views/files.adad14c1.js +0 -365
  160. package/static/v3/js/views/files.js +0 -365
  161. package/static/v3/js/views/graph-canvas.17c15d65.js +0 -509
  162. package/static/v3/js/views/graph-canvas.js +0 -509
  163. package/static/v3/js/views/home.24f8b8ae.js +0 -200
  164. package/static/v3/js/views/home.js +0 -200
  165. package/static/v3/js/views/hooks.37895880.js +0 -220
  166. package/static/v3/js/views/hooks.js +0 -220
  167. package/static/v3/js/views/hybrid-search.2fb63ed9.js +0 -194
  168. package/static/v3/js/views/hybrid-search.js +0 -194
  169. package/static/v3/js/views/knowledge-graph.5e40cbeb.js +0 -509
  170. package/static/v3/js/views/knowledge-graph.js +0 -509
  171. package/static/v3/js/views/marketplace.ab0583d4.js +0 -141
  172. package/static/v3/js/views/marketplace.js +0 -141
  173. package/static/v3/js/views/mcp.99b5c6a7.js +0 -114
  174. package/static/v3/js/views/mcp.js +0 -114
  175. package/static/v3/js/views/memory.4ebdf474.js +0 -147
  176. package/static/v3/js/views/memory.js +0 -147
  177. package/static/v3/js/views/models.a1ffa147.js +0 -256
  178. package/static/v3/js/views/models.js +0 -256
  179. package/static/v3/js/views/my-computer.d9d9ae1c.js +0 -463
  180. package/static/v3/js/views/my-computer.js +0 -463
  181. package/static/v3/js/views/pipeline.c522f1ce.js +0 -157
  182. package/static/v3/js/views/pipeline.js +0 -157
  183. package/static/v3/js/views/planning.9ac3e313.js +0 -153
  184. package/static/v3/js/views/planning.js +0 -153
  185. package/static/v3/js/views/settings.8631fa5e.js +0 -318
  186. package/static/v3/js/views/settings.js +0 -318
  187. package/static/v3/js/views/skills.c6c2f965.js +0 -109
  188. package/static/v3/js/views/skills.js +0 -109
  189. package/static/v3/js/views/tools.e4f11276.js +0 -108
  190. package/static/v3/js/views/tools.js +0 -108
  191. package/static/v3/js/views/workflows.26c57290.js +0 -128
  192. package/static/v3/js/views/workflows.js +0 -128
  193. package/static/workflows.html +0 -146
  194. package/static/workspace.css +0 -1121
  195. package/static/workspace.html +0 -357
@@ -0,0 +1,401 @@
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: F403,F405
4
+
5
+ from ._kg_common import * # noqa: F403,F401
6
+
7
+
8
+ class KnowledgeGraphProvenanceMixin:
9
+ def record_provenance(
10
+ self,
11
+ *,
12
+ node_id: str,
13
+ source_type: str,
14
+ pipeline: str = "unified-ingestion",
15
+ source_uri: Optional[str] = None,
16
+ content_hash: Optional[str] = None,
17
+ title: Optional[str] = None,
18
+ owner: Optional[str] = None,
19
+ workspace_id: Optional[str] = None,
20
+ captured_at: Optional[str] = None,
21
+ modified_at: Optional[str] = None,
22
+ embedded: bool = False,
23
+ linked: bool = False,
24
+ duplicate: bool = False,
25
+ agent_used: Optional[str] = None,
26
+ chunk_count: int = 0,
27
+ permissions: Optional[Dict[str, Any]] = None,
28
+ metadata: Optional[Dict[str, Any]] = None,
29
+ ) -> Dict[str, Any]:
30
+ """Append a provenance record for an ingested node (audit trail)."""
31
+ now = _now()
32
+ prov_basis = f"{node_id}|{content_hash or ''}|{now}"
33
+ prov_id = f"prov:{_sha256_text(prov_basis)[:24]}"
34
+ with self._connect() as conn:
35
+ conn.execute(
36
+ """
37
+ INSERT OR REPLACE INTO ingestion_provenance(
38
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
39
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
40
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
42
+ """,
43
+ (
44
+ prov_id,
45
+ node_id,
46
+ source_type,
47
+ source_uri,
48
+ content_hash,
49
+ title,
50
+ pipeline,
51
+ owner,
52
+ workspace_id,
53
+ captured_at,
54
+ modified_at,
55
+ 1 if embedded else 0,
56
+ 1 if linked else 0,
57
+ 1 if duplicate else 0,
58
+ agent_used,
59
+ int(chunk_count or 0),
60
+ _json(permissions or {}),
61
+ _json(metadata or {}),
62
+ now,
63
+ ),
64
+ )
65
+ return {"id": prov_id, "node_id": node_id, "created_at": now}
66
+
67
+ @staticmethod
68
+ def _provenance_row(row: sqlite3.Row) -> Dict[str, Any]:
69
+ return {
70
+ "id": row["id"],
71
+ "node_id": row["node_id"],
72
+ "source_type": row["source_type"],
73
+ "source_uri": row["source_uri"],
74
+ "content_hash": row["content_hash"],
75
+ "title": row["title"],
76
+ "pipeline": row["pipeline"],
77
+ "owner": row["owner"],
78
+ "workspace_id": row["workspace_id"],
79
+ "captured_at": row["captured_at"],
80
+ "modified_at": row["modified_at"],
81
+ "embedded": bool(row["embedded"]),
82
+ "linked": bool(row["linked"]),
83
+ "duplicate": bool(row["duplicate"]),
84
+ "agent_used": row["agent_used"],
85
+ "chunk_count": row["chunk_count"],
86
+ "permissions": _safe_loads(row["permissions_json"]),
87
+ "metadata": _safe_loads(row["metadata_json"]),
88
+ "created_at": row["created_at"],
89
+ }
90
+
91
+ def get_provenance(self, node_id: str) -> Optional[Dict[str, Any]]:
92
+ """Return the most recent provenance record for a node, or None."""
93
+ with self._connect() as conn:
94
+ row = conn.execute(
95
+ "SELECT * FROM ingestion_provenance WHERE node_id = ? "
96
+ "ORDER BY created_at DESC, rowid DESC LIMIT 1",
97
+ (node_id,),
98
+ ).fetchone()
99
+ return self._provenance_row(row) if row else None
100
+
101
+ def list_provenance(
102
+ self, *, limit: int = 100, source_type: Optional[str] = None
103
+ ) -> Dict[str, Any]:
104
+ """Recent provenance records (newest first), optionally by source_type."""
105
+ limit = max(1, min(int(limit or 100), 1000))
106
+ with self._connect() as conn:
107
+ if source_type:
108
+ rows = conn.execute(
109
+ "SELECT * FROM ingestion_provenance WHERE source_type = ? "
110
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
111
+ (source_type, limit),
112
+ ).fetchall()
113
+ else:
114
+ rows = conn.execute(
115
+ "SELECT * FROM ingestion_provenance "
116
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
117
+ (limit,),
118
+ ).fetchall()
119
+ return {
120
+ "items": [self._provenance_row(r) for r in rows],
121
+ "count": len(rows),
122
+ }
123
+
124
+ def provenance_coverage(self) -> Dict[str, Any]:
125
+ """How much of the brain is explainable: nodes with vs without
126
+ provenance, per node type — the honesty metric for 'every source goes
127
+ through the pipeline'. Pre-v4 nodes ingested before provenance existed
128
+ legitimately count as uncovered."""
129
+ nt, _ = self._read_tables()
130
+ with self._connect() as conn:
131
+ total = conn.execute(f"SELECT COUNT(*) FROM {nt}").fetchone()[0]
132
+ covered = conn.execute(
133
+ f"SELECT COUNT(*) FROM {nt} WHERE id IN (SELECT DISTINCT node_id FROM ingestion_provenance)"
134
+ ).fetchone()[0]
135
+ uncovered_by_type = {
136
+ row["type"]: row["c"]
137
+ for row in conn.execute(
138
+ f"""
139
+ SELECT type, COUNT(*) AS c FROM {nt}
140
+ WHERE id NOT IN (SELECT DISTINCT node_id FROM ingestion_provenance)
141
+ GROUP BY type ORDER BY c DESC LIMIT 20
142
+ """
143
+ ).fetchall()
144
+ }
145
+ by_source = {
146
+ row["source_type"]: row["c"]
147
+ for row in conn.execute(
148
+ "SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
149
+ ).fetchall()
150
+ }
151
+ return {
152
+ "total_nodes": total,
153
+ "nodes_with_provenance": covered,
154
+ "coverage_ratio": round(covered / total, 4) if total else None,
155
+ "uncovered_by_type": uncovered_by_type,
156
+ "provenance_by_source_type": by_source,
157
+ }
158
+
159
+ def provenance_stats(self) -> Dict[str, Any]:
160
+ """Aggregate provenance counts for the Knowledge Graph status surface."""
161
+ with self._connect() as conn:
162
+ total = conn.execute(
163
+ "SELECT COUNT(*) AS c FROM ingestion_provenance"
164
+ ).fetchone()["c"]
165
+ by_source = {
166
+ r["source_type"]: r["c"]
167
+ for r in conn.execute(
168
+ "SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
169
+ ).fetchall()
170
+ }
171
+ embedded = conn.execute(
172
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE embedded = 1"
173
+ ).fetchone()["c"]
174
+ duplicates = conn.execute(
175
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE duplicate = 1"
176
+ ).fetchone()["c"]
177
+ last = conn.execute(
178
+ "SELECT created_at FROM ingestion_provenance ORDER BY created_at DESC LIMIT 1"
179
+ ).fetchone()
180
+ return {
181
+ "total": total,
182
+ "by_source_type": by_source,
183
+ "embedded": embedded,
184
+ "duplicates": duplicates,
185
+ "last_ingested_at": last["created_at"] if last else None,
186
+ }
187
+
188
+ def schema_versions(self) -> Dict[str, Any]:
189
+ """Versions an exporter stamps and an importer validates against."""
190
+ try:
191
+ from kg_schema import EMBED_DIM as _EMBED_DIM, KG_SCHEMA_V2_VERSION as _V2
192
+ except Exception: # pragma: no cover - kg_schema always importable in practice
193
+ _EMBED_DIM, _V2 = 1024, 2
194
+ return {
195
+ "graph_schema_version": GRAPH_SCHEMA_VERSION,
196
+ "db_format_version": _KG_DB_FORMAT_VERSION,
197
+ "kg_v2_schema_version": _V2,
198
+ "projection_version": _PROJECTION_VERSION,
199
+ "embed_dim": _EMBED_DIM,
200
+ }
201
+
202
+ def export_graph_data(
203
+ self, *, workspace_id: Optional[str] = None
204
+ ) -> Dict[str, Any]:
205
+ """Raw, lossless logical export of the graph (nodes/edges/chunks/sources/
206
+ provenance). Vector embeddings are intentionally omitted — they are
207
+ re-derived on import — so the artifact stays portable and small. Use
208
+ :meth:`backup_database` for a faithful binary copy incl. embeddings.
209
+
210
+ ``workspace_id`` REALLY filters (v4): the artifact contains only nodes
211
+ scoped to that workspace plus legacy-global rows (NULL scope, readable
212
+ machine-wide by definition), with edges/chunks/provenance restricted to
213
+ the surviving nodes. Pre-v4 this parameter was stamped into the header
214
+ while the data exported everything — a header that lied.
215
+ """
216
+ with self._connect() as conn:
217
+
218
+ def rows(table: str):
219
+ return [
220
+ dict(r) for r in conn.execute(f"SELECT * FROM {table}").fetchall()
221
+ ]
222
+
223
+ if workspace_id:
224
+ keep_ids = {
225
+ row["id"]
226
+ for row in conn.execute(
227
+ "SELECT id FROM nodes_v2 WHERE workspace_id = ? OR workspace_id IS NULL",
228
+ (workspace_id,),
229
+ ).fetchall()
230
+ }
231
+ nodes = [n for n in rows("nodes") if n["id"] in keep_ids]
232
+ edges = [
233
+ e
234
+ for e in rows("edges")
235
+ if e["from_node"] in keep_ids and e["to_node"] in keep_ids
236
+ ]
237
+ chunks = [c for c in rows("chunks") if c["source_node"] in keep_ids]
238
+ provenance = [
239
+ p for p in rows("ingestion_provenance") if p["node_id"] in keep_ids
240
+ ]
241
+ data = {
242
+ "nodes": nodes,
243
+ "edges": edges,
244
+ "chunks": chunks,
245
+ "knowledge_sources": rows("knowledge_sources"),
246
+ "provenance": provenance,
247
+ }
248
+ else:
249
+ data = {
250
+ "nodes": rows("nodes"),
251
+ "edges": rows("edges"),
252
+ "chunks": rows("chunks"),
253
+ "knowledge_sources": rows("knowledge_sources"),
254
+ "provenance": rows("ingestion_provenance"),
255
+ }
256
+ data["counts"] = {k: len(v) for k, v in data.items()}
257
+ return data
258
+
259
+ def import_graph_data(
260
+ self, data: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False
261
+ ) -> Dict[str, Any]:
262
+ """Import a logical export back into the store.
263
+
264
+ ``mode='merge'`` upserts on top of existing data (id collisions update);
265
+ ``mode='replace'`` clears the graph first. ``dry_run=True`` reports the
266
+ plan without writing. Refuses artifacts from a NEWER graph schema than
267
+ this build.
268
+ """
269
+ nodes = data.get("nodes") or []
270
+ edges = data.get("edges") or []
271
+ chunks = data.get("chunks") or []
272
+ sources = data.get("knowledge_sources") or []
273
+ provenance = data.get("provenance") or []
274
+
275
+ header = data.get("header") or {}
276
+ incoming_schema = header.get("graph_schema_version")
277
+ if isinstance(incoming_schema, int) and incoming_schema > GRAPH_SCHEMA_VERSION:
278
+ raise ValueError(
279
+ f"Artifact graph_schema_version {incoming_schema} is newer than this "
280
+ f"build ({GRAPH_SCHEMA_VERSION}); refusing to import."
281
+ )
282
+
283
+ plan = {
284
+ "mode": mode,
285
+ "nodes": len(nodes),
286
+ "edges": len(edges),
287
+ "chunks": len(chunks),
288
+ "knowledge_sources": len(sources),
289
+ "provenance": len(provenance),
290
+ }
291
+ if dry_run:
292
+ plan["dry_run"] = True
293
+ return plan
294
+
295
+ if mode == "replace":
296
+ self.clear_all()
297
+
298
+ with self._connect() as conn:
299
+ for n in nodes:
300
+ self._upsert_node(
301
+ conn,
302
+ n["id"],
303
+ n["type"],
304
+ n.get("title") or "",
305
+ summary=n.get("summary") or "",
306
+ metadata=_safe_loads(n.get("metadata_json")),
307
+ raw=_safe_loads(n.get("raw_json")),
308
+ )
309
+ for c in chunks:
310
+ self._upsert_chunk(
311
+ conn,
312
+ chunk_id=c["id"],
313
+ source_node=c["source_node"],
314
+ text=c.get("text") or "",
315
+ metadata=_safe_loads(c.get("metadata_json")),
316
+ )
317
+ for e in edges:
318
+ self._upsert_edge(
319
+ conn,
320
+ e["from_node"],
321
+ e["to_node"],
322
+ e["type"],
323
+ weight=float(e.get("weight") or 1.0),
324
+ metadata=_safe_loads(e.get("metadata_json")),
325
+ )
326
+ for s in sources:
327
+ conn.execute(
328
+ """
329
+ INSERT OR REPLACE INTO knowledge_sources(
330
+ id, root_path, os_type, drive_id, label, status, include_ocr,
331
+ watch_enabled, consent_json, created_at, updated_at, last_scanned_at)
332
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
333
+ """,
334
+ (
335
+ s["id"],
336
+ s["root_path"],
337
+ s["os_type"],
338
+ s.get("drive_id"),
339
+ s.get("label"),
340
+ s.get("status") or "active",
341
+ int(s.get("include_ocr") or 0),
342
+ int(s.get("watch_enabled") or 0),
343
+ s.get("consent_json") or "{}",
344
+ s.get("created_at") or _now(),
345
+ s.get("updated_at") or _now(),
346
+ s.get("last_scanned_at"),
347
+ ),
348
+ )
349
+ for p in provenance:
350
+ conn.execute(
351
+ """
352
+ INSERT OR REPLACE INTO ingestion_provenance(
353
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
354
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
355
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
356
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
357
+ """,
358
+ (
359
+ p["id"],
360
+ p["node_id"],
361
+ p["source_type"],
362
+ p.get("source_uri"),
363
+ p.get("content_hash"),
364
+ p.get("title"),
365
+ p.get("pipeline") or "import",
366
+ p.get("owner"),
367
+ p.get("workspace_id"),
368
+ p.get("captured_at"),
369
+ p.get("modified_at"),
370
+ int(p.get("embedded") or 0),
371
+ int(p.get("linked") or 0),
372
+ int(p.get("duplicate") or 0),
373
+ p.get("agent_used"),
374
+ int(p.get("chunk_count") or 0),
375
+ p.get("permissions_json") or "{}",
376
+ p.get("metadata_json") or "{}",
377
+ p.get("created_at") or _now(),
378
+ ),
379
+ )
380
+ plan["imported"] = True
381
+ return plan
382
+
383
+ def backup_database(self, dest_path) -> Path:
384
+ """Write a clean, standalone snapshot of the live DB to ``dest_path``.
385
+
386
+ Uses ``VACUUM INTO`` (after a full WAL checkpoint) so the snapshot is a
387
+ defragmented, rollback-journal-mode database with no companion -wal/-shm
388
+ — which restores cleanly by a plain file copy. Captures all data incl.
389
+ the vector_embeddings BLOBs.
390
+ """
391
+ dest = Path(dest_path)
392
+ dest.parent.mkdir(parents=True, exist_ok=True)
393
+ if dest.exists():
394
+ dest.unlink() # VACUUM INTO requires the target to not exist
395
+ conn = self._connect()
396
+ try:
397
+ conn.execute("PRAGMA wal_checkpoint(FULL)")
398
+ conn.execute("VACUUM INTO ?", (str(dest),))
399
+ finally:
400
+ conn.close()
401
+ return dest