ltcai 3.6.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. package/README.md +39 -31
  2. package/docs/CHANGELOG.md +64 -0
  3. package/docs/REALTIME_COLLABORATION.md +3 -3
  4. package/docs/V3_FRONTEND.md +9 -8
  5. package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
  6. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +552 -0
  7. package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
  8. package/docs/kg-schema.md +51 -53
  9. package/docs/spec-vs-impl.md +10 -10
  10. package/kg_schema.py +2 -520
  11. package/knowledge_graph.py +37 -4629
  12. package/knowledge_graph_api.py +11 -127
  13. package/latticeai/__init__.py +1 -1
  14. package/latticeai/api/admin.py +16 -17
  15. package/latticeai/api/agents.py +20 -7
  16. package/latticeai/api/auth.py +46 -15
  17. package/latticeai/api/chat.py +112 -76
  18. package/latticeai/api/health.py +1 -1
  19. package/latticeai/api/hooks.py +1 -1
  20. package/latticeai/api/invitations.py +100 -0
  21. package/latticeai/api/knowledge_graph.py +139 -0
  22. package/latticeai/api/local_files.py +1 -1
  23. package/latticeai/api/mcp.py +23 -11
  24. package/latticeai/api/memory.py +1 -1
  25. package/latticeai/api/models.py +1 -1
  26. package/latticeai/api/network.py +81 -0
  27. package/latticeai/api/plugins.py +3 -6
  28. package/latticeai/api/realtime.py +5 -8
  29. package/latticeai/api/search.py +26 -2
  30. package/latticeai/api/security_dashboard.py +2 -3
  31. package/latticeai/api/setup.py +2 -2
  32. package/latticeai/api/static_routes.py +11 -16
  33. package/latticeai/api/tools.py +3 -0
  34. package/latticeai/api/ui_redirects.py +26 -0
  35. package/latticeai/api/workflow_designer.py +85 -6
  36. package/latticeai/api/workspace.py +93 -57
  37. package/latticeai/app_factory.py +1781 -0
  38. package/latticeai/brain/__init__.py +18 -0
  39. package/latticeai/brain/_kg_common.py +1123 -0
  40. package/latticeai/brain/context.py +213 -0
  41. package/latticeai/brain/conversations.py +236 -0
  42. package/latticeai/brain/discovery.py +1455 -0
  43. package/latticeai/brain/documents.py +218 -0
  44. package/latticeai/brain/identity.py +175 -0
  45. package/latticeai/brain/ingest.py +644 -0
  46. package/latticeai/brain/memory.py +102 -0
  47. package/latticeai/brain/network.py +205 -0
  48. package/latticeai/brain/projection.py +561 -0
  49. package/latticeai/brain/provenance.py +401 -0
  50. package/latticeai/brain/retrieval.py +1316 -0
  51. package/latticeai/brain/schema.py +640 -0
  52. package/latticeai/brain/store.py +216 -0
  53. package/latticeai/brain/write_master.py +225 -0
  54. package/latticeai/core/agent.py +31 -7
  55. package/latticeai/core/audit.py +0 -7
  56. package/latticeai/core/config.py +1 -1
  57. package/latticeai/core/context_builder.py +1 -2
  58. package/latticeai/core/enterprise.py +1 -1
  59. package/latticeai/core/graph_curator.py +2 -2
  60. package/latticeai/core/invitations.py +131 -0
  61. package/latticeai/core/marketplace.py +1 -1
  62. package/latticeai/core/mcp_registry.py +791 -0
  63. package/latticeai/core/model_compat.py +1 -1
  64. package/latticeai/core/model_resolution.py +0 -1
  65. package/latticeai/core/multi_agent.py +238 -4
  66. package/latticeai/core/policy.py +54 -0
  67. package/latticeai/core/realtime.py +65 -44
  68. package/latticeai/core/security.py +1 -1
  69. package/latticeai/core/sessions.py +66 -10
  70. package/latticeai/core/users.py +147 -0
  71. package/latticeai/core/workflow_engine.py +114 -2
  72. package/latticeai/core/workspace_os.py +477 -29
  73. package/latticeai/models/__init__.py +7 -0
  74. package/latticeai/models/router.py +779 -0
  75. package/latticeai/server_app.py +29 -1536
  76. package/latticeai/services/agent_runtime.py +243 -4
  77. package/latticeai/services/app_context.py +75 -14
  78. package/latticeai/services/ingestion.py +47 -0
  79. package/latticeai/services/kg_portability.py +33 -3
  80. package/latticeai/services/memory_service.py +39 -11
  81. package/latticeai/services/model_runtime.py +2 -5
  82. package/latticeai/services/platform_runtime.py +100 -23
  83. package/latticeai/services/run_executor.py +328 -0
  84. package/latticeai/services/search_service.py +17 -8
  85. package/latticeai/services/tool_dispatch.py +12 -2
  86. package/latticeai/services/triggers.py +241 -0
  87. package/latticeai/services/upload_service.py +37 -12
  88. package/latticeai/services/workspace_service.py +55 -16
  89. package/llm_router.py +29 -772
  90. package/ltcai_cli.py +1 -2
  91. package/mcp_registry.py +25 -788
  92. package/p_reinforce.py +124 -14
  93. package/package.json +10 -20
  94. package/scripts/bump_version.py +99 -0
  95. package/scripts/generate_diagrams.py +0 -1
  96. package/scripts/lint_v3.mjs +105 -18
  97. package/scripts/validate_release_artifacts.py +0 -1
  98. package/scripts/wheel_smoke.py +142 -0
  99. package/server.py +11 -7
  100. package/setup_wizard.py +1142 -0
  101. package/static/sw.js +81 -52
  102. package/static/v3/asset-manifest.json +33 -25
  103. package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
  104. package/static/v3/css/lattice.base.css +1 -1
  105. package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
  106. package/static/v3/css/lattice.components.css +1 -1
  107. package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
  108. package/static/v3/css/lattice.shell.css +1 -1
  109. package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
  110. package/static/v3/css/lattice.tokens.css +3 -0
  111. package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
  112. package/static/v3/css/lattice.views.css +2 -2
  113. package/static/v3/index.html +3 -4
  114. package/static/v3/js/{app.c541f955.js → app.c5c80c46.js} +1 -1
  115. package/static/v3/js/core/{api.33d6320e.js → api.ba0fbf14.js} +58 -1
  116. package/static/v3/js/core/api.js +57 -0
  117. package/static/v3/js/core/i18n.880e1fec.js +575 -0
  118. package/static/v3/js/core/i18n.js +575 -0
  119. package/static/v3/js/core/routes.37522821.js +101 -0
  120. package/static/v3/js/core/routes.js +71 -63
  121. package/static/v3/js/core/{shell.8c163e0e.js → shell.e3f6bbfa.js} +68 -39
  122. package/static/v3/js/core/shell.js +66 -37
  123. package/static/v3/js/core/{store.34ebd5e6.js → store.7b2aa044.js} +11 -1
  124. package/static/v3/js/core/store.js +11 -1
  125. package/static/v3/js/views/account.eff40715.js +143 -0
  126. package/static/v3/js/views/account.js +143 -0
  127. package/static/v3/js/views/activity.0d271ef9.js +67 -0
  128. package/static/v3/js/views/activity.js +67 -0
  129. package/static/v3/js/views/{admin-users.03bac88c.js → admin-users.f7ac7b43.js} +4 -6
  130. package/static/v3/js/views/admin-users.js +4 -6
  131. package/static/v3/js/views/{agents.014d0b74.js → agents.17c5288d.js} +35 -12
  132. package/static/v3/js/views/agents.js +35 -12
  133. package/static/v3/js/views/{chat.e6dd7dd0.js → chat.e250e2cc.js} +23 -0
  134. package/static/v3/js/views/chat.js +23 -0
  135. package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
  136. package/static/v3/js/views/graph-canvas.js +509 -0
  137. package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
  138. package/static/v3/js/views/hybrid-search.js +1 -2
  139. package/static/v3/js/views/{knowledge-graph.a96040a5.js → knowledge-graph.4d09c537.js} +60 -44
  140. package/static/v3/js/views/knowledge-graph.js +60 -44
  141. package/static/v3/js/views/network.52a4f181.js +97 -0
  142. package/static/v3/js/views/network.js +97 -0
  143. package/static/v3/js/views/{planning.9ac3e313.js → planning.4876fd77.js} +26 -5
  144. package/static/v3/js/views/planning.js +26 -5
  145. package/static/v3/js/views/runs.b63b2afa.js +144 -0
  146. package/static/v3/js/views/runs.js +144 -0
  147. package/static/v3/js/views/{settings.8631fa5e.js → settings.b7140634.js} +7 -8
  148. package/static/v3/js/views/settings.js +7 -8
  149. package/static/v3/js/views/snapshots.6f5db095.js +135 -0
  150. package/static/v3/js/views/snapshots.js +135 -0
  151. package/static/v3/js/views/{workflows.26c57290.js → workflows.7752225a.js} +87 -2
  152. package/static/v3/js/views/workflows.js +87 -2
  153. package/static/v3/js/views/workspace-admin.c466029b.js +156 -0
  154. package/static/v3/js/views/workspace-admin.js +156 -0
  155. package/static/vendor/chart.umd.min.js +20 -0
  156. package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
  157. package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
  158. package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
  159. package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
  160. package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
  161. package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
  162. package/static/vendor/fonts/inter.css +44 -0
  163. package/static/vendor/icons/tabler-icons.min.css +4 -0
  164. package/static/vendor/icons/tabler-icons.woff2 +0 -0
  165. package/static/vendor/marked.min.js +69 -0
  166. package/telegram_bot.py +1 -2
  167. package/tools/commands.py +4 -2
  168. package/tools/computer.py +1 -1
  169. package/tools/documents.py +1 -3
  170. package/tools/filesystem.py +0 -4
  171. package/tools/knowledge.py +1 -3
  172. package/tools/network.py +1 -3
  173. package/codex_telegram_bot.py +0 -195
  174. package/docs/assets/v3.4.0/agent-run.png +0 -0
  175. package/docs/assets/v3.4.0/agents.png +0 -0
  176. package/docs/assets/v3.4.0/before/chat-before.png +0 -0
  177. package/docs/assets/v3.4.0/before/files-before.png +0 -0
  178. package/docs/assets/v3.4.0/chat.png +0 -0
  179. package/docs/assets/v3.4.0/connect-folder.png +0 -0
  180. package/docs/assets/v3.4.0/files.png +0 -0
  181. package/docs/assets/v3.4.0/home.png +0 -0
  182. package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
  183. package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
  184. package/docs/assets/v3.4.0/local-agent.png +0 -0
  185. package/docs/assets/v3.4.0/memory.png +0 -0
  186. package/docs/assets/v3.4.0/settings.png +0 -0
  187. package/docs/assets/v3.4.0/vision-input.png +0 -0
  188. package/docs/assets/v3.4.0/workflows.png +0 -0
  189. package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
  190. package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
  191. package/docs/assets/v3.4.1/local-agent.png +0 -0
  192. package/docs/images/admin-dashboard.png +0 -0
  193. package/docs/images/architecture.png +0 -0
  194. package/docs/images/enterprise.png +0 -0
  195. package/docs/images/graph.png +0 -0
  196. package/docs/images/hero.gif +0 -0
  197. package/docs/images/knowledge-graph.png +0 -0
  198. package/docs/images/lattice-ai-demo.gif +0 -0
  199. package/docs/images/lattice-ai-hero.png +0 -0
  200. package/docs/images/logo.svg +0 -33
  201. package/docs/images/mobile-responsive.png +0 -0
  202. package/docs/images/model-recommendation.png +0 -0
  203. package/docs/images/onboarding.png +0 -0
  204. package/docs/images/organization.png +0 -0
  205. package/docs/images/pipeline.png +0 -0
  206. package/docs/images/screenshot-admin.png +0 -0
  207. package/docs/images/screenshot-chat.png +0 -0
  208. package/docs/images/screenshot-graph.png +0 -0
  209. package/docs/images/skills.png +0 -0
  210. package/docs/images/workspace-dark.png +0 -0
  211. package/docs/images/workspace-light.png +0 -0
  212. package/docs/images/workspace.png +0 -0
  213. package/requirements.txt +0 -16
  214. package/static/account.html +0 -115
  215. package/static/activity.html +0 -73
  216. package/static/admin.html +0 -488
  217. package/static/agents.html +0 -139
  218. package/static/chat.html +0 -844
  219. package/static/css/reference/account.css +0 -439
  220. package/static/css/reference/admin.css +0 -610
  221. package/static/css/reference/base.css +0 -1661
  222. package/static/css/reference/chat.css +0 -4623
  223. package/static/css/reference/graph.css +0 -1016
  224. package/static/css/responsive.css +0 -861
  225. package/static/graph.html +0 -124
  226. package/static/platform.css +0 -104
  227. package/static/plugins.html +0 -136
  228. package/static/scripts/account.js +0 -238
  229. package/static/scripts/admin.js +0 -1614
  230. package/static/scripts/chat.js +0 -5081
  231. package/static/scripts/graph.js +0 -1804
  232. package/static/scripts/platform.js +0 -64
  233. package/static/scripts/ux.js +0 -167
  234. package/static/scripts/workspace.js +0 -948
  235. package/static/v3/js/core/routes.2ce3815a.js +0 -93
  236. package/static/workflows.html +0 -146
  237. package/static/workspace.css +0 -1121
  238. package/static/workspace.html +0 -357
@@ -0,0 +1,401 @@
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: F403,F405
4
+
5
+ from ._kg_common import * # noqa: F403,F401
6
+
7
+
8
+ class KnowledgeGraphProvenanceMixin:
9
+ def record_provenance(
10
+ self,
11
+ *,
12
+ node_id: str,
13
+ source_type: str,
14
+ pipeline: str = "unified-ingestion",
15
+ source_uri: Optional[str] = None,
16
+ content_hash: Optional[str] = None,
17
+ title: Optional[str] = None,
18
+ owner: Optional[str] = None,
19
+ workspace_id: Optional[str] = None,
20
+ captured_at: Optional[str] = None,
21
+ modified_at: Optional[str] = None,
22
+ embedded: bool = False,
23
+ linked: bool = False,
24
+ duplicate: bool = False,
25
+ agent_used: Optional[str] = None,
26
+ chunk_count: int = 0,
27
+ permissions: Optional[Dict[str, Any]] = None,
28
+ metadata: Optional[Dict[str, Any]] = None,
29
+ ) -> Dict[str, Any]:
30
+ """Append a provenance record for an ingested node (audit trail)."""
31
+ now = _now()
32
+ prov_basis = f"{node_id}|{content_hash or ''}|{now}"
33
+ prov_id = f"prov:{_sha256_text(prov_basis)[:24]}"
34
+ with self._connect() as conn:
35
+ conn.execute(
36
+ """
37
+ INSERT OR REPLACE INTO ingestion_provenance(
38
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
39
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
40
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
41
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
42
+ """,
43
+ (
44
+ prov_id,
45
+ node_id,
46
+ source_type,
47
+ source_uri,
48
+ content_hash,
49
+ title,
50
+ pipeline,
51
+ owner,
52
+ workspace_id,
53
+ captured_at,
54
+ modified_at,
55
+ 1 if embedded else 0,
56
+ 1 if linked else 0,
57
+ 1 if duplicate else 0,
58
+ agent_used,
59
+ int(chunk_count or 0),
60
+ _json(permissions or {}),
61
+ _json(metadata or {}),
62
+ now,
63
+ ),
64
+ )
65
+ return {"id": prov_id, "node_id": node_id, "created_at": now}
66
+
67
+ @staticmethod
68
+ def _provenance_row(row: sqlite3.Row) -> Dict[str, Any]:
69
+ return {
70
+ "id": row["id"],
71
+ "node_id": row["node_id"],
72
+ "source_type": row["source_type"],
73
+ "source_uri": row["source_uri"],
74
+ "content_hash": row["content_hash"],
75
+ "title": row["title"],
76
+ "pipeline": row["pipeline"],
77
+ "owner": row["owner"],
78
+ "workspace_id": row["workspace_id"],
79
+ "captured_at": row["captured_at"],
80
+ "modified_at": row["modified_at"],
81
+ "embedded": bool(row["embedded"]),
82
+ "linked": bool(row["linked"]),
83
+ "duplicate": bool(row["duplicate"]),
84
+ "agent_used": row["agent_used"],
85
+ "chunk_count": row["chunk_count"],
86
+ "permissions": _safe_loads(row["permissions_json"]),
87
+ "metadata": _safe_loads(row["metadata_json"]),
88
+ "created_at": row["created_at"],
89
+ }
90
+
91
+ def get_provenance(self, node_id: str) -> Optional[Dict[str, Any]]:
92
+ """Return the most recent provenance record for a node, or None."""
93
+ with self._connect() as conn:
94
+ row = conn.execute(
95
+ "SELECT * FROM ingestion_provenance WHERE node_id = ? "
96
+ "ORDER BY created_at DESC, rowid DESC LIMIT 1",
97
+ (node_id,),
98
+ ).fetchone()
99
+ return self._provenance_row(row) if row else None
100
+
101
+ def list_provenance(
102
+ self, *, limit: int = 100, source_type: Optional[str] = None
103
+ ) -> Dict[str, Any]:
104
+ """Recent provenance records (newest first), optionally by source_type."""
105
+ limit = max(1, min(int(limit or 100), 1000))
106
+ with self._connect() as conn:
107
+ if source_type:
108
+ rows = conn.execute(
109
+ "SELECT * FROM ingestion_provenance WHERE source_type = ? "
110
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
111
+ (source_type, limit),
112
+ ).fetchall()
113
+ else:
114
+ rows = conn.execute(
115
+ "SELECT * FROM ingestion_provenance "
116
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
117
+ (limit,),
118
+ ).fetchall()
119
+ return {
120
+ "items": [self._provenance_row(r) for r in rows],
121
+ "count": len(rows),
122
+ }
123
+
124
+ def provenance_coverage(self) -> Dict[str, Any]:
125
+ """How much of the brain is explainable: nodes with vs without
126
+ provenance, per node type — the honesty metric for 'every source goes
127
+ through the pipeline'. Pre-v4 nodes ingested before provenance existed
128
+ legitimately count as uncovered."""
129
+ nt, _ = self._read_tables()
130
+ with self._connect() as conn:
131
+ total = conn.execute(f"SELECT COUNT(*) FROM {nt}").fetchone()[0]
132
+ covered = conn.execute(
133
+ f"SELECT COUNT(*) FROM {nt} WHERE id IN (SELECT DISTINCT node_id FROM ingestion_provenance)"
134
+ ).fetchone()[0]
135
+ uncovered_by_type = {
136
+ row["type"]: row["c"]
137
+ for row in conn.execute(
138
+ f"""
139
+ SELECT type, COUNT(*) AS c FROM {nt}
140
+ WHERE id NOT IN (SELECT DISTINCT node_id FROM ingestion_provenance)
141
+ GROUP BY type ORDER BY c DESC LIMIT 20
142
+ """
143
+ ).fetchall()
144
+ }
145
+ by_source = {
146
+ row["source_type"]: row["c"]
147
+ for row in conn.execute(
148
+ "SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
149
+ ).fetchall()
150
+ }
151
+ return {
152
+ "total_nodes": total,
153
+ "nodes_with_provenance": covered,
154
+ "coverage_ratio": round(covered / total, 4) if total else None,
155
+ "uncovered_by_type": uncovered_by_type,
156
+ "provenance_by_source_type": by_source,
157
+ }
158
+
159
+ def provenance_stats(self) -> Dict[str, Any]:
160
+ """Aggregate provenance counts for the Knowledge Graph status surface."""
161
+ with self._connect() as conn:
162
+ total = conn.execute(
163
+ "SELECT COUNT(*) AS c FROM ingestion_provenance"
164
+ ).fetchone()["c"]
165
+ by_source = {
166
+ r["source_type"]: r["c"]
167
+ for r in conn.execute(
168
+ "SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
169
+ ).fetchall()
170
+ }
171
+ embedded = conn.execute(
172
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE embedded = 1"
173
+ ).fetchone()["c"]
174
+ duplicates = conn.execute(
175
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE duplicate = 1"
176
+ ).fetchone()["c"]
177
+ last = conn.execute(
178
+ "SELECT created_at FROM ingestion_provenance ORDER BY created_at DESC LIMIT 1"
179
+ ).fetchone()
180
+ return {
181
+ "total": total,
182
+ "by_source_type": by_source,
183
+ "embedded": embedded,
184
+ "duplicates": duplicates,
185
+ "last_ingested_at": last["created_at"] if last else None,
186
+ }
187
+
188
+ def schema_versions(self) -> Dict[str, Any]:
189
+ """Versions an exporter stamps and an importer validates against."""
190
+ try:
191
+ from kg_schema import EMBED_DIM as _EMBED_DIM, KG_SCHEMA_V2_VERSION as _V2
192
+ except Exception: # pragma: no cover - kg_schema always importable in practice
193
+ _EMBED_DIM, _V2 = 1024, 2
194
+ return {
195
+ "graph_schema_version": GRAPH_SCHEMA_VERSION,
196
+ "db_format_version": _KG_DB_FORMAT_VERSION,
197
+ "kg_v2_schema_version": _V2,
198
+ "projection_version": _PROJECTION_VERSION,
199
+ "embed_dim": _EMBED_DIM,
200
+ }
201
+
202
+ def export_graph_data(
203
+ self, *, workspace_id: Optional[str] = None
204
+ ) -> Dict[str, Any]:
205
+ """Raw, lossless logical export of the graph (nodes/edges/chunks/sources/
206
+ provenance). Vector embeddings are intentionally omitted — they are
207
+ re-derived on import — so the artifact stays portable and small. Use
208
+ :meth:`backup_database` for a faithful binary copy incl. embeddings.
209
+
210
+ ``workspace_id`` REALLY filters (v4): the artifact contains only nodes
211
+ scoped to that workspace plus legacy-global rows (NULL scope, readable
212
+ machine-wide by definition), with edges/chunks/provenance restricted to
213
+ the surviving nodes. Pre-v4 this parameter was stamped into the header
214
+ while the data exported everything — a header that lied.
215
+ """
216
+ with self._connect() as conn:
217
+
218
+ def rows(table: str):
219
+ return [
220
+ dict(r) for r in conn.execute(f"SELECT * FROM {table}").fetchall()
221
+ ]
222
+
223
+ if workspace_id:
224
+ keep_ids = {
225
+ row["id"]
226
+ for row in conn.execute(
227
+ "SELECT id FROM nodes_v2 WHERE workspace_id = ? OR workspace_id IS NULL",
228
+ (workspace_id,),
229
+ ).fetchall()
230
+ }
231
+ nodes = [n for n in rows("nodes") if n["id"] in keep_ids]
232
+ edges = [
233
+ e
234
+ for e in rows("edges")
235
+ if e["from_node"] in keep_ids and e["to_node"] in keep_ids
236
+ ]
237
+ chunks = [c for c in rows("chunks") if c["source_node"] in keep_ids]
238
+ provenance = [
239
+ p for p in rows("ingestion_provenance") if p["node_id"] in keep_ids
240
+ ]
241
+ data = {
242
+ "nodes": nodes,
243
+ "edges": edges,
244
+ "chunks": chunks,
245
+ "knowledge_sources": rows("knowledge_sources"),
246
+ "provenance": provenance,
247
+ }
248
+ else:
249
+ data = {
250
+ "nodes": rows("nodes"),
251
+ "edges": rows("edges"),
252
+ "chunks": rows("chunks"),
253
+ "knowledge_sources": rows("knowledge_sources"),
254
+ "provenance": rows("ingestion_provenance"),
255
+ }
256
+ data["counts"] = {k: len(v) for k, v in data.items()}
257
+ return data
258
+
259
+ def import_graph_data(
260
+ self, data: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False
261
+ ) -> Dict[str, Any]:
262
+ """Import a logical export back into the store.
263
+
264
+ ``mode='merge'`` upserts on top of existing data (id collisions update);
265
+ ``mode='replace'`` clears the graph first. ``dry_run=True`` reports the
266
+ plan without writing. Refuses artifacts from a NEWER graph schema than
267
+ this build.
268
+ """
269
+ nodes = data.get("nodes") or []
270
+ edges = data.get("edges") or []
271
+ chunks = data.get("chunks") or []
272
+ sources = data.get("knowledge_sources") or []
273
+ provenance = data.get("provenance") or []
274
+
275
+ header = data.get("header") or {}
276
+ incoming_schema = header.get("graph_schema_version")
277
+ if isinstance(incoming_schema, int) and incoming_schema > GRAPH_SCHEMA_VERSION:
278
+ raise ValueError(
279
+ f"Artifact graph_schema_version {incoming_schema} is newer than this "
280
+ f"build ({GRAPH_SCHEMA_VERSION}); refusing to import."
281
+ )
282
+
283
+ plan = {
284
+ "mode": mode,
285
+ "nodes": len(nodes),
286
+ "edges": len(edges),
287
+ "chunks": len(chunks),
288
+ "knowledge_sources": len(sources),
289
+ "provenance": len(provenance),
290
+ }
291
+ if dry_run:
292
+ plan["dry_run"] = True
293
+ return plan
294
+
295
+ if mode == "replace":
296
+ self.clear_all()
297
+
298
+ with self._connect() as conn:
299
+ for n in nodes:
300
+ self._upsert_node(
301
+ conn,
302
+ n["id"],
303
+ n["type"],
304
+ n.get("title") or "",
305
+ summary=n.get("summary") or "",
306
+ metadata=_safe_loads(n.get("metadata_json")),
307
+ raw=_safe_loads(n.get("raw_json")),
308
+ )
309
+ for c in chunks:
310
+ self._upsert_chunk(
311
+ conn,
312
+ chunk_id=c["id"],
313
+ source_node=c["source_node"],
314
+ text=c.get("text") or "",
315
+ metadata=_safe_loads(c.get("metadata_json")),
316
+ )
317
+ for e in edges:
318
+ self._upsert_edge(
319
+ conn,
320
+ e["from_node"],
321
+ e["to_node"],
322
+ e["type"],
323
+ weight=float(e.get("weight") or 1.0),
324
+ metadata=_safe_loads(e.get("metadata_json")),
325
+ )
326
+ for s in sources:
327
+ conn.execute(
328
+ """
329
+ INSERT OR REPLACE INTO knowledge_sources(
330
+ id, root_path, os_type, drive_id, label, status, include_ocr,
331
+ watch_enabled, consent_json, created_at, updated_at, last_scanned_at)
332
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
333
+ """,
334
+ (
335
+ s["id"],
336
+ s["root_path"],
337
+ s["os_type"],
338
+ s.get("drive_id"),
339
+ s.get("label"),
340
+ s.get("status") or "active",
341
+ int(s.get("include_ocr") or 0),
342
+ int(s.get("watch_enabled") or 0),
343
+ s.get("consent_json") or "{}",
344
+ s.get("created_at") or _now(),
345
+ s.get("updated_at") or _now(),
346
+ s.get("last_scanned_at"),
347
+ ),
348
+ )
349
+ for p in provenance:
350
+ conn.execute(
351
+ """
352
+ INSERT OR REPLACE INTO ingestion_provenance(
353
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
354
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
355
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
356
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
357
+ """,
358
+ (
359
+ p["id"],
360
+ p["node_id"],
361
+ p["source_type"],
362
+ p.get("source_uri"),
363
+ p.get("content_hash"),
364
+ p.get("title"),
365
+ p.get("pipeline") or "import",
366
+ p.get("owner"),
367
+ p.get("workspace_id"),
368
+ p.get("captured_at"),
369
+ p.get("modified_at"),
370
+ int(p.get("embedded") or 0),
371
+ int(p.get("linked") or 0),
372
+ int(p.get("duplicate") or 0),
373
+ p.get("agent_used"),
374
+ int(p.get("chunk_count") or 0),
375
+ p.get("permissions_json") or "{}",
376
+ p.get("metadata_json") or "{}",
377
+ p.get("created_at") or _now(),
378
+ ),
379
+ )
380
+ plan["imported"] = True
381
+ return plan
382
+
383
+ def backup_database(self, dest_path) -> Path:
384
+ """Write a clean, standalone snapshot of the live DB to ``dest_path``.
385
+
386
+ Uses ``VACUUM INTO`` (after a full WAL checkpoint) so the snapshot is a
387
+ defragmented, rollback-journal-mode database with no companion -wal/-shm
388
+ — which restores cleanly by a plain file copy. Captures all data incl.
389
+ the vector_embeddings BLOBs.
390
+ """
391
+ dest = Path(dest_path)
392
+ dest.parent.mkdir(parents=True, exist_ok=True)
393
+ if dest.exists():
394
+ dest.unlink() # VACUUM INTO requires the target to not exist
395
+ conn = self._connect()
396
+ try:
397
+ conn.execute("PRAGMA wal_checkpoint(FULL)")
398
+ conn.execute("VACUUM INTO ?", (str(dest),))
399
+ finally:
400
+ conn.close()
401
+ return dest