ltcai 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -33
- package/docs/CHANGELOG.md +64 -0
- package/docs/REALTIME_COLLABORATION.md +3 -3
- package/docs/V3_FRONTEND.md +9 -8
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +86 -43
- package/docs/kg-schema.md +6 -2
- package/docs/spec-vs-impl.md +10 -10
- package/kg_schema.py +2 -603
- package/knowledge_graph.py +37 -4958
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +15 -16
- package/latticeai/api/agents.py +13 -6
- package/latticeai/api/auth.py +19 -11
- package/latticeai/api/invitations.py +100 -0
- package/latticeai/api/knowledge_graph.py +4 -11
- package/latticeai/api/plugins.py +3 -6
- package/latticeai/api/realtime.py +4 -7
- package/latticeai/api/static_routes.py +9 -12
- package/latticeai/api/ui_redirects.py +26 -0
- package/latticeai/api/workflow_designer.py +39 -6
- package/latticeai/api/workspace.py +24 -10
- package/latticeai/app_factory.py +88 -17
- package/latticeai/brain/_kg_common.py +1123 -0
- package/latticeai/brain/discovery.py +1455 -0
- package/latticeai/brain/documents.py +218 -0
- package/latticeai/brain/ingest.py +644 -0
- package/latticeai/brain/projection.py +561 -0
- package/latticeai/brain/provenance.py +401 -0
- package/latticeai/brain/retrieval.py +1316 -0
- package/latticeai/brain/schema.py +640 -0
- package/latticeai/brain/store.py +216 -0
- package/latticeai/brain/write_master.py +225 -0
- package/latticeai/core/invitations.py +131 -0
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/multi_agent.py +1 -1
- package/latticeai/core/policy.py +54 -0
- package/latticeai/core/realtime.py +65 -44
- package/latticeai/core/sessions.py +31 -5
- package/latticeai/core/users.py +147 -0
- package/latticeai/core/workspace_os.py +420 -20
- package/latticeai/services/agent_runtime.py +242 -4
- package/latticeai/services/run_executor.py +328 -0
- package/latticeai/services/workspace_service.py +27 -19
- package/package.json +2 -14
- package/scripts/lint_v3.mjs +23 -0
- package/static/v3/asset-manifest.json +21 -14
- package/static/v3/js/{app.356e6452.js → app.c5c80c46.js} +1 -1
- package/static/v3/js/core/{api.7a308b89.js → api.ba0fbf14.js} +58 -1
- package/static/v3/js/core/api.js +57 -0
- package/static/v3/js/core/i18n.880e1fec.js +575 -0
- package/static/v3/js/core/i18n.js +575 -0
- package/static/v3/js/core/routes.37522821.js +101 -0
- package/static/v3/js/core/routes.js +71 -63
- package/static/v3/js/core/{shell.a1657f20.js → shell.e3f6bbfa.js} +67 -38
- package/static/v3/js/core/shell.js +65 -36
- package/static/v3/js/core/{store.204a08b2.js → store.7b2aa044.js} +10 -0
- package/static/v3/js/core/store.js +10 -0
- package/static/v3/js/views/account.eff40715.js +143 -0
- package/static/v3/js/views/account.js +143 -0
- package/static/v3/js/views/activity.0d271ef9.js +67 -0
- package/static/v3/js/views/activity.js +67 -0
- package/static/v3/js/views/{admin-users.03bac88c.js → admin-users.f7ac7b43.js} +4 -6
- package/static/v3/js/views/admin-users.js +4 -6
- package/static/v3/js/views/{agents.014d0b74.js → agents.17c5288d.js} +35 -12
- package/static/v3/js/views/agents.js +35 -12
- package/static/v3/js/views/{chat.e6dd7dd0.js → chat.e250e2cc.js} +23 -0
- package/static/v3/js/views/chat.js +23 -0
- package/static/v3/js/views/{knowledge-graph.5e40cbeb.js → knowledge-graph.4d09c537.js} +27 -7
- package/static/v3/js/views/knowledge-graph.js +27 -7
- package/static/v3/js/views/network.52a4f181.js +97 -0
- package/static/v3/js/views/network.js +97 -0
- package/static/v3/js/views/{planning.9ac3e313.js → planning.4876fd77.js} +26 -5
- package/static/v3/js/views/planning.js +26 -5
- package/static/v3/js/views/runs.b63b2afa.js +144 -0
- package/static/v3/js/views/runs.js +144 -0
- package/static/v3/js/views/{settings.8631fa5e.js → settings.b7140634.js} +7 -8
- package/static/v3/js/views/settings.js +7 -8
- package/static/v3/js/views/snapshots.6f5db095.js +135 -0
- package/static/v3/js/views/snapshots.js +135 -0
- package/static/v3/js/views/{workflows.26c57290.js → workflows.7752225a.js} +87 -2
- package/static/v3/js/views/workflows.js +87 -2
- package/static/v3/js/views/workspace-admin.c466029b.js +156 -0
- package/static/v3/js/views/workspace-admin.js +156 -0
- package/static/account.html +0 -113
- package/static/activity.html +0 -73
- package/static/admin.html +0 -486
- package/static/agents.html +0 -139
- package/static/chat.html +0 -841
- package/static/css/reference/account.css +0 -439
- package/static/css/reference/admin.css +0 -610
- package/static/css/reference/base.css +0 -1661
- package/static/css/reference/chat.css +0 -4623
- package/static/css/reference/graph.css +0 -1016
- package/static/css/responsive.css +0 -861
- package/static/graph.html +0 -122
- package/static/platform.css +0 -104
- package/static/plugins.html +0 -136
- package/static/scripts/account.js +0 -238
- package/static/scripts/admin.js +0 -1614
- package/static/scripts/chat.js +0 -5081
- package/static/scripts/graph.js +0 -1804
- package/static/scripts/platform.js +0 -64
- package/static/scripts/ux.js +0 -167
- package/static/scripts/workspace.js +0 -948
- package/static/v3/js/core/routes.7222343d.js +0 -93
- package/static/workflows.html +0 -146
- package/static/workspace.css +0 -1121
- package/static/workspace.html +0 -357
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# ruff: noqa: F403,F405
|
|
4
|
+
|
|
5
|
+
from ._kg_common import * # noqa: F403,F401
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KnowledgeGraphProvenanceMixin:
|
|
9
|
+
def record_provenance(
|
|
10
|
+
self,
|
|
11
|
+
*,
|
|
12
|
+
node_id: str,
|
|
13
|
+
source_type: str,
|
|
14
|
+
pipeline: str = "unified-ingestion",
|
|
15
|
+
source_uri: Optional[str] = None,
|
|
16
|
+
content_hash: Optional[str] = None,
|
|
17
|
+
title: Optional[str] = None,
|
|
18
|
+
owner: Optional[str] = None,
|
|
19
|
+
workspace_id: Optional[str] = None,
|
|
20
|
+
captured_at: Optional[str] = None,
|
|
21
|
+
modified_at: Optional[str] = None,
|
|
22
|
+
embedded: bool = False,
|
|
23
|
+
linked: bool = False,
|
|
24
|
+
duplicate: bool = False,
|
|
25
|
+
agent_used: Optional[str] = None,
|
|
26
|
+
chunk_count: int = 0,
|
|
27
|
+
permissions: Optional[Dict[str, Any]] = None,
|
|
28
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
29
|
+
) -> Dict[str, Any]:
|
|
30
|
+
"""Append a provenance record for an ingested node (audit trail)."""
|
|
31
|
+
now = _now()
|
|
32
|
+
prov_basis = f"{node_id}|{content_hash or ''}|{now}"
|
|
33
|
+
prov_id = f"prov:{_sha256_text(prov_basis)[:24]}"
|
|
34
|
+
with self._connect() as conn:
|
|
35
|
+
conn.execute(
|
|
36
|
+
"""
|
|
37
|
+
INSERT OR REPLACE INTO ingestion_provenance(
|
|
38
|
+
id, node_id, source_type, source_uri, content_hash, title, pipeline,
|
|
39
|
+
owner, workspace_id, captured_at, modified_at, embedded, linked,
|
|
40
|
+
duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
|
|
41
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
42
|
+
""",
|
|
43
|
+
(
|
|
44
|
+
prov_id,
|
|
45
|
+
node_id,
|
|
46
|
+
source_type,
|
|
47
|
+
source_uri,
|
|
48
|
+
content_hash,
|
|
49
|
+
title,
|
|
50
|
+
pipeline,
|
|
51
|
+
owner,
|
|
52
|
+
workspace_id,
|
|
53
|
+
captured_at,
|
|
54
|
+
modified_at,
|
|
55
|
+
1 if embedded else 0,
|
|
56
|
+
1 if linked else 0,
|
|
57
|
+
1 if duplicate else 0,
|
|
58
|
+
agent_used,
|
|
59
|
+
int(chunk_count or 0),
|
|
60
|
+
_json(permissions or {}),
|
|
61
|
+
_json(metadata or {}),
|
|
62
|
+
now,
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
return {"id": prov_id, "node_id": node_id, "created_at": now}
|
|
66
|
+
|
|
67
|
+
@staticmethod
|
|
68
|
+
def _provenance_row(row: sqlite3.Row) -> Dict[str, Any]:
|
|
69
|
+
return {
|
|
70
|
+
"id": row["id"],
|
|
71
|
+
"node_id": row["node_id"],
|
|
72
|
+
"source_type": row["source_type"],
|
|
73
|
+
"source_uri": row["source_uri"],
|
|
74
|
+
"content_hash": row["content_hash"],
|
|
75
|
+
"title": row["title"],
|
|
76
|
+
"pipeline": row["pipeline"],
|
|
77
|
+
"owner": row["owner"],
|
|
78
|
+
"workspace_id": row["workspace_id"],
|
|
79
|
+
"captured_at": row["captured_at"],
|
|
80
|
+
"modified_at": row["modified_at"],
|
|
81
|
+
"embedded": bool(row["embedded"]),
|
|
82
|
+
"linked": bool(row["linked"]),
|
|
83
|
+
"duplicate": bool(row["duplicate"]),
|
|
84
|
+
"agent_used": row["agent_used"],
|
|
85
|
+
"chunk_count": row["chunk_count"],
|
|
86
|
+
"permissions": _safe_loads(row["permissions_json"]),
|
|
87
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
88
|
+
"created_at": row["created_at"],
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
def get_provenance(self, node_id: str) -> Optional[Dict[str, Any]]:
|
|
92
|
+
"""Return the most recent provenance record for a node, or None."""
|
|
93
|
+
with self._connect() as conn:
|
|
94
|
+
row = conn.execute(
|
|
95
|
+
"SELECT * FROM ingestion_provenance WHERE node_id = ? "
|
|
96
|
+
"ORDER BY created_at DESC, rowid DESC LIMIT 1",
|
|
97
|
+
(node_id,),
|
|
98
|
+
).fetchone()
|
|
99
|
+
return self._provenance_row(row) if row else None
|
|
100
|
+
|
|
101
|
+
def list_provenance(
|
|
102
|
+
self, *, limit: int = 100, source_type: Optional[str] = None
|
|
103
|
+
) -> Dict[str, Any]:
|
|
104
|
+
"""Recent provenance records (newest first), optionally by source_type."""
|
|
105
|
+
limit = max(1, min(int(limit or 100), 1000))
|
|
106
|
+
with self._connect() as conn:
|
|
107
|
+
if source_type:
|
|
108
|
+
rows = conn.execute(
|
|
109
|
+
"SELECT * FROM ingestion_provenance WHERE source_type = ? "
|
|
110
|
+
"ORDER BY created_at DESC, rowid DESC LIMIT ?",
|
|
111
|
+
(source_type, limit),
|
|
112
|
+
).fetchall()
|
|
113
|
+
else:
|
|
114
|
+
rows = conn.execute(
|
|
115
|
+
"SELECT * FROM ingestion_provenance "
|
|
116
|
+
"ORDER BY created_at DESC, rowid DESC LIMIT ?",
|
|
117
|
+
(limit,),
|
|
118
|
+
).fetchall()
|
|
119
|
+
return {
|
|
120
|
+
"items": [self._provenance_row(r) for r in rows],
|
|
121
|
+
"count": len(rows),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
def provenance_coverage(self) -> Dict[str, Any]:
|
|
125
|
+
"""How much of the brain is explainable: nodes with vs without
|
|
126
|
+
provenance, per node type — the honesty metric for 'every source goes
|
|
127
|
+
through the pipeline'. Pre-v4 nodes ingested before provenance existed
|
|
128
|
+
legitimately count as uncovered."""
|
|
129
|
+
nt, _ = self._read_tables()
|
|
130
|
+
with self._connect() as conn:
|
|
131
|
+
total = conn.execute(f"SELECT COUNT(*) FROM {nt}").fetchone()[0]
|
|
132
|
+
covered = conn.execute(
|
|
133
|
+
f"SELECT COUNT(*) FROM {nt} WHERE id IN (SELECT DISTINCT node_id FROM ingestion_provenance)"
|
|
134
|
+
).fetchone()[0]
|
|
135
|
+
uncovered_by_type = {
|
|
136
|
+
row["type"]: row["c"]
|
|
137
|
+
for row in conn.execute(
|
|
138
|
+
f"""
|
|
139
|
+
SELECT type, COUNT(*) AS c FROM {nt}
|
|
140
|
+
WHERE id NOT IN (SELECT DISTINCT node_id FROM ingestion_provenance)
|
|
141
|
+
GROUP BY type ORDER BY c DESC LIMIT 20
|
|
142
|
+
"""
|
|
143
|
+
).fetchall()
|
|
144
|
+
}
|
|
145
|
+
by_source = {
|
|
146
|
+
row["source_type"]: row["c"]
|
|
147
|
+
for row in conn.execute(
|
|
148
|
+
"SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
|
|
149
|
+
).fetchall()
|
|
150
|
+
}
|
|
151
|
+
return {
|
|
152
|
+
"total_nodes": total,
|
|
153
|
+
"nodes_with_provenance": covered,
|
|
154
|
+
"coverage_ratio": round(covered / total, 4) if total else None,
|
|
155
|
+
"uncovered_by_type": uncovered_by_type,
|
|
156
|
+
"provenance_by_source_type": by_source,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
def provenance_stats(self) -> Dict[str, Any]:
|
|
160
|
+
"""Aggregate provenance counts for the Knowledge Graph status surface."""
|
|
161
|
+
with self._connect() as conn:
|
|
162
|
+
total = conn.execute(
|
|
163
|
+
"SELECT COUNT(*) AS c FROM ingestion_provenance"
|
|
164
|
+
).fetchone()["c"]
|
|
165
|
+
by_source = {
|
|
166
|
+
r["source_type"]: r["c"]
|
|
167
|
+
for r in conn.execute(
|
|
168
|
+
"SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
|
|
169
|
+
).fetchall()
|
|
170
|
+
}
|
|
171
|
+
embedded = conn.execute(
|
|
172
|
+
"SELECT COUNT(*) AS c FROM ingestion_provenance WHERE embedded = 1"
|
|
173
|
+
).fetchone()["c"]
|
|
174
|
+
duplicates = conn.execute(
|
|
175
|
+
"SELECT COUNT(*) AS c FROM ingestion_provenance WHERE duplicate = 1"
|
|
176
|
+
).fetchone()["c"]
|
|
177
|
+
last = conn.execute(
|
|
178
|
+
"SELECT created_at FROM ingestion_provenance ORDER BY created_at DESC LIMIT 1"
|
|
179
|
+
).fetchone()
|
|
180
|
+
return {
|
|
181
|
+
"total": total,
|
|
182
|
+
"by_source_type": by_source,
|
|
183
|
+
"embedded": embedded,
|
|
184
|
+
"duplicates": duplicates,
|
|
185
|
+
"last_ingested_at": last["created_at"] if last else None,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
def schema_versions(self) -> Dict[str, Any]:
|
|
189
|
+
"""Versions an exporter stamps and an importer validates against."""
|
|
190
|
+
try:
|
|
191
|
+
from kg_schema import EMBED_DIM as _EMBED_DIM, KG_SCHEMA_V2_VERSION as _V2
|
|
192
|
+
except Exception: # pragma: no cover - kg_schema always importable in practice
|
|
193
|
+
_EMBED_DIM, _V2 = 1024, 2
|
|
194
|
+
return {
|
|
195
|
+
"graph_schema_version": GRAPH_SCHEMA_VERSION,
|
|
196
|
+
"db_format_version": _KG_DB_FORMAT_VERSION,
|
|
197
|
+
"kg_v2_schema_version": _V2,
|
|
198
|
+
"projection_version": _PROJECTION_VERSION,
|
|
199
|
+
"embed_dim": _EMBED_DIM,
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
def export_graph_data(
|
|
203
|
+
self, *, workspace_id: Optional[str] = None
|
|
204
|
+
) -> Dict[str, Any]:
|
|
205
|
+
"""Raw, lossless logical export of the graph (nodes/edges/chunks/sources/
|
|
206
|
+
provenance). Vector embeddings are intentionally omitted — they are
|
|
207
|
+
re-derived on import — so the artifact stays portable and small. Use
|
|
208
|
+
:meth:`backup_database` for a faithful binary copy incl. embeddings.
|
|
209
|
+
|
|
210
|
+
``workspace_id`` REALLY filters (v4): the artifact contains only nodes
|
|
211
|
+
scoped to that workspace plus legacy-global rows (NULL scope, readable
|
|
212
|
+
machine-wide by definition), with edges/chunks/provenance restricted to
|
|
213
|
+
the surviving nodes. Pre-v4 this parameter was stamped into the header
|
|
214
|
+
while the data exported everything — a header that lied.
|
|
215
|
+
"""
|
|
216
|
+
with self._connect() as conn:
|
|
217
|
+
|
|
218
|
+
def rows(table: str):
|
|
219
|
+
return [
|
|
220
|
+
dict(r) for r in conn.execute(f"SELECT * FROM {table}").fetchall()
|
|
221
|
+
]
|
|
222
|
+
|
|
223
|
+
if workspace_id:
|
|
224
|
+
keep_ids = {
|
|
225
|
+
row["id"]
|
|
226
|
+
for row in conn.execute(
|
|
227
|
+
"SELECT id FROM nodes_v2 WHERE workspace_id = ? OR workspace_id IS NULL",
|
|
228
|
+
(workspace_id,),
|
|
229
|
+
).fetchall()
|
|
230
|
+
}
|
|
231
|
+
nodes = [n for n in rows("nodes") if n["id"] in keep_ids]
|
|
232
|
+
edges = [
|
|
233
|
+
e
|
|
234
|
+
for e in rows("edges")
|
|
235
|
+
if e["from_node"] in keep_ids and e["to_node"] in keep_ids
|
|
236
|
+
]
|
|
237
|
+
chunks = [c for c in rows("chunks") if c["source_node"] in keep_ids]
|
|
238
|
+
provenance = [
|
|
239
|
+
p for p in rows("ingestion_provenance") if p["node_id"] in keep_ids
|
|
240
|
+
]
|
|
241
|
+
data = {
|
|
242
|
+
"nodes": nodes,
|
|
243
|
+
"edges": edges,
|
|
244
|
+
"chunks": chunks,
|
|
245
|
+
"knowledge_sources": rows("knowledge_sources"),
|
|
246
|
+
"provenance": provenance,
|
|
247
|
+
}
|
|
248
|
+
else:
|
|
249
|
+
data = {
|
|
250
|
+
"nodes": rows("nodes"),
|
|
251
|
+
"edges": rows("edges"),
|
|
252
|
+
"chunks": rows("chunks"),
|
|
253
|
+
"knowledge_sources": rows("knowledge_sources"),
|
|
254
|
+
"provenance": rows("ingestion_provenance"),
|
|
255
|
+
}
|
|
256
|
+
data["counts"] = {k: len(v) for k, v in data.items()}
|
|
257
|
+
return data
|
|
258
|
+
|
|
259
|
+
def import_graph_data(
|
|
260
|
+
self, data: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False
|
|
261
|
+
) -> Dict[str, Any]:
|
|
262
|
+
"""Import a logical export back into the store.
|
|
263
|
+
|
|
264
|
+
``mode='merge'`` upserts on top of existing data (id collisions update);
|
|
265
|
+
``mode='replace'`` clears the graph first. ``dry_run=True`` reports the
|
|
266
|
+
plan without writing. Refuses artifacts from a NEWER graph schema than
|
|
267
|
+
this build.
|
|
268
|
+
"""
|
|
269
|
+
nodes = data.get("nodes") or []
|
|
270
|
+
edges = data.get("edges") or []
|
|
271
|
+
chunks = data.get("chunks") or []
|
|
272
|
+
sources = data.get("knowledge_sources") or []
|
|
273
|
+
provenance = data.get("provenance") or []
|
|
274
|
+
|
|
275
|
+
header = data.get("header") or {}
|
|
276
|
+
incoming_schema = header.get("graph_schema_version")
|
|
277
|
+
if isinstance(incoming_schema, int) and incoming_schema > GRAPH_SCHEMA_VERSION:
|
|
278
|
+
raise ValueError(
|
|
279
|
+
f"Artifact graph_schema_version {incoming_schema} is newer than this "
|
|
280
|
+
f"build ({GRAPH_SCHEMA_VERSION}); refusing to import."
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
plan = {
|
|
284
|
+
"mode": mode,
|
|
285
|
+
"nodes": len(nodes),
|
|
286
|
+
"edges": len(edges),
|
|
287
|
+
"chunks": len(chunks),
|
|
288
|
+
"knowledge_sources": len(sources),
|
|
289
|
+
"provenance": len(provenance),
|
|
290
|
+
}
|
|
291
|
+
if dry_run:
|
|
292
|
+
plan["dry_run"] = True
|
|
293
|
+
return plan
|
|
294
|
+
|
|
295
|
+
if mode == "replace":
|
|
296
|
+
self.clear_all()
|
|
297
|
+
|
|
298
|
+
with self._connect() as conn:
|
|
299
|
+
for n in nodes:
|
|
300
|
+
self._upsert_node(
|
|
301
|
+
conn,
|
|
302
|
+
n["id"],
|
|
303
|
+
n["type"],
|
|
304
|
+
n.get("title") or "",
|
|
305
|
+
summary=n.get("summary") or "",
|
|
306
|
+
metadata=_safe_loads(n.get("metadata_json")),
|
|
307
|
+
raw=_safe_loads(n.get("raw_json")),
|
|
308
|
+
)
|
|
309
|
+
for c in chunks:
|
|
310
|
+
self._upsert_chunk(
|
|
311
|
+
conn,
|
|
312
|
+
chunk_id=c["id"],
|
|
313
|
+
source_node=c["source_node"],
|
|
314
|
+
text=c.get("text") or "",
|
|
315
|
+
metadata=_safe_loads(c.get("metadata_json")),
|
|
316
|
+
)
|
|
317
|
+
for e in edges:
|
|
318
|
+
self._upsert_edge(
|
|
319
|
+
conn,
|
|
320
|
+
e["from_node"],
|
|
321
|
+
e["to_node"],
|
|
322
|
+
e["type"],
|
|
323
|
+
weight=float(e.get("weight") or 1.0),
|
|
324
|
+
metadata=_safe_loads(e.get("metadata_json")),
|
|
325
|
+
)
|
|
326
|
+
for s in sources:
|
|
327
|
+
conn.execute(
|
|
328
|
+
"""
|
|
329
|
+
INSERT OR REPLACE INTO knowledge_sources(
|
|
330
|
+
id, root_path, os_type, drive_id, label, status, include_ocr,
|
|
331
|
+
watch_enabled, consent_json, created_at, updated_at, last_scanned_at)
|
|
332
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
333
|
+
""",
|
|
334
|
+
(
|
|
335
|
+
s["id"],
|
|
336
|
+
s["root_path"],
|
|
337
|
+
s["os_type"],
|
|
338
|
+
s.get("drive_id"),
|
|
339
|
+
s.get("label"),
|
|
340
|
+
s.get("status") or "active",
|
|
341
|
+
int(s.get("include_ocr") or 0),
|
|
342
|
+
int(s.get("watch_enabled") or 0),
|
|
343
|
+
s.get("consent_json") or "{}",
|
|
344
|
+
s.get("created_at") or _now(),
|
|
345
|
+
s.get("updated_at") or _now(),
|
|
346
|
+
s.get("last_scanned_at"),
|
|
347
|
+
),
|
|
348
|
+
)
|
|
349
|
+
for p in provenance:
|
|
350
|
+
conn.execute(
|
|
351
|
+
"""
|
|
352
|
+
INSERT OR REPLACE INTO ingestion_provenance(
|
|
353
|
+
id, node_id, source_type, source_uri, content_hash, title, pipeline,
|
|
354
|
+
owner, workspace_id, captured_at, modified_at, embedded, linked,
|
|
355
|
+
duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
|
|
356
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
357
|
+
""",
|
|
358
|
+
(
|
|
359
|
+
p["id"],
|
|
360
|
+
p["node_id"],
|
|
361
|
+
p["source_type"],
|
|
362
|
+
p.get("source_uri"),
|
|
363
|
+
p.get("content_hash"),
|
|
364
|
+
p.get("title"),
|
|
365
|
+
p.get("pipeline") or "import",
|
|
366
|
+
p.get("owner"),
|
|
367
|
+
p.get("workspace_id"),
|
|
368
|
+
p.get("captured_at"),
|
|
369
|
+
p.get("modified_at"),
|
|
370
|
+
int(p.get("embedded") or 0),
|
|
371
|
+
int(p.get("linked") or 0),
|
|
372
|
+
int(p.get("duplicate") or 0),
|
|
373
|
+
p.get("agent_used"),
|
|
374
|
+
int(p.get("chunk_count") or 0),
|
|
375
|
+
p.get("permissions_json") or "{}",
|
|
376
|
+
p.get("metadata_json") or "{}",
|
|
377
|
+
p.get("created_at") or _now(),
|
|
378
|
+
),
|
|
379
|
+
)
|
|
380
|
+
plan["imported"] = True
|
|
381
|
+
return plan
|
|
382
|
+
|
|
383
|
+
def backup_database(self, dest_path) -> Path:
|
|
384
|
+
"""Write a clean, standalone snapshot of the live DB to ``dest_path``.
|
|
385
|
+
|
|
386
|
+
Uses ``VACUUM INTO`` (after a full WAL checkpoint) so the snapshot is a
|
|
387
|
+
defragmented, rollback-journal-mode database with no companion -wal/-shm
|
|
388
|
+
— which restores cleanly by a plain file copy. Captures all data incl.
|
|
389
|
+
the vector_embeddings BLOBs.
|
|
390
|
+
"""
|
|
391
|
+
dest = Path(dest_path)
|
|
392
|
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
393
|
+
if dest.exists():
|
|
394
|
+
dest.unlink() # VACUUM INTO requires the target to not exist
|
|
395
|
+
conn = self._connect()
|
|
396
|
+
try:
|
|
397
|
+
conn.execute("PRAGMA wal_checkpoint(FULL)")
|
|
398
|
+
conn.execute("VACUUM INTO ?", (str(dest),))
|
|
399
|
+
finally:
|
|
400
|
+
conn.close()
|
|
401
|
+
return dest
|