ltcai 4.3.3 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +21 -16
  2. package/docs/CHANGELOG.md +37 -0
  3. package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
  4. package/lattice_brain/__init__.py +38 -23
  5. package/lattice_brain/_kg_common.py +11 -1
  6. package/lattice_brain/context.py +212 -2
  7. package/lattice_brain/conversations.py +234 -1
  8. package/lattice_brain/discovery.py +11 -1
  9. package/lattice_brain/documents.py +11 -1
  10. package/lattice_brain/graph/__init__.py +28 -0
  11. package/lattice_brain/graph/_kg_common.py +1123 -0
  12. package/lattice_brain/graph/curator.py +473 -0
  13. package/lattice_brain/graph/discovery.py +1455 -0
  14. package/lattice_brain/graph/documents.py +218 -0
  15. package/lattice_brain/graph/identity.py +175 -0
  16. package/lattice_brain/graph/ingest.py +644 -0
  17. package/lattice_brain/graph/network.py +205 -0
  18. package/lattice_brain/graph/projection.py +571 -0
  19. package/lattice_brain/graph/provenance.py +401 -0
  20. package/lattice_brain/graph/retrieval.py +1341 -0
  21. package/lattice_brain/graph/schema.py +640 -0
  22. package/lattice_brain/graph/store.py +237 -0
  23. package/lattice_brain/graph/write_master.py +225 -0
  24. package/lattice_brain/identity.py +11 -13
  25. package/lattice_brain/ingest.py +11 -1
  26. package/lattice_brain/ingestion.py +318 -0
  27. package/lattice_brain/memory.py +100 -1
  28. package/lattice_brain/network.py +11 -1
  29. package/lattice_brain/portability.py +431 -0
  30. package/lattice_brain/projection.py +11 -1
  31. package/lattice_brain/provenance.py +11 -1
  32. package/lattice_brain/retrieval.py +11 -1
  33. package/lattice_brain/runtime/__init__.py +32 -0
  34. package/lattice_brain/runtime/agent_runtime.py +569 -0
  35. package/lattice_brain/runtime/hooks.py +754 -0
  36. package/lattice_brain/runtime/multi_agent.py +795 -0
  37. package/lattice_brain/schema.py +11 -1
  38. package/lattice_brain/store.py +10 -2
  39. package/lattice_brain/workflow.py +461 -0
  40. package/lattice_brain/write_master.py +11 -1
  41. package/latticeai/__init__.py +1 -1
  42. package/latticeai/api/agents.py +2 -2
  43. package/latticeai/api/browser.py +1 -1
  44. package/latticeai/api/chat.py +1 -1
  45. package/latticeai/api/computer_use.py +1 -1
  46. package/latticeai/api/hooks.py +2 -2
  47. package/latticeai/api/mcp.py +1 -1
  48. package/latticeai/api/tools.py +1 -1
  49. package/latticeai/api/workflow_designer.py +2 -2
  50. package/latticeai/app_factory.py +4 -4
  51. package/latticeai/brain/__init__.py +24 -6
  52. package/latticeai/brain/_kg_common.py +11 -1117
  53. package/latticeai/brain/context.py +12 -208
  54. package/latticeai/brain/conversations.py +12 -231
  55. package/latticeai/brain/discovery.py +13 -1451
  56. package/latticeai/brain/documents.py +13 -214
  57. package/latticeai/brain/identity.py +11 -169
  58. package/latticeai/brain/ingest.py +13 -640
  59. package/latticeai/brain/memory.py +12 -97
  60. package/latticeai/brain/network.py +12 -200
  61. package/latticeai/brain/projection.py +13 -567
  62. package/latticeai/brain/provenance.py +13 -397
  63. package/latticeai/brain/retrieval.py +13 -1337
  64. package/latticeai/brain/schema.py +12 -635
  65. package/latticeai/brain/store.py +13 -233
  66. package/latticeai/brain/write_master.py +13 -221
  67. package/latticeai/core/agent.py +1 -1
  68. package/latticeai/core/agent_registry.py +2 -2
  69. package/latticeai/core/builtin_hooks.py +2 -2
  70. package/latticeai/core/graph_curator.py +6 -468
  71. package/latticeai/core/hooks.py +6 -749
  72. package/latticeai/core/marketplace.py +1 -1
  73. package/latticeai/core/multi_agent.py +6 -790
  74. package/latticeai/core/workflow_engine.py +6 -456
  75. package/latticeai/core/workspace_os.py +1 -1
  76. package/latticeai/services/agent_runtime.py +6 -564
  77. package/latticeai/services/ingestion.py +6 -313
  78. package/latticeai/services/kg_portability.py +6 -426
  79. package/latticeai/services/platform_runtime.py +3 -3
  80. package/latticeai/services/run_executor.py +1 -1
  81. package/latticeai/services/upload_service.py +1 -1
  82. package/p_reinforce.py +1 -1
  83. package/package.json +1 -1
  84. package/scripts/bump_version.py +1 -1
  85. package/scripts/wheel_smoke.py +7 -0
  86. package/src-tauri/Cargo.lock +1 -1
  87. package/src-tauri/Cargo.toml +1 -1
  88. package/src-tauri/tauri.conf.json +1 -1
  89. package/static/app/asset-manifest.json +1 -1
@@ -0,0 +1,571 @@
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: F403,F405
4
+
5
+ from ._kg_common import * # noqa: F403,F401
6
+
7
+
8
+ class KnowledgeGraphProjectionMixin:
9
+ _FTS_SQL = """
10
+ CREATE VIRTUAL TABLE IF NOT EXISTS node_fts USING fts5(
11
+ node_id UNINDEXED, title, summary, metadata, tokenize='trigram'
12
+ );
13
+ CREATE TRIGGER IF NOT EXISTS node_fts_ai AFTER INSERT ON nodes BEGIN
14
+ INSERT INTO node_fts(node_id, title, summary, metadata)
15
+ VALUES (new.id, new.title, COALESCE(new.summary, ''), new.metadata_json);
16
+ END;
17
+ CREATE TRIGGER IF NOT EXISTS node_fts_au AFTER UPDATE ON nodes BEGIN
18
+ DELETE FROM node_fts WHERE node_id = old.id;
19
+ INSERT INTO node_fts(node_id, title, summary, metadata)
20
+ VALUES (new.id, new.title, COALESCE(new.summary, ''), new.metadata_json);
21
+ END;
22
+ CREATE TRIGGER IF NOT EXISTS node_fts_ad AFTER DELETE ON nodes BEGIN
23
+ DELETE FROM node_fts WHERE node_id = old.id;
24
+ END;
25
+ """
26
+
27
+ _V2_VIEWS_SQL = """
28
+ CREATE VIEW IF NOT EXISTS kgv2_nodes AS
29
+ SELECT id,
30
+ COALESCE(legacy_type, type) AS type,
31
+ label AS title,
32
+ summary,
33
+ attrs AS metadata_json,
34
+ created_at, updated_at
35
+ FROM nodes_v2;
36
+ CREATE VIEW IF NOT EXISTS kgv2_edges AS
37
+ SELECT id, source AS from_node, target AS to_node,
38
+ COALESCE(legacy_type, type) AS type,
39
+ weight,
40
+ metadata AS metadata_json,
41
+ created_at
42
+ FROM edges_v2;
43
+ """
44
+
45
+ def _init_fts(self) -> None:
46
+ self._fts_enabled = False
47
+ try:
48
+ with self._connect() as conn:
49
+ conn.executescript(self._FTS_SQL)
50
+ fts_count = conn.execute(
51
+ "SELECT count(*) AS c FROM node_fts"
52
+ ).fetchone()["c"]
53
+ if fts_count == 0:
54
+ conn.execute(
55
+ "INSERT INTO node_fts(node_id, title, summary, metadata) "
56
+ "SELECT id, title, COALESCE(summary, ''), metadata_json FROM nodes"
57
+ )
58
+ self._fts_enabled = True
59
+ except sqlite3.OperationalError as exc:
60
+ # FTS5/trigram not compiled into this SQLite build. LIKE search
61
+ # stays authoritative; the capability is reported, never faked.
62
+ logging.info(
63
+ "FTS5 trigram index unavailable (%s); keyword search uses LIKE scans.",
64
+ exc,
65
+ )
66
+
67
+ def _fts_match_ids(
68
+ self, conn: sqlite3.Connection, query: str, limit: int
69
+ ) -> List[str]:
70
+ """Ranked node ids for a trigram FTS query ('' on any failure)."""
71
+ if not getattr(self, "_fts_enabled", False) or len(query) < 3:
72
+ return []
73
+ escaped = query.replace('"', '""')
74
+ try:
75
+ rows = conn.execute(
76
+ "SELECT node_id FROM node_fts WHERE node_fts MATCH ? ORDER BY rank LIMIT ?",
77
+ (f'"{escaped}"', limit),
78
+ ).fetchall()
79
+ except sqlite3.OperationalError:
80
+ return []
81
+ return [row["node_id"] for row in rows]
82
+
83
+ def _init_v2_schema(self) -> None:
84
+ """Initialize the normalized v2 tables + reconstruction views, migrating
85
+ the projection layout when it is stale — **atomically**.
86
+
87
+ The entire DROP → CREATE → VIEWS → BACKFILL → version-stamp sequence runs
88
+ in a single transaction on one connection: on any failure it rolls back,
89
+ leaving the prior projection untouched and the version unchanged, so the
90
+ next startup simply retries. The migration only ever touches the v2
91
+ tables/views and the ``projection_version`` key — never the authoritative
92
+ legacy ``nodes``/``edges`` — so legacy data cannot be corrupted even if
93
+ the rebuild fails midway.
94
+ """
95
+ if KGStoreV2 is None or _exec_script is None:
96
+ return
97
+ self._v2_projection_available = False
98
+ try:
99
+ self._backup_before_v2_flip()
100
+ with self._connect() as conn:
101
+ conn.execute("BEGIN")
102
+ stale = self._projection_version(conn) != _PROJECTION_VERSION
103
+ # Reconstruction views are non-authoritative. Recreate them on
104
+ # every startup so older SQLite rename migrations cannot strand
105
+ # a view against a temporary table such as edges_v2_old.
106
+ for stmt in (
107
+ "DROP VIEW IF EXISTS kgv2_edges",
108
+ "DROP VIEW IF EXISTS kgv2_nodes",
109
+ ):
110
+ conn.execute(stmt)
111
+ if stale:
112
+ # The projection is non-authoritative; drop it so init_schema
113
+ # recreates the tables with the current normalized columns.
114
+ for stmt in (
115
+ "DROP TABLE IF EXISTS edges_v2",
116
+ "DROP TABLE IF EXISTS nodes_v2",
117
+ ):
118
+ conn.execute(stmt)
119
+ # init_schema(conn=...) joins this transaction (no implicit commit)
120
+ KGStoreV2(self.db_path).init_schema(conn=conn)
121
+ _exec_script(conn, self._V2_VIEWS_SQL)
122
+ self._backfill_v2_on(conn, force=stale)
123
+ # version stamp commits together with the backfill — never stranded
124
+ conn.execute(
125
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES ('projection_version', ?)",
126
+ (str(_PROJECTION_VERSION),),
127
+ )
128
+ mastered_at = _now()
129
+ conn.execute(
130
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES (?, ?)",
131
+ (_KG_DB_FORMAT_KEY, str(_KG_DB_FORMAT_VERSION)),
132
+ )
133
+ conn.execute(
134
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES (?, COALESCE((SELECT value FROM kg_meta WHERE key=?), ?))",
135
+ (_V2_WRITE_MASTER_KEY, _V2_WRITE_MASTER_KEY, mastered_at),
136
+ )
137
+ conn.execute(f"PRAGMA user_version={_KG_DB_FORMAT_VERSION}")
138
+ conn.execute("SELECT 1 FROM kgv2_nodes LIMIT 1").fetchone()
139
+ conn.execute("SELECT 1 FROM kgv2_edges LIMIT 1").fetchone()
140
+ self._v2_projection_available = True
141
+ except Exception as e:
142
+ logging.warning("knowledge_graph: v2 schema init/backfill skipped: %s", e)
143
+
144
+ def _backup_before_v2_flip(self) -> Optional[str]:
145
+ """Create one local SQLite backup before the v2 write-master flip."""
146
+ if not self.db_path.exists() or self.db_path.stat().st_size == 0:
147
+ return None
148
+ with self._connect() as conn:
149
+ try:
150
+ stamped = conn.execute(
151
+ "SELECT value FROM kg_meta WHERE key=?", (_V2_WRITE_MASTER_KEY,)
152
+ ).fetchone()
153
+ except sqlite3.Error:
154
+ stamped = None
155
+ if stamped:
156
+ return None
157
+ try:
158
+ rows = int(
159
+ conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0] or 0
160
+ )
161
+ except sqlite3.Error:
162
+ rows = 0
163
+ if rows == 0:
164
+ return None
165
+ conn.execute("PRAGMA wal_checkpoint(FULL)")
166
+ backup_dir = self.db_path.parent / "backups"
167
+ backup_dir.mkdir(parents=True, exist_ok=True)
168
+ stamp = datetime.now().strftime("%Y%m%dT%H%M%S")
169
+ dest = (
170
+ backup_dir / f"{self.db_path.stem}.pre-v2-write-master.{stamp}.sqlite"
171
+ )
172
+ conn.execute("VACUUM INTO ?", (str(dest),))
173
+ return str(dest)
174
+
175
+ def _projection_version(self, conn: sqlite3.Connection) -> int:
176
+ """Return the stored v2 projection layout version (0 if unknown).
177
+
178
+ A fresh DB (kg_meta absent) raises ``sqlite3.OperationalError`` here and
179
+ is correctly treated as version 0 → rebuild. Only sqlite errors are
180
+ swallowed so a real bug doesn't masquerade as a stale projection.
181
+ """
182
+ try:
183
+ row = conn.execute(
184
+ "SELECT value FROM kg_meta WHERE key='projection_version'"
185
+ ).fetchone()
186
+ return int(row["value"]) if row and row["value"] is not None else 0
187
+ except sqlite3.Error:
188
+ return 0
189
+
190
+ def _backfill_v2_if_needed(self, *, force: bool = False) -> None:
191
+ """Project legacy nodes/edges into v2 on a fresh transaction.
192
+
193
+ Thin wrapper around :meth:`_backfill_v2_on` for callers (tests, ad-hoc
194
+ re-sync) that aren't already inside the migration transaction.
195
+ """
196
+ try:
197
+ with self._connect() as conn:
198
+ self._backfill_v2_on(conn, force=force)
199
+ except Exception as ex:
200
+ logging.warning("knowledge_graph: v2 backfill skipped: %s", ex)
201
+
202
+ def _backfill_v2_on(self, conn: sqlite3.Connection, *, force: bool = False) -> None:
203
+ """Project legacy nodes/edges into the normalized v2 tables on ``conn``.
204
+
205
+ Non-destructive to legacy. ``force`` rebuilds unconditionally (used after
206
+ a layout migration); otherwise it only projects when v2 is empty. The v2
207
+ graph is a derived projection, so clearing + rebuilding it is always safe.
208
+ Idempotent: no-ops once v2 carries the current projection. Copies the
209
+ legacy column values **verbatim** so the kgv2_* views are byte-faithful.
210
+ """
211
+ legacy_nodes = conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0]
212
+ if legacy_nodes == 0:
213
+ return
214
+ v2_nodes = conn.execute("SELECT COUNT(*) FROM nodes_v2").fetchone()[0]
215
+ if v2_nodes > 0 and not force:
216
+ return # current projection already present
217
+ # (re)project: clear v2 graph (not authoritative) and rebuild
218
+ conn.execute("DELETE FROM edges_v2")
219
+ conn.execute("DELETE FROM nodes_v2")
220
+ n = e = 0
221
+ for r in conn.execute(
222
+ "SELECT id, type, title, summary, metadata_json, created_at, updated_at FROM nodes"
223
+ ).fetchall():
224
+ self._v2_project_node(
225
+ conn,
226
+ r["id"],
227
+ r["type"],
228
+ r["title"],
229
+ r["summary"],
230
+ r["metadata_json"],
231
+ created_at=r["created_at"],
232
+ updated_at=r["updated_at"],
233
+ )
234
+ n += 1
235
+ for r in conn.execute(
236
+ "SELECT id, from_node, to_node, type, weight, metadata_json, created_at FROM edges"
237
+ ).fetchall():
238
+ self._v2_project_edge(
239
+ conn,
240
+ r["from_node"],
241
+ r["to_node"],
242
+ r["type"],
243
+ float(r["weight"] or 1.0),
244
+ r["metadata_json"],
245
+ edge_id=r["id"],
246
+ created_at=r["created_at"],
247
+ )
248
+ e += 1
249
+ logging.info(
250
+ "knowledge_graph: projected legacy → v2 (%d nodes, %d edges)", n, e
251
+ )
252
+
253
+ def _v2_project_node(
254
+ self,
255
+ conn: sqlite3.Connection,
256
+ node_id: str,
257
+ node_type: str,
258
+ title: str,
259
+ summary: Optional[str],
260
+ metadata_json: Optional[str],
261
+ *,
262
+ created_at: Optional[str] = None,
263
+ updated_at: Optional[str] = None,
264
+ owner: Optional[str] = None,
265
+ workspace_id: Optional[str] = None,
266
+ visibility: Optional[str] = None,
267
+ strict: bool = False,
268
+ ) -> None:
269
+ if KGStoreV2 is None:
270
+ if strict:
271
+ raise RuntimeError("Knowledge Graph v2 schema is unavailable")
272
+ return
273
+ ts = updated_at or _now()
274
+ norm_type = (
275
+ NodeType.from_legacy(node_type).value if NodeType is not None else node_type
276
+ )
277
+ # Scope resolution: explicit param > metadata hints > legacy-global.
278
+ # 'legacy' (not 'private') marks unscoped rows — the column default
279
+ # must never silently privatize previously machine-shared data.
280
+ meta = _safe_loads(metadata_json) if metadata_json else {}
281
+ owner = owner or meta.get("user_email") or meta.get("owner") or None
282
+ workspace_id = workspace_id or meta.get("workspace_id") or None
283
+ visibility = visibility or ("legacy" if workspace_id is None else "workspace")
284
+ try:
285
+ conn.execute(
286
+ """
287
+ INSERT INTO nodes_v2(id, type, legacy_type, label, summary, attrs,
288
+ owner_id, workspace_id, visibility,
289
+ created_at, updated_at, importance_score)
290
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0.0)
291
+ ON CONFLICT(id) DO UPDATE SET
292
+ type=excluded.type, legacy_type=excluded.legacy_type,
293
+ label=excluded.label, summary=excluded.summary,
294
+ attrs=excluded.attrs, updated_at=excluded.updated_at,
295
+ owner_id=COALESCE(excluded.owner_id, nodes_v2.owner_id),
296
+ workspace_id=COALESCE(excluded.workspace_id, nodes_v2.workspace_id),
297
+ visibility=CASE WHEN excluded.visibility != 'legacy'
298
+ THEN excluded.visibility
299
+ ELSE nodes_v2.visibility END
300
+ """,
301
+ (
302
+ node_id,
303
+ norm_type,
304
+ node_type,
305
+ title,
306
+ summary,
307
+ metadata_json if metadata_json is not None else "{}",
308
+ owner,
309
+ workspace_id,
310
+ visibility,
311
+ created_at or ts,
312
+ ts,
313
+ ),
314
+ )
315
+ except Exception as ex:
316
+ if strict:
317
+ raise
318
+ logging.debug(
319
+ "knowledge_graph: v2 node projection skipped (%s): %s", node_id, ex
320
+ )
321
+
322
+ def _v2_project_edge(
323
+ self,
324
+ conn: sqlite3.Connection,
325
+ from_node: str,
326
+ to_node: str,
327
+ edge_type: str,
328
+ weight: float,
329
+ metadata_json: Optional[str],
330
+ *,
331
+ edge_id: Optional[str] = None,
332
+ created_at: Optional[str] = None,
333
+ strict: bool = False,
334
+ ) -> None:
335
+ if KGStoreV2 is None:
336
+ if strict:
337
+ raise RuntimeError("Knowledge Graph v2 schema is unavailable")
338
+ return
339
+ eid = (
340
+ edge_id or f"edge:{_sha256_text(f'{from_node}|{edge_type}|{to_node}')[:24]}"
341
+ )
342
+ norm_type = (
343
+ EdgeType.from_legacy(edge_type).value if EdgeType is not None else edge_type
344
+ )
345
+ meta_str = metadata_json if metadata_json is not None else "{}"
346
+ confidence = float(_safe_loads(meta_str).get("confidence", 1.0))
347
+ try:
348
+ conn.execute(
349
+ """
350
+ INSERT INTO edges_v2(id, source, target, type, legacy_type, weight,
351
+ confidence, evidence, metadata, created_by, created_at)
352
+ VALUES (?, ?, ?, ?, ?, ?, ?, '[]', ?, 'legacy', ?)
353
+ ON CONFLICT(source, target, type, legacy_type) DO UPDATE SET
354
+ weight=max(edges_v2.weight, excluded.weight),
355
+ confidence=excluded.confidence,
356
+ metadata=excluded.metadata
357
+ """,
358
+ (
359
+ eid,
360
+ from_node,
361
+ to_node,
362
+ norm_type,
363
+ edge_type,
364
+ float(weight),
365
+ confidence,
366
+ meta_str,
367
+ created_at or _now(),
368
+ ),
369
+ )
370
+ # Temporal record: every observation of this relationship is kept
371
+ # (the UNIQUE upsert + weight=max alone would erase recurrence).
372
+ row = conn.execute(
373
+ "SELECT id FROM edges_v2 WHERE source=? AND target=? AND type=? AND legacy_type=?",
374
+ (from_node, to_node, norm_type, edge_type),
375
+ ).fetchone()
376
+ if row is not None:
377
+ conn.execute(
378
+ "INSERT INTO edge_occurrences(edge_id, observed_at, weight, source) VALUES (?, ?, ?, ?)",
379
+ (
380
+ row["id"],
381
+ created_at or _now(),
382
+ float(weight),
383
+ _safe_loads(meta_str).get("source"),
384
+ ),
385
+ )
386
+ except Exception as ex:
387
+ if strict:
388
+ raise
389
+ logging.debug(
390
+ "knowledge_graph: v2 edge projection skipped (%s->%s): %s",
391
+ from_node,
392
+ to_node,
393
+ ex,
394
+ )
395
+
396
+ def curate(
397
+ self, *, max_documents: int = 200, max_new_nodes: int = 8
398
+ ) -> Dict[str, Any]:
399
+ """On-demand graph curation (T4.4 — graph_curator goes live).
400
+
401
+ Runs the curator's gated topic-promotion pipeline over recent content
402
+ nodes: candidates are clustered, secret-bearing labels are refused,
403
+ and only multi-source topics above the importance threshold become
404
+ Topic nodes (with MENTIONS edges back to their sources and a real
405
+ importance_score in nodes_v2). Explicit and observable — the result
406
+ reports everything promoted AND everything skipped, with reasons.
407
+ """
408
+ from .curator import auto_build_graph_overlay
409
+
410
+ content_types = (
411
+ "Document",
412
+ "File",
413
+ "CodeFile",
414
+ "Message",
415
+ "AIResponse",
416
+ "Chat",
417
+ "Page",
418
+ "Slide",
419
+ "Spreadsheet",
420
+ )
421
+ nt, _ = self._read_tables()
422
+ with self._connect() as conn:
423
+ placeholders = ",".join("?" for _ in content_types)
424
+ rows = conn.execute(
425
+ f"""
426
+ SELECT id, type, title, summary FROM {nt}
427
+ WHERE type IN ({placeholders})
428
+ ORDER BY updated_at DESC, id ASC LIMIT ?
429
+ """,
430
+ (*content_types, max(1, min(int(max_documents), 2000))),
431
+ ).fetchall()
432
+ existing_labels = {
433
+ str(row["title"] or "").strip().lower()
434
+ for row in conn.execute(
435
+ f"SELECT title FROM {nt} WHERE type IN ('Topic', 'Concept')"
436
+ ).fetchall()
437
+ }
438
+ documents = [
439
+ {
440
+ "id": row["id"],
441
+ "text": f"{row['title']} {row['summary'] or ''}",
442
+ "kind": "file"
443
+ if row["type"] in {"Document", "File", "CodeFile", "Spreadsheet"}
444
+ else "chat",
445
+ }
446
+ for row in rows
447
+ ]
448
+ overlay = auto_build_graph_overlay(
449
+ documents,
450
+ existing_node_labels=existing_labels,
451
+ max_new_nodes=max(1, min(int(max_new_nodes), 50)),
452
+ )
453
+ promoted: List[Dict[str, Any]] = []
454
+ with self._connect() as conn:
455
+ valid_ids = {row["id"] for row in rows}
456
+ for promo in overlay["promotions"]:
457
+ topic_id = f"topic:{_slug(promo['label'])}"
458
+ self._upsert_node(
459
+ conn,
460
+ topic_id,
461
+ "Topic",
462
+ promo["label"],
463
+ metadata={
464
+ "curated": True,
465
+ "importance": promo["importance"],
466
+ "aliases": promo["aliases"],
467
+ "source": "graph_curator",
468
+ },
469
+ )
470
+ conn.execute(
471
+ "UPDATE nodes_v2 SET importance_score=? WHERE id=?",
472
+ (float(promo["importance"]), topic_id),
473
+ )
474
+ linked = 0
475
+ for source_id in promo["sources"][:10]:
476
+ if source_id in valid_ids:
477
+ self._upsert_edge(
478
+ conn,
479
+ source_id,
480
+ topic_id,
481
+ "MENTIONS",
482
+ weight=0.6,
483
+ metadata={"source": "graph_curator"},
484
+ )
485
+ linked += 1
486
+ promoted.append(
487
+ {
488
+ "node_id": topic_id,
489
+ "label": promo["label"],
490
+ "importance": promo["importance"],
491
+ "linked_sources": linked,
492
+ }
493
+ )
494
+ return {
495
+ "status": "ok",
496
+ "documents_scanned": len(documents),
497
+ "candidates_total": overlay["candidates_total"],
498
+ "promoted": promoted,
499
+ "skipped": overlay["skipped"][:50],
500
+ "skipped_total": len(overlay["skipped"]),
501
+ }
502
+
503
+ def mark_superseded(self, old_node_id: str, new_node_id: str) -> Dict[str, Any]:
504
+ """Record that ``old_node_id`` was replaced by ``new_node_id``.
505
+
506
+ The old node stays queryable (knowledge is durable); readers can follow
507
+ the revision chain via ``nodes_v2.superseded_by``.
508
+ """
509
+ with self._connect() as conn:
510
+ for node_id in (old_node_id, new_node_id):
511
+ exists = conn.execute(
512
+ "SELECT 1 FROM nodes_v2 WHERE id=?", (node_id,)
513
+ ).fetchone()
514
+ if not exists:
515
+ raise FileNotFoundError(node_id)
516
+ conn.execute(
517
+ "UPDATE nodes_v2 SET superseded_by=?, updated_at=? WHERE id=?",
518
+ (new_node_id, _now(), old_node_id),
519
+ )
520
+ return {"status": "ok", "node_id": old_node_id, "superseded_by": new_node_id}
521
+
522
+ def _v2_delete_nodes(self, conn: sqlite3.Connection, ids) -> None:
523
+ """Mirror legacy node deletions into v2 (edges_v2 cascade on the FK)."""
524
+ if KGStoreV2 is None:
525
+ return
526
+ ids = list(ids)
527
+ if not ids:
528
+ return
529
+ ph = ",".join("?" * len(ids))
530
+ try:
531
+ conn.execute(f"DELETE FROM nodes_v2 WHERE id IN ({ph})", ids)
532
+ except Exception as ex:
533
+ logging.debug("knowledge_graph: v2 node delete mirror skipped: %s", ex)
534
+
535
+ def _v2_delete_edges_from(self, conn: sqlite3.Connection, node_id: str) -> None:
536
+ """Mirror a legacy ``DELETE FROM edges WHERE from_node=?`` into v2."""
537
+ if KGStoreV2 is None:
538
+ return
539
+ try:
540
+ conn.execute("DELETE FROM edges_v2 WHERE source=?", (node_id,))
541
+ except Exception as ex:
542
+ logging.debug("knowledge_graph: v2 edge delete mirror skipped: %s", ex)
543
+
544
+ def _v2_sync_report(self) -> Dict[str, Any]:
545
+ """Diagnose the dual-write invariant: legacy node/edge id sets must equal
546
+ the v2 projection's. Returns counts + any drift (ids missing from / extra
547
+ in v2). ``in_sync`` is True only when both id sets match exactly.
548
+
549
+ All legacy writes go through _upsert_node/_upsert_edge (which dual-write)
550
+ and every legacy delete is mirrored, so a non-empty drift signals a
551
+ bypassed write path — this is the runtime guard for that invariant.
552
+ """
553
+ if KGStoreV2 is None:
554
+ return {"available": False, "in_sync": True}
555
+ with self._connect() as conn:
556
+ legacy_nodes = {r[0] for r in conn.execute("SELECT id FROM nodes")}
557
+ v2_nodes = {r[0] for r in conn.execute("SELECT id FROM nodes_v2")}
558
+ legacy_edges = {r[0] for r in conn.execute("SELECT id FROM edges")}
559
+ v2_edges = {r[0] for r in conn.execute("SELECT id FROM edges_v2")}
560
+ return {
561
+ "available": True,
562
+ "in_sync": legacy_nodes == v2_nodes and legacy_edges == v2_edges,
563
+ "nodes_legacy": len(legacy_nodes),
564
+ "nodes_v2": len(v2_nodes),
565
+ "edges_legacy": len(legacy_edges),
566
+ "edges_v2": len(v2_edges),
567
+ "nodes_missing_from_v2": sorted(legacy_nodes - v2_nodes),
568
+ "nodes_extra_in_v2": sorted(v2_nodes - legacy_nodes),
569
+ "edges_missing_from_v2": sorted(legacy_edges - v2_edges),
570
+ "edges_extra_in_v2": sorted(v2_edges - legacy_edges),
571
+ }