ltcai 2.2.7 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. package/README.md +72 -34
  2. package/docs/CHANGELOG.md +119 -0
  3. package/docs/V3_BACKEND_ARCHITECTURE.md +138 -0
  4. package/docs/V3_FRONTEND.md +139 -0
  5. package/knowledge_graph.py +649 -21
  6. package/latticeai/__init__.py +1 -1
  7. package/latticeai/api/admin.py +47 -0
  8. package/latticeai/api/agents.py +54 -31
  9. package/latticeai/api/auth.py +5 -2
  10. package/latticeai/api/chat.py +10 -2
  11. package/latticeai/api/search.py +240 -0
  12. package/latticeai/api/static_routes.py +11 -2
  13. package/latticeai/core/config.py +18 -0
  14. package/latticeai/core/embedding_providers.py +625 -0
  15. package/latticeai/core/local_embeddings.py +86 -0
  16. package/latticeai/core/workspace_os.py +1 -1
  17. package/latticeai/server_app.py +65 -1
  18. package/latticeai/services/agent_runtime.py +245 -0
  19. package/latticeai/services/search_service.py +346 -0
  20. package/package.json +13 -6
  21. package/scripts/build_v3_assets.mjs +164 -0
  22. package/scripts/capture/README.md +28 -0
  23. package/scripts/capture/capture_enterprise.js +8 -0
  24. package/scripts/capture/capture_graph.js +8 -0
  25. package/scripts/capture/capture_onboarding.js +8 -0
  26. package/scripts/capture/capture_page.js +43 -0
  27. package/scripts/capture/capture_release_media.js +125 -0
  28. package/scripts/capture/capture_skills.js +8 -0
  29. package/scripts/capture/capture_workspace.js +8 -0
  30. package/scripts/generate_diagrams.py +513 -0
  31. package/scripts/lint_v3.mjs +33 -0
  32. package/scripts/release-0.3.1.sh +105 -0
  33. package/scripts/take_screenshots.js +69 -0
  34. package/scripts/validate_release_artifacts.py +167 -0
  35. package/static/account.html +9 -9
  36. package/static/activity.html +4 -4
  37. package/static/admin.html +8 -8
  38. package/static/agents.html +4 -4
  39. package/static/chat.html +10 -10
  40. package/static/css/reference/account.css +137 -1
  41. package/static/css/reference/chat.css +31 -37
  42. package/static/css/responsive.css +42 -0
  43. package/static/css/tokens.5a595671.css +260 -0
  44. package/static/css/tokens.css +125 -130
  45. package/static/graph.html +9 -9
  46. package/static/manifest.json +3 -3
  47. package/static/plugins.html +4 -4
  48. package/static/scripts/account.js +4 -4
  49. package/static/scripts/chat.js +40 -8
  50. package/static/scripts/workspace.js +78 -0
  51. package/static/sw.js +3 -1
  52. package/static/v3/asset-manifest.json +47 -0
  53. package/static/v3/css/lattice.base.css +128 -0
  54. package/static/v3/css/lattice.base.e4cdd05d.css +128 -0
  55. package/static/v3/css/lattice.components.011e988b.css +447 -0
  56. package/static/v3/css/lattice.components.css +447 -0
  57. package/static/v3/css/lattice.shell.4920f42d.css +407 -0
  58. package/static/v3/css/lattice.shell.css +407 -0
  59. package/static/v3/css/lattice.tokens.c597ff81.css +132 -0
  60. package/static/v3/css/lattice.tokens.css +132 -0
  61. package/static/v3/css/lattice.views.3ee19d4e.css +277 -0
  62. package/static/v3/css/lattice.views.css +277 -0
  63. package/static/v3/index.html +69 -0
  64. package/static/v3/js/app.46fb61d9.js +26 -0
  65. package/static/v3/js/app.js +26 -0
  66. package/static/v3/js/core/api.22a41d42.js +344 -0
  67. package/static/v3/js/core/api.js +344 -0
  68. package/static/v3/js/core/components.4c83e0a9.js +222 -0
  69. package/static/v3/js/core/components.js +222 -0
  70. package/static/v3/js/core/dom.a2773eb0.js +148 -0
  71. package/static/v3/js/core/dom.js +148 -0
  72. package/static/v3/js/core/router.584570f2.js +37 -0
  73. package/static/v3/js/core/router.js +37 -0
  74. package/static/v3/js/core/routes.f935dd50.js +78 -0
  75. package/static/v3/js/core/routes.js +78 -0
  76. package/static/v3/js/core/shell.1b6199d6.js +363 -0
  77. package/static/v3/js/core/shell.js +363 -0
  78. package/static/v3/js/core/store.34ebd5e6.js +113 -0
  79. package/static/v3/js/core/store.js +113 -0
  80. package/static/v3/js/views/admin-audit.660a1fb1.js +185 -0
  81. package/static/v3/js/views/admin-audit.js +185 -0
  82. package/static/v3/js/views/admin-permissions.a7ae5f09.js +177 -0
  83. package/static/v3/js/views/admin-permissions.js +177 -0
  84. package/static/v3/js/views/admin-policies.3658fd86.js +102 -0
  85. package/static/v3/js/views/admin-policies.js +102 -0
  86. package/static/v3/js/views/admin-private-vpc.7d342d36.js +135 -0
  87. package/static/v3/js/views/admin-private-vpc.js +135 -0
  88. package/static/v3/js/views/admin-security.07c66b72.js +180 -0
  89. package/static/v3/js/views/admin-security.js +180 -0
  90. package/static/v3/js/views/admin-users.03bac88c.js +168 -0
  91. package/static/v3/js/views/admin-users.js +168 -0
  92. package/static/v3/js/views/agents.14e48bdd.js +193 -0
  93. package/static/v3/js/views/agents.js +193 -0
  94. package/static/v3/js/views/chat.718144ce.js +449 -0
  95. package/static/v3/js/views/chat.js +449 -0
  96. package/static/v3/js/views/files.4935197e.js +186 -0
  97. package/static/v3/js/views/files.js +186 -0
  98. package/static/v3/js/views/home.cdde3b32.js +119 -0
  99. package/static/v3/js/views/home.js +119 -0
  100. package/static/v3/js/views/hybrid-search.b22b97e0.js +195 -0
  101. package/static/v3/js/views/hybrid-search.js +195 -0
  102. package/static/v3/js/views/knowledge-graph.a14ea7e7.js +237 -0
  103. package/static/v3/js/views/knowledge-graph.js +237 -0
  104. package/static/v3/js/views/models.a1ffa147.js +256 -0
  105. package/static/v3/js/views/models.js +256 -0
  106. package/static/v3/js/views/my-computer.1b2ff621.js +237 -0
  107. package/static/v3/js/views/my-computer.js +237 -0
  108. package/static/v3/js/views/pipeline.c522f1ce.js +157 -0
  109. package/static/v3/js/views/pipeline.js +157 -0
  110. package/static/v3/js/views/settings.4f777210.js +250 -0
  111. package/static/v3/js/views/settings.js +250 -0
  112. package/static/workflows.html +4 -4
  113. package/static/workspace.css +340 -2
  114. package/static/workspace.html +43 -24
  115. package/docs/images/tmp_frames/frame_00.png +0 -0
  116. package/docs/images/tmp_frames/frame_01.png +0 -0
  117. package/docs/images/tmp_frames/frame_02.png +0 -0
  118. package/docs/images/tmp_frames/frame_03.png +0 -0
  119. package/docs/images/tmp_frames/hero_00.png +0 -0
  120. package/docs/images/tmp_frames/hero_01.png +0 -0
  121. package/docs/images/tmp_frames/hero_02.png +0 -0
  122. package/docs/images/tmp_frames/hero_03.png +0 -0
@@ -16,6 +16,7 @@ import platform
16
16
  import re
17
17
  import shutil
18
18
  import sqlite3
19
+ import time
19
20
  import zipfile
20
21
  from collections import Counter
21
22
  from datetime import datetime
@@ -30,6 +31,8 @@ except Exception: # pragma: no cover - v2 schema is optional at import time
30
31
  EdgeType = None # type: ignore[assignment]
31
32
  _exec_script = None # type: ignore[assignment]
32
33
 
34
+ from latticeai.core.local_embeddings import LocalEmbeddingModel
35
+
33
36
  # Default read source for the graph queries: v2 reconstruction views.
34
37
  # Override with LATTICEAI_KG_READ_V2=0 to fall back to the legacy tables.
35
38
  _READ_FROM_V2_DEFAULT = os.getenv("LATTICEAI_KG_READ_V2", "1") != "0"
@@ -806,11 +809,16 @@ def _topic_candidates(text: str, limit: int = 8) -> List[str]:
806
809
 
807
810
 
808
811
  class KnowledgeGraphStore:
809
- def __init__(self, db_path: Path, blob_dir: Path):
812
+ def __init__(self, db_path: Path, blob_dir: Path, embedder: Any = None):
810
813
  self.db_path = Path(db_path)
811
814
  self.blob_dir = Path(blob_dir)
812
815
  self.db_path.parent.mkdir(parents=True, exist_ok=True)
813
816
  self.blob_dir.mkdir(parents=True, exist_ok=True)
817
+ # The embedder is swappable behind a fixed interface
818
+ # (model_id/dim/embed/encode/decode/similarity). Defaults to the
819
+ # deterministic, offline hash model so the store works with no config;
820
+ # server_app injects a provider-backed embedder from Config.
821
+ self._embedding_model = embedder if embedder is not None else LocalEmbeddingModel()
814
822
  self._init_db()
815
823
  # Read graph queries from the v2 projection (kgv2_* views) when available.
816
824
  # Toggle off (e.g. in tests) to compare against the legacy tables.
@@ -909,6 +917,31 @@ class KnowledgeGraphStore:
909
917
  UNIQUE(source_id, relative_path),
910
918
  FOREIGN KEY(source_id) REFERENCES knowledge_sources(id) ON DELETE CASCADE
911
919
  );
920
+ CREATE TABLE IF NOT EXISTS vector_embeddings (
921
+ item_id TEXT PRIMARY KEY,
922
+ item_type TEXT NOT NULL,
923
+ source_node TEXT NOT NULL,
924
+ text_hash TEXT NOT NULL,
925
+ embedding BLOB NOT NULL,
926
+ embedding_dim INTEGER NOT NULL,
927
+ embedding_model TEXT NOT NULL,
928
+ metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
929
+ indexed_at TEXT NOT NULL,
930
+ FOREIGN KEY(source_node) REFERENCES nodes(id) ON DELETE CASCADE
931
+ );
932
+ CREATE TABLE IF NOT EXISTS vector_index_operations (
933
+ id TEXT PRIMARY KEY,
934
+ operation TEXT NOT NULL,
935
+ status TEXT NOT NULL,
936
+ requested_at TEXT NOT NULL,
937
+ started_at TEXT,
938
+ completed_at TEXT,
939
+ items_total INTEGER NOT NULL DEFAULT 0,
940
+ items_indexed INTEGER NOT NULL DEFAULT 0,
941
+ items_skipped INTEGER NOT NULL DEFAULT 0,
942
+ error_message TEXT,
943
+ metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json))
944
+ );
912
945
  CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type);
913
946
  CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_node);
914
947
  CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_node);
@@ -917,6 +950,10 @@ class KnowledgeGraphStore:
917
950
  CREATE INDEX IF NOT EXISTS idx_local_file_index_source ON local_file_index(source_id);
918
951
  CREATE INDEX IF NOT EXISTS idx_local_file_index_status ON local_file_index(status);
919
952
  CREATE INDEX IF NOT EXISTS idx_local_file_index_graph_node ON local_file_index(graph_node_id);
953
+ CREATE INDEX IF NOT EXISTS idx_vector_embeddings_type ON vector_embeddings(item_type);
954
+ CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings(source_node);
955
+ CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(embedding_model);
956
+ CREATE INDEX IF NOT EXISTS idx_vector_index_operations_requested ON vector_index_operations(requested_at);
920
957
  """
921
958
  )
922
959
  conn.execute(
@@ -1198,6 +1235,15 @@ class KnowledgeGraphStore:
1198
1235
  # dual-write: project into the v2 graph on the same transaction
1199
1236
  self._v2_project_node(conn, node_id, node_type, title_s, summary_s, meta_json,
1200
1237
  created_at=now, updated_at=now)
1238
+ if node_type != "Chunk":
1239
+ self._upsert_vector_item(
1240
+ conn,
1241
+ item_id=node_id,
1242
+ item_type="node",
1243
+ source_node=node_id,
1244
+ text=self._vector_text_for_node(title=title_s, summary=summary_s, metadata=metadata),
1245
+ metadata={"node_type": node_type, **(metadata or {})},
1246
+ )
1201
1247
  return node_id
1202
1248
 
1203
1249
  def _upsert_edge(
@@ -1227,6 +1273,110 @@ class KnowledgeGraphStore:
1227
1273
  edge_id=edge_id, created_at=now)
1228
1274
  return edge_id
1229
1275
 
1276
+ def _vector_text_for_node(
1277
+ self,
1278
+ *,
1279
+ title: str,
1280
+ summary: str = "",
1281
+ metadata: Optional[Dict[str, Any]] = None,
1282
+ ) -> str:
1283
+ metadata = metadata or {}
1284
+ meta_parts = []
1285
+ for key in (
1286
+ "filename", "relative_path", "file_path", "conversation_id", "source",
1287
+ "category", "ext", "role",
1288
+ ):
1289
+ value = metadata.get(key)
1290
+ if value:
1291
+ meta_parts.append(str(value))
1292
+ return _clean_text("\n".join([str(title or ""), str(summary or ""), " ".join(meta_parts)]))
1293
+
1294
+ def _upsert_vector_item(
1295
+ self,
1296
+ conn: sqlite3.Connection,
1297
+ *,
1298
+ item_id: str,
1299
+ item_type: str,
1300
+ source_node: str,
1301
+ text: str,
1302
+ metadata: Optional[Dict[str, Any]] = None,
1303
+ ) -> bool:
1304
+ text = _clean_text(text)
1305
+ if len(text) < 2:
1306
+ conn.execute("DELETE FROM vector_embeddings WHERE item_id=?", (item_id,))
1307
+ return False
1308
+ text_hash = _sha256_text(text)
1309
+ existing = conn.execute(
1310
+ """
1311
+ SELECT text_hash, embedding_dim, embedding_model
1312
+ FROM vector_embeddings
1313
+ WHERE item_id=?
1314
+ """,
1315
+ (item_id,),
1316
+ ).fetchone()
1317
+ if (
1318
+ existing
1319
+ and existing["text_hash"] == text_hash
1320
+ and existing["embedding_dim"] == self._embedding_model.dim
1321
+ and existing["embedding_model"] == self._embedding_model.model_id
1322
+ ):
1323
+ return False
1324
+ embedding = self._embedding_model.encode(self._embedding_model.embed(text[:50_000]))
1325
+ conn.execute(
1326
+ """
1327
+ INSERT INTO vector_embeddings(
1328
+ item_id, item_type, source_node, text_hash, embedding,
1329
+ embedding_dim, embedding_model, metadata_json, indexed_at
1330
+ )
1331
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
1332
+ ON CONFLICT(item_id) DO UPDATE SET
1333
+ item_type=excluded.item_type,
1334
+ source_node=excluded.source_node,
1335
+ text_hash=excluded.text_hash,
1336
+ embedding=excluded.embedding,
1337
+ embedding_dim=excluded.embedding_dim,
1338
+ embedding_model=excluded.embedding_model,
1339
+ metadata_json=excluded.metadata_json,
1340
+ indexed_at=excluded.indexed_at
1341
+ """,
1342
+ (
1343
+ item_id,
1344
+ item_type,
1345
+ source_node,
1346
+ text_hash,
1347
+ embedding,
1348
+ self._embedding_model.dim,
1349
+ self._embedding_model.model_id,
1350
+ _json(metadata),
1351
+ _now(),
1352
+ ),
1353
+ )
1354
+ return True
1355
+
1356
+ def _upsert_chunk(
1357
+ self,
1358
+ conn: sqlite3.Connection,
1359
+ *,
1360
+ chunk_id: str,
1361
+ source_node: str,
1362
+ text: str,
1363
+ metadata: Optional[Dict[str, Any]] = None,
1364
+ ) -> None:
1365
+ metadata = metadata or {}
1366
+ conn.execute(
1367
+ "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
1368
+ "VALUES (?, ?, ?, ?, ?)",
1369
+ (chunk_id, source_node, text, _json(metadata), _now()),
1370
+ )
1371
+ self._upsert_vector_item(
1372
+ conn,
1373
+ item_id=chunk_id,
1374
+ item_type="chunk",
1375
+ source_node=chunk_id,
1376
+ text=text,
1377
+ metadata={**metadata, "parent_source_node": source_node},
1378
+ )
1379
+
1230
1380
  # ── Local folder sources → Graph RAG ──────────────────────────────────
1231
1381
 
1232
1382
  def discover_local_roots(self) -> Dict[str, Any]:
@@ -2052,16 +2202,12 @@ class KnowledgeGraphStore:
2052
2202
  summary=chunk[:500],
2053
2203
  metadata={"index": index, "source_node": file_node_id, "source_id": source_id},
2054
2204
  )
2055
- conn.execute(
2056
- "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
2057
- "VALUES (?, ?, ?, ?, ?)",
2058
- (
2059
- chunk_id,
2060
- file_node_id,
2061
- chunk,
2062
- _json({"index": index, "source_node": file_node_id, "source_id": source_id}),
2063
- _now(),
2064
- ),
2205
+ self._upsert_chunk(
2206
+ conn,
2207
+ chunk_id=chunk_id,
2208
+ source_node=file_node_id,
2209
+ text=chunk,
2210
+ metadata={"index": index, "source_node": file_node_id, "source_id": source_id},
2065
2211
  )
2066
2212
  self._upsert_edge(conn, file_node_id, chunk_id, "포함함", weight=0.7, metadata={"source": "local_scan"})
2067
2213
 
@@ -2494,11 +2640,12 @@ class KnowledgeGraphStore:
2494
2640
  summary=chunk[:500],
2495
2641
  metadata={"index": index, "source_node": node_id},
2496
2642
  )
2497
- conn.execute(
2498
- "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
2499
- "VALUES (?, ?, ?, ?, ?)",
2500
- (chunk_id, node_id, chunk,
2501
- _json({"index": index, "source_node": node_id}), _now()),
2643
+ self._upsert_chunk(
2644
+ conn,
2645
+ chunk_id=chunk_id,
2646
+ source_node=node_id,
2647
+ text=chunk,
2648
+ metadata={"index": index, "source_node": node_id},
2502
2649
  )
2503
2650
  self._upsert_edge(conn, node_id, chunk_id, "포함함")
2504
2651
 
@@ -2621,11 +2768,12 @@ class KnowledgeGraphStore:
2621
2768
  summary=chunk[:500],
2622
2769
  metadata={"index": index, "source_node": file_id},
2623
2770
  )
2624
- conn.execute(
2625
- "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
2626
- "VALUES (?, ?, ?, ?, ?)",
2627
- (chunk_id, file_id, chunk,
2628
- _json({"index": index, "source_node": file_id}), _now()),
2771
+ self._upsert_chunk(
2772
+ conn,
2773
+ chunk_id=chunk_id,
2774
+ source_node=file_id,
2775
+ text=chunk,
2776
+ metadata={"index": index, "source_node": file_id},
2629
2777
  )
2630
2778
  self._upsert_edge(conn, file_id, chunk_id, "포함함")
2631
2779
 
@@ -3168,6 +3316,486 @@ class KnowledgeGraphStore:
3168
3316
  ]
3169
3317
  return {"node_id": node_id, "neighbors": nodes, "edges": edges}
3170
3318
 
3319
+ def get_node(self, node_id: str) -> Dict[str, Any]:
3320
+ node_id = str(node_id or "").strip()
3321
+ if not node_id:
3322
+ raise ValueError("node_id required")
3323
+ nt, et = self._read_tables()
3324
+ with self._connect() as conn:
3325
+ row = conn.execute(
3326
+ f"""
3327
+ SELECT id, type, title, summary, metadata_json, updated_at
3328
+ FROM {nt}
3329
+ WHERE id=?
3330
+ """,
3331
+ (node_id,),
3332
+ ).fetchone()
3333
+ if not row:
3334
+ raise ValueError(f"graph node not found: {node_id}")
3335
+ degree = conn.execute(
3336
+ f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
3337
+ (node_id, node_id),
3338
+ ).fetchone()["c"]
3339
+ return {
3340
+ "id": row["id"],
3341
+ "type": row["type"],
3342
+ "title": row["title"],
3343
+ "summary": row["summary"],
3344
+ "metadata": _safe_loads(row["metadata_json"]),
3345
+ "updated_at": row["updated_at"],
3346
+ "degree": degree,
3347
+ }
3348
+
3349
+ def relationship_search(
3350
+ self,
3351
+ *,
3352
+ query: str = "",
3353
+ node_id: str = "",
3354
+ relationship_type: str = "",
3355
+ limit: int = 30,
3356
+ ) -> Dict[str, Any]:
3357
+ query = str(query or "").strip()
3358
+ node_id = str(node_id or "").strip()
3359
+ relationship_type = str(relationship_type or "").strip()
3360
+ limit = max(1, min(int(limit or 30), 200))
3361
+ nt, et = self._read_tables()
3362
+ where = []
3363
+ params: List[Any] = []
3364
+ if node_id:
3365
+ where.append("(e.from_node=? OR e.to_node=?)")
3366
+ params.extend([node_id, node_id])
3367
+ if relationship_type:
3368
+ where.append("e.type LIKE ?")
3369
+ params.append(f"%{relationship_type}%")
3370
+ if query:
3371
+ where.append(
3372
+ "(e.type LIKE ? OR e.metadata_json LIKE ? OR src.title LIKE ? OR dst.title LIKE ? OR src.summary LIKE ? OR dst.summary LIKE ?)"
3373
+ )
3374
+ params.extend([f"%{query}%"] * 6)
3375
+ where_sql = "WHERE " + " AND ".join(where) if where else ""
3376
+ with self._connect() as conn:
3377
+ rows = conn.execute(
3378
+ f"""
3379
+ SELECT
3380
+ e.id, e.from_node, e.to_node, e.type, e.weight, e.metadata_json, e.created_at,
3381
+ src.type AS source_type, src.title AS source_title, src.summary AS source_summary,
3382
+ src.metadata_json AS source_metadata,
3383
+ dst.type AS target_type, dst.title AS target_title, dst.summary AS target_summary,
3384
+ dst.metadata_json AS target_metadata
3385
+ FROM {et} e
3386
+ JOIN {nt} src ON src.id=e.from_node
3387
+ JOIN {nt} dst ON dst.id=e.to_node
3388
+ {where_sql}
3389
+ ORDER BY e.weight DESC, e.created_at DESC, e.id ASC
3390
+ LIMIT ?
3391
+ """,
3392
+ (*params, limit),
3393
+ ).fetchall()
3394
+ return {
3395
+ "query": query,
3396
+ "node_id": node_id,
3397
+ "relationship_type": relationship_type,
3398
+ "relationships": [
3399
+ {
3400
+ "id": row["id"],
3401
+ "type": row["type"],
3402
+ "weight": row["weight"],
3403
+ "metadata": _safe_loads(row["metadata_json"]),
3404
+ "created_at": row["created_at"],
3405
+ "source": {
3406
+ "id": row["from_node"],
3407
+ "type": row["source_type"],
3408
+ "title": row["source_title"],
3409
+ "summary": row["source_summary"],
3410
+ "metadata": _safe_loads(row["source_metadata"]),
3411
+ },
3412
+ "target": {
3413
+ "id": row["to_node"],
3414
+ "type": row["target_type"],
3415
+ "title": row["target_title"],
3416
+ "summary": row["target_summary"],
3417
+ "metadata": _safe_loads(row["target_metadata"]),
3418
+ },
3419
+ }
3420
+ for row in rows
3421
+ ],
3422
+ }
3423
+
3424
+ def traverse(self, node_id: str, *, depth: int = 1, limit: int = 100) -> Dict[str, Any]:
3425
+ node_id = str(node_id or "").strip()
3426
+ if not node_id:
3427
+ raise ValueError("node_id required")
3428
+ depth = max(0, min(int(depth or 1), 4))
3429
+ limit = max(1, min(int(limit or 100), 500))
3430
+ nt, et = self._read_tables()
3431
+ visited = {node_id}
3432
+ frontier = {node_id}
3433
+ edges_by_id: Dict[str, Dict[str, Any]] = {}
3434
+ with self._connect() as conn:
3435
+ for _ in range(depth):
3436
+ if not frontier or len(visited) >= limit:
3437
+ break
3438
+ placeholders = ",".join("?" * len(frontier))
3439
+ rows = conn.execute(
3440
+ f"""
3441
+ SELECT id, from_node, to_node, type, weight, metadata_json
3442
+ FROM {et}
3443
+ WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})
3444
+ ORDER BY weight DESC, id ASC
3445
+ LIMIT ?
3446
+ """,
3447
+ (*frontier, *frontier, limit * 3),
3448
+ ).fetchall()
3449
+ next_frontier = set()
3450
+ for row in rows:
3451
+ edges_by_id[row["id"]] = {
3452
+ "id": row["id"],
3453
+ "from": row["from_node"],
3454
+ "to": row["to_node"],
3455
+ "type": row["type"],
3456
+ "weight": row["weight"],
3457
+ "metadata": _safe_loads(row["metadata_json"]),
3458
+ }
3459
+ for candidate in (row["from_node"], row["to_node"]):
3460
+ if candidate not in visited and len(visited) < limit:
3461
+ visited.add(candidate)
3462
+ next_frontier.add(candidate)
3463
+ frontier = next_frontier
3464
+ placeholders = ",".join("?" * len(visited))
3465
+ node_rows = conn.execute(
3466
+ f"""
3467
+ SELECT id, type, title, summary, metadata_json, updated_at
3468
+ FROM {nt}
3469
+ WHERE id IN ({placeholders})
3470
+ ORDER BY updated_at DESC, id ASC
3471
+ """,
3472
+ list(visited),
3473
+ ).fetchall()
3474
+ return {
3475
+ "root": node_id,
3476
+ "depth": depth,
3477
+ "nodes": [
3478
+ {
3479
+ "id": row["id"],
3480
+ "type": row["type"],
3481
+ "title": row["title"],
3482
+ "summary": row["summary"],
3483
+ "metadata": _safe_loads(row["metadata_json"]),
3484
+ "updated_at": row["updated_at"],
3485
+ }
3486
+ for row in node_rows
3487
+ ],
3488
+ "edges": list(edges_by_id.values()),
3489
+ }
3490
+
3491
+ def _iter_vector_source_items(
3492
+ self,
3493
+ conn: sqlite3.Connection,
3494
+ *,
3495
+ include_nodes: bool = True,
3496
+ include_chunks: bool = True,
3497
+ ) -> List[Dict[str, Any]]:
3498
+ items: List[Dict[str, Any]] = []
3499
+ if include_nodes:
3500
+ for row in conn.execute(
3501
+ """
3502
+ SELECT id, type, title, summary, metadata_json
3503
+ FROM nodes
3504
+ WHERE type <> 'Chunk'
3505
+ ORDER BY updated_at DESC, id ASC
3506
+ """
3507
+ ).fetchall():
3508
+ metadata = _safe_loads(row["metadata_json"])
3509
+ text = self._vector_text_for_node(
3510
+ title=row["title"],
3511
+ summary=row["summary"] or "",
3512
+ metadata=metadata,
3513
+ )
3514
+ if text:
3515
+ items.append({
3516
+ "item_id": row["id"],
3517
+ "item_type": "node",
3518
+ "source_node": row["id"],
3519
+ "text": text,
3520
+ "metadata": {"node_type": row["type"], **metadata},
3521
+ })
3522
+ if include_chunks:
3523
+ for row in conn.execute(
3524
+ """
3525
+ SELECT c.id, c.source_node AS parent_source_node, c.text, c.metadata_json
3526
+ FROM chunks c
3527
+ JOIN nodes n ON n.id=c.id
3528
+ ORDER BY c.created_at DESC, c.id ASC
3529
+ """
3530
+ ).fetchall():
3531
+ metadata = _safe_loads(row["metadata_json"])
3532
+ text = _clean_text(row["text"] or "")
3533
+ if text:
3534
+ items.append({
3535
+ "item_id": row["id"],
3536
+ "item_type": "chunk",
3537
+ "source_node": row["id"],
3538
+ "text": text,
3539
+ "metadata": {**metadata, "parent_source_node": row["parent_source_node"]},
3540
+ })
3541
+ return items
3542
+
3543
+ def rebuild_vector_index(
3544
+ self,
3545
+ *,
3546
+ full: bool = False,
3547
+ include_nodes: bool = True,
3548
+ include_chunks: bool = True,
3549
+ ) -> Dict[str, Any]:
3550
+ """Rebuild the derived vector index without mutating graph content."""
3551
+ op_id = f"vector-op:{_sha256_text(f'{time.time()}:{os.getpid()}')[:24]}"
3552
+ requested_at = _now()
3553
+ started = time.perf_counter()
3554
+ try:
3555
+ with self._connect() as conn:
3556
+ conn.execute(
3557
+ """
3558
+ INSERT INTO vector_index_operations(
3559
+ id, operation, status, requested_at, started_at, metadata_json
3560
+ )
3561
+ VALUES (?, ?, 'running', ?, ?, ?)
3562
+ """,
3563
+ (
3564
+ op_id,
3565
+ "rebuild_full" if full else "rebuild_incremental",
3566
+ requested_at,
3567
+ requested_at,
3568
+ _json({"include_nodes": include_nodes, "include_chunks": include_chunks}),
3569
+ ),
3570
+ )
3571
+ if full:
3572
+ filters = []
3573
+ if include_nodes:
3574
+ filters.append("'node'")
3575
+ if include_chunks:
3576
+ filters.append("'chunk'")
3577
+ if filters:
3578
+ conn.execute(f"DELETE FROM vector_embeddings WHERE item_type IN ({','.join(filters)})")
3579
+ items = self._iter_vector_source_items(
3580
+ conn,
3581
+ include_nodes=include_nodes,
3582
+ include_chunks=include_chunks,
3583
+ )
3584
+ indexed = skipped = 0
3585
+ for item in items:
3586
+ changed = self._upsert_vector_item(conn, **item)
3587
+ if changed:
3588
+ indexed += 1
3589
+ else:
3590
+ skipped += 1
3591
+ duration_ms = round((time.perf_counter() - started) * 1000, 2)
3592
+ conn.execute(
3593
+ """
3594
+ UPDATE vector_index_operations
3595
+ SET status='completed', completed_at=?, items_total=?,
3596
+ items_indexed=?, items_skipped=?, metadata_json=?
3597
+ WHERE id=?
3598
+ """,
3599
+ (
3600
+ _now(),
3601
+ len(items),
3602
+ indexed,
3603
+ skipped,
3604
+ _json({
3605
+ "include_nodes": include_nodes,
3606
+ "include_chunks": include_chunks,
3607
+ "duration_ms": duration_ms,
3608
+ "embedding_model": self._embedding_model.model_id,
3609
+ "embedding_dim": self._embedding_model.dim,
3610
+ }),
3611
+ op_id,
3612
+ ),
3613
+ )
3614
+ return {
3615
+ "status": "completed",
3616
+ "operation_id": op_id,
3617
+ "full": bool(full),
3618
+ "items_total": len(items),
3619
+ "items_indexed": indexed,
3620
+ "items_skipped": skipped,
3621
+ "duration_ms": duration_ms,
3622
+ "embedding_model": self._embedding_model.model_id,
3623
+ "embedding_dim": self._embedding_model.dim,
3624
+ }
3625
+ except Exception as exc:
3626
+ duration_ms = round((time.perf_counter() - started) * 1000, 2)
3627
+ with self._connect() as conn:
3628
+ conn.execute(
3629
+ """
3630
+ INSERT INTO vector_index_operations(
3631
+ id, operation, status, requested_at, started_at, completed_at,
3632
+ error_message, metadata_json
3633
+ )
3634
+ VALUES (?, ?, 'failed', ?, ?, ?, ?, ?)
3635
+ ON CONFLICT(id) DO UPDATE SET
3636
+ status='failed',
3637
+ completed_at=excluded.completed_at,
3638
+ error_message=excluded.error_message,
3639
+ metadata_json=excluded.metadata_json
3640
+ """,
3641
+ (
3642
+ op_id,
3643
+ "rebuild_full" if full else "rebuild_incremental",
3644
+ requested_at,
3645
+ requested_at,
3646
+ _now(),
3647
+ str(exc),
3648
+ _json({"duration_ms": duration_ms}),
3649
+ ),
3650
+ )
3651
+ raise
3652
+
3653
+ def index_status(self) -> Dict[str, Any]:
3654
+ with self._connect() as conn:
3655
+ vector_counts = {
3656
+ row["item_type"]: row["count"]
3657
+ for row in conn.execute(
3658
+ "SELECT item_type, COUNT(*) AS count FROM vector_embeddings GROUP BY item_type"
3659
+ )
3660
+ }
3661
+ source_items = self._iter_vector_source_items(conn)
3662
+ vector_rows = {
3663
+ row["item_id"]: row
3664
+ for row in conn.execute(
3665
+ """
3666
+ SELECT item_id, text_hash, embedding_dim, embedding_model, indexed_at
3667
+ FROM vector_embeddings
3668
+ """
3669
+ ).fetchall()
3670
+ }
3671
+ latest_rows = conn.execute(
3672
+ """
3673
+ SELECT id, operation, status, requested_at, started_at, completed_at,
3674
+ items_total, items_indexed, items_skipped, error_message, metadata_json
3675
+ FROM vector_index_operations
3676
+ ORDER BY requested_at DESC, id DESC
3677
+ LIMIT 5
3678
+ """
3679
+ ).fetchall()
3680
+ missing = stale = ready = 0
3681
+ for item in source_items:
3682
+ vector_row = vector_rows.get(item["item_id"])
3683
+ expected_hash = _sha256_text(_clean_text(item["text"]))
3684
+ if not vector_row:
3685
+ missing += 1
3686
+ elif (
3687
+ vector_row["text_hash"] != expected_hash
3688
+ or vector_row["embedding_dim"] != self._embedding_model.dim
3689
+ or vector_row["embedding_model"] != self._embedding_model.model_id
3690
+ ):
3691
+ stale += 1
3692
+ else:
3693
+ ready += 1
3694
+ pending = missing + stale
3695
+ return {
3696
+ "status": "ready" if pending == 0 else "needs_reindex",
3697
+ "storage": {
3698
+ "db_path": str(self.db_path),
3699
+ "backend": "sqlite",
3700
+ "embedding_model": self._embedding_model.model_id,
3701
+ "embedding_dim": self._embedding_model.dim,
3702
+ },
3703
+ "source_items": len(source_items),
3704
+ "indexed_items": sum(vector_counts.values()),
3705
+ "ready_items": ready,
3706
+ "missing_items": missing,
3707
+ "stale_items": stale,
3708
+ "pending_items": pending,
3709
+ "by_item_type": vector_counts,
3710
+ "operations": [
3711
+ {
3712
+ "id": row["id"],
3713
+ "operation": row["operation"],
3714
+ "status": row["status"],
3715
+ "requested_at": row["requested_at"],
3716
+ "started_at": row["started_at"],
3717
+ "completed_at": row["completed_at"],
3718
+ "items_total": row["items_total"],
3719
+ "items_indexed": row["items_indexed"],
3720
+ "items_skipped": row["items_skipped"],
3721
+ "error_message": row["error_message"],
3722
+ "metadata": _safe_loads(row["metadata_json"]),
3723
+ }
3724
+ for row in latest_rows
3725
+ ],
3726
+ }
3727
+
3728
+ def vector_search(
3729
+ self,
3730
+ query: str,
3731
+ *,
3732
+ limit: int = 30,
3733
+ min_score: float = 0.0,
3734
+ max_candidates: int = 10_000,
3735
+ ) -> Dict[str, Any]:
3736
+ query = str(query or "").strip()
3737
+ limit = max(1, min(int(limit or 30), 100))
3738
+ min_score = float(min_score or 0.0)
3739
+ if not query:
3740
+ return {"query": query, "matches": []}
3741
+ query_vector = self._embedding_model.embed(query)
3742
+ max_candidates = max(limit, min(int(max_candidates or 10_000), 50_000))
3743
+ with self._connect() as conn:
3744
+ rows = conn.execute(
3745
+ """
3746
+ SELECT
3747
+ ve.item_id, ve.item_type, ve.source_node, ve.embedding,
3748
+ ve.embedding_dim, ve.embedding_model, ve.metadata_json AS vector_metadata,
3749
+ n.type AS node_type, n.title AS node_title, n.summary AS node_summary,
3750
+ n.metadata_json AS node_metadata, n.updated_at AS node_updated_at,
3751
+ c.text AS chunk_text, c.source_node AS parent_node_id,
3752
+ pn.type AS parent_type, pn.title AS parent_title,
3753
+ pn.summary AS parent_summary, pn.metadata_json AS parent_metadata,
3754
+ pn.updated_at AS parent_updated_at
3755
+ FROM vector_embeddings ve
3756
+ LEFT JOIN nodes n ON n.id=ve.source_node
3757
+ LEFT JOIN chunks c ON c.id=ve.item_id
3758
+ LEFT JOIN nodes pn ON pn.id=c.source_node
3759
+ WHERE ve.embedding_model=? AND ve.embedding_dim=?
3760
+ ORDER BY ve.indexed_at DESC
3761
+ LIMIT ?
3762
+ """,
3763
+ (self._embedding_model.model_id, self._embedding_model.dim, max_candidates),
3764
+ ).fetchall()
3765
+ scored = []
3766
+ for row in rows:
3767
+ vector = self._embedding_model.decode(row["embedding"], row["embedding_dim"])
3768
+ score = self._embedding_model.similarity(query_vector, vector)
3769
+ if score < min_score:
3770
+ continue
3771
+ is_chunk = row["item_type"] == "chunk"
3772
+ summary = row["chunk_text"] if is_chunk and row["chunk_text"] else row["node_summary"]
3773
+ parent_metadata = _safe_loads(row["parent_metadata"])
3774
+ node_metadata = _safe_loads(row["node_metadata"])
3775
+ scored.append({
3776
+ "id": row["item_id"],
3777
+ "node_id": row["parent_node_id"] if is_chunk and row["parent_node_id"] else row["source_node"],
3778
+ "item_type": row["item_type"],
3779
+ "type": "Chunk" if is_chunk else row["node_type"],
3780
+ "title": row["parent_title"] if is_chunk and row["parent_title"] else row["node_title"],
3781
+ "summary": _clean_text(summary or "")[:1000],
3782
+ "score": round(float(score), 6),
3783
+ "metadata": {
3784
+ **(parent_metadata if is_chunk else node_metadata),
3785
+ "vector": _safe_loads(row["vector_metadata"]),
3786
+ "parent_node_id": row["parent_node_id"],
3787
+ "parent_type": row["parent_type"],
3788
+ },
3789
+ "updated_at": row["parent_updated_at"] if is_chunk and row["parent_updated_at"] else row["node_updated_at"],
3790
+ })
3791
+ scored.sort(key=lambda item: (item["score"], item.get("updated_at") or ""), reverse=True)
3792
+ return {
3793
+ "query": query,
3794
+ "embedding_model": self._embedding_model.model_id,
3795
+ "embedding_dim": self._embedding_model.dim,
3796
+ "matches": scored[:limit],
3797
+ }
3798
+
3171
3799
  def delete_conversation(self, conversation_id: str) -> Dict[str, Any]:
3172
3800
  conversation_id = str(conversation_id or "").strip()
3173
3801
  if not conversation_id: