npm - ltcai - Versions diffs - 2.2.2 → 3.0.1 - Mend

ltcai 2.2.2 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/README.md +66 -27
package/codex_telegram_bot.py +6 -2
package/docs/CHANGELOG.md +154 -0
package/docs/V3_BACKEND_ARCHITECTURE.md +138 -0
package/docs/V3_FRONTEND.md +136 -0
package/knowledge_graph.py +649 -21
package/latticeai/__init__.py +1 -1
package/latticeai/api/admin.py +47 -0
package/latticeai/api/agents.py +54 -31
package/latticeai/api/auth.py +1 -1
package/latticeai/api/chat.py +10 -2
package/latticeai/api/search.py +236 -0
package/latticeai/api/static_routes.py +21 -2
package/latticeai/core/config.py +16 -0
package/latticeai/core/embedding_providers.py +502 -0
package/latticeai/core/local_embeddings.py +86 -0
package/latticeai/core/logging_safety.py +62 -0
package/latticeai/core/workspace_os.py +1 -1
package/latticeai/server_app.py +49 -1
package/latticeai/services/agent_runtime.py +245 -0
package/latticeai/services/search_service.py +346 -0
package/package.json +8 -4
package/static/account.html +9 -4
package/static/activity.html +4 -4
package/static/admin.html +8 -3
package/static/agents.html +4 -4
package/static/chat.html +16 -11
package/static/css/reference/account.css +439 -0
package/static/css/reference/admin.css +610 -0
package/static/css/reference/base.css +1658 -0
package/static/{lattice-reference.css → css/reference/chat.css} +271 -3633
package/static/css/reference/graph.css +1016 -0
package/static/css/responsive.css +248 -1
package/static/css/tokens.css +132 -126
package/static/favicon.ico +0 -0
package/static/graph.html +9 -4
package/static/manifest.json +3 -3
package/static/platform.css +1 -1
package/static/plugins.html +4 -4
package/static/scripts/account.js +4 -4
package/static/scripts/chat.js +227 -77
package/static/scripts/workspace.js +78 -0
package/static/sw.js +5 -3
package/static/v3/css/lattice.base.css +128 -0
package/static/v3/css/lattice.components.css +447 -0
package/static/v3/css/lattice.shell.css +407 -0
package/static/v3/css/lattice.tokens.css +132 -0
package/static/v3/css/lattice.views.css +277 -0
package/static/v3/index.html +40 -0
package/static/v3/js/app.js +26 -0
package/static/v3/js/core/api.js +327 -0
package/static/v3/js/core/components.js +215 -0
package/static/v3/js/core/dom.js +148 -0
package/static/v3/js/core/fixtures.js +171 -0
package/static/v3/js/core/router.js +37 -0
package/static/v3/js/core/routes.js +73 -0
package/static/v3/js/core/shell.js +363 -0
package/static/v3/js/core/store.js +113 -0
package/static/v3/js/views/admin-audit.js +185 -0
package/static/v3/js/views/admin-permissions.js +178 -0
package/static/v3/js/views/admin-policies.js +103 -0
package/static/v3/js/views/admin-private-vpc.js +138 -0
package/static/v3/js/views/admin-security.js +181 -0
package/static/v3/js/views/admin-users.js +168 -0
package/static/v3/js/views/agents.js +194 -0
package/static/v3/js/views/chat.js +450 -0
package/static/v3/js/views/files.js +180 -0
package/static/v3/js/views/home.js +119 -0
package/static/v3/js/views/hybrid-search.js +195 -0
package/static/v3/js/views/knowledge-graph.js +238 -0
package/static/v3/js/views/models.js +247 -0
package/static/v3/js/views/my-computer.js +237 -0
package/static/v3/js/views/pipeline.js +161 -0
package/static/v3/js/views/settings.js +258 -0
package/static/workflows.html +4 -4
package/static/workspace.css +408 -14
package/static/workspace.html +43 -24
package/telegram_bot.py +18 -14

package/knowledge_graph.py CHANGED Viewed

@@ -16,6 +16,7 @@ import platform
 import re
 import shutil
 import sqlite3
+import time
 import zipfile
 from collections import Counter
 from datetime import datetime
@@ -30,6 +31,8 @@ except Exception:  # pragma: no cover - v2 schema is optional at import time
     EdgeType = None   # type: ignore[assignment]
     _exec_script = None  # type: ignore[assignment]
+from latticeai.core.local_embeddings import LocalEmbeddingModel
 # Default read source for the graph queries: v2 reconstruction views.
 # Override with LATTICEAI_KG_READ_V2=0 to fall back to the legacy tables.
 _READ_FROM_V2_DEFAULT = os.getenv("LATTICEAI_KG_READ_V2", "1") != "0"
@@ -806,11 +809,16 @@ def _topic_candidates(text: str, limit: int = 8) -> List[str]:
 class KnowledgeGraphStore:
-    def __init__(self, db_path: Path, blob_dir: Path):
+    def __init__(self, db_path: Path, blob_dir: Path, embedder: Any = None):
         self.db_path = Path(db_path)
         self.blob_dir = Path(blob_dir)
         self.db_path.parent.mkdir(parents=True, exist_ok=True)
         self.blob_dir.mkdir(parents=True, exist_ok=True)
+        # The embedder is swappable behind a fixed interface
+        # (model_id/dim/embed/encode/decode/similarity). Defaults to the
+        # deterministic, offline hash model so the store works with no config;
+        # server_app injects a provider-backed embedder from Config.
+        self._embedding_model = embedder if embedder is not None else LocalEmbeddingModel()
         self._init_db()
         # Read graph queries from the v2 projection (kgv2_* views) when available.
         # Toggle off (e.g. in tests) to compare against the legacy tables.
@@ -909,6 +917,31 @@ class KnowledgeGraphStore:
                   UNIQUE(source_id, relative_path),
                   FOREIGN KEY(source_id) REFERENCES knowledge_sources(id) ON DELETE CASCADE
                 );
+                CREATE TABLE IF NOT EXISTS vector_embeddings (
+                  item_id TEXT PRIMARY KEY,
+                  item_type TEXT NOT NULL,
+                  source_node TEXT NOT NULL,
+                  text_hash TEXT NOT NULL,
+                  embedding BLOB NOT NULL,
+                  embedding_dim INTEGER NOT NULL,
+                  embedding_model TEXT NOT NULL,
+                  metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
+                  indexed_at TEXT NOT NULL,
+                  FOREIGN KEY(source_node) REFERENCES nodes(id) ON DELETE CASCADE
+                );
+                CREATE TABLE IF NOT EXISTS vector_index_operations (
+                  id TEXT PRIMARY KEY,
+                  operation TEXT NOT NULL,
+                  status TEXT NOT NULL,
+                  requested_at TEXT NOT NULL,
+                  started_at TEXT,
+                  completed_at TEXT,
+                  items_total INTEGER NOT NULL DEFAULT 0,
+                  items_indexed INTEGER NOT NULL DEFAULT 0,
+                  items_skipped INTEGER NOT NULL DEFAULT 0,
+                  error_message TEXT,
+                  metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json))
+                );
                 CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type);
                 CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_node);
                 CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_node);
@@ -917,6 +950,10 @@ class KnowledgeGraphStore:
                 CREATE INDEX IF NOT EXISTS idx_local_file_index_source ON local_file_index(source_id);
                 CREATE INDEX IF NOT EXISTS idx_local_file_index_status ON local_file_index(status);
                 CREATE INDEX IF NOT EXISTS idx_local_file_index_graph_node ON local_file_index(graph_node_id);
+                CREATE INDEX IF NOT EXISTS idx_vector_embeddings_type ON vector_embeddings(item_type);
+                CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings(source_node);
+                CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(embedding_model);
+                CREATE INDEX IF NOT EXISTS idx_vector_index_operations_requested ON vector_index_operations(requested_at);
                 """
             )
             conn.execute(
@@ -1198,6 +1235,15 @@ class KnowledgeGraphStore:
         # dual-write: project into the v2 graph on the same transaction
         self._v2_project_node(conn, node_id, node_type, title_s, summary_s, meta_json,
                               created_at=now, updated_at=now)
+        if node_type != "Chunk":
+            self._upsert_vector_item(
+                conn,
+                item_id=node_id,
+                item_type="node",
+                source_node=node_id,
+                text=self._vector_text_for_node(title=title_s, summary=summary_s, metadata=metadata),
+                metadata={"node_type": node_type, **(metadata or {})},
+            )
         return node_id
     def _upsert_edge(
@@ -1227,6 +1273,110 @@ class KnowledgeGraphStore:
                               edge_id=edge_id, created_at=now)
         return edge_id
+    def _vector_text_for_node(
+        self,
+        *,
+        title: str,
+        summary: str = "",
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> str:
+        metadata = metadata or {}
+        meta_parts = []
+        for key in (
+            "filename", "relative_path", "file_path", "conversation_id", "source",
+            "category", "ext", "role",
+        ):
+            value = metadata.get(key)
+            if value:
+                meta_parts.append(str(value))
+        return _clean_text("\n".join([str(title or ""), str(summary or ""), " ".join(meta_parts)]))
+    def _upsert_vector_item(
+        self,
+        conn: sqlite3.Connection,
+        *,
+        item_id: str,
+        item_type: str,
+        source_node: str,
+        text: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> bool:
+        text = _clean_text(text)
+        if len(text) < 2:
+            conn.execute("DELETE FROM vector_embeddings WHERE item_id=?", (item_id,))
+            return False
+        text_hash = _sha256_text(text)
+        existing = conn.execute(
+            """
+            SELECT text_hash, embedding_dim, embedding_model
+            FROM vector_embeddings
+            WHERE item_id=?
+            """,
+            (item_id,),
+        ).fetchone()
+        if (
+            existing
+            and existing["text_hash"] == text_hash
+            and existing["embedding_dim"] == self._embedding_model.dim
+            and existing["embedding_model"] == self._embedding_model.model_id
+        ):
+            return False
+        embedding = self._embedding_model.encode(self._embedding_model.embed(text[:50_000]))
+        conn.execute(
+            """
+            INSERT INTO vector_embeddings(
+              item_id, item_type, source_node, text_hash, embedding,
+              embedding_dim, embedding_model, metadata_json, indexed_at
+            )
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(item_id) DO UPDATE SET
+              item_type=excluded.item_type,
+              source_node=excluded.source_node,
+              text_hash=excluded.text_hash,
+              embedding=excluded.embedding,
+              embedding_dim=excluded.embedding_dim,
+              embedding_model=excluded.embedding_model,
+              metadata_json=excluded.metadata_json,
+              indexed_at=excluded.indexed_at
+            """,
+            (
+                item_id,
+                item_type,
+                source_node,
+                text_hash,
+                embedding,
+                self._embedding_model.dim,
+                self._embedding_model.model_id,
+                _json(metadata),
+                _now(),
+            ),
+        )
+        return True
+    def _upsert_chunk(
+        self,
+        conn: sqlite3.Connection,
+        *,
+        chunk_id: str,
+        source_node: str,
+        text: str,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        metadata = metadata or {}
+        conn.execute(
+            "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (chunk_id, source_node, text, _json(metadata), _now()),
+        )
+        self._upsert_vector_item(
+            conn,
+            item_id=chunk_id,
+            item_type="chunk",
+            source_node=chunk_id,
+            text=text,
+            metadata={**metadata, "parent_source_node": source_node},
+        )
     # ── Local folder sources → Graph RAG ──────────────────────────────────
     def discover_local_roots(self) -> Dict[str, Any]:
@@ -2052,16 +2202,12 @@ class KnowledgeGraphStore:
                 summary=chunk[:500],
                 metadata={"index": index, "source_node": file_node_id, "source_id": source_id},
             )
-            conn.execute(
-                "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
-                "VALUES (?, ?, ?, ?, ?)",
-                (
-                    chunk_id,
-                    file_node_id,
-                    chunk,
-                    _json({"index": index, "source_node": file_node_id, "source_id": source_id}),
-                    _now(),
-                ),
+            self._upsert_chunk(
+                conn,
+                chunk_id=chunk_id,
+                source_node=file_node_id,
+                text=chunk,
+                metadata={"index": index, "source_node": file_node_id, "source_id": source_id},
             )
             self._upsert_edge(conn, file_node_id, chunk_id, "포함함", weight=0.7, metadata={"source": "local_scan"})
@@ -2494,11 +2640,12 @@ class KnowledgeGraphStore:
                     summary=chunk[:500],
                     metadata={"index": index, "source_node": node_id},
                 )
-                conn.execute(
-                    "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
-                    "VALUES (?, ?, ?, ?, ?)",
-                    (chunk_id, node_id, chunk,
-                     _json({"index": index, "source_node": node_id}), _now()),
+                self._upsert_chunk(
+                    conn,
+                    chunk_id=chunk_id,
+                    source_node=node_id,
+                    text=chunk,
+                    metadata={"index": index, "source_node": node_id},
                 )
                 self._upsert_edge(conn, node_id, chunk_id, "포함함")
@@ -2621,11 +2768,12 @@ class KnowledgeGraphStore:
                     summary=chunk[:500],
                     metadata={"index": index, "source_node": file_id},
                 )
-                conn.execute(
-                    "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
-                    "VALUES (?, ?, ?, ?, ?)",
-                    (chunk_id, file_id, chunk,
-                     _json({"index": index, "source_node": file_id}), _now()),
+                self._upsert_chunk(
+                    conn,
+                    chunk_id=chunk_id,
+                    source_node=file_id,
+                    text=chunk,
+                    metadata={"index": index, "source_node": file_id},
                 )
                 self._upsert_edge(conn, file_id, chunk_id, "포함함")
@@ -3168,6 +3316,486 @@ class KnowledgeGraphStore:
                 ]
         return {"node_id": node_id, "neighbors": nodes, "edges": edges}
+    def get_node(self, node_id: str) -> Dict[str, Any]:
+        node_id = str(node_id or "").strip()
+        if not node_id:
+            raise ValueError("node_id required")
+        nt, et = self._read_tables()
+        with self._connect() as conn:
+            row = conn.execute(
+                f"""
+                SELECT id, type, title, summary, metadata_json, updated_at
+                FROM {nt}
+                WHERE id=?
+                """,
+                (node_id,),
+            ).fetchone()
+            if not row:
+                raise ValueError(f"graph node not found: {node_id}")
+            degree = conn.execute(
+                f"SELECT COUNT(*) AS c FROM {et} WHERE from_node=? OR to_node=?",
+                (node_id, node_id),
+            ).fetchone()["c"]
+        return {
+            "id": row["id"],
+            "type": row["type"],
+            "title": row["title"],
+            "summary": row["summary"],
+            "metadata": _safe_loads(row["metadata_json"]),
+            "updated_at": row["updated_at"],
+            "degree": degree,
+        }
+    def relationship_search(
+        self,
+        *,
+        query: str = "",
+        node_id: str = "",
+        relationship_type: str = "",
+        limit: int = 30,
+    ) -> Dict[str, Any]:
+        query = str(query or "").strip()
+        node_id = str(node_id or "").strip()
+        relationship_type = str(relationship_type or "").strip()
+        limit = max(1, min(int(limit or 30), 200))
+        nt, et = self._read_tables()
+        where = []
+        params: List[Any] = []
+        if node_id:
+            where.append("(e.from_node=? OR e.to_node=?)")
+            params.extend([node_id, node_id])
+        if relationship_type:
+            where.append("e.type LIKE ?")
+            params.append(f"%{relationship_type}%")
+        if query:
+            where.append(
+                "(e.type LIKE ? OR e.metadata_json LIKE ? OR src.title LIKE ? OR dst.title LIKE ? OR src.summary LIKE ? OR dst.summary LIKE ?)"
+            )
+            params.extend([f"%{query}%"] * 6)
+        where_sql = "WHERE " + " AND ".join(where) if where else ""
+        with self._connect() as conn:
+            rows = conn.execute(
+                f"""
+                SELECT
+                  e.id, e.from_node, e.to_node, e.type, e.weight, e.metadata_json, e.created_at,
+                  src.type AS source_type, src.title AS source_title, src.summary AS source_summary,
+                  src.metadata_json AS source_metadata,
+                  dst.type AS target_type, dst.title AS target_title, dst.summary AS target_summary,
+                  dst.metadata_json AS target_metadata
+                FROM {et} e
+                JOIN {nt} src ON src.id=e.from_node
+                JOIN {nt} dst ON dst.id=e.to_node
+                {where_sql}
+                ORDER BY e.weight DESC, e.created_at DESC, e.id ASC
+                LIMIT ?
+                """,
+                (*params, limit),
+            ).fetchall()
+        return {
+            "query": query,
+            "node_id": node_id,
+            "relationship_type": relationship_type,
+            "relationships": [
+                {
+                    "id": row["id"],
+                    "type": row["type"],
+                    "weight": row["weight"],
+                    "metadata": _safe_loads(row["metadata_json"]),
+                    "created_at": row["created_at"],
+                    "source": {
+                        "id": row["from_node"],
+                        "type": row["source_type"],
+                        "title": row["source_title"],
+                        "summary": row["source_summary"],
+                        "metadata": _safe_loads(row["source_metadata"]),
+                    },
+                    "target": {
+                        "id": row["to_node"],
+                        "type": row["target_type"],
+                        "title": row["target_title"],
+                        "summary": row["target_summary"],
+                        "metadata": _safe_loads(row["target_metadata"]),
+                    },
+                }
+                for row in rows
+            ],
+        }
+    def traverse(self, node_id: str, *, depth: int = 1, limit: int = 100) -> Dict[str, Any]:
+        node_id = str(node_id or "").strip()
+        if not node_id:
+            raise ValueError("node_id required")
+        depth = max(0, min(int(depth or 1), 4))
+        limit = max(1, min(int(limit or 100), 500))
+        nt, et = self._read_tables()
+        visited = {node_id}
+        frontier = {node_id}
+        edges_by_id: Dict[str, Dict[str, Any]] = {}
+        with self._connect() as conn:
+            for _ in range(depth):
+                if not frontier or len(visited) >= limit:
+                    break
+                placeholders = ",".join("?" * len(frontier))
+                rows = conn.execute(
+                    f"""
+                    SELECT id, from_node, to_node, type, weight, metadata_json
+                    FROM {et}
+                    WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})
+                    ORDER BY weight DESC, id ASC
+                    LIMIT ?
+                    """,
+                    (*frontier, *frontier, limit * 3),
+                ).fetchall()
+                next_frontier = set()
+                for row in rows:
+                    edges_by_id[row["id"]] = {
+                        "id": row["id"],
+                        "from": row["from_node"],
+                        "to": row["to_node"],
+                        "type": row["type"],
+                        "weight": row["weight"],
+                        "metadata": _safe_loads(row["metadata_json"]),
+                    }
+                    for candidate in (row["from_node"], row["to_node"]):
+                        if candidate not in visited and len(visited) < limit:
+                            visited.add(candidate)
+                            next_frontier.add(candidate)
+                frontier = next_frontier
+            placeholders = ",".join("?" * len(visited))
+            node_rows = conn.execute(
+                f"""
+                SELECT id, type, title, summary, metadata_json, updated_at
+                FROM {nt}
+                WHERE id IN ({placeholders})
+                ORDER BY updated_at DESC, id ASC
+                """,
+                list(visited),
+            ).fetchall()
+        return {
+            "root": node_id,
+            "depth": depth,
+            "nodes": [
+                {
+                    "id": row["id"],
+                    "type": row["type"],
+                    "title": row["title"],
+                    "summary": row["summary"],
+                    "metadata": _safe_loads(row["metadata_json"]),
+                    "updated_at": row["updated_at"],
+                }
+                for row in node_rows
+            ],
+            "edges": list(edges_by_id.values()),
+        }
+    def _iter_vector_source_items(
+        self,
+        conn: sqlite3.Connection,
+        *,
+        include_nodes: bool = True,
+        include_chunks: bool = True,
+    ) -> List[Dict[str, Any]]:
+        items: List[Dict[str, Any]] = []
+        if include_nodes:
+            for row in conn.execute(
+                """
+                SELECT id, type, title, summary, metadata_json
+                FROM nodes
+                WHERE type <> 'Chunk'
+                ORDER BY updated_at DESC, id ASC
+                """
+            ).fetchall():
+                metadata = _safe_loads(row["metadata_json"])
+                text = self._vector_text_for_node(
+                    title=row["title"],
+                    summary=row["summary"] or "",
+                    metadata=metadata,
+                )
+                if text:
+                    items.append({
+                        "item_id": row["id"],
+                        "item_type": "node",
+                        "source_node": row["id"],
+                        "text": text,
+                        "metadata": {"node_type": row["type"], **metadata},
+                    })
+        if include_chunks:
+            for row in conn.execute(
+                """
+                SELECT c.id, c.source_node AS parent_source_node, c.text, c.metadata_json
+                FROM chunks c
+                JOIN nodes n ON n.id=c.id
+                ORDER BY c.created_at DESC, c.id ASC
+                """
+            ).fetchall():
+                metadata = _safe_loads(row["metadata_json"])
+                text = _clean_text(row["text"] or "")
+                if text:
+                    items.append({
+                        "item_id": row["id"],
+                        "item_type": "chunk",
+                        "source_node": row["id"],
+                        "text": text,
+                        "metadata": {**metadata, "parent_source_node": row["parent_source_node"]},
+                    })
+        return items
+    def rebuild_vector_index(
+        self,
+        *,
+        full: bool = False,
+        include_nodes: bool = True,
+        include_chunks: bool = True,
+    ) -> Dict[str, Any]:
+        """Rebuild the derived vector index without mutating graph content."""
+        op_id = f"vector-op:{_sha256_text(f'{time.time()}:{os.getpid()}')[:24]}"
+        requested_at = _now()
+        started = time.perf_counter()
+        try:
+            with self._connect() as conn:
+                conn.execute(
+                    """
+                    INSERT INTO vector_index_operations(
+                      id, operation, status, requested_at, started_at, metadata_json
+                    )
+                    VALUES (?, ?, 'running', ?, ?, ?)
+                    """,
+                    (
+                        op_id,
+                        "rebuild_full" if full else "rebuild_incremental",
+                        requested_at,
+                        requested_at,
+                        _json({"include_nodes": include_nodes, "include_chunks": include_chunks}),
+                    ),
+                )
+                if full:
+                    filters = []
+                    if include_nodes:
+                        filters.append("'node'")
+                    if include_chunks:
+                        filters.append("'chunk'")
+                    if filters:
+                        conn.execute(f"DELETE FROM vector_embeddings WHERE item_type IN ({','.join(filters)})")
+                items = self._iter_vector_source_items(
+                    conn,
+                    include_nodes=include_nodes,
+                    include_chunks=include_chunks,
+                )
+                indexed = skipped = 0
+                for item in items:
+                    changed = self._upsert_vector_item(conn, **item)
+                    if changed:
+                        indexed += 1
+                    else:
+                        skipped += 1
+                duration_ms = round((time.perf_counter() - started) * 1000, 2)
+                conn.execute(
+                    """
+                    UPDATE vector_index_operations
+                    SET status='completed', completed_at=?, items_total=?,
+                        items_indexed=?, items_skipped=?, metadata_json=?
+                    WHERE id=?
+                    """,
+                    (
+                        _now(),
+                        len(items),
+                        indexed,
+                        skipped,
+                        _json({
+                            "include_nodes": include_nodes,
+                            "include_chunks": include_chunks,
+                            "duration_ms": duration_ms,
+                            "embedding_model": self._embedding_model.model_id,
+                            "embedding_dim": self._embedding_model.dim,
+                        }),
+                        op_id,
+                    ),
+                )
+            return {
+                "status": "completed",
+                "operation_id": op_id,
+                "full": bool(full),
+                "items_total": len(items),
+                "items_indexed": indexed,
+                "items_skipped": skipped,
+                "duration_ms": duration_ms,
+                "embedding_model": self._embedding_model.model_id,
+                "embedding_dim": self._embedding_model.dim,
+            }
+        except Exception as exc:
+            duration_ms = round((time.perf_counter() - started) * 1000, 2)
+            with self._connect() as conn:
+                conn.execute(
+                    """
+                    INSERT INTO vector_index_operations(
+                      id, operation, status, requested_at, started_at, completed_at,
+                      error_message, metadata_json
+                    )
+                    VALUES (?, ?, 'failed', ?, ?, ?, ?, ?)
+                    ON CONFLICT(id) DO UPDATE SET
+                      status='failed',
+                      completed_at=excluded.completed_at,
+                      error_message=excluded.error_message,
+                      metadata_json=excluded.metadata_json
+                    """,
+                    (
+                        op_id,
+                        "rebuild_full" if full else "rebuild_incremental",
+                        requested_at,
+                        requested_at,
+                        _now(),
+                        str(exc),
+                        _json({"duration_ms": duration_ms}),
+                    ),
+                )
+            raise
+    def index_status(self) -> Dict[str, Any]:
+        with self._connect() as conn:
+            vector_counts = {
+                row["item_type"]: row["count"]
+                for row in conn.execute(
+                    "SELECT item_type, COUNT(*) AS count FROM vector_embeddings GROUP BY item_type"
+                )
+            }
+            source_items = self._iter_vector_source_items(conn)
+            vector_rows = {
+                row["item_id"]: row
+                for row in conn.execute(
+                    """
+                    SELECT item_id, text_hash, embedding_dim, embedding_model, indexed_at
+                    FROM vector_embeddings
+                    """
+                ).fetchall()
+            }
+            latest_rows = conn.execute(
+                """
+                SELECT id, operation, status, requested_at, started_at, completed_at,
+                       items_total, items_indexed, items_skipped, error_message, metadata_json
+                FROM vector_index_operations
+                ORDER BY requested_at DESC, id DESC
+                LIMIT 5
+                """
+            ).fetchall()
+        missing = stale = ready = 0
+        for item in source_items:
+            vector_row = vector_rows.get(item["item_id"])
+            expected_hash = _sha256_text(_clean_text(item["text"]))
+            if not vector_row:
+                missing += 1
+            elif (
+                vector_row["text_hash"] != expected_hash
+                or vector_row["embedding_dim"] != self._embedding_model.dim
+                or vector_row["embedding_model"] != self._embedding_model.model_id
+            ):
+                stale += 1
+            else:
+                ready += 1
+        pending = missing + stale
+        return {
+            "status": "ready" if pending == 0 else "needs_reindex",
+            "storage": {
+                "db_path": str(self.db_path),
+                "backend": "sqlite",
+                "embedding_model": self._embedding_model.model_id,
+                "embedding_dim": self._embedding_model.dim,
+            },
+            "source_items": len(source_items),
+            "indexed_items": sum(vector_counts.values()),
+            "ready_items": ready,
+            "missing_items": missing,
+            "stale_items": stale,
+            "pending_items": pending,
+            "by_item_type": vector_counts,
+            "operations": [
+                {
+                    "id": row["id"],
+                    "operation": row["operation"],
+                    "status": row["status"],
+                    "requested_at": row["requested_at"],
+                    "started_at": row["started_at"],
+                    "completed_at": row["completed_at"],
+                    "items_total": row["items_total"],
+                    "items_indexed": row["items_indexed"],
+                    "items_skipped": row["items_skipped"],
+                    "error_message": row["error_message"],
+                    "metadata": _safe_loads(row["metadata_json"]),
+                }
+                for row in latest_rows
+            ],
+        }
+    def vector_search(
+        self,
+        query: str,
+        *,
+        limit: int = 30,
+        min_score: float = 0.0,
+        max_candidates: int = 10_000,
+    ) -> Dict[str, Any]:
+        query = str(query or "").strip()
+        limit = max(1, min(int(limit or 30), 100))
+        min_score = float(min_score or 0.0)
+        if not query:
+            return {"query": query, "matches": []}
+        query_vector = self._embedding_model.embed(query)
+        max_candidates = max(limit, min(int(max_candidates or 10_000), 50_000))
+        with self._connect() as conn:
+            rows = conn.execute(
+                """
+                SELECT
+                  ve.item_id, ve.item_type, ve.source_node, ve.embedding,
+                  ve.embedding_dim, ve.embedding_model, ve.metadata_json AS vector_metadata,
+                  n.type AS node_type, n.title AS node_title, n.summary AS node_summary,
+                  n.metadata_json AS node_metadata, n.updated_at AS node_updated_at,
+                  c.text AS chunk_text, c.source_node AS parent_node_id,
+                  pn.type AS parent_type, pn.title AS parent_title,
+                  pn.summary AS parent_summary, pn.metadata_json AS parent_metadata,
+                  pn.updated_at AS parent_updated_at
+                FROM vector_embeddings ve
+                LEFT JOIN nodes n ON n.id=ve.source_node
+                LEFT JOIN chunks c ON c.id=ve.item_id
+                LEFT JOIN nodes pn ON pn.id=c.source_node
+                WHERE ve.embedding_model=? AND ve.embedding_dim=?
+                ORDER BY ve.indexed_at DESC
+                LIMIT ?
+                """,
+                (self._embedding_model.model_id, self._embedding_model.dim, max_candidates),
+            ).fetchall()
+        scored = []
+        for row in rows:
+            vector = self._embedding_model.decode(row["embedding"], row["embedding_dim"])
+            score = self._embedding_model.similarity(query_vector, vector)
+            if score < min_score:
+                continue
+            is_chunk = row["item_type"] == "chunk"
+            summary = row["chunk_text"] if is_chunk and row["chunk_text"] else row["node_summary"]
+            parent_metadata = _safe_loads(row["parent_metadata"])
+            node_metadata = _safe_loads(row["node_metadata"])
+            scored.append({
+                "id": row["item_id"],
+                "node_id": row["parent_node_id"] if is_chunk and row["parent_node_id"] else row["source_node"],
+                "item_type": row["item_type"],
+                "type": "Chunk" if is_chunk else row["node_type"],
+                "title": row["parent_title"] if is_chunk and row["parent_title"] else row["node_title"],
+                "summary": _clean_text(summary or "")[:1000],
+                "score": round(float(score), 6),
+                "metadata": {
+                    **(parent_metadata if is_chunk else node_metadata),
+                    "vector": _safe_loads(row["vector_metadata"]),
+                    "parent_node_id": row["parent_node_id"],
+                    "parent_type": row["parent_type"],
+                },
+                "updated_at": row["parent_updated_at"] if is_chunk and row["parent_updated_at"] else row["node_updated_at"],
+            })
+        scored.sort(key=lambda item: (item["score"], item.get("updated_at") or ""), reverse=True)
+        return {
+            "query": query,
+            "embedding_model": self._embedding_model.model_id,
+            "embedding_dim": self._embedding_model.dim,
+            "matches": scored[:limit],
+        }
     def delete_conversation(self, conversation_id: str) -> Dict[str, Any]:
         conversation_id = str(conversation_id or "").strip()
         if not conversation_id: