npm - ltcai - Versions diffs - 0.2.1 → 0.3.0 - Mend

ltcai 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

package/README.md +8 -2
package/auto_setup.py +15 -1
package/docs/CHANGELOG.md +67 -0
package/kg_schema.py +64 -15
package/knowledge_graph.py +499 -31
package/latticeai/core/__init__.py +1 -1
package/latticeai/core/context_builder.py +191 -0
package/latticeai/core/document_generator.py +103 -0
package/llm_router.py +148 -1
package/package.json +2 -2
package/server.py +207 -27
package/static/css/tokens.css +26 -0
package/static/lattice-reference.css +390 -375
package/latticeai/__pycache__/__init__.cpython-314.pyc +0 -0
package/latticeai/api/__pycache__/admin.cpython-314.pyc +0 -0
package/latticeai/api/__pycache__/auth.cpython-314.pyc +0 -0
package/latticeai/core/__pycache__/__init__.cpython-314.pyc +0 -0
package/latticeai/core/__pycache__/audit.cpython-314.pyc +0 -0
package/latticeai/core/__pycache__/security.cpython-314.pyc +0 -0
package/latticeai/core/__pycache__/sessions.cpython-314.pyc +0 -0

package/knowledge_graph.py CHANGED Viewed

@@ -6,6 +6,7 @@ portable database so it can later migrate to Neo4j/Postgres without changing
 the ingestion contract.
 """
+import asyncio
 import hashlib
 import json
 import logging
@@ -26,6 +27,12 @@ try:
 except Exception:  # pragma: no cover - v2 schema is optional at import time
     KGStoreV2 = None  # type: ignore[assignment]
+_llm_router_ref = None
+def set_llm_router(router_instance):
+    global _llm_router_ref
+    _llm_router_ref = router_instance
 GRAPH_SCHEMA_VERSION = 1
@@ -365,6 +372,109 @@ def _chunks(text: str, size: int = 1200, overlap: int = 160) -> List[str]:
     return chunks
+_LLM_EXTRACT_CONCEPT_PROMPT = """Extract the key concepts from the following text.
+Return ONLY a JSON array of objects, each with "concept" (string) and "importance" (float 0-1).
+Extract up to {limit} concepts. Focus on named entities, technical terms, and domain-specific nouns.
+Do NOT include common words, stop words, or generic terms.
+Text:
+{text}
+JSON:"""
+_LLM_EXTRACT_TRIPLE_PROMPT = """Extract relationship triples from the following text.
+Return ONLY a JSON array of objects, each with:
+- "subject": source concept (string)
+- "relation": relationship verb (string, Korean or English)
+- "object": target concept (string)
+- "evidence": the sentence supporting this triple (string, max 240 chars)
+- "confidence": how confident you are (float 0-1)
+Extract up to {limit} triples. Focus on meaningful semantic relationships.
+Text:
+{text}
+Concepts already identified: {concepts}
+JSON:"""
+ENABLE_LLM_EXTRACTION = os.getenv("LATTICEAI_LLM_EXTRACTION", "true").lower() in ("1", "true", "yes")
+def _llm_extract_concepts(text: str, limit: int = 12) -> Optional[List[str]]:
+    if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
+        return None
+    if not _llm_router_ref.current_model_id:
+        return None
+    prompt = _LLM_EXTRACT_CONCEPT_PROMPT.format(text=text[:3000], limit=limit)
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+                future = pool.submit(asyncio.run, _llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1))
+                raw = future.result(timeout=30)
+        else:
+            raw = asyncio.run(_llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1))
+        raw = raw.strip()
+        if raw.startswith("```"):
+            raw = re.sub(r"^```(?:json)?\s*", "", raw)
+            raw = re.sub(r"\s*```$", "", raw)
+        parsed = json.loads(raw)
+        if isinstance(parsed, list):
+            concepts = []
+            for item in parsed[:limit]:
+                if isinstance(item, dict) and "concept" in item:
+                    concepts.append(item["concept"])
+                elif isinstance(item, str):
+                    concepts.append(item)
+            return concepts if concepts else None
+    except Exception as e:
+        logging.debug("LLM concept extraction failed (falling back to rules): %s", e)
+    return None
+def _llm_extract_triples(text: str, concepts: List[str], limit: int = 20) -> Optional[List[Dict[str, str]]]:
+    if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
+        return None
+    if not _llm_router_ref.current_model_id:
+        return None
+    prompt = _LLM_EXTRACT_TRIPLE_PROMPT.format(
+        text=text[:3000], limit=limit,
+        concepts=", ".join(concepts[:15]),
+    )
+    try:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            import concurrent.futures
+            with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
+                future = pool.submit(asyncio.run, _llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1))
+                raw = future.result(timeout=30)
+        else:
+            raw = asyncio.run(_llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1))
+        raw = raw.strip()
+        if raw.startswith("```"):
+            raw = re.sub(r"^```(?:json)?\s*", "", raw)
+            raw = re.sub(r"\s*```$", "", raw)
+        parsed = json.loads(raw)
+        if isinstance(parsed, list):
+            triples = []
+            for item in parsed[:limit]:
+                if isinstance(item, dict) and "subject" in item and "object" in item:
+                    triples.append({
+                        "subject": str(item["subject"]),
+                        "relation": str(item.get("relation", "관련됨")),
+                        "object": str(item["object"]),
+                        "context": str(item.get("evidence", ""))[:240],
+                        "confidence": float(item.get("confidence", 0.8)),
+                    })
+            return triples if triples else None
+    except Exception as e:
+        logging.debug("LLM triple extraction failed (falling back to rules): %s", e)
+    return None
 _CONCEPT_STOP: set = {
     # English stop words
     "the", "and", "for", "with", "this", "that", "from", "into", "which",
@@ -385,7 +495,15 @@ _CONCEPT_STOP: set = {
 def _extract_concepts(text: str, limit: int = 12) -> List[str]:
-    """Extract meaningful named concepts from text.
+    """LLM-first concept extraction with rule-based fallback."""
+    llm_result = _llm_extract_concepts(text, limit)
+    if llm_result:
+        return llm_result
+    return _extract_concepts_rules(text, limit)
+def _extract_concepts_rules(text: str, limit: int = 12) -> List[str]:
+    """Extract meaningful named concepts from text (rule-based).
     Priority order:
     1. Backtick / quoted terms (explicitly technical)
@@ -586,7 +704,19 @@ def _extract_triples(
     concepts: List[str],
     limit: int = 20,
 ) -> List[Dict[str, str]]:
-    """Extract (subject, verb-edge, object, context) triples from text.
+    """LLM-first triple extraction with rule-based fallback."""
+    llm_result = _llm_extract_triples(text, concepts, limit)
+    if llm_result:
+        return llm_result
+    return _extract_triples_rules(text, concepts, limit)
+def _extract_triples_rules(
+    text: str,
+    concepts: List[str],
+    limit: int = 20,
+) -> List[Dict[str, str]]:
+    """Extract (subject, verb-edge, object, context) triples from text (rule-based).
     For each sentence containing ≥2 concepts, infer the verb-form edge label
     from surrounding context and create a directed triple.
@@ -1223,21 +1353,44 @@ class KnowledgeGraphStore:
             from docx import Document
             doc = Document(str(path))
             paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
+            table_lines = []
+            for table in doc.tables:
+                for row in table.rows:
+                    cells = [_clean_text(cell.text) for cell in row.cells]
+                    if any(cells):
+                        table_lines.append("\t".join(cells))
             meta["paragraphs"] = len(paragraphs)
             meta["tables"] = len(doc.tables)
-            text = "\n\n".join(paragraphs)
+            meta["table_rows"] = len(table_lines)
+            text = "\n\n".join([*paragraphs, *table_lines])
         elif ext == ".xlsx":
             from openpyxl import load_workbook
             wb = load_workbook(str(path), read_only=True, data_only=True)
             rows_all = []
+            non_empty_rows = 0
+            non_empty_cells = 0
+            char_count = 0
             for ws in wb.worksheets:
-                rows_all.append(f"[Sheet: {ws.title}]")
+                sheet_rows = []
                 for row in ws.iter_rows(values_only=True):
-                    cells = [str(cell) if cell is not None else "" for cell in row]
-                    rows_all.append("\t".join(cells))
-                    if len("\n".join(rows_all)) > 200_000:
+                    cells = [str(cell).strip() if cell is not None else "" for cell in row]
+                    if not any(cells):
+                        continue
+                    line = "\t".join(cells)
+                    non_empty_rows += 1
+                    non_empty_cells += sum(1 for cell in cells if cell)
+                    sheet_rows.append(line)
+                    char_count += len(line) + 1
+                    if char_count > 200_000:
                         break
+                if sheet_rows:
+                    rows_all.append(f"[Sheet: {ws.title}]")
+                    rows_all.extend(sheet_rows)
+                if char_count > 200_000:
+                    break
             meta["sheets"] = len(wb.worksheets)
+            meta["rows"] = non_empty_rows
+            meta["cells"] = non_empty_cells
             text = "\n".join(rows_all)
         elif ext == ".pptx":
             from pptx import Presentation
@@ -1247,9 +1400,13 @@ class KnowledgeGraphStore:
                 parts = []
                 for shape in slide.shapes:
                     if getattr(shape, "has_text_frame", False):
-                        parts.append(shape.text_frame.text)
-                slides_text.append(f"[Slide {index}]\n" + "\n".join(parts))
+                        slide_text = shape.text_frame.text.strip()
+                        if slide_text:
+                            parts.append(slide_text)
+                if parts:
+                    slides_text.append(f"[Slide {index}]\n" + "\n".join(parts))
             meta["slides"] = len(prs.slides)
+            meta["text_slides"] = len(slides_text)
             text = "\n\n".join(slides_text)
         elif category == "image":
             from PIL import Image
@@ -1362,13 +1519,13 @@ class KnowledgeGraphStore:
               extension=excluded.extension,
               size_bytes=excluded.size_bytes,
               modified_at=excluded.modified_at,
-              sha256=COALESCE(excluded.sha256, local_file_index.sha256),
+              sha256=excluded.sha256,
               last_scanned_at=excluded.last_scanned_at,
-              last_indexed_at=COALESCE(excluded.last_indexed_at, local_file_index.last_indexed_at),
+              last_indexed_at=excluded.last_indexed_at,
               parser_type=excluded.parser_type,
               status=excluded.status,
               error_message=excluded.error_message,
-              graph_node_id=COALESCE(excluded.graph_node_id, local_file_index.graph_node_id),
+              graph_node_id=excluded.graph_node_id,
               deleted=excluded.deleted,
               metadata_json=excluded.metadata_json
             """,
@@ -1381,6 +1538,113 @@ class KnowledgeGraphStore:
         )
         return index_id
+    def _delete_local_file_graph(self, conn: sqlite3.Connection, file_node_id: Optional[str]) -> None:
+        if not file_node_id:
+            return
+        file_row = conn.execute(
+            "SELECT metadata_json FROM nodes WHERE id=?",
+            (file_node_id,),
+        ).fetchone()
+        source_id = None
+        if file_row:
+            source_id = _safe_loads(file_row["metadata_json"]).get("source_id")
+        linked_rows = conn.execute(
+            """
+            SELECT n.id, n.type, n.metadata_json
+            FROM edges e
+            JOIN nodes n ON n.id=e.to_node
+            WHERE e.from_node=?
+            """,
+            (file_node_id,),
+        ).fetchall()
+        owned_ids: set = set()
+        auto_candidate_ids: set = set()
+        for row in linked_rows:
+            metadata = _safe_loads(row["metadata_json"])
+            if row["type"] in {"Chunk", "ImageText", "Section"} or metadata.get("source_node") == file_node_id:
+                owned_ids.add(row["id"])
+            elif metadata.get("auto_extracted") and metadata.get("source") == "local_folder":
+                auto_candidate_ids.add(row["id"])
+        conn.execute("DELETE FROM chunks WHERE source_node=?", (file_node_id,))
+        conn.execute("DELETE FROM edges WHERE from_node=? OR to_node=?", (file_node_id, file_node_id))
+        conn.execute("DELETE FROM nodes WHERE id=?", (file_node_id,))
+        def delete_nodes(node_ids: set) -> None:
+            if not node_ids:
+                return
+            placeholders = ",".join("?" * len(node_ids))
+            params = list(node_ids)
+            conn.execute(f"DELETE FROM chunks WHERE source_node IN ({placeholders})", params)
+            conn.execute(f"DELETE FROM edges WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})", params * 2)
+            conn.execute(f"DELETE FROM nodes WHERE id IN ({placeholders})", params)
+        delete_nodes(owned_ids)
+        removable_auto_ids: set = set()
+        for node_id in auto_candidate_ids:
+            remaining_edges = conn.execute(
+                "SELECT from_node, to_node FROM edges WHERE from_node=? OR to_node=?",
+                (node_id, node_id),
+            ).fetchall()
+            if all(
+                (row["from_node"] in auto_candidate_ids and row["to_node"] in auto_candidate_ids)
+                for row in remaining_edges
+            ):
+                removable_auto_ids.add(node_id)
+        delete_nodes(removable_auto_ids)
+        if source_id:
+            self._cleanup_local_graph_orphans(conn, str(source_id))
+    def _cleanup_local_graph_orphans(self, conn: sqlite3.Connection, source_id: str) -> None:
+        while True:
+            folder_rows = conn.execute(
+                "SELECT id, metadata_json FROM nodes WHERE type='Folder'"
+            ).fetchall()
+            leaf_ids = []
+            for row in folder_rows:
+                metadata = _safe_loads(row["metadata_json"])
+                if metadata.get("source_id") != source_id:
+                    continue
+                has_children = conn.execute(
+                    "SELECT 1 FROM edges WHERE from_node=? LIMIT 1",
+                    (row["id"],),
+                ).fetchone()
+                if not has_children:
+                    leaf_ids.append(row["id"])
+            if not leaf_ids:
+                break
+            placeholders = ",".join("?" * len(leaf_ids))
+            conn.execute(f"DELETE FROM edges WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})", leaf_ids * 2)
+            conn.execute(f"DELETE FROM nodes WHERE id IN ({placeholders})", leaf_ids)
+        for node_type in ("Drive", "Computer"):
+            rows = conn.execute("SELECT id FROM nodes WHERE type=?", (node_type,)).fetchall()
+            removable = []
+            for row in rows:
+                has_children = conn.execute(
+                    "SELECT 1 FROM edges WHERE from_node=? LIMIT 1",
+                    (row["id"],),
+                ).fetchone()
+                if not has_children:
+                    removable.append(row["id"])
+            if removable:
+                placeholders = ",".join("?" * len(removable))
+                conn.execute(f"DELETE FROM edges WHERE from_node IN ({placeholders}) OR to_node IN ({placeholders})", removable * 2)
+                conn.execute(f"DELETE FROM nodes WHERE id IN ({placeholders})", removable)
+    def _local_file_index_has_extracted_text(self, row: sqlite3.Row) -> bool:
+        metadata = _safe_loads(row["metadata_json"])
+        parser = metadata.get("parser") if isinstance(metadata, dict) else {}
+        if not isinstance(parser, dict):
+            return False
+        try:
+            return int(parser.get("extracted_chars") or 0) > 0
+        except (TypeError, ValueError):
+            return False
     def _upsert_local_file_node(
         self,
         conn: sqlite3.Connection,
@@ -1397,6 +1661,9 @@ class KnowledgeGraphStore:
         text: str,
         parser_meta: Dict[str, Any],
     ) -> str:
+        text = _clean_text(text)
+        if not text:
+            raise ValueError("텍스트 추출 결과가 비어 있습니다.")
         try:
             relative_path = file_path.relative_to(root).as_posix()
         except ValueError:
@@ -1446,7 +1713,7 @@ class KnowledgeGraphStore:
             file_node_id,
             _node_type_for_category(category),
             file_path.name,
-            summary=(_clean_text(text) or relative_path)[:700],
+            summary=text[:700],
             metadata=metadata,
             raw=metadata,
         )
@@ -1488,7 +1755,7 @@ class KnowledgeGraphStore:
             )
             self._upsert_edge(conn, file_node_id, chunk_id, "포함함", weight=0.7, metadata={"source": "local_scan"})
-        concepts = _extract_concepts(f"{file_path.name}\n{target_for_concepts}", limit=18)
+        concepts = _extract_concepts(target_for_concepts, limit=18)
         concept_ids: Dict[str, str] = {}
         for concept in concepts:
             node_t = _classify_node_type(concept, target_for_concepts)
@@ -1620,10 +1887,21 @@ class KnowledgeGraphStore:
                 except ValueError:
                     relative_path = file_path.name
                 seen_relative_paths.add(relative_path)
+                modified_at = _safe_iso_from_stat_mtime(stat.st_mtime)
+                existing = conn.execute(
+                    """
+                    SELECT size_bytes, modified_at, sha256, graph_node_id, status, metadata_json
+                    FROM local_file_index
+                    WHERE source_id=? AND relative_path=?
+                    """,
+                    (source_id, relative_path),
+                ).fetchone()
                 decision = self._local_file_decision(file_path, root, stat)
                 parser_type = decision["parser_type"]
                 if not decision["indexable"]:
                     counts[decision["status"]] += 1
+                    if existing and existing["graph_node_id"]:
+                        self._delete_local_file_graph(conn, existing["graph_node_id"])
                     self._upsert_local_file_index(
                         conn,
                         source_id=source_id,
@@ -1638,19 +1916,11 @@ class KnowledgeGraphStore:
                     )
                     continue
-                modified_at = _safe_iso_from_stat_mtime(stat.st_mtime)
-                existing = conn.execute(
-                    """
-                    SELECT size_bytes, modified_at, sha256, graph_node_id, status
-                    FROM local_file_index
-                    WHERE source_id=? AND relative_path=?
-                    """,
-                    (source_id, relative_path),
-                ).fetchone()
                 if (
                     existing
                     and existing["status"] == "indexed"
                     and existing["graph_node_id"]
+                    and self._local_file_index_has_extracted_text(existing)
                     and existing["size_bytes"] == stat.st_size
                     and existing["modified_at"] == modified_at
                 ):
@@ -1667,7 +1937,7 @@ class KnowledgeGraphStore:
                         parser_type=parser_type,
                         sha256=existing["sha256"],
                         graph_node_id=existing["graph_node_id"],
-                        metadata={"category": decision["category"], "unchanged": True},
+                        metadata={**_safe_loads(existing["metadata_json"]), "category": decision["category"], "unchanged": True},
                     )
                     continue
@@ -1677,6 +1947,8 @@ class KnowledgeGraphStore:
                 except Exception as exc:
                     counts["failed"] += 1
                     errors.append({"path": str(file_path), "error": str(exc)})
+                    if existing and existing["graph_node_id"]:
+                        self._delete_local_file_graph(conn, existing["graph_node_id"])
                     self._upsert_local_file_index(
                         conn,
                         source_id=source_id,
@@ -1692,7 +1964,12 @@ class KnowledgeGraphStore:
                     )
                     continue
-                if existing and existing["sha256"] == digest and existing["graph_node_id"]:
+                if (
+                    existing
+                    and existing["sha256"] == digest
+                    and existing["graph_node_id"]
+                    and self._local_file_index_has_extracted_text(existing)
+                ):
                     counts["skipped_unchanged"] += 1
                     self._upsert_local_file_index(
                         conn,
@@ -1706,7 +1983,7 @@ class KnowledgeGraphStore:
                         parser_type=parser_type,
                         sha256=digest,
                         graph_node_id=existing["graph_node_id"],
-                        metadata={"category": decision["category"], "sha256_unchanged": True},
+                        metadata={**_safe_loads(existing["metadata_json"]), "category": decision["category"], "sha256_unchanged": True},
                     )
                     continue
@@ -1716,6 +1993,27 @@ class KnowledgeGraphStore:
                         decision["category"],
                         include_ocr=include_ocr,
                     )
+                    text = _clean_text(text)
+                    parser_meta = {**parser_meta, "extracted_chars": len(text)}
+                    if not text:
+                        counts["skipped_empty_text"] += 1
+                        if existing and existing["graph_node_id"]:
+                            self._delete_local_file_graph(conn, existing["graph_node_id"])
+                        self._upsert_local_file_index(
+                            conn,
+                            source_id=source_id,
+                            root=root,
+                            file_path=file_path,
+                            stat=stat,
+                            os_type=os_type,
+                            drive_id=drive_id,
+                            status="skipped_empty_text",
+                            parser_type=parser_type,
+                            sha256=digest,
+                            error_message="텍스트 추출 결과가 비어 있습니다.",
+                            metadata={"category": decision["category"], "parser": parser_meta},
+                        )
+                        continue
                     graph_node_id = self._upsert_local_file_node(
                         conn,
                         source_id=source_id,
@@ -1749,6 +2047,8 @@ class KnowledgeGraphStore:
                 except Exception as exc:
                     counts["failed"] += 1
                     errors.append({"path": str(file_path), "error": str(exc)})
+                    if existing and existing["graph_node_id"]:
+                        self._delete_local_file_graph(conn, existing["graph_node_id"])
                     self._upsert_local_file_index(
                         conn,
                         source_id=source_id,
@@ -1765,19 +2065,20 @@ class KnowledgeGraphStore:
                     )
             if not limit_reached:
-                existing_paths = {
-                    row["relative_path"]
+                existing_rows = {
+                    row["relative_path"]: row["graph_node_id"]
                     for row in conn.execute(
-                        "SELECT relative_path FROM local_file_index WHERE source_id=?",
+                        "SELECT relative_path, graph_node_id FROM local_file_index WHERE source_id=?",
                         (source_id,),
                     )
                 }
-                deleted_paths = existing_paths - seen_relative_paths
+                deleted_paths = set(existing_rows) - seen_relative_paths
                 for relative_path in deleted_paths:
+                    self._delete_local_file_graph(conn, existing_rows.get(relative_path))
                     conn.execute(
                         """
                         UPDATE local_file_index
-                        SET status='deleted', deleted=1, last_scanned_at=?, error_message=NULL
+                        SET status='deleted', deleted=1, last_scanned_at=?, error_message=NULL, graph_node_id=NULL
                         WHERE source_id=? AND relative_path=?
                         """,
                         (_now(), source_id, relative_path),
@@ -2639,3 +2940,170 @@ class KnowledgeGraphStore:
             "local_file_status": local_file_status,
             "v2": v2,
         }
+    def search_for_document_generation(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
+        """Hybrid retrieval optimized for document generation.
+        Scoring: 0.5*text_relevance + 0.3*graph_relationship + 0.2*recency
+        Returns nodes with rich context for document generation prompts.
+        """
+        query = str(query or "").strip()
+        if not query:
+            return []
+        limit = max(1, min(int(limit or 10), 50))
+        terms = _topic_candidates(query, limit=12)
+        now = datetime.now()
+        with self._connect() as conn:
+            candidate_rows = []
+            seen_ids = set()
+            if query:
+                q = f"%{query}%"
+                rows = conn.execute(
+                    """
+                    SELECT id, type, title, summary, metadata_json, updated_at
+                    FROM nodes
+                    WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
+                      AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
+                                   'Spreadsheet', 'Image', 'ImageText', 'Chat',
+                                   'Decision', 'Task', 'Concept', 'Feature',
+                                   'Page', 'Slide')
+                    ORDER BY updated_at DESC
+                    LIMIT ?
+                    """,
+                    (q, q, q, limit * 5),
+                ).fetchall()
+                for row in rows:
+                    if row["id"] not in seen_ids:
+                        seen_ids.add(row["id"])
+                        candidate_rows.append(row)
+            for term in terms:
+                t = f"%{term}%"
+                rows = conn.execute(
+                    """
+                    SELECT id, type, title, summary, metadata_json, updated_at
+                    FROM nodes
+                    WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
+                      AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
+                                   'Spreadsheet', 'Image', 'ImageText', 'Chat',
+                                   'Decision', 'Task', 'Concept', 'Feature',
+                                   'Page', 'Slide')
+                    ORDER BY updated_at DESC
+                    LIMIT ?
+                    """,
+                    (t, t, t, limit * 3),
+                ).fetchall()
+                for row in rows:
+                    if row["id"] not in seen_ids:
+                        seen_ids.add(row["id"])
+                        candidate_rows.append(row)
+            scored_results = []
+            for row in candidate_rows:
+                haystack = f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
+                text_hits = sum(1 for term in terms if term.lower() in haystack)
+                text_score = min(1.0, text_hits / max(len(terms), 1))
+                edge_count = conn.execute(
+                    "SELECT COUNT(*) AS c FROM edges WHERE from_node=? OR to_node=?",
+                    (row["id"], row["id"]),
+                ).fetchone()["c"]
+                graph_score = min(1.0, math.log1p(edge_count) / 4.0)
+                recency = _recency_score(row["updated_at"], now=now, half_life_days=14.0)
+                doc_type_boost = 1.2 if row["type"] in (
+                    "Document", "File", "SlideDeck", "Decision",
+                ) else 1.0
+                hybrid_score = (
+                    0.5 * text_score
+                    + 0.3 * graph_score
+                    + 0.2 * recency
+                ) * doc_type_boost
+                meta = _safe_loads(row["metadata_json"])
+                neighbor_concepts = []
+                neighbor_rows = conn.execute(
+                    """
+                    SELECT n.title, n.type FROM edges e
+                    JOIN nodes n ON n.id = CASE WHEN e.from_node = ? THEN e.to_node ELSE e.from_node END
+                    WHERE (e.from_node = ? OR e.to_node = ?)
+                      AND n.type IN ('Concept', 'Feature', 'Decision', 'Task')
+                    LIMIT 8
+                    """,
+                    (row["id"], row["id"], row["id"]),
+                ).fetchall()
+                for nr in neighbor_rows:
+                    neighbor_concepts.append({"title": nr["title"], "type": nr["type"]})
+                scored_results.append({
+                    "id": row["id"],
+                    "type": row["type"],
+                    "title": row["title"],
+                    "summary": row["summary"],
+                    "metadata": meta,
+                    "updated_at": row["updated_at"],
+                    "hybrid_score": round(hybrid_score, 4),
+                    "scores": {
+                        "text": round(text_score, 4),
+                        "graph": round(graph_score, 4),
+                        "recency": round(recency, 4),
+                    },
+                    "related_concepts": neighbor_concepts,
+                })
+            scored_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
+            return scored_results[:limit]
+    def multi_hop_context(self, node_ids: List[str], max_hops: int = 2) -> Dict[str, Any]:
+        """Multi-hop graph traversal from seed nodes for richer context."""
+        visited_nodes = set()
+        visited_edges = set()
+        all_nodes = []
+        all_edges = []
+        frontier = set(node_ids)
+        with self._connect() as conn:
+            for hop in range(max_hops):
+                if not frontier:
+                    break
+                next_frontier = set()
+                for nid in frontier:
+                    if nid in visited_nodes:
+                        continue
+                    visited_nodes.add(nid)
+                    row = conn.execute(
+                        "SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE id=?",
+                        (nid,),
+                    ).fetchone()
+                    if row:
+                        all_nodes.append({
+                            "id": row["id"], "type": row["type"],
+                            "title": row["title"], "summary": row["summary"],
+                            "metadata": _safe_loads(row["metadata_json"]),
+                            "hop": hop,
+                        })
+                    edge_rows = conn.execute(
+                        """
+                        SELECT id, from_node, to_node, type, weight
+                        FROM edges WHERE from_node=? OR to_node=?
+                        """,
+                        (nid, nid),
+                    ).fetchall()
+                    for er in edge_rows:
+                        if er["id"] not in visited_edges:
+                            visited_edges.add(er["id"])
+                            all_edges.append({
+                                "from": er["from_node"], "to": er["to_node"],
+                                "type": er["type"], "weight": er["weight"],
+                            })
+                            other = er["to_node"] if er["from_node"] == nid else er["from_node"]
+                            if other not in visited_nodes:
+                                next_frontier.add(other)
+                frontier = next_frontier
+        return {"nodes": all_nodes, "edges": all_edges}