npm - superlocalmemory - Versions diffs - 3.3.19 → 3.3.21 - Mend

superlocalmemory 3.3.19 → 3.3.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

package/package.json +1 -1
package/pyproject.toml +9 -1
package/src/superlocalmemory/cli/commands.py +140 -23
package/src/superlocalmemory/cli/daemon.py +372 -0
package/src/superlocalmemory/cli/main.py +10 -2
package/src/superlocalmemory/cli/pending_store.py +158 -0
package/src/superlocalmemory/cli/setup_wizard.py +39 -6
package/src/superlocalmemory/code_graph/__init__.py +46 -0
package/src/superlocalmemory/code_graph/blast_radius.py +177 -0
package/src/superlocalmemory/code_graph/bridge/__init__.py +36 -0
package/src/superlocalmemory/code_graph/bridge/entity_resolver.py +464 -0
package/src/superlocalmemory/code_graph/bridge/event_listeners.py +195 -0
package/src/superlocalmemory/code_graph/bridge/fact_enricher.py +159 -0
package/src/superlocalmemory/code_graph/bridge/hebbian_linker.py +170 -0
package/src/superlocalmemory/code_graph/bridge/temporal_checker.py +152 -0
package/src/superlocalmemory/code_graph/changes.py +363 -0
package/src/superlocalmemory/code_graph/communities.py +299 -0
package/src/superlocalmemory/code_graph/config.py +88 -0
package/src/superlocalmemory/code_graph/database.py +482 -0
package/src/superlocalmemory/code_graph/extractors/__init__.py +78 -0
package/src/superlocalmemory/code_graph/extractors/python.py +413 -0
package/src/superlocalmemory/code_graph/extractors/typescript.py +556 -0
package/src/superlocalmemory/code_graph/flows.py +350 -0
package/src/superlocalmemory/code_graph/git_hooks.py +226 -0
package/src/superlocalmemory/code_graph/graph_engine.py +295 -0
package/src/superlocalmemory/code_graph/graph_store.py +158 -0
package/src/superlocalmemory/code_graph/incremental.py +200 -0
package/src/superlocalmemory/code_graph/models.py +130 -0
package/src/superlocalmemory/code_graph/parser.py +507 -0
package/src/superlocalmemory/code_graph/resolver.py +321 -0
package/src/superlocalmemory/code_graph/search.py +460 -0
package/src/superlocalmemory/code_graph/service.py +95 -0
package/src/superlocalmemory/code_graph/watcher.py +207 -0
package/src/superlocalmemory/core/config.py +4 -3
package/src/superlocalmemory/core/embedding_worker.py +4 -2
package/src/superlocalmemory/core/embeddings.py +8 -2
package/src/superlocalmemory/core/engine.py +32 -0
package/src/superlocalmemory/core/engine_wiring.py +5 -0
package/src/superlocalmemory/core/recall_pipeline.py +7 -3
package/src/superlocalmemory/core/store_pipeline.py +23 -1
package/src/superlocalmemory/encoding/fact_extractor.py +68 -7
package/src/superlocalmemory/infra/event_bus.py +5 -0
package/src/superlocalmemory/mcp/server.py +23 -0
package/src/superlocalmemory/mcp/tools_code_graph.py +1592 -0
package/src/superlocalmemory/retrieval/agentic.py +89 -17
package/src/superlocalmemory/retrieval/engine.py +137 -2
package/src/superlocalmemory/retrieval/semantic_channel.py +6 -2
package/src/superlocalmemory/retrieval/spreading_activation.py +5 -3
package/src/superlocalmemory/retrieval/strategy.py +16 -0
package/src/superlocalmemory/server/api.py +4 -2
package/src/superlocalmemory/server/ui.py +5 -2
package/src/superlocalmemory/storage/schema_code_graph.py +239 -0
package/src/superlocalmemory/ui/index.html +1879 -0
package/src/superlocalmemory/ui/js/agents.js +192 -0
package/src/superlocalmemory/ui/js/auto-settings.js +399 -0
package/src/superlocalmemory/ui/js/behavioral.js +276 -0
package/src/superlocalmemory/ui/js/clusters.js +206 -0
package/src/superlocalmemory/ui/js/compliance.js +252 -0
package/src/superlocalmemory/ui/js/core.js +246 -0
package/src/superlocalmemory/ui/js/dashboard.js +110 -0
package/src/superlocalmemory/ui/js/events.js +178 -0
package/src/superlocalmemory/ui/js/fact-detail.js +92 -0
package/src/superlocalmemory/ui/js/feedback.js +333 -0
package/src/superlocalmemory/ui/js/graph-core.js +447 -0
package/src/superlocalmemory/ui/js/graph-filters.js +220 -0
package/src/superlocalmemory/ui/js/graph-interactions.js +351 -0
package/src/superlocalmemory/ui/js/graph-ui.js +214 -0
package/src/superlocalmemory/ui/js/ide-status.js +102 -0
package/src/superlocalmemory/ui/js/init.js +45 -0
package/src/superlocalmemory/ui/js/learning.js +435 -0
package/src/superlocalmemory/ui/js/lifecycle.js +298 -0
package/src/superlocalmemory/ui/js/math-health.js +98 -0
package/src/superlocalmemory/ui/js/memories.js +264 -0
package/src/superlocalmemory/ui/js/modal.js +357 -0
package/src/superlocalmemory/ui/js/patterns.js +93 -0
package/src/superlocalmemory/ui/js/profiles.js +236 -0
package/src/superlocalmemory/ui/js/recall-lab.js +292 -0
package/src/superlocalmemory/ui/js/search.js +59 -0
package/src/superlocalmemory/ui/js/settings.js +224 -0
package/src/superlocalmemory/ui/js/timeline.js +32 -0
package/src/superlocalmemory/ui/js/trust-dashboard.js +73 -0

package/src/superlocalmemory/retrieval/agentic.py CHANGED Viewed

@@ -31,7 +31,10 @@ logger = logging.getLogger(__name__)
 _MAX_ROUNDS = 2
 _SUFFICIENCY_SCORE_THRESHOLD = 0.6
-_SKIP_TYPES = frozenset({"temporal"})  # S15: agentic harms temporal queries
+# V3.3.19: Removed "temporal" from skip list. S15's lesson was with
+# weak alias expansion. The new rule-based decomposer (v3.3.19) helps
+# temporal queries by generating entity+action sub-queries.
+_SKIP_TYPES: frozenset[str] = frozenset()  # No types skipped
 _SUFFICIENCY_SYSTEM = (
     "You evaluate whether retrieved context is sufficient to answer a query. "
@@ -241,22 +244,91 @@ class AgenticRetriever:
     def _heuristic_expand(
         self, query: str, profile_id: str,
     ) -> list[str]:
-        """Mode A: expand query with entity aliases (no LLM)."""
-        if self._db is None:
-            return []
-        expanded_parts: list[str] = []
-        entities = re.findall(r"\b[A-Z][a-z]{2,}\b", query)
-        for name in entities:
-            entity = self._db.get_entity_by_name(name, profile_id)
-            if entity:
-                aliases = self._db.get_aliases_for_entity(entity.entity_id)
-                for a in aliases[:3]:
-                    expanded_parts.append(a.alias)
-        if expanded_parts:
-            return [query + " " + " ".join(expanded_parts)]
-        return []
+        """Mode A: rule-based query decomposition (no LLM).
+        V3.3.19: Full rewrite. Generates targeted sub-queries by:
+        1. Extracting person/place names (real proper nouns only)
+        2. Extracting action/event keywords (non-stopwords minus entities)
+        3. Combining entity + action for focused retrieval
+        4. Entity-only and action-only lookups for broader context
+        For LoCoMo "When did [Person] [Action]?" patterns, this generates:
+          "Caroline LGBTQ support group"  (entity + action)
+          "Caroline"                       (entity only)
+          "LGBTQ support group"            (action only)
+        """
+        sub_queries: list[str] = []
+        # Extract REAL proper nouns from original query (not title-cased)
+        # This avoids the extract_query_entities trap where "Support Group"
+        # from title-casing gets treated as entities.
+        _STARTERS = {
+            "What", "Where", "Who", "Which", "How", "When", "Does", "Did",
+            "Can", "Could", "Would", "Should", "Are", "Is", "Was", "Were",
+            "Has", "Have", "The", "Tell", "Do",
+        }
+        entities = [
+            m for m in re.findall(r"\b[A-Z][a-z]{2,}\b", query)
+            if m not in _STARTERS
+        ]
+        # Also grab all-caps abbreviations (LGBTQ, MIT, NYC)
+        abbrevs = re.findall(r"\b[A-Z]{2,}\b", query)
+        entities.extend(abbrevs)
+        # Extract action/event keywords (remove question words + entity names)
+        _STOP = {
+            "when", "did", "does", "do", "what", "where", "who", "which",
+            "how", "is", "was", "were", "are", "has", "have", "had",
+            "the", "a", "an", "to", "for", "of", "in", "on", "at",
+            "and", "or", "but", "with", "from", "about", "that", "this",
+            "it", "they", "she", "he", "her", "his", "their", "its",
+            "been", "being", "would", "could", "should", "will", "can",
+            "may", "might", "not", "no", "so", "if", "by", "up",
+            "go", "going", "went", "get", "got", "ago",
+            "many", "much", "some", "any", "ever",
+        }
+        entity_lower = {e.lower() for e in entities}
+        words = re.sub(r"[^\w\s]", "", query.lower()).split()
+        action_words = [
+            w for w in words
+            if w not in _STOP and w not in entity_lower and len(w) > 2
+        ]
+        # Strategy 1: Entity + action keywords (most targeted)
+        if entities and action_words:
+            action_phrase = " ".join(action_words)
+            for ent in entities[:2]:
+                sub_queries.append(f"{ent} {action_phrase}")
+        # Strategy 2: Action keywords only (finds the event regardless of entity)
+        if action_words:
+            sub_queries.append(" ".join(action_words))
+        # Strategy 3: Entity-only lookup (broad context)
+        for ent in entities[:2]:
+            sub_queries.append(ent)
+        # Strategy 4: Alias expansion (original approach, still useful)
+        if self._db is not None:
+            for name in entities[:2]:
+                entity = self._db.get_entity_by_name(name, profile_id)
+                if entity:
+                    try:
+                        aliases = self._db.get_aliases_for_entity(entity.entity_id)
+                        for a in aliases[:2]:
+                            sub_queries.append(f"{a.alias} {' '.join(action_words)}")
+                    except Exception:
+                        pass
+        # Deduplicate, limit to 3 sub-queries (keep round 2 fast)
+        seen: set[str] = set()
+        unique: list[str] = []
+        for sq in sub_queries:
+            sq_lower = sq.strip().lower()
+            if sq_lower and sq_lower not in seen and sq_lower != query.lower():
+                seen.add(sq_lower)
+                unique.append(sq.strip())
+        return unique[:3]
 # ---------------------------------------------------------------------------

package/src/superlocalmemory/retrieval/engine.py CHANGED Viewed

@@ -134,7 +134,7 @@ class RetrievalEngine:
             profile_hits = []
         # Dynamic top-k for aggregation queries
-        effective_limit = 50 if strat.query_type == "aggregation" else limit
+        effective_limit = 100 if strat.query_type == "aggregation" else limit
         # 3. Run 4 channels
         ch_results = self._run_channels(query, profile_id, strat)
@@ -145,6 +145,14 @@ class RetrievalEngine:
         # 3. Single-pass RRF fusion
         fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
+        # V3.3.21: Cross-channel intersection boost for multi-hop/temporal queries.
+        # Problem: channels work in ISOLATION. "When did Caroline go to X?" needs
+        # entity(Caroline) ∩ temporal(date). RRF averages scores but doesn't enforce
+        # the intersection constraint. Fix: boost facts that appear in 2+ signal-type
+        # channels (entity+temporal, entity+semantic, temporal+semantic).
+        if strat.query_type == "multi_hop" and len(ch_results) >= 2:
+            fused = self._apply_cross_channel_intersection(fused, ch_results, strat)
         # Bridge discovery for multi-hop queries
         # V3.3.19: Only bridge.discover() (86ms). Removed bridge.spreading_activation()
         # which did per-node SQL queries across 254K edges → 78s latency.
@@ -184,9 +192,23 @@ class RetrievalEngine:
         top = fused[:pool]
         facts = self._load_facts(top, profile_id)
+        # V3.3.21: Session diversity for aggregation queries.
+        # Cat 1 (single-hop/aggregation) needs facts from MULTIPLE sessions.
+        # Without diversity enforcement, top-20 may all come from 1-2 sessions,
+        # missing scattered mentions across 19+ sessions.
+        if strat.query_type == "aggregation" and facts:
+            top = self._enforce_session_diversity(top, facts, min_sessions=3, top_k=20)
         # 5. Cross-encoder rerank (optional)
         # Bug 4 fix: reduced alpha for multi-hop/temporal to preserve diversity
-        if self._reranker is not None and facts:
+        # V3.3.21: Skip reranker if worker isn't ready yet (cold start).
+        # Returns results without CE reranking (~5-10pp lower quality) but instant
+        # instead of blocking 15-19s on first recall. Worker warms up in background.
+        reranker_ready = (
+            self._reranker is not None
+            and getattr(self._reranker, '_worker_ready', False)
+        )
+        if reranker_ready and facts:
             ce_alpha = 0.5 if strat.query_type in ("multi_hop", "temporal") else 0.75
             top = self._apply_reranker(query, top, facts, alpha=ce_alpha)
@@ -199,6 +221,119 @@ class RetrievalEngine:
             total_candidates=total, retrieval_time_ms=ms,
         )
+    # -- Cross-channel intersection boost -----------------------------------
+    @staticmethod
+    def _apply_cross_channel_intersection(
+        fused: list[FusionResult],
+        ch_results: dict[str, list[tuple[str, float]]],
+        strat: QueryStrategy,
+    ) -> list[FusionResult]:
+        """Boost facts that appear across multiple signal-type channels.
+        V3.3.21: Solves the channel isolation problem. When a query has both
+        entity and temporal signals (e.g., "When did Caroline go to X?"), facts
+        matching BOTH dimensions should rank higher than facts matching only one.
+        Channel groups:
+          - content: semantic, bm25 (text similarity)
+          - structure: entity_graph, spreading_activation (graph structure)
+          - temporal: temporal (date proximity)
+          - associative: hopfield (pattern completion)
+        Boost: facts in 2+ groups get 1.5x, facts in 3+ groups get 2.0x.
+        """
+        # Map channels to signal groups
+        _CHANNEL_GROUPS = {
+            "semantic": "content", "bm25": "content",
+            "entity_graph": "structure", "spreading_activation": "structure",
+            "temporal": "temporal",
+            "hopfield": "associative",
+            "profile": "content",
+        }
+        # Build fact_id -> set of signal groups it appears in
+        fact_groups: dict[str, set[str]] = {}
+        for ch_name, results in ch_results.items():
+            group = _CHANNEL_GROUPS.get(ch_name, ch_name)
+            for fid, _score in results:
+                if fid not in fact_groups:
+                    fact_groups[fid] = set()
+                fact_groups[fid].add(group)
+        # Apply boost based on cross-group coverage
+        boosted: list[FusionResult] = []
+        for fr in fused:
+            groups = fact_groups.get(fr.fact_id, set())
+            n_groups = len(groups)
+            if n_groups >= 3:
+                boost = 2.0
+            elif n_groups >= 2:
+                # Extra boost for temporal+structure intersection (the exact gap)
+                if "temporal" in groups and "structure" in groups:
+                    boost = 1.8
+                else:
+                    boost = 1.5
+            else:
+                boost = 1.0
+            boosted.append(FusionResult(
+                fact_id=fr.fact_id,
+                fused_score=fr.fused_score * boost,
+                channel_ranks=fr.channel_ranks,
+                channel_scores=fr.channel_scores,
+            ))
+        boosted.sort(key=lambda r: r.fused_score, reverse=True)
+        return boosted
+    # -- Session diversity enforcement ----------------------------------------
+    @staticmethod
+    def _enforce_session_diversity(
+        fused: list[FusionResult],
+        fact_map: dict[str, AtomicFact],
+        min_sessions: int = 3,
+        top_k: int = 20,
+    ) -> list[FusionResult]:
+        """Ensure top-k results span at least min_sessions different session_ids.
+        V3.3.21: Category 1 (aggregation) needs facts from MULTIPLE sessions —
+        95.7% of cat 1 questions require cross-session evidence. Without this,
+        top-20 may cluster around 1-2 sessions, missing scattered mentions.
+        Algorithm: if top-k has < min_sessions, promote the highest-scored facts
+        from underrepresented sessions into the top-k window.
+        """
+        if len(fused) <= top_k:
+            return fused
+        top = fused[:top_k]
+        rest = fused[top_k:]
+        sessions_in_top: set[str] = set()
+        for fr in top:
+            fact = fact_map.get(fr.fact_id)
+            if fact and fact.session_id:
+                sessions_in_top.add(fact.session_id)
+        if len(sessions_in_top) >= min_sessions:
+            return fused
+        promoted: list[FusionResult] = []
+        for fr in rest:
+            fact = fact_map.get(fr.fact_id)
+            if fact and fact.session_id and fact.session_id not in sessions_in_top:
+                sessions_in_top.add(fact.session_id)
+                promoted.append(fr)
+                if len(sessions_in_top) >= min_sessions:
+                    break
+        if not promoted:
+            return fused
+        promoted_ids = {fr.fact_id for fr in promoted}
+        remaining = [fr for fr in rest if fr.fact_id not in promoted_ids]
+        return top + promoted + remaining
     # -- Channel execution --------------------------------------------------
     def _embed_query(self, query: str) -> list[float] | None:

package/src/superlocalmemory/retrieval/semantic_channel.py CHANGED Viewed

@@ -183,8 +183,12 @@ class SemanticChannel:
         for fact in facts:
             cos_sim = knn_scores.get(fact.fact_id, 0.0)
-            # Graduated Fisher-Rao ramp (preserved from original)
-            fisher_weight = min(1.2, (fact.access_count or 0) / 10.0 * 1.2)
+            # V3.3.21: Fisher-Rao ramp with minimum floor.
+            # Bug fix: access_count=0 for fresh facts → Fisher weight=0 → metric DEAD.
+            # Paper 2's +12pp on multi-hop came from Fisher-Rao. A 0.3 floor ensures
+            # fresh facts still benefit from variance-weighted similarity, while
+            # frequently accessed facts get progressively stronger Fisher influence.
+            fisher_weight = max(0.15, min(1.2, (fact.access_count or 0) / 10.0 * 1.2))
             if (fisher_weight > 0.01
                     and fact.fisher_variance is not None

package/src/superlocalmemory/retrieval/spreading_activation.py CHANGED Viewed

@@ -46,10 +46,12 @@ class SpreadingActivationConfig:
     alpha: float = 1.0           # Seed scaling factor
     delta: float = 0.5           # Node retention / self-decay per iteration
     spreading_factor: float = 0.8  # S: energy diffusion rate
-    theta: float = 0.5           # Activation threshold for sigmoid
-    top_m: int = 7               # Lateral inhibition: max active nodes
+    # V3.3.20: Recalibrated for SLM graph density (254K edges, 768d).
+    # SYNAPSE defaults (theta=0.5, top_m=7) were for 384d sparse graphs.
+    theta: float = 0.2           # Activation threshold for sigmoid (was 0.5)
+    top_m: int = 20              # Lateral inhibition: max active nodes (was 7)
     max_iterations: int = 3      # T: propagation depth
-    tau_gate: float = 0.12       # FOK confidence gate
+    tau_gate: float = 0.05       # FOK confidence gate (was 0.12)
     enabled: bool = True         # Ships enabled by default

package/src/superlocalmemory/retrieval/strategy.py CHANGED Viewed

@@ -66,8 +66,21 @@ _CAUSAL_TEMPORAL_WORDS: frozenset[str] = frozenset({
 _AGGREGATION_WORDS: frozenset[str] = frozenset({
     "all", "list", "every", "everything", "various", "different",
     "many", "several", "multiple", "summarize", "overview",
+    # V3.3.21 R5: LoCoMo cat 1 patterns — "What X does/did Y Z?" needs aggregation.
+    # "What activities does Melanie partake in?" = aggregation, not factual.
+    "activities", "events", "hobbies", "instruments", "types",
+    "things", "places", "jobs", "skills", "interests", "pets",
 })
+# V3.3.21 R5: Plural noun patterns that signal aggregation queries.
+# "What [noun]s has/does [entity] [verb]?" = needs cross-session aggregation.
+_AGGREGATION_PATTERNS: tuple[str, ...] = (
+    r"what (?:\w+ )?(?:activities|events|hobbies|types|things|places|jobs)",
+    r"what (?:\w+ )?has .+ (?:done|visited|attended|participated|played|practiced)",
+    r"how many (?:\w+ )?(?:times|events|things|places)",
+    r"what are .+(?:'s|s') (?:\w+ )?(?:hobbies|interests|activities|skills)",
+)
 _OPINION_WORDS: tuple[str, ...] = (
     "think", "feel", "opinion", "prefer", "favorite", "best", "worst",
     "believe", "like about", "dislike", "enjoy", "hate", "love",
@@ -126,6 +139,9 @@ class QueryStrategyClassifier:
             return "temporal"
         if words & _AGGREGATION_WORDS:
             return "aggregation"
+        # V3.3.21 R5: Regex patterns for aggregation questions
+        if any(re.search(p, q) for p in _AGGREGATION_PATTERNS):
+            return "aggregation"
         if any(w in q for w in _OPINION_WORDS):
             return "opinion"
         if len(proper_nouns) >= 2:

package/src/superlocalmemory/server/api.py CHANGED Viewed

@@ -30,8 +30,10 @@ logger = logging.getLogger("superlocalmemory.api_server")
 # V3 paths
 MEMORY_DIR = Path.home() / ".superlocalmemory"
 DB_PATH = MEMORY_DIR / "memory.db"
-# ui/ is at repo root, 4 levels up from src/superlocalmemory/server/api.py
-UI_DIR = Path(__file__).resolve().parent.parent.parent.parent / "ui"
+# V3.3.21: UI shipped inside the package for pip/npm installs.
+_PKG_UI = Path(__file__).resolve().parent.parent / "ui"
+_REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
+UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
 # ============================================================================

package/src/superlocalmemory/server/ui.py CHANGED Viewed

@@ -48,8 +48,11 @@ from superlocalmemory.server.security_middleware import SecurityHeadersMiddlewar
 # V3 Paths (migrated from ~/.claude-memory to ~/.superlocalmemory)
 MEMORY_DIR = Path.home() / ".superlocalmemory"
 DB_PATH = MEMORY_DIR / "memory.db"
-# ui/ is at repo root, 4 levels up from src/superlocalmemory/server/ui.py
-UI_DIR = Path(__file__).resolve().parent.parent.parent.parent / "ui"
+# V3.3.21: UI shipped inside the package for pip/npm installs.
+# Check package location first, then fall back to repo root for dev mode.
+_PKG_UI = Path(__file__).resolve().parent.parent / "ui"
+_REPO_UI = Path(__file__).resolve().parent.parent.parent.parent / "ui"
+UI_DIR = _PKG_UI if (_PKG_UI / "index.html").exists() else _REPO_UI
 def create_app() -> FastAPI:

package/src/superlocalmemory/storage/schema_code_graph.py ADDED Viewed

@@ -0,0 +1,239 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory v3.4 — CodeGraph Module
+"""DDL for the code_graph.db database.
+Single source of truth for all CodeGraph tables.
+No other module should contain CREATE TABLE statements.
+Tables:
+  1. graph_nodes       — Code entities (functions, classes, files, modules)
+  2. graph_edges       — Relationships (calls, imports, inherits, contains, tested_by)
+  3. graph_files       — File tracking for incremental updates
+  4. graph_metadata    — Key-value store for graph-level config
+  5. code_memory_links — Bridge table linking code nodes to SLM memory facts
+  6. code_node_embeddings — vec0 virtual table for semantic search (optional)
+  7. graph_nodes_fts   — FTS5 virtual table for text search
+"""
+from __future__ import annotations
+import logging
+import sqlite3
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# DDL Statements (executed in order)
+# ---------------------------------------------------------------------------
+_DDL_STATEMENTS: tuple[str, ...] = (
+    # ── Table 1: graph_nodes ──────────────────────────────────────────
+    """
+    CREATE TABLE IF NOT EXISTS graph_nodes (
+        node_id         TEXT PRIMARY KEY,
+        kind            TEXT NOT NULL CHECK (kind IN ('file', 'class', 'function', 'method', 'module')),
+        name            TEXT NOT NULL,
+        qualified_name  TEXT NOT NULL UNIQUE,
+        file_path       TEXT NOT NULL,
+        line_start      INTEGER NOT NULL DEFAULT 0,
+        line_end        INTEGER NOT NULL DEFAULT 0,
+        language        TEXT NOT NULL DEFAULT '',
+        parent_name     TEXT,
+        signature       TEXT,
+        docstring       TEXT,
+        is_test         INTEGER NOT NULL DEFAULT 0,
+        content_hash    TEXT,
+        community_id    INTEGER,
+        extra_json      TEXT NOT NULL DEFAULT '{}',
+        created_at      REAL NOT NULL,
+        updated_at      REAL NOT NULL
+    )
+    """,
+    # ── Table 2: graph_edges ──────────────────────────────────────────
+    """
+    CREATE TABLE IF NOT EXISTS graph_edges (
+        edge_id         TEXT PRIMARY KEY,
+        kind            TEXT NOT NULL CHECK (kind IN ('calls', 'imports', 'inherits', 'contains', 'tested_by', 'depends_on')),
+        source_node_id  TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
+        target_node_id  TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
+        file_path       TEXT NOT NULL,
+        line            INTEGER NOT NULL DEFAULT 0,
+        confidence      REAL NOT NULL DEFAULT 1.0 CHECK (confidence >= 0.0 AND confidence <= 1.0),
+        extra_json      TEXT NOT NULL DEFAULT '{}',
+        created_at      REAL NOT NULL,
+        updated_at      REAL NOT NULL
+    )
+    """,
+    # ── Table 3: graph_files ──────────────────────────────────────────
+    """
+    CREATE TABLE IF NOT EXISTS graph_files (
+        file_path       TEXT PRIMARY KEY,
+        content_hash    TEXT NOT NULL,
+        mtime           REAL NOT NULL,
+        language        TEXT NOT NULL,
+        node_count      INTEGER NOT NULL DEFAULT 0,
+        edge_count      INTEGER NOT NULL DEFAULT 0,
+        last_indexed    REAL NOT NULL
+    )
+    """,
+    # ── Table 4: graph_metadata ───────────────────────────────────────
+    """
+    CREATE TABLE IF NOT EXISTS graph_metadata (
+        key             TEXT PRIMARY KEY,
+        value           TEXT NOT NULL,
+        updated_at      REAL NOT NULL
+    )
+    """,
+    # ── Table 5: code_memory_links ────────────────────────────────────
+    """
+    CREATE TABLE IF NOT EXISTS code_memory_links (
+        link_id         TEXT PRIMARY KEY,
+        code_node_id    TEXT NOT NULL REFERENCES graph_nodes(node_id) ON DELETE CASCADE,
+        slm_fact_id     TEXT NOT NULL,
+        slm_entity_id   TEXT,
+        link_type       TEXT NOT NULL CHECK (link_type IN (
+            'mentions', 'decision_about', 'bug_fix', 'refactor', 'design_rationale'
+        )),
+        confidence      REAL NOT NULL DEFAULT 0.8 CHECK (confidence >= 0.0 AND confidence <= 1.0),
+        created_at      TEXT NOT NULL DEFAULT (datetime('now')),
+        last_verified   TEXT,
+        is_stale        INTEGER NOT NULL DEFAULT 0
+    )
+    """,
+)
+# Indexes (separate from tables for clarity)
+_INDEX_STATEMENTS: tuple[str, ...] = (
+    # graph_nodes indexes
+    "CREATE INDEX IF NOT EXISTS idx_gn_file_path ON graph_nodes(file_path)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_kind ON graph_nodes(kind)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_name ON graph_nodes(name)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_qualified ON graph_nodes(qualified_name)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_parent ON graph_nodes(parent_name)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_language ON graph_nodes(language)",
+    "CREATE INDEX IF NOT EXISTS idx_gn_community ON graph_nodes(community_id)",
+    # graph_edges indexes
+    "CREATE INDEX IF NOT EXISTS idx_ge_source ON graph_edges(source_node_id)",
+    "CREATE INDEX IF NOT EXISTS idx_ge_target ON graph_edges(target_node_id)",
+    "CREATE INDEX IF NOT EXISTS idx_ge_kind ON graph_edges(kind)",
+    "CREATE INDEX IF NOT EXISTS idx_ge_file ON graph_edges(file_path)",
+    "CREATE INDEX IF NOT EXISTS idx_ge_source_kind ON graph_edges(source_node_id, kind)",
+    "CREATE INDEX IF NOT EXISTS idx_ge_target_kind ON graph_edges(target_node_id, kind)",
+    # code_memory_links indexes
+    "CREATE INDEX IF NOT EXISTS idx_cml_node ON code_memory_links(code_node_id)",
+    "CREATE INDEX IF NOT EXISTS idx_cml_fact ON code_memory_links(slm_fact_id)",
+    "CREATE INDEX IF NOT EXISTS idx_cml_entity ON code_memory_links(slm_entity_id)",
+    "CREATE INDEX IF NOT EXISTS idx_cml_type ON code_memory_links(link_type)",
+    "CREATE INDEX IF NOT EXISTS idx_cml_stale ON code_memory_links(is_stale)",
+)
+# FTS5 virtual table + sync triggers
+_FTS5_STATEMENTS: tuple[str, ...] = (
+    """
+    CREATE VIRTUAL TABLE IF NOT EXISTS graph_nodes_fts USING fts5(
+        name,
+        qualified_name,
+        file_path,
+        signature,
+        content='graph_nodes',
+        content_rowid='rowid',
+        tokenize='porter unicode61'
+    )
+    """,
+    # Auto-sync trigger: INSERT
+    """
+    CREATE TRIGGER IF NOT EXISTS trg_gn_fts_insert AFTER INSERT ON graph_nodes
+    BEGIN
+        INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
+        VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
+    END
+    """,
+    # Auto-sync trigger: DELETE
+    """
+    CREATE TRIGGER IF NOT EXISTS trg_gn_fts_delete AFTER DELETE ON graph_nodes
+    BEGIN
+        INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
+        VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
+    END
+    """,
+    # Auto-sync trigger: UPDATE
+    """
+    CREATE TRIGGER IF NOT EXISTS trg_gn_fts_update AFTER UPDATE ON graph_nodes
+    BEGIN
+        INSERT INTO graph_nodes_fts(graph_nodes_fts, rowid, name, qualified_name, file_path, signature)
+        VALUES ('delete', OLD.rowid, OLD.name, OLD.qualified_name, OLD.file_path, OLD.signature);
+        INSERT INTO graph_nodes_fts(rowid, name, qualified_name, file_path, signature)
+        VALUES (NEW.rowid, NEW.name, NEW.qualified_name, NEW.file_path, NEW.signature);
+    END
+    """,
+)
+# ---------------------------------------------------------------------------
+# Public API (matches SLM's schema.py pattern)
+# ---------------------------------------------------------------------------
+def create_all_tables(conn: sqlite3.Connection) -> None:
+    """Create all CodeGraph tables, indexes, and triggers.
+    Idempotent — safe to call multiple times (all DDL uses IF NOT EXISTS).
+    """
+    cursor = conn.cursor()
+    # Enable foreign keys
+    cursor.execute("PRAGMA foreign_keys = ON")
+    # Core tables
+    for ddl in _DDL_STATEMENTS:
+        cursor.execute(ddl)
+    # Indexes
+    for idx in _INDEX_STATEMENTS:
+        cursor.execute(idx)
+    # FTS5 + triggers (may fail if SQLite lacks FTS5 — non-fatal)
+    for stmt in _FTS5_STATEMENTS:
+        try:
+            cursor.execute(stmt)
+        except sqlite3.OperationalError as exc:
+            logger.warning("FTS5 setup failed (non-fatal): %s", exc)
+    # vec0 virtual table for embeddings (may fail if sqlite-vec not loaded)
+    try:
+        cursor.execute("""
+            CREATE VIRTUAL TABLE IF NOT EXISTS code_node_embeddings USING vec0(
+                node_id TEXT PRIMARY KEY,
+                embedding float[768] distance_metric=cosine
+            )
+        """)
+    except sqlite3.OperationalError as exc:
+        logger.warning("vec0 setup failed (non-fatal, embeddings disabled): %s", exc)
+    conn.commit()
+    logger.info("CodeGraph schema initialized (%d tables, %d indexes)",
+                len(_DDL_STATEMENTS), len(_INDEX_STATEMENTS))
+def drop_all_tables(conn: sqlite3.Connection) -> None:
+    """Drop all CodeGraph tables. Used in tests only."""
+    cursor = conn.cursor()
+    for table in (
+        "graph_nodes_fts", "code_node_embeddings",
+        "code_memory_links", "graph_metadata",
+        "graph_files", "graph_edges", "graph_nodes",
+    ):
+        try:
+            cursor.execute(f"DROP TABLE IF EXISTS {table}")
+        except sqlite3.OperationalError:
+            pass
+    # Drop triggers
+    for trigger in ("trg_gn_fts_insert", "trg_gn_fts_delete", "trg_gn_fts_update"):
+        cursor.execute(f"DROP TRIGGER IF EXISTS {trigger}")
+    conn.commit()