npm - superlocalmemory - Versions diffs - 3.2.1 → 3.2.2 - Mend

superlocalmemory 3.2.1 → 3.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/CHANGELOG.md +23 -1
package/README.md +61 -1
package/package.json +1 -1
package/pyproject.toml +26 -1
package/src/superlocalmemory/attribution/signer.py +6 -1
package/src/superlocalmemory/core/config.py +114 -1
package/src/superlocalmemory/core/consolidation_engine.py +595 -0
package/src/superlocalmemory/core/embeddings.py +0 -1
package/src/superlocalmemory/core/engine.py +164 -674
package/src/superlocalmemory/core/engine_wiring.py +474 -0
package/src/superlocalmemory/core/graph_analyzer.py +199 -0
package/src/superlocalmemory/core/recall_pipeline.py +247 -0
package/src/superlocalmemory/core/store_pipeline.py +483 -0
package/src/superlocalmemory/core/worker_pool.py +35 -12
package/src/superlocalmemory/encoding/auto_linker.py +308 -0
package/src/superlocalmemory/encoding/context_generator.py +175 -0
package/src/superlocalmemory/encoding/temporal_validator.py +513 -0
package/src/superlocalmemory/hooks/auto_invoker.py +484 -0
package/src/superlocalmemory/retrieval/channel_registry.py +154 -0
package/src/superlocalmemory/retrieval/engine.py +12 -0
package/src/superlocalmemory/retrieval/semantic_channel.py +87 -3
package/src/superlocalmemory/retrieval/spreading_activation.py +311 -0
package/src/superlocalmemory/retrieval/strategy.py +6 -6
package/src/superlocalmemory/retrieval/vector_store.py +386 -0
package/src/superlocalmemory/server/routes/v3_api.py +576 -0
package/src/superlocalmemory/storage/access_log.py +169 -0
package/src/superlocalmemory/storage/database.py +288 -0
package/src/superlocalmemory/storage/schema.py +10 -0
package/src/superlocalmemory/storage/schema_v32.py +252 -0
package/src/superlocalmemory/storage/v2_migrator.py +24 -2

package/src/superlocalmemory/retrieval/semantic_channel.py CHANGED Viewed

@@ -20,7 +20,7 @@ from __future__ import annotations
 import logging
 import math
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 import numpy as np
@@ -71,6 +71,8 @@ class SemanticChannel:
     fresh facts (low access_count) use cosine, frequently-accessed facts
     transition to Fisher-Rao distance for uncertainty-aware similarity.
+    V3.2: VectorStore KNN fast path when available, falls back to full scan.
     Graduated ramp: weight = min(1.2, access_count / 10 * 1.2)
     Final sim = fisher_weight * fisher_sim + (1 - fisher_weight) * cosine_sim
     """
@@ -81,6 +83,7 @@ class SemanticChannel:
         fisher_temperature: float = 15.0,
         embedder: object | None = None,
         fisher_mode: str = "simplified",
+        vector_store: Any | None = None,
     ) -> None:
         self._db = db
         self._temperature = fisher_temperature
@@ -88,6 +91,7 @@ class SemanticChannel:
         self._fisher_mode = fisher_mode if fisher_mode in ("simplified", "full") else "simplified"
         # Lazily instantiated full metric (avoids import cost when not needed)
         self._full_metric: object | None = None
+        self._vector_store = vector_store
     def search(
         self,
@@ -97,8 +101,8 @@ class SemanticChannel:
     ) -> list[tuple[str, float]]:
         """Search for semantically similar facts.
-        Uses graduated Fisher-Rao ramp: access_count < 1 = pure cosine,
-        access_count >= 10 = full Fisher-Rao (1.2x weight).
+        Uses VectorStore KNN if available, otherwise full-table scan.
+        Fisher-Rao scoring preserved as post-KNN secondary signal.
         Args:
             query_embedding: Dense vector for the query.
@@ -114,6 +118,86 @@ class SemanticChannel:
         q_vec = np.array(query_embedding, dtype=np.float32)
+        # --- FAST PATH: sqlite-vec KNN ---
+        if self._vector_store and self._vector_store.available:
+            results = self._search_via_vector_store(
+                query_embedding, q_vec, profile_id, top_k,
+            )
+            if results:  # If vec0 returned results, use them
+                return results
+            # If vec0 is empty (cold start), fall through to full scan
+        # --- FALLBACK: full-table scan (original code, unchanged) ---
+        return self._search_full_scan(query_embedding, q_vec, profile_id, top_k)
+    def _search_via_vector_store(
+        self,
+        query_embedding: list[float],
+        q_vec: np.ndarray,
+        profile_id: str,
+        top_k: int,
+    ) -> list[tuple[str, float]]:
+        """KNN via VectorStore, then Fisher-Rao re-scoring on top-K subset."""
+        # Step 1: Fast KNN -- get 2x top_k candidates for Fisher re-ranking
+        knn_results = self._vector_store.search(
+            query_embedding, top_k=top_k * 2, profile_id=profile_id,
+        )
+        if not knn_results:
+            return []  # Caller falls through to full scan
+        # Step 2: Load only the candidate facts (NOT all facts)
+        candidate_ids = [fid for fid, _ in knn_results]
+        knn_scores = {fid: score for fid, score in knn_results}
+        facts = self._db.get_facts_by_ids(candidate_ids, profile_id)
+        if not facts:
+            return [(fid, score) for fid, score in knn_results[:top_k]]
+        # Step 3: Fisher-Rao re-scoring on the subset
+        q_mean: np.ndarray | None = None
+        q_var: np.ndarray | None = None
+        if self._embedder and hasattr(self._embedder, 'compute_fisher_params'):
+            qm, qv = self._embedder.compute_fisher_params(query_embedding)
+            q_mean = np.array(qm, dtype=np.float32)
+            q_var = np.array(qv, dtype=np.float32)
+        scored: list[tuple[str, float]] = []
+        for fact in facts:
+            cos_sim = knn_scores.get(fact.fact_id, 0.0)
+            # Graduated Fisher-Rao ramp (preserved from original)
+            fisher_weight = min(1.2, (fact.access_count or 0) / 10.0 * 1.2)
+            if (fisher_weight > 0.01
+                    and fact.fisher_variance is not None
+                    and fact.embedding is not None
+                    and len(fact.fisher_variance) == len(q_vec)):
+                f_vec = np.array(fact.embedding, dtype=np.float32)
+                var_vec = np.array(fact.fisher_variance, dtype=np.float32)
+                f_sim = self._compute_fisher_sim(
+                    q_vec, f_vec, var_vec, fact, q_mean, q_var,
+                )
+                capped_w = min(1.0, fisher_weight)
+                sim = capped_w * f_sim + (1.0 - capped_w) * cos_sim
+            else:
+                sim = cos_sim
+            if sim > 0.3:
+                scored.append((fact.fact_id, sim))
+        scored.sort(key=lambda x: x[1], reverse=True)
+        return scored[:top_k]
+    def _search_full_scan(
+        self,
+        query_embedding: list[float],
+        q_vec: np.ndarray,
+        profile_id: str,
+        top_k: int,
+    ) -> list[tuple[str, float]]:
+        """Original full-table-scan search. Used as fallback when VectorStore
+        is unavailable or empty (cold start).
+        """
         # Compute query Fisher params for Bayesian comparison (F45 fix)
         q_mean: np.ndarray | None = None
         q_var: np.ndarray | None = None

package/src/superlocalmemory/retrieval/spreading_activation.py ADDED Viewed

@@ -0,0 +1,311 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3
+"""SYNAPSE spreading activation -- 5th retrieval channel.
+SYNAPSE (arXiv 2601.02744) 5-step algorithm adapted for SLM.
+Pure math -- no LLM calls at query time. With M=7, T=3 the
+computation is ~21 neighbor lookups (<5ms on SQLite with indexes).
+Reads BOTH graph_edges + association_edges via UNION query (Rule 13).
+Registered as 5th channel via ChannelRegistry (needs_embedding=True).
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+License: MIT
+"""
+from __future__ import annotations
+import hashlib
+import logging
+import math
+from dataclasses import dataclass
+from typing import Any
+import numpy as np
+from superlocalmemory.storage.models import _new_id
+logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# Configuration (frozen dataclass, Rule 10)
+# ---------------------------------------------------------------------------
+@dataclass(frozen=True)
+class SpreadingActivationConfig:
+    """Configuration for SYNAPSE spreading activation.
+    All hyperparameters from the SYNAPSE paper (arXiv 2601.02744).
+    SYNAPSE tuned on 384d (all-MiniLM-L6-v2). SLM uses 768d
+    (nomic-embed-text). Phase 3 calibration test verifies convergence.
+    """
+    alpha: float = 1.0           # Seed scaling factor
+    delta: float = 0.5           # Node retention / self-decay per iteration
+    spreading_factor: float = 0.8  # S: energy diffusion rate
+    theta: float = 0.5           # Activation threshold for sigmoid
+    top_m: int = 7               # Lateral inhibition: max active nodes
+    max_iterations: int = 3      # T: propagation depth
+    tau_gate: float = 0.12       # FOK confidence gate
+    enabled: bool = False        # Feature flag (Rule 12)
+# ---------------------------------------------------------------------------
+# SpreadingActivation Channel
+# ---------------------------------------------------------------------------
+class SpreadingActivation:
+    """SYNAPSE 5-step spreading activation as 5th retrieval channel.
+    Algorithm:
+        Step 1: Initialization with ALPHA seed scaling
+        Step 2: Propagation with fan effect (out-degree normalization)
+        Step 3: Lateral inhibition (top-M=7 pruning)
+        Step 4: Nonlinear sigmoid gating
+        Step 5: Iterate T=3 times, then FOK gate
+    Registered as 5th channel via ChannelRegistry (Rule 07).
+    Reads BOTH graph_edges + association_edges via UNION query (Rule 13).
+    """
+    def __init__(
+        self,
+        db: Any,
+        vector_store: Any,
+        config: SpreadingActivationConfig | None = None,
+    ) -> None:
+        self._db = db
+        self._vector_store = vector_store
+        self._config = config or SpreadingActivationConfig()
+    def search(
+        self,
+        query: Any,
+        profile_id: str = "",
+        top_k: int = 7,
+    ) -> list[tuple[str, float]]:
+        """Channel-compatible interface: (query, top_k) -> [(fact_id, score)].
+        Matches ANNSearchable protocol (Rule 07).
+        """
+        if not self._config.enabled:
+            return []
+        try:
+            # Step 0: Get seed nodes from VectorStore KNN
+            seed_results = self._vector_store.search(
+                query, top_k=self._config.top_m,
+            )
+            if not seed_results:
+                return []
+            # Check cache first
+            query_hash = self._compute_query_hash(query, profile_id)
+            cached = self._get_cached_results(query_hash, profile_id)
+            if cached:
+                return cached[:top_k]
+            # Run 5-step spreading activation
+            activations = self._propagate(seed_results, profile_id)
+            # FOK gating
+            if not self._fok_check(activations):
+                return []
+            # Cache results
+            self._cache_results(query_hash, profile_id, activations)
+            # Return top-K sorted by activation
+            results = sorted(
+                activations.items(), key=lambda x: x[1], reverse=True,
+            )
+            return results[:top_k]
+        except Exception as exc:
+            logger.debug(
+                "SpreadingActivation.search failed for profile %s: %s",
+                profile_id, exc,
+            )
+            return []
+    def _propagate(
+        self,
+        seeds: list[tuple[str, float]],
+        profile_id: str,
+    ) -> dict[str, float]:
+        """Execute the 5-step SYNAPSE algorithm.
+        Step 1: a_i^(0) = alpha * sim(h_i, h_q) for seeds, 0 otherwise
+        Step 2: u_i^(t+1) = delta * a_i^(t) + S * SUM(w_ji/deg(j) * a_j^(t))
+        Step 3: Lateral inhibition -- keep top-M=7 only
+        Step 4: sigmoid(u - theta)
+        Step 5: Iterate T=3 times
+        """
+        cfg = self._config
+        # Step 1: Initialization
+        activations: dict[str, float] = {}
+        for fact_id, similarity in seeds:
+            activations[fact_id] = cfg.alpha * similarity
+        # Precompute out-degrees for fan effect
+        degree_cache: dict[str, int] = {}
+        # Steps 2-4, repeated T times
+        for _iteration in range(cfg.max_iterations):
+            new_activations: dict[str, float] = {}
+            for node_id, activation in activations.items():
+                if activation < 0.001:
+                    continue
+                # Get neighbors from BOTH tables (Rule 13)
+                neighbors = self._get_unified_neighbors(node_id, profile_id)
+                # Out-degree for fan effect normalization
+                if node_id not in degree_cache:
+                    degree_cache[node_id] = max(len(neighbors), 1)
+                out_degree = degree_cache[node_id]
+                # Step 2: Propagation with fan effect
+                for neighbor_id, edge_weight in neighbors:
+                    spread = (
+                        cfg.spreading_factor
+                        * (edge_weight / out_degree)
+                        * activation
+                    )
+                    new_activations[neighbor_id] = (
+                        new_activations.get(neighbor_id, 0.0) + spread
+                    )
+            # Add self-retention (delta * current activation)
+            for node_id, activation in activations.items():
+                new_activations[node_id] = (
+                    new_activations.get(node_id, 0.0) + cfg.delta * activation
+                )
+            # Step 3: Lateral inhibition -- keep only top-M
+            sorted_nodes = sorted(
+                new_activations.items(), key=lambda x: x[1], reverse=True,
+            )
+            top_m_nodes = sorted_nodes[: cfg.top_m]
+            # Step 4: Nonlinear activation (sigmoid with threshold shift)
+            activations = {}
+            for node_id, raw_activation in top_m_nodes:
+                gated = 1.0 / (1.0 + math.exp(-(raw_activation - cfg.theta)))
+                activations[node_id] = gated
+        return activations
+    def _get_unified_neighbors(
+        self, node_id: str, profile_id: str,
+    ) -> list[tuple[str, float]]:
+        """Get neighbors from BOTH graph_edges and association_edges.
+        Uses bidirectional UNION query (Section 4 of LLD).
+        """
+        try:
+            rows = self._db.execute(
+                """
+                SELECT target_id AS neighbor_id, weight FROM graph_edges
+                    WHERE source_id = ? AND profile_id = ?
+                UNION ALL
+                SELECT target_fact_id AS neighbor_id, weight FROM association_edges
+                    WHERE source_fact_id = ? AND profile_id = ?
+                UNION ALL
+                SELECT source_id AS neighbor_id, weight FROM graph_edges
+                    WHERE target_id = ? AND profile_id = ?
+                UNION ALL
+                SELECT source_fact_id AS neighbor_id, weight FROM association_edges
+                    WHERE target_fact_id = ? AND profile_id = ?
+                """,
+                (node_id, profile_id, node_id, profile_id,
+                 node_id, profile_id, node_id, profile_id),
+            )
+            return [
+                (dict(r)["neighbor_id"], dict(r)["weight"]) for r in rows
+            ]
+        except Exception as exc:
+            logger.debug(
+                "SpreadingActivation: UNION query failed for node %s "
+                "profile %s: %s",
+                node_id, profile_id, exc,
+            )
+            return []
+    def _fok_check(self, activations: dict[str, float]) -> bool:
+        """Feeling-of-Knowing gate.
+        If max activation < tau_gate (0.12), reject results as noise.
+        """
+        if not activations:
+            return False
+        return max(activations.values()) >= self._config.tau_gate
+    def _compute_query_hash(self, query: Any, profile_id: str) -> str:
+        """Deterministic hash for cache key."""
+        if isinstance(query, np.ndarray):
+            data = query.tobytes() + profile_id.encode()
+        elif isinstance(query, list):
+            data = np.array(query, dtype=np.float32).tobytes() + profile_id.encode()
+        else:
+            data = str(query).encode() + profile_id.encode()
+        return hashlib.sha256(data).hexdigest()[:16]
+    def _get_cached_results(
+        self, query_hash: str, profile_id: str,
+    ) -> list[tuple[str, float]] | None:
+        """Check activation_cache for recent results."""
+        try:
+            rows = self._db.execute(
+                "SELECT node_id, activation_value FROM activation_cache "
+                "WHERE profile_id = ? AND query_hash = ? "
+                "AND expires_at > datetime('now') "
+                "ORDER BY activation_value DESC",
+                (profile_id, query_hash),
+            )
+            if not rows:
+                return None
+            return [
+                (dict(r)["node_id"], dict(r)["activation_value"])
+                for r in rows
+            ]
+        except Exception:
+            return None
+    def _cache_results(
+        self,
+        query_hash: str,
+        profile_id: str,
+        activations: dict[str, float],
+    ) -> None:
+        """Store results in activation_cache with 1-hour TTL."""
+        try:
+            for node_id, value in activations.items():
+                self._db.execute(
+                    "INSERT OR REPLACE INTO activation_cache "
+                    "(cache_id, profile_id, query_hash, node_id, "
+                    " activation_value, iteration, created_at, expires_at) "
+                    "VALUES (?, ?, ?, ?, ?, ?, datetime('now'), "
+                    "datetime('now', '+1 hour'))",
+                    (_new_id(), profile_id, query_hash, node_id, value,
+                     self._config.max_iterations),
+                )
+        except Exception as exc:
+            logger.debug("Cache write failed: %s", exc)
+    def cleanup_expired_cache(self) -> int:
+        """Delete expired cache entries. Called by maintenance."""
+        try:
+            result = self._db.execute(
+                "DELETE FROM activation_cache "
+                "WHERE expires_at < datetime('now')",
+                (),
+            )
+            return len(result) if result else 0
+        except Exception:
+            return 0

package/src/superlocalmemory/retrieval/strategy.py CHANGED Viewed

@@ -16,12 +16,12 @@ import re
 from dataclasses import dataclass, field
 STRATEGY_PRESETS: dict[str, dict[str, float]] = {
-    "temporal": {"semantic": 0.8, "bm25": 1.5, "entity_graph": 0.8, "temporal": 2.0},
-    "multi_hop": {"semantic": 1.0, "bm25": 0.8, "entity_graph": 2.0, "temporal": 0.5},
-    "aggregation": {"semantic": 1.2, "bm25": 1.5, "entity_graph": 1.0, "temporal": 0.5},
-    "opinion": {"semantic": 1.8, "bm25": 0.6, "entity_graph": 0.8, "temporal": 0.3},
-    "factual": {"semantic": 1.2, "bm25": 1.4, "entity_graph": 1.0, "temporal": 0.6},
-    "entity": {"semantic": 1.0, "bm25": 1.5, "entity_graph": 1.2, "temporal": 0.5},
+    "temporal": {"semantic": 0.8, "bm25": 1.5, "entity_graph": 0.8, "temporal": 2.0, "spreading_activation": 0.5},
+    "multi_hop": {"semantic": 1.0, "bm25": 0.8, "entity_graph": 2.0, "temporal": 0.5, "spreading_activation": 2.0},
+    "aggregation": {"semantic": 1.2, "bm25": 1.5, "entity_graph": 1.0, "temporal": 0.5, "spreading_activation": 0.8},
+    "opinion": {"semantic": 1.8, "bm25": 0.6, "entity_graph": 0.8, "temporal": 0.3, "spreading_activation": 0.5},
+    "factual": {"semantic": 1.2, "bm25": 1.4, "entity_graph": 1.0, "temporal": 0.6, "spreading_activation": 0.8},
+    "entity": {"semantic": 1.0, "bm25": 1.5, "entity_graph": 1.2, "temporal": 0.5, "spreading_activation": 1.0},
     "general": {},
 }