npm - superlocalmemory - Versions diffs - 3.3.12 → 3.3.14 - Mend

superlocalmemory 3.3.12 → 3.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/pyproject.toml +2 -3
package/src/superlocalmemory/core/config.py +2 -2
package/src/superlocalmemory/core/embedding_worker.py +5 -1
package/src/superlocalmemory/core/engine.py +14 -0
package/src/superlocalmemory/core/engine_wiring.py +15 -0
package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
package/src/superlocalmemory/core/reranker_worker.py +33 -7
package/src/superlocalmemory/encoding/graph_builder.py +7 -0
package/src/superlocalmemory/retrieval/engine.py +8 -3
package/src/superlocalmemory/retrieval/reranker.py +1 -1
package/src/superlocalmemory/storage/database.py +11 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.12",
+  "version": "3.3.14",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.12"
+version = "3.3.14"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}
@@ -48,8 +48,7 @@ dependencies = [
 [project.optional-dependencies]
 search = [
-    "sentence-transformers>=4.0.0",
-    "sentence-transformers[onnx]>=4.0.0",
+    "sentence-transformers[onnx]>=5.0.0",
     "einops>=0.8.2",
     "torch>=2.2.0",
     "scikit-learn>=1.3.0,<2.0.0",

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -154,7 +154,7 @@ class RetrievalConfig:
     # Reranking (V3.3.2: ONNX backend enabled for all modes)
     use_cross_encoder: bool = True
-    cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
     cross_encoder_backend: str = "onnx"  # "onnx" (~200MB) or "" (PyTorch, ~1.5GB)
     # Agentic (Mode C only)
@@ -618,7 +618,7 @@ class SLMConfig:
             # but NEVER override an explicit use_cross_encoder setting.
             # The user's explicit choice always wins.
             if "cross_encoder_backend" not in rt:
-                rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+                rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
                 rt["cross_encoder_backend"] = "onnx"
                 # Only auto-enable if user didn't explicitly set the field
                 rt.setdefault("use_cross_encoder", True)

package/src/superlocalmemory/core/embedding_worker.py CHANGED Viewed

@@ -156,4 +156,8 @@ def _respond(data: dict) -> None:
 if __name__ == "__main__":
-    _worker_main()
+    try:
+        _worker_main()
+    except KeyboardInterrupt:
+        # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
+        sys.exit(0)

package/src/superlocalmemory/core/engine.py CHANGED Viewed

@@ -79,6 +79,7 @@ class MemoryEngine:
         self._auto_linker = None
         self._graph_analyzer = None
         self._consolidation_engine = None
+        self._maintenance_scheduler = None
         self._hooks = HookRegistry()
     # -- Public properties (Phase 2+ access) --------------------------------
@@ -194,6 +195,17 @@ class MemoryEngine:
         # V3.3: Check for embedding model migration on mode switch
         self._check_embedding_migration()
+        # V3.3.13: Background maintenance scheduler (Langevin/Ebbinghaus/Sheaf)
+        if self._config.forgetting.enabled:
+            try:
+                from superlocalmemory.core.maintenance_scheduler import MaintenanceScheduler
+                self._maintenance_scheduler = MaintenanceScheduler(
+                    self._db, self._config, self._profile_id,
+                )
+                self._maintenance_scheduler.start()
+            except Exception as exc:
+                logger.debug("Maintenance scheduler init failed: %s", exc)
         self._initialized = True
         logger.info(
             "MemoryEngine initialized: mode=%s profile=%s",
@@ -306,6 +318,8 @@ class MemoryEngine:
     # -- Lifecycle ----------------------------------------------------------
     def close(self) -> None:
+        if self._maintenance_scheduler is not None:
+            self._maintenance_scheduler.stop()
         self._initialized = False
     @property

package/src/superlocalmemory/core/engine_wiring.py CHANGED Viewed

@@ -454,6 +454,21 @@ def init_retrieval(
         trust_scorer=trust_scorer,
     )
+    # V3.3.13: Ensure reranker warmup is in progress.
+    # The CrossEncoderReranker constructor starts background warmup, but
+    # callers can also call warmup_sync() to block until ready.
+    # Here we just log warmup status — benchmark scripts call warmup_sync() explicitly.
+    if reranker is not None:
+        import threading
+        def _log_warmup_status() -> None:
+            ready = reranker.warmup_sync(timeout=180)
+            if ready:
+                logger.info("Cross-encoder reranker warm and ready")
+            else:
+                logger.warning("Cross-encoder reranker warmup failed — recalls will use fallback scoring")
+        t = threading.Thread(target=_log_warmup_status, daemon=True, name="ce-init-warmup")
+        t.start()
     # Phase A: Register forgetting filter into the channel registry
     try:
         from superlocalmemory.retrieval.forgetting_filter import register_forgetting_filter

package/src/superlocalmemory/core/maintenance_scheduler.py ADDED Viewed

@@ -0,0 +1,94 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""SuperLocalMemory V3 — Background Maintenance Scheduler.
+V3.3.13: Periodically triggers Langevin/Ebbinghaus/Sheaf maintenance
+so users don't need to call run_maintenance manually.
+Configurable interval via ForgettingConfig.scheduler_interval_minutes.
+Defaults to 30 min. Disabled during benchmarks (no config.forgetting.enabled).
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+License: MIT
+"""
+from __future__ import annotations
+import logging
+import threading
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from superlocalmemory.core.config import SLMConfig
+    from superlocalmemory.storage.database import DatabaseManager
+logger = logging.getLogger(__name__)
+class MaintenanceScheduler:
+    """Background scheduler for periodic math maintenance.
+    Runs Langevin/Sheaf/Fisher maintenance at configurable intervals.
+    Thread-safe. Auto-stops on garbage collection or explicit stop().
+    """
+    def __init__(
+        self,
+        db: DatabaseManager,
+        config: SLMConfig,
+        profile_id: str = "default",
+    ) -> None:
+        self._db = db
+        self._config = config
+        self._profile_id = profile_id
+        self._timer: threading.Timer | None = None
+        self._running = False
+        self._interval = config.forgetting.scheduler_interval_minutes * 60.0
+    def start(self) -> None:
+        """Start the periodic scheduler. Idempotent."""
+        if self._running:
+            return
+        self._running = True
+        self._schedule_next()
+        logger.info(
+            "Maintenance scheduler started (interval=%dm)",
+            self._config.forgetting.scheduler_interval_minutes,
+        )
+    def stop(self) -> None:
+        """Stop the scheduler. Idempotent."""
+        self._running = False
+        if self._timer is not None:
+            self._timer.cancel()
+            self._timer = None
+        logger.info("Maintenance scheduler stopped")
+    def _schedule_next(self) -> None:
+        """Schedule the next maintenance run."""
+        if not self._running:
+            return
+        self._timer = threading.Timer(self._interval, self._run)
+        self._timer.daemon = True
+        self._timer.start()
+    def _run(self) -> None:
+        """Execute maintenance and schedule next run."""
+        if not self._running:
+            return
+        try:
+            from superlocalmemory.core.maintenance import run_maintenance
+            counts = run_maintenance(self._db, self._config, self._profile_id)
+            logger.info("Scheduled maintenance complete: %s", counts)
+        except Exception as exc:
+            logger.warning("Scheduled maintenance failed: %s", exc)
+        finally:
+            self._schedule_next()
+    def __del__(self) -> None:
+        try:
+            self.stop()
+        except Exception:
+            pass

package/src/superlocalmemory/core/reranker_worker.py CHANGED Viewed

@@ -115,7 +115,7 @@ def _worker_main() -> None:
             continue
         if cmd == "load":
-            name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+            name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
             backend = req.get("backend", "onnx")
             model, active_backend, model_name = _load_model(name, backend)
             _respond({
@@ -133,7 +133,7 @@ def _worker_main() -> None:
                 continue
             if model is None:
                 # Auto-load with defaults
-                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
                 backend = req.get("backend", "onnx")
                 model, active_backend, model_name = _load_model(name, backend)
             if model is None:
@@ -162,7 +162,7 @@ def _worker_main() -> None:
                 _respond({"ok": False, "error": "Missing query or document"})
                 continue
             if model is None:
-                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
                 backend = req.get("backend", "onnx")
                 model, active_backend, model_name = _load_model(name, backend)
             if model is None:
@@ -186,22 +186,43 @@ def _worker_main() -> None:
 def _load_model(
     name: str, backend: str,
 ) -> tuple:
-    """Load cross-encoder model. Returns (model, backend_name, model_name)."""
+    """Load cross-encoder model. Returns (model, backend_name, model_name).
+    V3.3.13: sentence-transformers 5.x+ supports backend='onnx' for
+    CrossEncoder. We use a 3-tier fallback chain:
+      1. ONNX + platform-quantized model (fastest, ~200MB, 2.4ms/pair)
+      2. ONNX + generic model (fast, auto-exported on first use)
+      3. PyTorch (always works, ~500MB, 6ms/pair)
+    Cross-platform:
+      Mac ARM64 → model_qint8_arm64.onnx
+      x86_64    → model_quint8_avx2.onnx
+      Fallback  → model.onnx (generic)
+    """
     try:
         from sentence_transformers import CrossEncoder
         if backend == "onnx":
+            # Tier 1: Platform-specific quantized ONNX (fastest)
             try:
                 onnx_file = _detect_onnx_variant()
                 m = CrossEncoder(
                     name, backend="onnx",
                     model_kwargs={"file_name": onnx_file},
                 )
+                return m, f"onnx-quantized({onnx_file})", name
+            except Exception:
+                pass
+            # Tier 2: Generic ONNX (auto-exported by optimum)
+            try:
+                m = CrossEncoder(name, backend="onnx")
                 return m, "onnx", name
             except Exception:
-                # ONNX failed → try PyTorch
                 pass
-        # PyTorch fallback (or explicit pytorch backend)
+        # Tier 3: PyTorch (always works, no ONNX dependency needed)
         m = CrossEncoder(name)
         return m, "pytorch", name
     except ImportError:
@@ -217,4 +238,9 @@ def _respond(data: dict) -> None:
 if __name__ == "__main__":
-    _worker_main()
+    try:
+        _worker_main()
+    except KeyboardInterrupt:
+        # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
+        # Exit cleanly instead of printing a traceback that fails CI.
+        sys.exit(0)

package/src/superlocalmemory/encoding/graph_builder.py CHANGED Viewed

@@ -261,6 +261,9 @@ class GraphBuilder:
                 break
         return edges
+    # V3.3.13: Cap causal edges per entity to prevent O(n²) explosion (same as entity/temporal).
+    _MAX_CAUSAL_EDGES_PER_ENTITY: int = 20
     def _build_causal_edges(
         self, new_fact: AtomicFact, profile_id: str,
     ) -> list[GraphEdge]:
@@ -273,7 +276,10 @@ class GraphBuilder:
         edges: list[GraphEdge] = []
         seen: set[str] = set()
         for entity_id in new_fact.canonical_entities:
+            causal_edge_count = 0
             for other in self._db.get_facts_by_entity(entity_id, profile_id):
+                if causal_edge_count >= self._MAX_CAUSAL_EDGES_PER_ENTITY:
+                    break
                 if other.fact_id == new_fact.fact_id or other.fact_id in seen:
                     continue
                 if self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.CAUSAL, profile_id):
@@ -284,6 +290,7 @@ class GraphBuilder:
                     target_id=new_fact.fact_id, edge_type=EdgeType.CAUSAL,
                     weight=_CAUSAL_WEIGHT,
                 ))
+                causal_edge_count += 1
         return edges
     # -- Helpers -----------------------------------------------------------

package/src/superlocalmemory/retrieval/engine.py CHANGED Viewed

@@ -301,11 +301,16 @@ class RetrievalEngine:
     def _load_facts(
         self, fused: list[FusionResult], profile_id: str,
     ) -> dict[str, AtomicFact]:
-        needed = {fr.fact_id for fr in fused}
+        """Load facts by ID — targeted query, not full-table scan.
+        V3.3.13: Was loading ALL facts (O(n) memory) then filtering.
+        Now uses get_facts_by_ids() for O(k) where k = pool size (~60).
+        """
+        needed = [fr.fact_id for fr in fused]
         if not needed:
             return {}
-        all_facts = self._db.get_all_facts(profile_id)
-        return {f.fact_id: f for f in all_facts if f.fact_id in needed}
+        facts = self._db.get_facts_by_ids(needed, profile_id)
+        return {f.fact_id: f for f in facts}
     # -- Cross-encoder rerank -----------------------------------------------

package/src/superlocalmemory/retrieval/reranker.py CHANGED Viewed

@@ -59,7 +59,7 @@ class CrossEncoderReranker:
     def __init__(
         self,
-        model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2",
+        model_name: str = "cross-encoder/ms-marco-MiniLM-L-12-v2",
         backend: str = "onnx",
     ) -> None:
         self._model_name = model_name

package/src/superlocalmemory/storage/database.py CHANGED Viewed

@@ -250,12 +250,20 @@ class DatabaseManager:
         )
         return [self._row_to_fact(r) for r in rows]
+    _MAX_FACTS_PER_ENTITY_LOOKUP: int = 100
     def get_facts_by_entity(self, entity_id: str, profile_id: str) -> list[AtomicFact]:
-        """Facts whose canonical_entities JSON array contains *entity_id*."""
+        """Facts whose canonical_entities JSON array contains *entity_id*.
+        V3.3.14: LIMIT to _MAX_FACTS_PER_ENTITY_LOOKUP (100) to prevent
+        unbounded memory growth during ingestion. Previously loaded ALL
+        facts for popular entities (500+) causing 17GB+ memory usage.
+        Ordered by created_at DESC so newest facts are always included.
+        """
         rows = self.execute(
             "SELECT * FROM atomic_facts WHERE profile_id = ? AND canonical_entities_json LIKE ? "
-            "ORDER BY created_at DESC",
-            (profile_id, f'%"{entity_id}"%'),
+            "ORDER BY created_at DESC LIMIT ?",
+            (profile_id, f'%"{entity_id}"%', self._MAX_FACTS_PER_ENTITY_LOOKUP),
         )
         return [self._row_to_fact(r) for r in rows]