npm - superlocalmemory - Versions diffs - 3.3.14 → 3.3.16 - Mend

superlocalmemory 3.3.14 → 3.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/core/recall_pipeline.py +44 -33
package/src/superlocalmemory/core/reranker_worker.py +21 -0
package/src/superlocalmemory/retrieval/engine.py +14 -18
package/src/superlocalmemory/retrieval/reranker.py +4 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.14",
+  "version": "3.3.16",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.14"
+version = "3.3.16"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}

package/src/superlocalmemory/core/recall_pipeline.py CHANGED Viewed

@@ -24,6 +24,36 @@ from superlocalmemory.storage.models import Mode, RecallResponse
 logger = logging.getLogger(__name__)
+# ---------------------------------------------------------------------------
+# V3.3.16: Module-level singletons for recall hot-path objects.
+# Prevents creating new BehavioralTracker / ForgettingScheduler per recall
+# (304 recalls = 304 objects that fragment pymalloc arenas → 25GB).
+# ---------------------------------------------------------------------------
+_behavioral_tracker_cache: dict[int, object] = {}
+_forgetting_scheduler_cache: dict[int, object] = {}
+def _get_behavioral_tracker(db: Any) -> Any:
+    """Get or create a cached BehavioralTracker for this DB instance."""
+    key = id(db)
+    if key not in _behavioral_tracker_cache:
+        from superlocalmemory.learning.behavioral import BehavioralTracker
+        _behavioral_tracker_cache[key] = BehavioralTracker(db)
+    return _behavioral_tracker_cache[key]
+def _get_forgetting_scheduler(db: Any, config: Any) -> Any:
+    """Get or create a cached ForgettingScheduler for this DB instance."""
+    key = id(db)
+    if key not in _forgetting_scheduler_cache:
+        from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
+        from superlocalmemory.math.ebbinghaus import EbbinghausCurve
+        ebbinghaus = EbbinghausCurve(config.forgetting)
+        _forgetting_scheduler_cache[key] = ForgettingScheduler(db, ebbinghaus, config.forgetting)
+    return _forgetting_scheduler_cache[key]
 # ---------------------------------------------------------------------------
 # apply_adaptive_ranking  (was MemoryEngine._apply_adaptive_ranking)
 # ---------------------------------------------------------------------------
@@ -192,11 +222,11 @@ def run_recall(
         except Exception as exc:
             logger.debug("Access log batch store failed: %s", exc)
-    # V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
+    # V3.3.16: Behavioral tracking + spaced repetition use module-level
+    # singletons to avoid creating new objects per recall (was causing
+    # object accumulation across 304 benchmark recalls).
     try:
-        from superlocalmemory.learning.behavioral import BehavioralTracker
-        _tracker = BehavioralTracker(db)
-        _tracker.record_query(
+        _get_behavioral_tracker(db).record_query(
             profile_id=profile_id, query=query,
             query_type=response.query_type,
             result_count=len(response.results),
@@ -204,15 +234,11 @@ def run_recall(
     except Exception as exc:
         logger.debug("Behavioral tracking: %s", exc)
-    # V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
     if response.results:
         try:
-            from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
-            from superlocalmemory.math.ebbinghaus import EbbinghausCurve
-            _ebbinghaus = EbbinghausCurve(config.forgetting)
-            _fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
+            fsched = _get_forgetting_scheduler(db, config)
             for r in response.results[:10]:
-                _fsched.on_access_event(r.fact.fact_id, profile_id)
+                fsched.on_access_event(r.fact.fact_id, profile_id)
         except Exception as exc:
             logger.debug("Spaced repetition update: %s", exc)
@@ -237,31 +263,16 @@ def run_recall(
         for r in response.results:
             trust_scorer.update_on_access("fact", r.fact.fact_id, profile_id)
-    # Fisher Bayesian update on recall
-    q_emb = embedder.embed(query) if embedder else None
-    q_var_arr = None
-    if embedder and q_emb:
-        _, q_var_list = embedder.compute_fisher_params(q_emb)
-        import numpy as _np
-        q_var_arr = _np.array(q_var_list, dtype=_np.float64)
+    # V3.3.16: Access count update only — no redundant embedding call.
+    # Fisher Bayesian variance update moved to store_pipeline (write-time)
+    # to avoid per-recall memory pressure from numpy array creation.
+    # Previously: embedder.embed(query) here duplicated the embed call
+    # already done in retrieval engine, creating 768-dim numpy arrays
+    # 304 times during benchmark → pymalloc arena fragmentation → 25GB.
     for r in response.results:
-        updates: dict[str, object] = {
+        db.update_fact(r.fact.fact_id, {
             "access_count": r.fact.access_count + 1,
-        }
-        # Bayesian variance narrowing after 3+ accesses
-        if (q_var_arr is not None
-                and r.fact.fisher_variance
-                and len(r.fact.fisher_variance) == len(q_var_arr)
-                and r.fact.access_count >= 3):
-            import numpy as _np
-            f_var = _np.array(r.fact.fisher_variance, dtype=_np.float64)
-            # Conjugate Gaussian update: 1/new_var = 1/f_var + 1/q_var
-            new_var = 1.0 / (1.0 / _np.maximum(f_var, 0.05) + 1.0 / _np.maximum(q_var_arr, 0.05))
-            new_var = _np.clip(new_var, 0.05, 2.0)
-            updates["fisher_variance"] = new_var.tolist()
-        db.update_fact(r.fact.fact_id, updates)
+        })
     # Post-operation hooks (audit, trust signal, learning)
     hook_ctx["result_count"] = len(response.results)

package/src/superlocalmemory/core/reranker_worker.py CHANGED Viewed

@@ -118,10 +118,31 @@ def _worker_main() -> None:
             name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
             backend = req.get("backend", "onnx")
             model, active_backend, model_name = _load_model(name, backend)
+            # V3.3.16: Run real inference to trigger ONNX CoreML JIT compilation.
+            # Without this, first real rerank call triggers 30-60s compilation
+            # that exceeds the caller's timeout, killing the worker.
+            warmup_ok = False
+            if model is not None:
+                try:
+                    dummy_pairs = [
+                        ("What is the capital of France?", "Paris is the capital of France."),
+                        ("Who wrote Hamlet?", "Shakespeare wrote many plays."),
+                        ("What color is the sky?", "The sky is blue on a clear day."),
+                    ]
+                    try:
+                        import torch
+                        with torch.inference_mode():
+                            _scores = model.predict(dummy_pairs)
+                    except ImportError:
+                        _scores = model.predict(dummy_pairs)
+                    warmup_ok = True
+                except Exception:
+                    pass
             _respond({
                 "ok": model is not None,
                 "backend": active_backend,
                 "model": model_name,
+                "warmup_inference": warmup_ok,
             })
             continue

package/src/superlocalmemory/retrieval/engine.py CHANGED Viewed

@@ -338,31 +338,27 @@ class RetrievalEngine:
         if not candidates:
             return fused
-        # Bug 3 fix: strip speaker tags from content before CE scoring
-        clean_candidates: list[tuple[AtomicFact, float]] = []
-        for fact, score in candidates:
-            cleaned_content = re.sub(r'^\[[A-Za-z]+\]:\s*', '', fact.content)
-            clean_fact = AtomicFact(
-                fact_id=fact.fact_id, memory_id=fact.memory_id,
-                profile_id=fact.profile_id, content=cleaned_content,
-                fact_type=fact.fact_type, entities=fact.entities,
-                canonical_entities=fact.canonical_entities,
-                observation_date=fact.observation_date,
-                referenced_date=fact.referenced_date,
-                confidence=fact.confidence, importance=fact.importance,
-                evidence_count=fact.evidence_count,
-                access_count=fact.access_count,
-                embedding=fact.embedding, created_at=fact.created_at,
-            )
-            clean_candidates.append((clean_fact, score))
+        # V3.3.16: Strip speaker tags WITHOUT copying full AtomicFact objects.
+        # Previously created full copies including 768-dim embeddings (~6KB each),
+        # which over 304 recalls caused pymalloc arena fragmentation → 25GB.
+        # Now: temporarily patch .content on originals, rerank, then restore.
+        originals: list[tuple[AtomicFact, str]] = []  # (fact, original_content)
+        for fact, _ in candidates:
+            orig = fact.content
+            fact.content = re.sub(r'^\[[A-Za-z]+\]:\s*', '', orig)
+            originals.append((fact, orig))
         try:
             scored = self._reranker.rerank(  # type: ignore[union-attr]
-                query, clean_candidates, top_k=len(clean_candidates),
+                query, candidates, top_k=len(candidates),
             )
         except Exception as exc:
             logger.warning("Cross-encoder rerank failed: %s", exc)
             return fused
+        finally:
+            # Restore original content (with speaker tags)
+            for fact, orig_content in originals:
+                fact.content = orig_content
         score_map = {fact.fact_id: score for fact, score in scored}

package/src/superlocalmemory/retrieval/reranker.py CHANGED Viewed

@@ -321,13 +321,14 @@ class CrossEncoderReranker:
         documents = [fact.content for fact, _ in candidates]
-        # V3.3.12: Increased timeout 10s→60s — L-12-v2 needs PyTorch + ONNX load.
-        # Critical: Paper 2 ablation showed -30.7pp without reranking.
+        # V3.3.16: Timeout 180s — ONNX CoreML compilation can take 30-60s on
+        # first inference even after model load. The warmup_inference in the
+        # worker should prevent this, but 180s is a safety net.
         resp = self._send_request({
             "cmd": "rerank",
             "query": query,
             "documents": documents,
-        }, timeout=60.0)
+        }, timeout=180.0)
         if resp is None or not resp.get("ok"):
             # Fallback: return by existing score