npm - superlocalmemory - Versions diffs - 3.3.11 → 3.3.13 - Mend

superlocalmemory 3.3.11 → 3.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/package.json +1 -1
package/pyproject.toml +2 -3
package/src/superlocalmemory/core/config.py +9 -6
package/src/superlocalmemory/core/embedding_worker.py +5 -1
package/src/superlocalmemory/core/embeddings.py +3 -1
package/src/superlocalmemory/core/engine.py +14 -0
package/src/superlocalmemory/core/engine_wiring.py +16 -1
package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
package/src/superlocalmemory/core/recall_pipeline.py +24 -0
package/src/superlocalmemory/core/recall_worker.py +22 -4
package/src/superlocalmemory/core/reranker_worker.py +246 -0
package/src/superlocalmemory/core/store_pipeline.py +12 -2
package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
package/src/superlocalmemory/encoding/graph_builder.py +21 -1
package/src/superlocalmemory/learning/adaptive.py +2 -2
package/src/superlocalmemory/math/fisher_quantized.py +8 -4
package/src/superlocalmemory/math/langevin.py +15 -2
package/src/superlocalmemory/mcp/resources.py +2 -2
package/src/superlocalmemory/mcp/shared.py +27 -0
package/src/superlocalmemory/mcp/tools_active.py +31 -1
package/src/superlocalmemory/mcp/tools_core.py +15 -9
package/src/superlocalmemory/mcp/tools_v28.py +2 -2
package/src/superlocalmemory/mcp/tools_v3.py +3 -0
package/src/superlocalmemory/mcp/tools_v33.py +68 -7
package/src/superlocalmemory/retrieval/agentic.py +1 -1
package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
package/src/superlocalmemory/retrieval/engine.py +44 -9
package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
package/src/superlocalmemory/retrieval/fusion.py +2 -2
package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
package/src/superlocalmemory/retrieval/reranker.py +24 -7
package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
package/src/superlocalmemory/storage/schema.py +2 -0

package/src/superlocalmemory/retrieval/reranker.py CHANGED Viewed

@@ -35,7 +35,9 @@ _live_rerankers: set[weakref.ref] = set()
 logger = logging.getLogger(__name__)
 _IDLE_TIMEOUT_SECONDS = 120  # 2 min → kill worker
-_SUBPROCESS_RESPONSE_TIMEOUT = 120  # 120s for ONNX cold start
+# V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
+_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
+_SUBPROCESS_RESPONSE_TIMEOUT = 180  # V3.3.12: 180s (was 120s) for stressed system respawns
 _WORKER_RECYCLE_AFTER = 500  # Recycle after N requests
@@ -57,7 +59,7 @@ class CrossEncoderReranker:
     def __init__(
         self,
-        model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2",
+        model_name: str = "cross-encoder/ms-marco-MiniLM-L-12-v2",
         backend: str = "onnx",
     ) -> None:
         self._model_name = model_name
@@ -129,8 +131,23 @@ class CrossEncoderReranker:
             finally:
                 self._worker_loading = False
-        t = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
-        t.start()
+        self._warmup_thread = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
+        self._warmup_thread.start()
+    def warmup_sync(self, timeout: float = 120.0) -> bool:
+        """Block until reranker model is loaded. Returns True if ready.
+        V3.3.12: Critical for benchmarks and first-recall quality.
+        Without this, first 30-60s of recalls get no reranking (-30.7pp).
+        """
+        if self._model_loaded:
+            return True
+        if not self._worker_loading and not self._model_loaded:
+            self._start_background_warmup()
+        t = getattr(self, '_warmup_thread', None)
+        if t is not None:
+            t.join(timeout=timeout)
+        return self._model_loaded
     # ------------------------------------------------------------------
     # Worker management (mirrors EmbeddingService pattern)
@@ -304,13 +321,13 @@ class CrossEncoderReranker:
         documents = [fact.content for fact, _ in candidates]
-        # Short timeout (10s) — model should already be loaded by warmup.
-        # If worker crashed or is still loading, fallback immediately.
+        # V3.3.12: Increased timeout 10s→60s — L-12-v2 needs PyTorch + ONNX load.
+        # Critical: Paper 2 ablation showed -30.7pp without reranking.
         resp = self._send_request({
             "cmd": "rerank",
             "query": query,
             "documents": documents,
-        }, timeout=10.0)
+        }, timeout=60.0)
         if resp is None or not resp.get("ok"):
             # Fallback: return by existing score

package/src/superlocalmemory/retrieval/semantic_channel.py CHANGED Viewed

@@ -182,7 +182,7 @@ class SemanticChannel:
             else:
                 sim = cos_sim
-            if sim > 0.3:
+            if sim > 0.05:
                 scored.append((fact.fact_id, sim))
         scored.sort(key=lambda x: x[1], reverse=True)
@@ -235,7 +235,7 @@ class SemanticChannel:
             else:
                 sim = cos_sim
-            if sim > 0.3:
+            if sim > 0.05:
                 scored.append((fact.fact_id, sim))
         scored.sort(key=lambda x: x[1], reverse=True)

package/src/superlocalmemory/retrieval/temporal_channel.py CHANGED Viewed

@@ -124,6 +124,16 @@ class TemporalChannel:
         import re
         _PROPER_RE = re.compile(r"\b([A-Z][a-z]+)\b")
         names = [m.group(1) for m in _PROPER_RE.finditer(query)]
+        # Also try title-cased version for lowercase queries
+        if not names:
+            names = [m.group(1) for m in _PROPER_RE.finditer(query.title())]
+        # Filter out common words from title-casing
+        _stop = {"What", "When", "Where", "Who", "Which", "How", "Does", "Did",
+                 "The", "That", "This", "There", "Then", "Have", "Has", "Had",
+                 "About", "After", "Before", "From", "With", "Would", "Could",
+                 "Should", "Will", "Because", "Also", "Just", "Like", "Know",
+                 "Think", "Tell", "Said"}
+        names = [n for n in names if n not in _stop]
         if not names:
             return []
@@ -146,7 +156,10 @@ class TemporalChannel:
                 fid = dict(row)["fact_id"]
                 if fid not in seen:
                     seen.add(fid)
-                    results.append((fid, 0.85))  # High base score for entity-temporal
+                    # Rank by position (first events more likely relevant) instead
+                    # of flat 0.85 which loses discrimination
+                    rank_score = 0.85 - len(seen) * 0.02
+                    results.append((fid, max(0.3, rank_score)))
         return results

package/src/superlocalmemory/storage/schema.py CHANGED Viewed

@@ -442,6 +442,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_target
     ON graph_edges (profile_id, target_id);
 CREATE INDEX IF NOT EXISTS idx_edges_type
     ON graph_edges (profile_id, edge_type);
+CREATE INDEX IF NOT EXISTS idx_edges_exists_check
+    ON graph_edges (profile_id, source_id, target_id, edge_type);
 """