npm - superlocalmemory - Versions diffs - 3.0.34 → 3.0.35 - Mend

superlocalmemory 3.0.34 → 3.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/core/config.py +2 -2
package/src/superlocalmemory/core/recall_worker.py +14 -0
package/src/superlocalmemory/core/worker_pool.py +32 -2
package/src/superlocalmemory/server/ui.py +15 -4

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.0.34",
+  "version": "3.0.35",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.0.34"
+version = "3.0.35"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -348,7 +348,7 @@ class SLMConfig:
                 ),
                 llm=LLMConfig(),  # No LLM
                 retrieval=RetrievalConfig(
-                    use_cross_encoder=False,  # Disabled: 30s PyTorch cold start kills UX
+                    use_cross_encoder=True,
                 ),
                 math=MathConfig(
                     sheaf_contradiction_threshold=0.45,  # 768d threshold
@@ -370,7 +370,7 @@ class SLMConfig:
                     api_base=llm_api_base or "http://localhost:11434",
                     api_key=llm_api_key or "",
                 ),
-                retrieval=RetrievalConfig(use_cross_encoder=False),
+                retrieval=RetrievalConfig(use_cross_encoder=True),
             )
         # Mode C — FULL POWER, UNRESTRICTED

package/src/superlocalmemory/core/recall_worker.py CHANGED Viewed

@@ -222,6 +222,20 @@ def _worker_main() -> None:
             _respond({"ok": True})
             continue
+        if cmd == "warmup":
+            # Pre-load engine + all models (embedding, reranker, BM25, LLM)
+            # Called at dashboard/MCP startup so first real request is fast.
+            # A dummy recall triggers lazy-loaded components (cross-encoder, BM25 index).
+            try:
+                engine = _get_engine()
+                fact_count = engine._db.get_fact_count(engine._profile_id) if engine._db else 0
+                if fact_count > 0:
+                    engine.recall("warmup", limit=1)
+                _respond({"ok": True, "message": "Engine warm", "facts": fact_count})
+            except Exception as exc:
+                _respond({"ok": False, "error": f"Warmup failed: {exc}"})
+            continue
         try:
             if cmd == "recall":
                 result = _handle_recall(req.get("query", ""), req.get("limit", 10))

package/src/superlocalmemory/core/worker_pool.py CHANGED Viewed

@@ -28,8 +28,9 @@ import time
 logger = logging.getLogger(__name__)
-_IDLE_TIMEOUT = 120  # 2 min — kill worker after idle
+_IDLE_TIMEOUT = 120   # 2 min — kill worker after idle
 _REQUEST_TIMEOUT = 60  # 60 sec max per request
+_WARMUP_TIMEOUT = 120  # 2 min — first cold start loads PyTorch + models
 class WorkerPool:
@@ -102,6 +103,31 @@ class WorkerPool:
         with self._lock:
             self._kill()
+    def warmup(self) -> None:
+        """Pre-spawn and warm up the worker in a background thread.
+        Spawns the recall_worker subprocess so that PyTorch, models, and
+        the engine are all loaded BEFORE the first user request. This
+        amortizes the 30s cold-start at dashboard/MCP startup time.
+        Call from startup events — non-blocking, runs in background.
+        """
+        def _do_warmup() -> None:
+            logger.info("Worker warmup starting (background)...")
+            try:
+                result = self._send_with_timeout(
+                    {"cmd": "warmup"}, timeout=_WARMUP_TIMEOUT,
+                )
+                if result.get("ok"):
+                    logger.info("Worker warmup complete (engine + models ready)")
+                else:
+                    logger.warning("Worker warmup returned: %s", result)
+            except Exception as exc:
+                logger.warning("Worker warmup failed: %s", exc)
+        t = threading.Thread(target=_do_warmup, daemon=True, name="worker-warmup")
+        t.start()
     @property
     def worker_pid(self) -> int | None:
         """PID of the worker process, or None if not running."""
@@ -115,6 +141,10 @@ class WorkerPool:
     def _send(self, request: dict) -> dict:
         """Send request to worker and get response. Thread-safe."""
+        return self._send_with_timeout(request, timeout=_REQUEST_TIMEOUT)
+    def _send_with_timeout(self, request: dict, timeout: float) -> dict:
+        """Send request with configurable timeout. Thread-safe."""
         with self._lock:
             self._ensure_worker()
             if self._proc is None:
@@ -129,7 +159,7 @@ class WorkerPool:
                 import selectors
                 sel = selectors.DefaultSelector()
                 sel.register(self._proc.stdout, selectors.EVENT_READ)
-                ready = sel.select(timeout=_REQUEST_TIMEOUT)
+                ready = sel.select(timeout=timeout)
                 sel.close()
                 if not ready:

package/src/superlocalmemory/server/ui.py CHANGED Viewed

@@ -199,14 +199,25 @@ def create_app() -> FastAPI:
     @application.on_event("startup")
     async def startup_event():
-        """Initialize event bus. Engine runs in subprocess worker (never in this process)."""
-        # Engine is NEVER loaded in the dashboard process.
-        # All recall/search operations go through WorkerPool subprocess.
-        # This keeps the dashboard permanently at ~60 MB.
+        """Initialize event bus and warm up worker subprocess.
+        Engine runs in subprocess worker (never in this process).
+        Background warmup pre-loads PyTorch + models so first recall is fast.
+        """
         application.state.engine = None
         logger.info("Dashboard started (~60 MB, engine runs in subprocess worker)")
         register_event_listener()
+        # Background warmup: pre-spawn worker and load all models.
+        # This runs in a daemon thread — dashboard is responsive immediately.
+        # Worker will be ready by the time user does first search (~10-30s).
+        try:
+            from superlocalmemory.core.worker_pool import WorkerPool
+            WorkerPool.shared().warmup()
+            logger.info("Worker warmup initiated (background)")
+        except Exception as exc:
+            logger.warning("Worker warmup failed to start: %s", exc)
     @application.on_event("shutdown")
     async def shutdown_event():
         """Kill worker subprocess on dashboard shutdown."""