superlocalmemory 3.0.34 → 3.0.35

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.0.34",
3
+ "version": "3.0.35",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.0.34"
3
+ version = "3.0.35"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -348,7 +348,7 @@ class SLMConfig:
348
348
  ),
349
349
  llm=LLMConfig(), # No LLM
350
350
  retrieval=RetrievalConfig(
351
- use_cross_encoder=False, # Disabled: 30s PyTorch cold start kills UX
351
+ use_cross_encoder=True,
352
352
  ),
353
353
  math=MathConfig(
354
354
  sheaf_contradiction_threshold=0.45, # 768d threshold
@@ -370,7 +370,7 @@ class SLMConfig:
370
370
  api_base=llm_api_base or "http://localhost:11434",
371
371
  api_key=llm_api_key or "",
372
372
  ),
373
- retrieval=RetrievalConfig(use_cross_encoder=False),
373
+ retrieval=RetrievalConfig(use_cross_encoder=True),
374
374
  )
375
375
 
376
376
  # Mode C — FULL POWER, UNRESTRICTED
@@ -222,6 +222,20 @@ def _worker_main() -> None:
222
222
  _respond({"ok": True})
223
223
  continue
224
224
 
225
+ if cmd == "warmup":
226
+ # Pre-load engine + all models (embedding, reranker, BM25, LLM)
227
+ # Called at dashboard/MCP startup so first real request is fast.
228
+ # A dummy recall triggers lazy-loaded components (cross-encoder, BM25 index).
229
+ try:
230
+ engine = _get_engine()
231
+ fact_count = engine._db.get_fact_count(engine._profile_id) if engine._db else 0
232
+ if fact_count > 0:
233
+ engine.recall("warmup", limit=1)
234
+ _respond({"ok": True, "message": "Engine warm", "facts": fact_count})
235
+ except Exception as exc:
236
+ _respond({"ok": False, "error": f"Warmup failed: {exc}"})
237
+ continue
238
+
225
239
  try:
226
240
  if cmd == "recall":
227
241
  result = _handle_recall(req.get("query", ""), req.get("limit", 10))
@@ -28,8 +28,9 @@ import time
28
28
 
29
29
  logger = logging.getLogger(__name__)
30
30
 
31
- _IDLE_TIMEOUT = 120 # 2 min — kill worker after idle
31
+ _IDLE_TIMEOUT = 120 # 2 min — kill worker after idle
32
32
  _REQUEST_TIMEOUT = 60 # 60 sec max per request
33
+ _WARMUP_TIMEOUT = 120 # 2 min — first cold start loads PyTorch + models
33
34
 
34
35
 
35
36
  class WorkerPool:
@@ -102,6 +103,31 @@ class WorkerPool:
102
103
  with self._lock:
103
104
  self._kill()
104
105
 
106
+ def warmup(self) -> None:
107
+ """Pre-spawn and warm up the worker in a background thread.
108
+
109
+ Spawns the recall_worker subprocess so that PyTorch, models, and
110
+ the engine are all loaded BEFORE the first user request. This
111
+ amortizes the 30s cold-start at dashboard/MCP startup time.
112
+
113
+ Call from startup events — non-blocking, runs in background.
114
+ """
115
+ def _do_warmup() -> None:
116
+ logger.info("Worker warmup starting (background)...")
117
+ try:
118
+ result = self._send_with_timeout(
119
+ {"cmd": "warmup"}, timeout=_WARMUP_TIMEOUT,
120
+ )
121
+ if result.get("ok"):
122
+ logger.info("Worker warmup complete (engine + models ready)")
123
+ else:
124
+ logger.warning("Worker warmup returned: %s", result)
125
+ except Exception as exc:
126
+ logger.warning("Worker warmup failed: %s", exc)
127
+
128
+ t = threading.Thread(target=_do_warmup, daemon=True, name="worker-warmup")
129
+ t.start()
130
+
105
131
  @property
106
132
  def worker_pid(self) -> int | None:
107
133
  """PID of the worker process, or None if not running."""
@@ -115,6 +141,10 @@ class WorkerPool:
115
141
 
116
142
  def _send(self, request: dict) -> dict:
117
143
  """Send request to worker and get response. Thread-safe."""
144
+ return self._send_with_timeout(request, timeout=_REQUEST_TIMEOUT)
145
+
146
+ def _send_with_timeout(self, request: dict, timeout: float) -> dict:
147
+ """Send request with configurable timeout. Thread-safe."""
118
148
  with self._lock:
119
149
  self._ensure_worker()
120
150
  if self._proc is None:
@@ -129,7 +159,7 @@ class WorkerPool:
129
159
  import selectors
130
160
  sel = selectors.DefaultSelector()
131
161
  sel.register(self._proc.stdout, selectors.EVENT_READ)
132
- ready = sel.select(timeout=_REQUEST_TIMEOUT)
162
+ ready = sel.select(timeout=timeout)
133
163
  sel.close()
134
164
 
135
165
  if not ready:
@@ -199,14 +199,25 @@ def create_app() -> FastAPI:
199
199
 
200
200
  @application.on_event("startup")
201
201
  async def startup_event():
202
- """Initialize event bus. Engine runs in subprocess worker (never in this process)."""
203
- # Engine is NEVER loaded in the dashboard process.
204
- # All recall/search operations go through WorkerPool subprocess.
205
- # This keeps the dashboard permanently at ~60 MB.
202
+ """Initialize event bus and warm up worker subprocess.
203
+
204
+ Engine runs in subprocess worker (never in this process).
205
+ Background warmup pre-loads PyTorch + models so first recall is fast.
206
+ """
206
207
  application.state.engine = None
207
208
  logger.info("Dashboard started (~60 MB, engine runs in subprocess worker)")
208
209
  register_event_listener()
209
210
 
211
+ # Background warmup: pre-spawn worker and load all models.
212
+ # This runs in a daemon thread — dashboard is responsive immediately.
213
+ # Worker will be ready by the time user does first search (~10-30s).
214
+ try:
215
+ from superlocalmemory.core.worker_pool import WorkerPool
216
+ WorkerPool.shared().warmup()
217
+ logger.info("Worker warmup initiated (background)")
218
+ except Exception as exc:
219
+ logger.warning("Worker warmup failed to start: %s", exc)
220
+
210
221
  @application.on_event("shutdown")
211
222
  async def shutdown_event():
212
223
  """Kill worker subprocess on dashboard shutdown."""