superlocalmemory 3.0.34 → 3.0.35
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.35",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -348,7 +348,7 @@ class SLMConfig:
|
|
|
348
348
|
),
|
|
349
349
|
llm=LLMConfig(), # No LLM
|
|
350
350
|
retrieval=RetrievalConfig(
|
|
351
|
-
use_cross_encoder=
|
|
351
|
+
use_cross_encoder=True,
|
|
352
352
|
),
|
|
353
353
|
math=MathConfig(
|
|
354
354
|
sheaf_contradiction_threshold=0.45, # 768d threshold
|
|
@@ -370,7 +370,7 @@ class SLMConfig:
|
|
|
370
370
|
api_base=llm_api_base or "http://localhost:11434",
|
|
371
371
|
api_key=llm_api_key or "",
|
|
372
372
|
),
|
|
373
|
-
retrieval=RetrievalConfig(use_cross_encoder=
|
|
373
|
+
retrieval=RetrievalConfig(use_cross_encoder=True),
|
|
374
374
|
)
|
|
375
375
|
|
|
376
376
|
# Mode C — FULL POWER, UNRESTRICTED
|
|
@@ -222,6 +222,20 @@ def _worker_main() -> None:
|
|
|
222
222
|
_respond({"ok": True})
|
|
223
223
|
continue
|
|
224
224
|
|
|
225
|
+
if cmd == "warmup":
|
|
226
|
+
# Pre-load engine + all models (embedding, reranker, BM25, LLM)
|
|
227
|
+
# Called at dashboard/MCP startup so first real request is fast.
|
|
228
|
+
# A dummy recall triggers lazy-loaded components (cross-encoder, BM25 index).
|
|
229
|
+
try:
|
|
230
|
+
engine = _get_engine()
|
|
231
|
+
fact_count = engine._db.get_fact_count(engine._profile_id) if engine._db else 0
|
|
232
|
+
if fact_count > 0:
|
|
233
|
+
engine.recall("warmup", limit=1)
|
|
234
|
+
_respond({"ok": True, "message": "Engine warm", "facts": fact_count})
|
|
235
|
+
except Exception as exc:
|
|
236
|
+
_respond({"ok": False, "error": f"Warmup failed: {exc}"})
|
|
237
|
+
continue
|
|
238
|
+
|
|
225
239
|
try:
|
|
226
240
|
if cmd == "recall":
|
|
227
241
|
result = _handle_recall(req.get("query", ""), req.get("limit", 10))
|
|
@@ -28,8 +28,9 @@ import time
|
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
31
|
-
_IDLE_TIMEOUT = 120
|
|
31
|
+
_IDLE_TIMEOUT = 120 # 2 min — kill worker after idle
|
|
32
32
|
_REQUEST_TIMEOUT = 60 # 60 sec max per request
|
|
33
|
+
_WARMUP_TIMEOUT = 120 # 2 min — first cold start loads PyTorch + models
|
|
33
34
|
|
|
34
35
|
|
|
35
36
|
class WorkerPool:
|
|
@@ -102,6 +103,31 @@ class WorkerPool:
|
|
|
102
103
|
with self._lock:
|
|
103
104
|
self._kill()
|
|
104
105
|
|
|
106
|
+
def warmup(self) -> None:
|
|
107
|
+
"""Pre-spawn and warm up the worker in a background thread.
|
|
108
|
+
|
|
109
|
+
Spawns the recall_worker subprocess so that PyTorch, models, and
|
|
110
|
+
the engine are all loaded BEFORE the first user request. This
|
|
111
|
+
amortizes the 30s cold-start at dashboard/MCP startup time.
|
|
112
|
+
|
|
113
|
+
Call from startup events — non-blocking, runs in background.
|
|
114
|
+
"""
|
|
115
|
+
def _do_warmup() -> None:
|
|
116
|
+
logger.info("Worker warmup starting (background)...")
|
|
117
|
+
try:
|
|
118
|
+
result = self._send_with_timeout(
|
|
119
|
+
{"cmd": "warmup"}, timeout=_WARMUP_TIMEOUT,
|
|
120
|
+
)
|
|
121
|
+
if result.get("ok"):
|
|
122
|
+
logger.info("Worker warmup complete (engine + models ready)")
|
|
123
|
+
else:
|
|
124
|
+
logger.warning("Worker warmup returned: %s", result)
|
|
125
|
+
except Exception as exc:
|
|
126
|
+
logger.warning("Worker warmup failed: %s", exc)
|
|
127
|
+
|
|
128
|
+
t = threading.Thread(target=_do_warmup, daemon=True, name="worker-warmup")
|
|
129
|
+
t.start()
|
|
130
|
+
|
|
105
131
|
@property
|
|
106
132
|
def worker_pid(self) -> int | None:
|
|
107
133
|
"""PID of the worker process, or None if not running."""
|
|
@@ -115,6 +141,10 @@ class WorkerPool:
|
|
|
115
141
|
|
|
116
142
|
def _send(self, request: dict) -> dict:
|
|
117
143
|
"""Send request to worker and get response. Thread-safe."""
|
|
144
|
+
return self._send_with_timeout(request, timeout=_REQUEST_TIMEOUT)
|
|
145
|
+
|
|
146
|
+
def _send_with_timeout(self, request: dict, timeout: float) -> dict:
|
|
147
|
+
"""Send request with configurable timeout. Thread-safe."""
|
|
118
148
|
with self._lock:
|
|
119
149
|
self._ensure_worker()
|
|
120
150
|
if self._proc is None:
|
|
@@ -129,7 +159,7 @@ class WorkerPool:
|
|
|
129
159
|
import selectors
|
|
130
160
|
sel = selectors.DefaultSelector()
|
|
131
161
|
sel.register(self._proc.stdout, selectors.EVENT_READ)
|
|
132
|
-
ready = sel.select(timeout=
|
|
162
|
+
ready = sel.select(timeout=timeout)
|
|
133
163
|
sel.close()
|
|
134
164
|
|
|
135
165
|
if not ready:
|
|
@@ -199,14 +199,25 @@ def create_app() -> FastAPI:
|
|
|
199
199
|
|
|
200
200
|
@application.on_event("startup")
|
|
201
201
|
async def startup_event():
|
|
202
|
-
"""Initialize event bus
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
202
|
+
"""Initialize event bus and warm up worker subprocess.
|
|
203
|
+
|
|
204
|
+
Engine runs in subprocess worker (never in this process).
|
|
205
|
+
Background warmup pre-loads PyTorch + models so first recall is fast.
|
|
206
|
+
"""
|
|
206
207
|
application.state.engine = None
|
|
207
208
|
logger.info("Dashboard started (~60 MB, engine runs in subprocess worker)")
|
|
208
209
|
register_event_listener()
|
|
209
210
|
|
|
211
|
+
# Background warmup: pre-spawn worker and load all models.
|
|
212
|
+
# This runs in a daemon thread — dashboard is responsive immediately.
|
|
213
|
+
# Worker will be ready by the time user does first search (~10-30s).
|
|
214
|
+
try:
|
|
215
|
+
from superlocalmemory.core.worker_pool import WorkerPool
|
|
216
|
+
WorkerPool.shared().warmup()
|
|
217
|
+
logger.info("Worker warmup initiated (background)")
|
|
218
|
+
except Exception as exc:
|
|
219
|
+
logger.warning("Worker warmup failed to start: %s", exc)
|
|
220
|
+
|
|
210
221
|
@application.on_event("shutdown")
|
|
211
222
|
async def shutdown_event():
|
|
212
223
|
"""Kill worker subprocess on dashboard shutdown."""
|