superlocalmemory 3.3.16 → 3.3.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/core/embedding_worker.py +10 -0
- package/src/superlocalmemory/core/embeddings.py +20 -2
- package/src/superlocalmemory/core/recall_pipeline.py +24 -8
- package/src/superlocalmemory/core/recall_worker.py +11 -1
- package/src/superlocalmemory/core/reranker_worker.py +12 -3
- package/src/superlocalmemory/retrieval/reranker.py +13 -19
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.17",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -144,6 +144,16 @@ def _worker_main() -> None:
|
|
|
144
144
|
_respond({"ok": True, "vectors": result, "dim": dim})
|
|
145
145
|
except Exception as exc:
|
|
146
146
|
_respond({"ok": False, "error": str(exc)})
|
|
147
|
+
|
|
148
|
+
# V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
|
|
149
|
+
# PyTorch on ARM64 Mac never returns memory to OS. After ~200 embeds
|
|
150
|
+
# a worker that started at 300MB grows to 17GB+. Parent auto-respawns
|
|
151
|
+
# a fresh worker on next request (existing mechanism in embeddings.py).
|
|
152
|
+
import resource
|
|
153
|
+
rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
|
|
154
|
+
if rss_mb > 2500:
|
|
155
|
+
sys.exit(0)
|
|
156
|
+
|
|
147
157
|
continue
|
|
148
158
|
|
|
149
159
|
_respond({"ok": False, "error": f"Unknown command: {cmd}"})
|
|
@@ -207,11 +207,29 @@ class EmbeddingService:
|
|
|
207
207
|
return resp["vectors"]
|
|
208
208
|
except (BrokenPipeError, OSError, json.JSONDecodeError) as exc:
|
|
209
209
|
logger.warning(
|
|
210
|
-
"Embedding worker communication failed: %s.
|
|
211
|
-
"Run 'slm doctor' to check dependencies and Python version.",
|
|
210
|
+
"Embedding worker communication failed: %s — respawning.",
|
|
212
211
|
exc,
|
|
213
212
|
)
|
|
214
213
|
self._kill_worker()
|
|
214
|
+
# V3.3.16: Auto-retry once after worker death (RSS watchdog
|
|
215
|
+
# or crash). Respawn + re-send instead of returning None.
|
|
216
|
+
try:
|
|
217
|
+
self._ensure_worker()
|
|
218
|
+
if self._worker_proc is not None:
|
|
219
|
+
self._worker_proc.stdin.write(req)
|
|
220
|
+
self._worker_proc.stdin.flush()
|
|
221
|
+
resp_line = self._readline_with_timeout(
|
|
222
|
+
self._worker_proc.stdout,
|
|
223
|
+
_SUBPROCESS_RESPONSE_TIMEOUT,
|
|
224
|
+
)
|
|
225
|
+
if resp_line:
|
|
226
|
+
resp = json.loads(resp_line)
|
|
227
|
+
if resp.get("ok"):
|
|
228
|
+
self._reset_idle_timer()
|
|
229
|
+
self._request_count = 1
|
|
230
|
+
return resp["vectors"]
|
|
231
|
+
except Exception:
|
|
232
|
+
self._kill_worker()
|
|
215
233
|
return None
|
|
216
234
|
|
|
217
235
|
@staticmethod
|
|
@@ -263,16 +263,32 @@ def run_recall(
|
|
|
263
263
|
for r in response.results:
|
|
264
264
|
trust_scorer.update_on_access("fact", r.fact.fact_id, profile_id)
|
|
265
265
|
|
|
266
|
-
#
|
|
267
|
-
#
|
|
268
|
-
#
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
|
|
266
|
+
# Fisher Bayesian update on recall — narrows variance on accessed facts
|
|
267
|
+
# so they score higher on subsequent recalls (critical for benchmark: +24pp).
|
|
268
|
+
# V3.3.16: Reuse query embedding from retrieval engine cache instead of
|
|
269
|
+
# calling embedder.embed() again (which was the memory leak source).
|
|
270
|
+
q_var_arr = None
|
|
271
|
+
if embedder and hasattr(retrieval_engine, '_query_embedding_cache'):
|
|
272
|
+
cached_emb = retrieval_engine._query_embedding_cache.get(query)
|
|
273
|
+
if cached_emb is not None:
|
|
274
|
+
import numpy as _np
|
|
275
|
+
_, q_var_list = embedder.compute_fisher_params(cached_emb)
|
|
276
|
+
q_var_arr = _np.array(q_var_list, dtype=_np.float64)
|
|
277
|
+
|
|
272
278
|
for r in response.results:
|
|
273
|
-
|
|
279
|
+
updates: dict[str, object] = {
|
|
274
280
|
"access_count": r.fact.access_count + 1,
|
|
275
|
-
}
|
|
281
|
+
}
|
|
282
|
+
if (q_var_arr is not None
|
|
283
|
+
and r.fact.fisher_variance
|
|
284
|
+
and len(r.fact.fisher_variance) == len(q_var_arr)
|
|
285
|
+
and r.fact.access_count >= 3):
|
|
286
|
+
import numpy as _np
|
|
287
|
+
f_var = _np.array(r.fact.fisher_variance, dtype=_np.float64)
|
|
288
|
+
new_var = 1.0 / (1.0 / _np.maximum(f_var, 0.05) + 1.0 / _np.maximum(q_var_arr, 0.05))
|
|
289
|
+
new_var = _np.clip(new_var, 0.05, 2.0)
|
|
290
|
+
updates["fisher_variance"] = new_var.tolist()
|
|
291
|
+
db.update_fact(r.fact.fact_id, updates)
|
|
276
292
|
|
|
277
293
|
# Post-operation hooks (audit, trust signal, learning)
|
|
278
294
|
hook_ctx["result_count"] = len(response.results)
|
|
@@ -321,6 +321,13 @@ def _worker_main() -> None:
|
|
|
321
321
|
except Exception as exc:
|
|
322
322
|
_respond({"ok": False, "error": str(exc)})
|
|
323
323
|
|
|
324
|
+
# V3.3.16: RSS watchdog — self-terminate if memory exceeds 1.5GB.
|
|
325
|
+
# Parent auto-respawns a fresh worker on next request.
|
|
326
|
+
import resource
|
|
327
|
+
rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
|
|
328
|
+
if rss_mb > 2500:
|
|
329
|
+
sys.exit(0)
|
|
330
|
+
|
|
324
331
|
|
|
325
332
|
def _respond(data: dict) -> None:
|
|
326
333
|
sys.stdout.write(json.dumps(data) + "\n")
|
|
@@ -328,4 +335,7 @@ def _respond(data: dict) -> None:
|
|
|
328
335
|
|
|
329
336
|
|
|
330
337
|
if __name__ == "__main__":
|
|
331
|
-
|
|
338
|
+
try:
|
|
339
|
+
_worker_main()
|
|
340
|
+
except KeyboardInterrupt:
|
|
341
|
+
sys.exit(0)
|
|
@@ -124,10 +124,12 @@ def _worker_main() -> None:
|
|
|
124
124
|
warmup_ok = False
|
|
125
125
|
if model is not None:
|
|
126
126
|
try:
|
|
127
|
+
# Use 60 pairs (realistic batch size) to trigger CoreML
|
|
128
|
+
# compilation for the actual workload. 3 pairs compiled a
|
|
129
|
+
# different execution plan that got recompiled on 60 pairs.
|
|
127
130
|
dummy_pairs = [
|
|
128
|
-
("What
|
|
129
|
-
|
|
130
|
-
("What color is the sky?", "The sky is blue on a clear day."),
|
|
131
|
+
(f"What happened to person {i}?", f"Person {i} went to location {i} and did activity {i} last summer with friends.")
|
|
132
|
+
for i in range(60)
|
|
131
133
|
]
|
|
132
134
|
try:
|
|
133
135
|
import torch
|
|
@@ -174,6 +176,13 @@ def _worker_main() -> None:
|
|
|
174
176
|
})
|
|
175
177
|
except Exception as exc:
|
|
176
178
|
_respond({"ok": False, "error": str(exc)})
|
|
179
|
+
|
|
180
|
+
# V3.3.16: RSS watchdog — same as embedding_worker
|
|
181
|
+
import resource
|
|
182
|
+
rss_mb = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 / 1024
|
|
183
|
+
if rss_mb > 2500:
|
|
184
|
+
sys.exit(0)
|
|
185
|
+
|
|
177
186
|
continue
|
|
178
187
|
|
|
179
188
|
if cmd == "score":
|
|
@@ -94,8 +94,10 @@ class CrossEncoderReranker:
|
|
|
94
94
|
def _start_background_warmup(self) -> None:
|
|
95
95
|
"""Start worker and load model in background thread.
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
V3.3.16: Uses _send_request (lock-protected) instead of raw
|
|
98
|
+
stdin/stdout access. Previous code wrote to stdin without the
|
|
99
|
+
lock, creating a race where the warmup's readline thread could
|
|
100
|
+
steal responses meant for _send_request → deadlock → timeout.
|
|
99
101
|
"""
|
|
100
102
|
if self._worker_loading or self._model_loaded:
|
|
101
103
|
return
|
|
@@ -106,26 +108,18 @@ class CrossEncoderReranker:
|
|
|
106
108
|
self._ensure_worker()
|
|
107
109
|
if self._worker_proc is None:
|
|
108
110
|
return
|
|
109
|
-
|
|
110
|
-
req = json.dumps({
|
|
111
|
+
resp = self._send_request({
|
|
111
112
|
"cmd": "load",
|
|
112
113
|
"model_name": self._model_name,
|
|
113
114
|
"backend": self._backend,
|
|
114
|
-
})
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if resp.get("ok"):
|
|
123
|
-
self._model_loaded = True
|
|
124
|
-
logger.info(
|
|
125
|
-
"Reranker worker warm (backend=%s)",
|
|
126
|
-
resp.get("backend", "?"),
|
|
127
|
-
)
|
|
128
|
-
self._reset_idle_timer()
|
|
115
|
+
}, timeout=_SUBPROCESS_RESPONSE_TIMEOUT)
|
|
116
|
+
if resp and resp.get("ok"):
|
|
117
|
+
self._model_loaded = True
|
|
118
|
+
logger.info(
|
|
119
|
+
"Reranker worker warm (backend=%s, warmup_inference=%s)",
|
|
120
|
+
resp.get("backend", "?"),
|
|
121
|
+
resp.get("warmup_inference", False),
|
|
122
|
+
)
|
|
129
123
|
except Exception as exc:
|
|
130
124
|
logger.debug("Background reranker warmup failed: %s", exc)
|
|
131
125
|
finally:
|