superlocalmemory 3.3.14 → 3.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.16",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -24,6 +24,36 @@ from superlocalmemory.storage.models import Mode, RecallResponse
|
|
|
24
24
|
logger = logging.getLogger(__name__)
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
# V3.3.16: Module-level singletons for recall hot-path objects.
|
|
29
|
+
# Prevents creating new BehavioralTracker / ForgettingScheduler per recall
|
|
30
|
+
# (304 recalls = 304 objects that fragment pymalloc arenas → 25GB).
|
|
31
|
+
# ---------------------------------------------------------------------------
|
|
32
|
+
|
|
33
|
+
_behavioral_tracker_cache: dict[int, object] = {}
|
|
34
|
+
_forgetting_scheduler_cache: dict[int, object] = {}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _get_behavioral_tracker(db: Any) -> Any:
|
|
38
|
+
"""Get or create a cached BehavioralTracker for this DB instance."""
|
|
39
|
+
key = id(db)
|
|
40
|
+
if key not in _behavioral_tracker_cache:
|
|
41
|
+
from superlocalmemory.learning.behavioral import BehavioralTracker
|
|
42
|
+
_behavioral_tracker_cache[key] = BehavioralTracker(db)
|
|
43
|
+
return _behavioral_tracker_cache[key]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _get_forgetting_scheduler(db: Any, config: Any) -> Any:
|
|
47
|
+
"""Get or create a cached ForgettingScheduler for this DB instance."""
|
|
48
|
+
key = id(db)
|
|
49
|
+
if key not in _forgetting_scheduler_cache:
|
|
50
|
+
from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
|
|
51
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
52
|
+
ebbinghaus = EbbinghausCurve(config.forgetting)
|
|
53
|
+
_forgetting_scheduler_cache[key] = ForgettingScheduler(db, ebbinghaus, config.forgetting)
|
|
54
|
+
return _forgetting_scheduler_cache[key]
|
|
55
|
+
|
|
56
|
+
|
|
27
57
|
# ---------------------------------------------------------------------------
|
|
28
58
|
# apply_adaptive_ranking (was MemoryEngine._apply_adaptive_ranking)
|
|
29
59
|
# ---------------------------------------------------------------------------
|
|
@@ -192,11 +222,11 @@ def run_recall(
|
|
|
192
222
|
except Exception as exc:
|
|
193
223
|
logger.debug("Access log batch store failed: %s", exc)
|
|
194
224
|
|
|
195
|
-
# V3.3.
|
|
225
|
+
# V3.3.16: Behavioral tracking + spaced repetition use module-level
|
|
226
|
+
# singletons to avoid creating new objects per recall (was causing
|
|
227
|
+
# object accumulation across 304 benchmark recalls).
|
|
196
228
|
try:
|
|
197
|
-
|
|
198
|
-
_tracker = BehavioralTracker(db)
|
|
199
|
-
_tracker.record_query(
|
|
229
|
+
_get_behavioral_tracker(db).record_query(
|
|
200
230
|
profile_id=profile_id, query=query,
|
|
201
231
|
query_type=response.query_type,
|
|
202
232
|
result_count=len(response.results),
|
|
@@ -204,15 +234,11 @@ def run_recall(
|
|
|
204
234
|
except Exception as exc:
|
|
205
235
|
logger.debug("Behavioral tracking: %s", exc)
|
|
206
236
|
|
|
207
|
-
# V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
|
|
208
237
|
if response.results:
|
|
209
238
|
try:
|
|
210
|
-
|
|
211
|
-
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
212
|
-
_ebbinghaus = EbbinghausCurve(config.forgetting)
|
|
213
|
-
_fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
|
|
239
|
+
fsched = _get_forgetting_scheduler(db, config)
|
|
214
240
|
for r in response.results[:10]:
|
|
215
|
-
|
|
241
|
+
fsched.on_access_event(r.fact.fact_id, profile_id)
|
|
216
242
|
except Exception as exc:
|
|
217
243
|
logger.debug("Spaced repetition update: %s", exc)
|
|
218
244
|
|
|
@@ -237,31 +263,16 @@ def run_recall(
|
|
|
237
263
|
for r in response.results:
|
|
238
264
|
trust_scorer.update_on_access("fact", r.fact.fact_id, profile_id)
|
|
239
265
|
|
|
240
|
-
#
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
q_var_arr = _np.array(q_var_list, dtype=_np.float64)
|
|
247
|
-
|
|
266
|
+
# V3.3.16: Access count update only — no redundant embedding call.
|
|
267
|
+
# Fisher Bayesian variance update moved to store_pipeline (write-time)
|
|
268
|
+
# to avoid per-recall memory pressure from numpy array creation.
|
|
269
|
+
# Previously: embedder.embed(query) here duplicated the embed call
|
|
270
|
+
# already done in retrieval engine, creating 768-dim numpy arrays
|
|
271
|
+
# 304 times during benchmark → pymalloc arena fragmentation → 25GB.
|
|
248
272
|
for r in response.results:
|
|
249
|
-
|
|
273
|
+
db.update_fact(r.fact.fact_id, {
|
|
250
274
|
"access_count": r.fact.access_count + 1,
|
|
251
|
-
}
|
|
252
|
-
# Bayesian variance narrowing after 3+ accesses
|
|
253
|
-
if (q_var_arr is not None
|
|
254
|
-
and r.fact.fisher_variance
|
|
255
|
-
and len(r.fact.fisher_variance) == len(q_var_arr)
|
|
256
|
-
and r.fact.access_count >= 3):
|
|
257
|
-
import numpy as _np
|
|
258
|
-
f_var = _np.array(r.fact.fisher_variance, dtype=_np.float64)
|
|
259
|
-
# Conjugate Gaussian update: 1/new_var = 1/f_var + 1/q_var
|
|
260
|
-
new_var = 1.0 / (1.0 / _np.maximum(f_var, 0.05) + 1.0 / _np.maximum(q_var_arr, 0.05))
|
|
261
|
-
new_var = _np.clip(new_var, 0.05, 2.0)
|
|
262
|
-
updates["fisher_variance"] = new_var.tolist()
|
|
263
|
-
|
|
264
|
-
db.update_fact(r.fact.fact_id, updates)
|
|
275
|
+
})
|
|
265
276
|
|
|
266
277
|
# Post-operation hooks (audit, trust signal, learning)
|
|
267
278
|
hook_ctx["result_count"] = len(response.results)
|
|
@@ -118,10 +118,31 @@ def _worker_main() -> None:
|
|
|
118
118
|
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
119
119
|
backend = req.get("backend", "onnx")
|
|
120
120
|
model, active_backend, model_name = _load_model(name, backend)
|
|
121
|
+
# V3.3.16: Run real inference to trigger ONNX CoreML JIT compilation.
|
|
122
|
+
# Without this, first real rerank call triggers 30-60s compilation
|
|
123
|
+
# that exceeds the caller's timeout, killing the worker.
|
|
124
|
+
warmup_ok = False
|
|
125
|
+
if model is not None:
|
|
126
|
+
try:
|
|
127
|
+
dummy_pairs = [
|
|
128
|
+
("What is the capital of France?", "Paris is the capital of France."),
|
|
129
|
+
("Who wrote Hamlet?", "Shakespeare wrote many plays."),
|
|
130
|
+
("What color is the sky?", "The sky is blue on a clear day."),
|
|
131
|
+
]
|
|
132
|
+
try:
|
|
133
|
+
import torch
|
|
134
|
+
with torch.inference_mode():
|
|
135
|
+
_scores = model.predict(dummy_pairs)
|
|
136
|
+
except ImportError:
|
|
137
|
+
_scores = model.predict(dummy_pairs)
|
|
138
|
+
warmup_ok = True
|
|
139
|
+
except Exception:
|
|
140
|
+
pass
|
|
121
141
|
_respond({
|
|
122
142
|
"ok": model is not None,
|
|
123
143
|
"backend": active_backend,
|
|
124
144
|
"model": model_name,
|
|
145
|
+
"warmup_inference": warmup_ok,
|
|
125
146
|
})
|
|
126
147
|
continue
|
|
127
148
|
|
|
@@ -338,31 +338,27 @@ class RetrievalEngine:
|
|
|
338
338
|
if not candidates:
|
|
339
339
|
return fused
|
|
340
340
|
|
|
341
|
-
#
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
observation_date=fact.observation_date,
|
|
351
|
-
referenced_date=fact.referenced_date,
|
|
352
|
-
confidence=fact.confidence, importance=fact.importance,
|
|
353
|
-
evidence_count=fact.evidence_count,
|
|
354
|
-
access_count=fact.access_count,
|
|
355
|
-
embedding=fact.embedding, created_at=fact.created_at,
|
|
356
|
-
)
|
|
357
|
-
clean_candidates.append((clean_fact, score))
|
|
341
|
+
# V3.3.16: Strip speaker tags WITHOUT copying full AtomicFact objects.
|
|
342
|
+
# Previously created full copies including 768-dim embeddings (~6KB each),
|
|
343
|
+
# which over 304 recalls caused pymalloc arena fragmentation → 25GB.
|
|
344
|
+
# Now: temporarily patch .content on originals, rerank, then restore.
|
|
345
|
+
originals: list[tuple[AtomicFact, str]] = [] # (fact, original_content)
|
|
346
|
+
for fact, _ in candidates:
|
|
347
|
+
orig = fact.content
|
|
348
|
+
fact.content = re.sub(r'^\[[A-Za-z]+\]:\s*', '', orig)
|
|
349
|
+
originals.append((fact, orig))
|
|
358
350
|
|
|
359
351
|
try:
|
|
360
352
|
scored = self._reranker.rerank( # type: ignore[union-attr]
|
|
361
|
-
query,
|
|
353
|
+
query, candidates, top_k=len(candidates),
|
|
362
354
|
)
|
|
363
355
|
except Exception as exc:
|
|
364
356
|
logger.warning("Cross-encoder rerank failed: %s", exc)
|
|
365
357
|
return fused
|
|
358
|
+
finally:
|
|
359
|
+
# Restore original content (with speaker tags)
|
|
360
|
+
for fact, orig_content in originals:
|
|
361
|
+
fact.content = orig_content
|
|
366
362
|
|
|
367
363
|
score_map = {fact.fact_id: score for fact, score in scored}
|
|
368
364
|
|
|
@@ -321,13 +321,14 @@ class CrossEncoderReranker:
|
|
|
321
321
|
|
|
322
322
|
documents = [fact.content for fact, _ in candidates]
|
|
323
323
|
|
|
324
|
-
# V3.3.
|
|
325
|
-
#
|
|
324
|
+
# V3.3.16: Timeout 180s — ONNX CoreML compilation can take 30-60s on
|
|
325
|
+
# first inference even after model load. The warmup_inference in the
|
|
326
|
+
# worker should prevent this, but 180s is a safety net.
|
|
326
327
|
resp = self._send_request({
|
|
327
328
|
"cmd": "rerank",
|
|
328
329
|
"query": query,
|
|
329
330
|
"documents": documents,
|
|
330
|
-
}, timeout=
|
|
331
|
+
}, timeout=180.0)
|
|
331
332
|
|
|
332
333
|
if resp is None or not resp.get("ok"):
|
|
333
334
|
# Fallback: return by existing score
|