superlocalmemory 3.3.15 → 3.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.3.15",
3
+ "version": "3.3.16",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.3.15"
3
+ version = "3.3.16"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -24,6 +24,36 @@ from superlocalmemory.storage.models import Mode, RecallResponse
24
24
  logger = logging.getLogger(__name__)
25
25
 
26
26
 
27
+ # ---------------------------------------------------------------------------
28
+ # V3.3.16: Module-level singletons for recall hot-path objects.
29
+ # Prevents creating new BehavioralTracker / ForgettingScheduler per recall
30
+ # (304 recalls = 304 objects that fragment pymalloc arenas → 25GB).
31
+ # ---------------------------------------------------------------------------
32
+
33
+ _behavioral_tracker_cache: dict[int, object] = {}
34
+ _forgetting_scheduler_cache: dict[int, object] = {}
35
+
36
+
37
+ def _get_behavioral_tracker(db: Any) -> Any:
38
+ """Get or create a cached BehavioralTracker for this DB instance."""
39
+ key = id(db)
40
+ if key not in _behavioral_tracker_cache:
41
+ from superlocalmemory.learning.behavioral import BehavioralTracker
42
+ _behavioral_tracker_cache[key] = BehavioralTracker(db)
43
+ return _behavioral_tracker_cache[key]
44
+
45
+
46
+ def _get_forgetting_scheduler(db: Any, config: Any) -> Any:
47
+ """Get or create a cached ForgettingScheduler for this DB instance."""
48
+ key = id(db)
49
+ if key not in _forgetting_scheduler_cache:
50
+ from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
51
+ from superlocalmemory.math.ebbinghaus import EbbinghausCurve
52
+ ebbinghaus = EbbinghausCurve(config.forgetting)
53
+ _forgetting_scheduler_cache[key] = ForgettingScheduler(db, ebbinghaus, config.forgetting)
54
+ return _forgetting_scheduler_cache[key]
55
+
56
+
27
57
  # ---------------------------------------------------------------------------
28
58
  # apply_adaptive_ranking (was MemoryEngine._apply_adaptive_ranking)
29
59
  # ---------------------------------------------------------------------------
@@ -192,11 +222,11 @@ def run_recall(
192
222
  except Exception as exc:
193
223
  logger.debug("Access log batch store failed: %s", exc)
194
224
 
195
- # V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
225
+ # V3.3.16: Behavioral tracking + spaced repetition use module-level
226
+ # singletons to avoid creating new objects per recall (was causing
227
+ # object accumulation across 304 benchmark recalls).
196
228
  try:
197
- from superlocalmemory.learning.behavioral import BehavioralTracker
198
- _tracker = BehavioralTracker(db)
199
- _tracker.record_query(
229
+ _get_behavioral_tracker(db).record_query(
200
230
  profile_id=profile_id, query=query,
201
231
  query_type=response.query_type,
202
232
  result_count=len(response.results),
@@ -204,15 +234,11 @@ def run_recall(
204
234
  except Exception as exc:
205
235
  logger.debug("Behavioral tracking: %s", exc)
206
236
 
207
- # V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
208
237
  if response.results:
209
238
  try:
210
- from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
211
- from superlocalmemory.math.ebbinghaus import EbbinghausCurve
212
- _ebbinghaus = EbbinghausCurve(config.forgetting)
213
- _fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
239
+ fsched = _get_forgetting_scheduler(db, config)
214
240
  for r in response.results[:10]:
215
- _fsched.on_access_event(r.fact.fact_id, profile_id)
241
+ fsched.on_access_event(r.fact.fact_id, profile_id)
216
242
  except Exception as exc:
217
243
  logger.debug("Spaced repetition update: %s", exc)
218
244
 
@@ -237,31 +263,16 @@ def run_recall(
237
263
  for r in response.results:
238
264
  trust_scorer.update_on_access("fact", r.fact.fact_id, profile_id)
239
265
 
240
- # Fisher Bayesian update on recall
241
- q_emb = embedder.embed(query) if embedder else None
242
- q_var_arr = None
243
- if embedder and q_emb:
244
- _, q_var_list = embedder.compute_fisher_params(q_emb)
245
- import numpy as _np
246
- q_var_arr = _np.array(q_var_list, dtype=_np.float64)
247
-
266
+ # V3.3.16: Access count update only — no redundant embedding call.
267
+ # Fisher Bayesian variance update moved to store_pipeline (write-time)
268
+ # to avoid per-recall memory pressure from numpy array creation.
269
+ # Previously: embedder.embed(query) here duplicated the embed call
270
+ # already done in retrieval engine, creating 768-dim numpy arrays
271
+ # 304 times during benchmark → pymalloc arena fragmentation → 25GB.
248
272
  for r in response.results:
249
- updates: dict[str, object] = {
273
+ db.update_fact(r.fact.fact_id, {
250
274
  "access_count": r.fact.access_count + 1,
251
- }
252
- # Bayesian variance narrowing after 3+ accesses
253
- if (q_var_arr is not None
254
- and r.fact.fisher_variance
255
- and len(r.fact.fisher_variance) == len(q_var_arr)
256
- and r.fact.access_count >= 3):
257
- import numpy as _np
258
- f_var = _np.array(r.fact.fisher_variance, dtype=_np.float64)
259
- # Conjugate Gaussian update: 1/new_var = 1/f_var + 1/q_var
260
- new_var = 1.0 / (1.0 / _np.maximum(f_var, 0.05) + 1.0 / _np.maximum(q_var_arr, 0.05))
261
- new_var = _np.clip(new_var, 0.05, 2.0)
262
- updates["fisher_variance"] = new_var.tolist()
263
-
264
- db.update_fact(r.fact.fact_id, updates)
275
+ })
265
276
 
266
277
  # Post-operation hooks (audit, trust signal, learning)
267
278
  hook_ctx["result_count"] = len(response.results)
@@ -118,10 +118,31 @@ def _worker_main() -> None:
118
118
  name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
119
119
  backend = req.get("backend", "onnx")
120
120
  model, active_backend, model_name = _load_model(name, backend)
121
+ # V3.3.16: Run real inference to trigger ONNX CoreML JIT compilation.
122
+ # Without this, first real rerank call triggers 30-60s compilation
123
+ # that exceeds the caller's timeout, killing the worker.
124
+ warmup_ok = False
125
+ if model is not None:
126
+ try:
127
+ dummy_pairs = [
128
+ ("What is the capital of France?", "Paris is the capital of France."),
129
+ ("Who wrote Hamlet?", "Shakespeare wrote many plays."),
130
+ ("What color is the sky?", "The sky is blue on a clear day."),
131
+ ]
132
+ try:
133
+ import torch
134
+ with torch.inference_mode():
135
+ _scores = model.predict(dummy_pairs)
136
+ except ImportError:
137
+ _scores = model.predict(dummy_pairs)
138
+ warmup_ok = True
139
+ except Exception:
140
+ pass
121
141
  _respond({
122
142
  "ok": model is not None,
123
143
  "backend": active_backend,
124
144
  "model": model_name,
145
+ "warmup_inference": warmup_ok,
125
146
  })
126
147
  continue
127
148
 
@@ -338,31 +338,27 @@ class RetrievalEngine:
338
338
  if not candidates:
339
339
  return fused
340
340
 
341
- # Bug 3 fix: strip speaker tags from content before CE scoring
342
- clean_candidates: list[tuple[AtomicFact, float]] = []
343
- for fact, score in candidates:
344
- cleaned_content = re.sub(r'^\[[A-Za-z]+\]:\s*', '', fact.content)
345
- clean_fact = AtomicFact(
346
- fact_id=fact.fact_id, memory_id=fact.memory_id,
347
- profile_id=fact.profile_id, content=cleaned_content,
348
- fact_type=fact.fact_type, entities=fact.entities,
349
- canonical_entities=fact.canonical_entities,
350
- observation_date=fact.observation_date,
351
- referenced_date=fact.referenced_date,
352
- confidence=fact.confidence, importance=fact.importance,
353
- evidence_count=fact.evidence_count,
354
- access_count=fact.access_count,
355
- embedding=fact.embedding, created_at=fact.created_at,
356
- )
357
- clean_candidates.append((clean_fact, score))
341
+ # V3.3.16: Strip speaker tags WITHOUT copying full AtomicFact objects.
342
+ # Previously created full copies including 768-dim embeddings (~6KB each),
343
+ # which over 304 recalls caused pymalloc arena fragmentation → 25GB.
344
+ # Now: temporarily patch .content on originals, rerank, then restore.
345
+ originals: list[tuple[AtomicFact, str]] = [] # (fact, original_content)
346
+ for fact, _ in candidates:
347
+ orig = fact.content
348
+ fact.content = re.sub(r'^\[[A-Za-z]+\]:\s*', '', orig)
349
+ originals.append((fact, orig))
358
350
 
359
351
  try:
360
352
  scored = self._reranker.rerank( # type: ignore[union-attr]
361
- query, clean_candidates, top_k=len(clean_candidates),
353
+ query, candidates, top_k=len(candidates),
362
354
  )
363
355
  except Exception as exc:
364
356
  logger.warning("Cross-encoder rerank failed: %s", exc)
365
357
  return fused
358
+ finally:
359
+ # Restore original content (with speaker tags)
360
+ for fact, orig_content in originals:
361
+ fact.content = orig_content
366
362
 
367
363
  score_map = {fact.fact_id: score for fact, score in scored}
368
364
 
@@ -321,13 +321,14 @@ class CrossEncoderReranker:
321
321
 
322
322
  documents = [fact.content for fact, _ in candidates]
323
323
 
324
- # V3.3.12: Increased timeout 10s→60s L-12-v2 needs PyTorch + ONNX load.
325
- # Critical: Paper 2 ablation showed -30.7pp without reranking.
324
+ # V3.3.16: Timeout 180sONNX CoreML compilation can take 30-60s on
325
+ # first inference even after model load. The warmup_inference in the
326
+ # worker should prevent this, but 180s is a safety net.
326
327
  resp = self._send_request({
327
328
  "cmd": "rerank",
328
329
  "query": query,
329
330
  "documents": documents,
330
- }, timeout=60.0)
331
+ }, timeout=180.0)
331
332
 
332
333
  if resp is None or not resp.get("ok"):
333
334
  # Fallback: return by existing score