superlocalmemory 3.3.11 → 3.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +2 -3
  3. package/src/superlocalmemory/core/config.py +9 -6
  4. package/src/superlocalmemory/core/embedding_worker.py +5 -1
  5. package/src/superlocalmemory/core/embeddings.py +3 -1
  6. package/src/superlocalmemory/core/engine.py +14 -0
  7. package/src/superlocalmemory/core/engine_wiring.py +16 -1
  8. package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
  9. package/src/superlocalmemory/core/recall_pipeline.py +24 -0
  10. package/src/superlocalmemory/core/recall_worker.py +22 -4
  11. package/src/superlocalmemory/core/reranker_worker.py +246 -0
  12. package/src/superlocalmemory/core/store_pipeline.py +12 -2
  13. package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
  14. package/src/superlocalmemory/encoding/graph_builder.py +21 -1
  15. package/src/superlocalmemory/learning/adaptive.py +2 -2
  16. package/src/superlocalmemory/math/fisher_quantized.py +8 -4
  17. package/src/superlocalmemory/math/langevin.py +15 -2
  18. package/src/superlocalmemory/mcp/resources.py +2 -2
  19. package/src/superlocalmemory/mcp/shared.py +27 -0
  20. package/src/superlocalmemory/mcp/tools_active.py +31 -1
  21. package/src/superlocalmemory/mcp/tools_core.py +15 -9
  22. package/src/superlocalmemory/mcp/tools_v28.py +2 -2
  23. package/src/superlocalmemory/mcp/tools_v3.py +3 -0
  24. package/src/superlocalmemory/mcp/tools_v33.py +68 -7
  25. package/src/superlocalmemory/retrieval/agentic.py +1 -1
  26. package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
  27. package/src/superlocalmemory/retrieval/engine.py +44 -9
  28. package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
  29. package/src/superlocalmemory/retrieval/fusion.py +2 -2
  30. package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
  31. package/src/superlocalmemory/retrieval/reranker.py +24 -7
  32. package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
  33. package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
  34. package/src/superlocalmemory/storage/schema.py +2 -0
@@ -35,7 +35,9 @@ _live_rerankers: set[weakref.ref] = set()
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
  _IDLE_TIMEOUT_SECONDS = 120 # 2 min → kill worker
38
- _SUBPROCESS_RESPONSE_TIMEOUT = 120 # 120s for ONNX cold start
38
+ # V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
39
+ _IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
40
+ _SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) for stressed system respawns
39
41
  _WORKER_RECYCLE_AFTER = 500 # Recycle after N requests
40
42
 
41
43
 
@@ -57,7 +59,7 @@ class CrossEncoderReranker:
57
59
 
58
60
  def __init__(
59
61
  self,
60
- model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2",
62
+ model_name: str = "cross-encoder/ms-marco-MiniLM-L-12-v2",
61
63
  backend: str = "onnx",
62
64
  ) -> None:
63
65
  self._model_name = model_name
@@ -129,8 +131,23 @@ class CrossEncoderReranker:
129
131
  finally:
130
132
  self._worker_loading = False
131
133
 
132
- t = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
133
- t.start()
134
+ self._warmup_thread = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
135
+ self._warmup_thread.start()
136
+
137
+ def warmup_sync(self, timeout: float = 120.0) -> bool:
138
+ """Block until reranker model is loaded. Returns True if ready.
139
+
140
+ V3.3.12: Critical for benchmarks and first-recall quality.
141
+ Without this, first 30-60s of recalls get no reranking (-30.7pp).
142
+ """
143
+ if self._model_loaded:
144
+ return True
145
+ if not self._worker_loading and not self._model_loaded:
146
+ self._start_background_warmup()
147
+ t = getattr(self, '_warmup_thread', None)
148
+ if t is not None:
149
+ t.join(timeout=timeout)
150
+ return self._model_loaded
134
151
 
135
152
  # ------------------------------------------------------------------
136
153
  # Worker management (mirrors EmbeddingService pattern)
@@ -304,13 +321,13 @@ class CrossEncoderReranker:
304
321
 
305
322
  documents = [fact.content for fact, _ in candidates]
306
323
 
307
- # Short timeout (10s)model should already be loaded by warmup.
308
- # If worker crashed or is still loading, fallback immediately.
324
+ # V3.3.12: Increased timeout 10s→60sL-12-v2 needs PyTorch + ONNX load.
325
+ # Critical: Paper 2 ablation showed -30.7pp without reranking.
309
326
  resp = self._send_request({
310
327
  "cmd": "rerank",
311
328
  "query": query,
312
329
  "documents": documents,
313
- }, timeout=10.0)
330
+ }, timeout=60.0)
314
331
 
315
332
  if resp is None or not resp.get("ok"):
316
333
  # Fallback: return by existing score
@@ -182,7 +182,7 @@ class SemanticChannel:
182
182
  else:
183
183
  sim = cos_sim
184
184
 
185
- if sim > 0.3:
185
+ if sim > 0.05:
186
186
  scored.append((fact.fact_id, sim))
187
187
 
188
188
  scored.sort(key=lambda x: x[1], reverse=True)
@@ -235,7 +235,7 @@ class SemanticChannel:
235
235
  else:
236
236
  sim = cos_sim
237
237
 
238
- if sim > 0.3:
238
+ if sim > 0.05:
239
239
  scored.append((fact.fact_id, sim))
240
240
 
241
241
  scored.sort(key=lambda x: x[1], reverse=True)
@@ -124,6 +124,16 @@ class TemporalChannel:
124
124
  import re
125
125
  _PROPER_RE = re.compile(r"\b([A-Z][a-z]+)\b")
126
126
  names = [m.group(1) for m in _PROPER_RE.finditer(query)]
127
+ # Also try title-cased version for lowercase queries
128
+ if not names:
129
+ names = [m.group(1) for m in _PROPER_RE.finditer(query.title())]
130
+ # Filter out common words from title-casing
131
+ _stop = {"What", "When", "Where", "Who", "Which", "How", "Does", "Did",
132
+ "The", "That", "This", "There", "Then", "Have", "Has", "Had",
133
+ "About", "After", "Before", "From", "With", "Would", "Could",
134
+ "Should", "Will", "Because", "Also", "Just", "Like", "Know",
135
+ "Think", "Tell", "Said"}
136
+ names = [n for n in names if n not in _stop]
127
137
  if not names:
128
138
  return []
129
139
 
@@ -146,7 +156,10 @@ class TemporalChannel:
146
156
  fid = dict(row)["fact_id"]
147
157
  if fid not in seen:
148
158
  seen.add(fid)
149
- results.append((fid, 0.85)) # High base score for entity-temporal
159
+ # Rank by position (first events more likely relevant) instead
160
+ # of flat 0.85 which loses discrimination
161
+ rank_score = 0.85 - len(seen) * 0.02
162
+ results.append((fid, max(0.3, rank_score)))
150
163
 
151
164
  return results
152
165
 
@@ -442,6 +442,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_target
442
442
  ON graph_edges (profile_id, target_id);
443
443
  CREATE INDEX IF NOT EXISTS idx_edges_type
444
444
  ON graph_edges (profile_id, edge_type);
445
+ CREATE INDEX IF NOT EXISTS idx_edges_exists_check
446
+ ON graph_edges (profile_id, source_id, target_id, edge_type);
445
447
  """
446
448
 
447
449