superlocalmemory 3.3.11 → 3.3.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +2 -3
- package/src/superlocalmemory/core/config.py +9 -6
- package/src/superlocalmemory/core/embedding_worker.py +5 -1
- package/src/superlocalmemory/core/embeddings.py +3 -1
- package/src/superlocalmemory/core/engine.py +14 -0
- package/src/superlocalmemory/core/engine_wiring.py +16 -1
- package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
- package/src/superlocalmemory/core/recall_pipeline.py +24 -0
- package/src/superlocalmemory/core/recall_worker.py +22 -4
- package/src/superlocalmemory/core/reranker_worker.py +246 -0
- package/src/superlocalmemory/core/store_pipeline.py +12 -2
- package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
- package/src/superlocalmemory/encoding/graph_builder.py +21 -1
- package/src/superlocalmemory/learning/adaptive.py +2 -2
- package/src/superlocalmemory/math/fisher_quantized.py +8 -4
- package/src/superlocalmemory/math/langevin.py +15 -2
- package/src/superlocalmemory/mcp/resources.py +2 -2
- package/src/superlocalmemory/mcp/shared.py +27 -0
- package/src/superlocalmemory/mcp/tools_active.py +31 -1
- package/src/superlocalmemory/mcp/tools_core.py +15 -9
- package/src/superlocalmemory/mcp/tools_v28.py +2 -2
- package/src/superlocalmemory/mcp/tools_v3.py +3 -0
- package/src/superlocalmemory/mcp/tools_v33.py +68 -7
- package/src/superlocalmemory/retrieval/agentic.py +1 -1
- package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
- package/src/superlocalmemory/retrieval/engine.py +44 -9
- package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
- package/src/superlocalmemory/retrieval/fusion.py +2 -2
- package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
- package/src/superlocalmemory/retrieval/reranker.py +24 -7
- package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
- package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
- package/src/superlocalmemory/storage/schema.py +2 -0
|
@@ -35,7 +35,9 @@ _live_rerankers: set[weakref.ref] = set()
|
|
|
35
35
|
logger = logging.getLogger(__name__)
|
|
36
36
|
|
|
37
37
|
_IDLE_TIMEOUT_SECONDS = 120 # 2 min → kill worker
|
|
38
|
-
|
|
38
|
+
# V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
|
|
39
|
+
_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
|
|
40
|
+
_SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) for stressed system respawns
|
|
39
41
|
_WORKER_RECYCLE_AFTER = 500 # Recycle after N requests
|
|
40
42
|
|
|
41
43
|
|
|
@@ -57,7 +59,7 @@ class CrossEncoderReranker:
|
|
|
57
59
|
|
|
58
60
|
def __init__(
|
|
59
61
|
self,
|
|
60
|
-
model_name: str = "cross-encoder/ms-marco-MiniLM-L-
|
|
62
|
+
model_name: str = "cross-encoder/ms-marco-MiniLM-L-12-v2",
|
|
61
63
|
backend: str = "onnx",
|
|
62
64
|
) -> None:
|
|
63
65
|
self._model_name = model_name
|
|
@@ -129,8 +131,23 @@ class CrossEncoderReranker:
|
|
|
129
131
|
finally:
|
|
130
132
|
self._worker_loading = False
|
|
131
133
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
self._warmup_thread = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
|
|
135
|
+
self._warmup_thread.start()
|
|
136
|
+
|
|
137
|
+
def warmup_sync(self, timeout: float = 120.0) -> bool:
|
|
138
|
+
"""Block until reranker model is loaded. Returns True if ready.
|
|
139
|
+
|
|
140
|
+
V3.3.12: Critical for benchmarks and first-recall quality.
|
|
141
|
+
Without this, first 30-60s of recalls get no reranking (-30.7pp).
|
|
142
|
+
"""
|
|
143
|
+
if self._model_loaded:
|
|
144
|
+
return True
|
|
145
|
+
if not self._worker_loading and not self._model_loaded:
|
|
146
|
+
self._start_background_warmup()
|
|
147
|
+
t = getattr(self, '_warmup_thread', None)
|
|
148
|
+
if t is not None:
|
|
149
|
+
t.join(timeout=timeout)
|
|
150
|
+
return self._model_loaded
|
|
134
151
|
|
|
135
152
|
# ------------------------------------------------------------------
|
|
136
153
|
# Worker management (mirrors EmbeddingService pattern)
|
|
@@ -304,13 +321,13 @@ class CrossEncoderReranker:
|
|
|
304
321
|
|
|
305
322
|
documents = [fact.content for fact, _ in candidates]
|
|
306
323
|
|
|
307
|
-
#
|
|
308
|
-
#
|
|
324
|
+
# V3.3.12: Increased timeout 10s→60s — L-12-v2 needs PyTorch + ONNX load.
|
|
325
|
+
# Critical: Paper 2 ablation showed -30.7pp without reranking.
|
|
309
326
|
resp = self._send_request({
|
|
310
327
|
"cmd": "rerank",
|
|
311
328
|
"query": query,
|
|
312
329
|
"documents": documents,
|
|
313
|
-
}, timeout=
|
|
330
|
+
}, timeout=60.0)
|
|
314
331
|
|
|
315
332
|
if resp is None or not resp.get("ok"):
|
|
316
333
|
# Fallback: return by existing score
|
|
@@ -182,7 +182,7 @@ class SemanticChannel:
|
|
|
182
182
|
else:
|
|
183
183
|
sim = cos_sim
|
|
184
184
|
|
|
185
|
-
if sim > 0.
|
|
185
|
+
if sim > 0.05:
|
|
186
186
|
scored.append((fact.fact_id, sim))
|
|
187
187
|
|
|
188
188
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -235,7 +235,7 @@ class SemanticChannel:
|
|
|
235
235
|
else:
|
|
236
236
|
sim = cos_sim
|
|
237
237
|
|
|
238
|
-
if sim > 0.
|
|
238
|
+
if sim > 0.05:
|
|
239
239
|
scored.append((fact.fact_id, sim))
|
|
240
240
|
|
|
241
241
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -124,6 +124,16 @@ class TemporalChannel:
|
|
|
124
124
|
import re
|
|
125
125
|
_PROPER_RE = re.compile(r"\b([A-Z][a-z]+)\b")
|
|
126
126
|
names = [m.group(1) for m in _PROPER_RE.finditer(query)]
|
|
127
|
+
# Also try title-cased version for lowercase queries
|
|
128
|
+
if not names:
|
|
129
|
+
names = [m.group(1) for m in _PROPER_RE.finditer(query.title())]
|
|
130
|
+
# Filter out common words from title-casing
|
|
131
|
+
_stop = {"What", "When", "Where", "Who", "Which", "How", "Does", "Did",
|
|
132
|
+
"The", "That", "This", "There", "Then", "Have", "Has", "Had",
|
|
133
|
+
"About", "After", "Before", "From", "With", "Would", "Could",
|
|
134
|
+
"Should", "Will", "Because", "Also", "Just", "Like", "Know",
|
|
135
|
+
"Think", "Tell", "Said"}
|
|
136
|
+
names = [n for n in names if n not in _stop]
|
|
127
137
|
if not names:
|
|
128
138
|
return []
|
|
129
139
|
|
|
@@ -146,7 +156,10 @@ class TemporalChannel:
|
|
|
146
156
|
fid = dict(row)["fact_id"]
|
|
147
157
|
if fid not in seen:
|
|
148
158
|
seen.add(fid)
|
|
149
|
-
|
|
159
|
+
# Rank by position (first events more likely relevant) instead
|
|
160
|
+
# of flat 0.85 which loses discrimination
|
|
161
|
+
rank_score = 0.85 - len(seen) * 0.02
|
|
162
|
+
results.append((fid, max(0.3, rank_score)))
|
|
150
163
|
|
|
151
164
|
return results
|
|
152
165
|
|
|
@@ -442,6 +442,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_target
|
|
|
442
442
|
ON graph_edges (profile_id, target_id);
|
|
443
443
|
CREATE INDEX IF NOT EXISTS idx_edges_type
|
|
444
444
|
ON graph_edges (profile_id, edge_type);
|
|
445
|
+
CREATE INDEX IF NOT EXISTS idx_edges_exists_check
|
|
446
|
+
ON graph_edges (profile_id, source_id, target_id, edge_type);
|
|
445
447
|
"""
|
|
446
448
|
|
|
447
449
|
|