superlocalmemory 3.3.11 → 3.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/core/config.py +7 -4
- package/src/superlocalmemory/core/embeddings.py +3 -1
- package/src/superlocalmemory/core/engine_wiring.py +1 -1
- package/src/superlocalmemory/core/recall_pipeline.py +24 -0
- package/src/superlocalmemory/core/recall_worker.py +22 -4
- package/src/superlocalmemory/core/reranker_worker.py +220 -0
- package/src/superlocalmemory/core/store_pipeline.py +12 -2
- package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
- package/src/superlocalmemory/encoding/graph_builder.py +14 -1
- package/src/superlocalmemory/learning/adaptive.py +2 -2
- package/src/superlocalmemory/math/fisher_quantized.py +8 -4
- package/src/superlocalmemory/math/langevin.py +15 -2
- package/src/superlocalmemory/mcp/resources.py +2 -2
- package/src/superlocalmemory/mcp/shared.py +27 -0
- package/src/superlocalmemory/mcp/tools_active.py +31 -1
- package/src/superlocalmemory/mcp/tools_core.py +15 -9
- package/src/superlocalmemory/mcp/tools_v28.py +2 -2
- package/src/superlocalmemory/mcp/tools_v3.py +3 -0
- package/src/superlocalmemory/mcp/tools_v33.py +68 -7
- package/src/superlocalmemory/retrieval/agentic.py +1 -1
- package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
- package/src/superlocalmemory/retrieval/engine.py +44 -9
- package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
- package/src/superlocalmemory/retrieval/fusion.py +2 -2
- package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
- package/src/superlocalmemory/retrieval/reranker.py +23 -6
- package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
- package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
- package/src/superlocalmemory/storage/schema.py +2 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.12",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -86,10 +86,10 @@ class LLMConfig:
|
|
|
86
86
|
class ChannelWeights:
|
|
87
87
|
"""Retrieval channel weights — 5 channels, query-adaptive."""
|
|
88
88
|
|
|
89
|
-
#
|
|
90
|
-
semantic: float = 1.
|
|
89
|
+
# Semantic should dominate for conversational retrieval (paraphrase matters most).
|
|
90
|
+
semantic: float = 1.5
|
|
91
91
|
bm25: float = 1.0
|
|
92
|
-
entity_graph: float = 1.
|
|
92
|
+
entity_graph: float = 1.0
|
|
93
93
|
temporal: float = 1.0
|
|
94
94
|
spreading_activation: float = 1.0 # Phase 3: 5th channel (BC-08: default value)
|
|
95
95
|
hopfield: float = 0.8 # Phase G: 6th channel (Hopfield associative memory)
|
|
@@ -143,7 +143,7 @@ class RetrievalConfig:
|
|
|
143
143
|
"""Configuration for the retrieval (recall) pipeline."""
|
|
144
144
|
|
|
145
145
|
# Fusion
|
|
146
|
-
rrf_k: int =
|
|
146
|
+
rrf_k: int = 15 # RRF smoothing constant (k=15 for candidate pools of 50-200)
|
|
147
147
|
top_k: int = 20 # Final results to return
|
|
148
148
|
|
|
149
149
|
# Per-channel
|
|
@@ -740,6 +740,9 @@ class SLMConfig:
|
|
|
740
740
|
retrieval=RetrievalConfig(
|
|
741
741
|
# V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
|
|
742
742
|
use_cross_encoder=True,
|
|
743
|
+
# Mode A is zero-LLM: disable agentic retrieval (it replaces
|
|
744
|
+
# precision-tuned fusion with crude heuristic expansions)
|
|
745
|
+
agentic_max_rounds=0,
|
|
743
746
|
),
|
|
744
747
|
math=MathConfig(
|
|
745
748
|
sheaf_contradiction_threshold=0.45, # 768d threshold
|
|
@@ -50,7 +50,9 @@ class DimensionMismatchError(RuntimeError):
|
|
|
50
50
|
|
|
51
51
|
|
|
52
52
|
_IDLE_TIMEOUT_SECONDS = 120 # 2 minutes — kill worker after idle
|
|
53
|
-
|
|
53
|
+
# V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
|
|
54
|
+
_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
|
|
55
|
+
_SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) — respawns on stressed systems need more time
|
|
54
56
|
_WORKER_RECYCLE_AFTER = 1000 # Recycle worker after N requests (C++ fragmentation prevention)
|
|
55
57
|
|
|
56
58
|
|
|
@@ -339,7 +339,7 @@ def _init_spreading_activation(
|
|
|
339
339
|
SpreadingActivation,
|
|
340
340
|
SpreadingActivationConfig,
|
|
341
341
|
)
|
|
342
|
-
sa_config = SpreadingActivationConfig(enabled=
|
|
342
|
+
sa_config = SpreadingActivationConfig(enabled=True)
|
|
343
343
|
return SpreadingActivation(
|
|
344
344
|
db=db, vector_store=vector_store, config=sa_config,
|
|
345
345
|
)
|
|
@@ -192,6 +192,30 @@ def run_recall(
|
|
|
192
192
|
except Exception as exc:
|
|
193
193
|
logger.debug("Access log batch store failed: %s", exc)
|
|
194
194
|
|
|
195
|
+
# V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
|
|
196
|
+
try:
|
|
197
|
+
from superlocalmemory.learning.behavioral import BehavioralTracker
|
|
198
|
+
_tracker = BehavioralTracker(db)
|
|
199
|
+
_tracker.record_query(
|
|
200
|
+
profile_id=profile_id, query=query,
|
|
201
|
+
query_type=response.query_type,
|
|
202
|
+
result_count=len(response.results),
|
|
203
|
+
)
|
|
204
|
+
except Exception as exc:
|
|
205
|
+
logger.debug("Behavioral tracking: %s", exc)
|
|
206
|
+
|
|
207
|
+
# V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
|
|
208
|
+
if response.results:
|
|
209
|
+
try:
|
|
210
|
+
from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
|
|
211
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
212
|
+
_ebbinghaus = EbbinghausCurve(config.forgetting)
|
|
213
|
+
_fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
|
|
214
|
+
for r in response.results[:10]:
|
|
215
|
+
_fsched.on_access_event(r.fact.fact_id, profile_id)
|
|
216
|
+
except Exception as exc:
|
|
217
|
+
logger.debug("Spaced repetition update: %s", exc)
|
|
218
|
+
|
|
195
219
|
# Phase 3: Hebbian strengthening for co-accessed facts
|
|
196
220
|
if auto_linker and response.results:
|
|
197
221
|
try:
|
|
@@ -187,10 +187,28 @@ def _handle_update_memory(fact_id: str, content: str, agent_id: str = "system")
|
|
|
187
187
|
if not rows:
|
|
188
188
|
return {"ok": False, "error": f"Memory {fact_id} not found"}
|
|
189
189
|
old_content = dict(rows[0]).get("content", "")[:80]
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
190
|
+
# V3.3.12: Re-embed updated content so semantic search + BM25 stay consistent.
|
|
191
|
+
# Previously only the text column was updated, leaving stale embeddings.
|
|
192
|
+
updates: dict = {"content": content}
|
|
193
|
+
if engine._embedder:
|
|
194
|
+
try:
|
|
195
|
+
new_emb = engine._embedder.embed(content)
|
|
196
|
+
if new_emb:
|
|
197
|
+
updates["embedding"] = new_emb
|
|
198
|
+
fm, fv = engine._embedder.compute_fisher_params(new_emb)
|
|
199
|
+
updates["fisher_mean"] = fm
|
|
200
|
+
updates["fisher_variance"] = fv
|
|
201
|
+
except Exception:
|
|
202
|
+
pass
|
|
203
|
+
engine._db.update_fact(fact_id, updates)
|
|
204
|
+
# Update BM25 index for the new content
|
|
205
|
+
if hasattr(engine, '_retrieval_engine') and engine._retrieval_engine:
|
|
206
|
+
bm25 = getattr(engine._retrieval_engine, '_bm25', None)
|
|
207
|
+
if bm25:
|
|
208
|
+
try:
|
|
209
|
+
bm25.add(fact_id, content, pid)
|
|
210
|
+
except Exception:
|
|
211
|
+
pass
|
|
194
212
|
import logging as _logging
|
|
195
213
|
_logging.getLogger("superlocalmemory.audit").info(
|
|
196
214
|
"UPDATE fact_id=%s by agent=%s old=%s new=%s",
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""Subprocess reranker worker — isolates PyTorch/ONNX from main process.
|
|
6
|
+
|
|
7
|
+
Same pattern as embedding_worker.py. The main process stays at ~60 MB.
|
|
8
|
+
All cross-encoder model memory lives in this worker subprocess.
|
|
9
|
+
|
|
10
|
+
Protocol (JSON over stdin/stdout):
|
|
11
|
+
Request: {"cmd": "rerank", "query": "...", "documents": ["...", ...]}
|
|
12
|
+
Response: {"ok": true, "scores": [0.95, 0.32, ...]}
|
|
13
|
+
|
|
14
|
+
Request: {"cmd": "score", "query": "...", "document": "..."}
|
|
15
|
+
Response: {"ok": true, "score": 0.87}
|
|
16
|
+
|
|
17
|
+
Request: {"cmd": "ping"}
|
|
18
|
+
Response: {"ok": true, "backend": "onnx", "model": "..."}
|
|
19
|
+
|
|
20
|
+
Request: {"cmd": "quit"}
|
|
21
|
+
(worker exits)
|
|
22
|
+
|
|
23
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import json
|
|
29
|
+
import os
|
|
30
|
+
import platform
|
|
31
|
+
import signal
|
|
32
|
+
import struct
|
|
33
|
+
import sys
|
|
34
|
+
import threading
|
|
35
|
+
|
|
36
|
+
# Force CPU BEFORE any torch import
|
|
37
|
+
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
|
38
|
+
os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
|
|
39
|
+
os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
|
|
40
|
+
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
41
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
42
|
+
os.environ["TORCH_DEVICE"] = "cpu"
|
|
43
|
+
|
|
44
|
+
# SIGTERM bridge for Docker/systemd
|
|
45
|
+
if sys.platform != "win32":
|
|
46
|
+
signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _start_parent_watchdog() -> None:
|
|
50
|
+
"""Monitor parent process — self-terminate if parent dies.
|
|
51
|
+
|
|
52
|
+
Prevents orphaned workers that consume 1+ GB each when the parent
|
|
53
|
+
process crashes, is killed, or exits without cleanup.
|
|
54
|
+
|
|
55
|
+
V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
|
|
56
|
+
"""
|
|
57
|
+
parent_pid = os.getppid()
|
|
58
|
+
|
|
59
|
+
def _watch() -> None:
|
|
60
|
+
import time
|
|
61
|
+
while True:
|
|
62
|
+
time.sleep(5)
|
|
63
|
+
try:
|
|
64
|
+
os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
|
|
65
|
+
except OSError:
|
|
66
|
+
# Parent is dead — self-terminate
|
|
67
|
+
os._exit(0)
|
|
68
|
+
|
|
69
|
+
t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
|
|
70
|
+
t.start()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _detect_onnx_variant() -> str:
|
|
74
|
+
"""Auto-detect the best ONNX model variant for the current platform."""
|
|
75
|
+
arch = platform.machine().lower()
|
|
76
|
+
is_64bit = struct.calcsize("P") * 8 == 64
|
|
77
|
+
|
|
78
|
+
if sys.platform == "darwin" and arch in ("arm64", "aarch64"):
|
|
79
|
+
return "onnx/model_qint8_arm64.onnx"
|
|
80
|
+
if arch in ("x86_64", "amd64") and is_64bit:
|
|
81
|
+
return "onnx/model_quint8_avx2.onnx"
|
|
82
|
+
return "onnx/model.onnx"
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _worker_main() -> None:
|
|
86
|
+
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
87
|
+
_start_parent_watchdog() # V3.3.7: self-terminate if parent dies
|
|
88
|
+
|
|
89
|
+
model = None
|
|
90
|
+
active_backend = ""
|
|
91
|
+
model_name = ""
|
|
92
|
+
|
|
93
|
+
for line in sys.stdin:
|
|
94
|
+
line = line.strip()
|
|
95
|
+
if not line:
|
|
96
|
+
continue
|
|
97
|
+
try:
|
|
98
|
+
req = json.loads(line)
|
|
99
|
+
except json.JSONDecodeError:
|
|
100
|
+
_respond({"ok": False, "error": "Invalid JSON"})
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
cmd = req.get("cmd", "")
|
|
104
|
+
|
|
105
|
+
if cmd == "quit":
|
|
106
|
+
break
|
|
107
|
+
|
|
108
|
+
if cmd == "ping":
|
|
109
|
+
_respond({
|
|
110
|
+
"ok": True,
|
|
111
|
+
"loaded": model is not None,
|
|
112
|
+
"backend": active_backend,
|
|
113
|
+
"model": model_name,
|
|
114
|
+
})
|
|
115
|
+
continue
|
|
116
|
+
|
|
117
|
+
if cmd == "load":
|
|
118
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
119
|
+
backend = req.get("backend", "onnx")
|
|
120
|
+
model, active_backend, model_name = _load_model(name, backend)
|
|
121
|
+
_respond({
|
|
122
|
+
"ok": model is not None,
|
|
123
|
+
"backend": active_backend,
|
|
124
|
+
"model": model_name,
|
|
125
|
+
})
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
if cmd == "rerank":
|
|
129
|
+
query = req.get("query", "")
|
|
130
|
+
documents = req.get("documents", [])
|
|
131
|
+
if not query or not documents:
|
|
132
|
+
_respond({"ok": False, "error": "Missing query or documents"})
|
|
133
|
+
continue
|
|
134
|
+
if model is None:
|
|
135
|
+
# Auto-load with defaults
|
|
136
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
137
|
+
backend = req.get("backend", "onnx")
|
|
138
|
+
model, active_backend, model_name = _load_model(name, backend)
|
|
139
|
+
if model is None:
|
|
140
|
+
_respond({"ok": False, "error": "Model load failed"})
|
|
141
|
+
continue
|
|
142
|
+
try:
|
|
143
|
+
pairs = [(query, doc) for doc in documents]
|
|
144
|
+
try:
|
|
145
|
+
import torch
|
|
146
|
+
with torch.inference_mode():
|
|
147
|
+
scores = model.predict(pairs)
|
|
148
|
+
except ImportError:
|
|
149
|
+
scores = model.predict(pairs)
|
|
150
|
+
_respond({
|
|
151
|
+
"ok": True,
|
|
152
|
+
"scores": [float(s) for s in scores],
|
|
153
|
+
})
|
|
154
|
+
except Exception as exc:
|
|
155
|
+
_respond({"ok": False, "error": str(exc)})
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
if cmd == "score":
|
|
159
|
+
query = req.get("query", "")
|
|
160
|
+
document = req.get("document", "")
|
|
161
|
+
if not query or not document:
|
|
162
|
+
_respond({"ok": False, "error": "Missing query or document"})
|
|
163
|
+
continue
|
|
164
|
+
if model is None:
|
|
165
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
|
|
166
|
+
backend = req.get("backend", "onnx")
|
|
167
|
+
model, active_backend, model_name = _load_model(name, backend)
|
|
168
|
+
if model is None:
|
|
169
|
+
_respond({"ok": False, "error": "Model load failed"})
|
|
170
|
+
continue
|
|
171
|
+
try:
|
|
172
|
+
try:
|
|
173
|
+
import torch
|
|
174
|
+
with torch.inference_mode():
|
|
175
|
+
scores = model.predict([(query, document)])
|
|
176
|
+
except ImportError:
|
|
177
|
+
scores = model.predict([(query, document)])
|
|
178
|
+
_respond({"ok": True, "score": float(scores[0])})
|
|
179
|
+
except Exception as exc:
|
|
180
|
+
_respond({"ok": False, "error": str(exc)})
|
|
181
|
+
continue
|
|
182
|
+
|
|
183
|
+
_respond({"ok": False, "error": f"Unknown command: {cmd}"})
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _load_model(
|
|
187
|
+
name: str, backend: str,
|
|
188
|
+
) -> tuple:
|
|
189
|
+
"""Load cross-encoder model. Returns (model, backend_name, model_name)."""
|
|
190
|
+
try:
|
|
191
|
+
from sentence_transformers import CrossEncoder
|
|
192
|
+
|
|
193
|
+
if backend == "onnx":
|
|
194
|
+
try:
|
|
195
|
+
onnx_file = _detect_onnx_variant()
|
|
196
|
+
m = CrossEncoder(
|
|
197
|
+
name, backend="onnx",
|
|
198
|
+
model_kwargs={"file_name": onnx_file},
|
|
199
|
+
)
|
|
200
|
+
return m, "onnx", name
|
|
201
|
+
except Exception:
|
|
202
|
+
# ONNX failed → try PyTorch
|
|
203
|
+
pass
|
|
204
|
+
# PyTorch fallback (or explicit pytorch backend)
|
|
205
|
+
m = CrossEncoder(name)
|
|
206
|
+
return m, "pytorch", name
|
|
207
|
+
except ImportError:
|
|
208
|
+
return None, "", ""
|
|
209
|
+
except Exception:
|
|
210
|
+
return None, "", ""
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _respond(data: dict) -> None:
|
|
214
|
+
"""Write JSON response to stdout, flush immediately."""
|
|
215
|
+
sys.stdout.write(json.dumps(data) + "\n")
|
|
216
|
+
sys.stdout.flush()
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
if __name__ == "__main__":
|
|
220
|
+
_worker_main()
|
|
@@ -170,13 +170,23 @@ def run_store(
|
|
|
170
170
|
# V3.3.11: Also store raw content as a verbatim fact to preserve details
|
|
171
171
|
# that fact extraction may abstract away (dates, names, specifics).
|
|
172
172
|
# This ensures BM25 and semantic search can always find the original text.
|
|
173
|
+
# V3.3.12: Extract entities from verbatim content so entity channel + temporal
|
|
174
|
+
# channel can find it (was entities=[] which made 4/6 channels blind).
|
|
173
175
|
if content.strip() and len(content.strip()) >= 20:
|
|
174
176
|
import uuid
|
|
177
|
+
import re as _re
|
|
178
|
+
_verbatim_text = content.strip()
|
|
179
|
+
# Extract entities using the same regex as fact_extractor
|
|
180
|
+
_ent_re = _re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b")
|
|
181
|
+
_entity_set = {m.group(1) for m in _ent_re.finditer(_verbatim_text)}
|
|
182
|
+
# Also extract all-caps abbreviations (NYU, MIT, etc.) — dedup with first set
|
|
183
|
+
_entity_set |= {m.group(1) for m in _re.finditer(r'\b([A-Z]{2,})\b', _verbatim_text)}
|
|
184
|
+
_verbatim_entities = sorted(_entity_set)
|
|
175
185
|
verbatim = AtomicFact(
|
|
176
186
|
fact_id=uuid.uuid4().hex[:16],
|
|
177
|
-
content=
|
|
187
|
+
content=_verbatim_text,
|
|
178
188
|
fact_type=FactType.EPISODIC,
|
|
179
|
-
entities=
|
|
189
|
+
entities=_verbatim_entities,
|
|
180
190
|
session_id=session_id,
|
|
181
191
|
observation_date=parsed_date,
|
|
182
192
|
confidence=0.9,
|
|
@@ -84,7 +84,8 @@ _INTERVAL_RE = re.compile(
|
|
|
84
84
|
)
|
|
85
85
|
|
|
86
86
|
_ENTITY_RE = re.compile(
|
|
87
|
-
r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b"
|
|
87
|
+
r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
|
|
88
|
+
r"|\b([A-Z]{2,})\b" # ALL-CAPS abbreviations (NYU, MIT)
|
|
88
89
|
)
|
|
89
90
|
|
|
90
91
|
_QUOTED_RE = re.compile(r'"([^"]+)"') # Quoted strings as entities
|
|
@@ -243,7 +244,7 @@ def _extract_entities(text: str) -> list[str]:
|
|
|
243
244
|
|
|
244
245
|
# Capitalized word sequences (proper nouns)
|
|
245
246
|
for match in _ENTITY_RE.finditer(text):
|
|
246
|
-
candidate = match.group(1).strip()
|
|
247
|
+
candidate = (match.group(1) or match.group(2) or "").strip()
|
|
247
248
|
# Filter common English words that start sentences
|
|
248
249
|
# Check first word of multi-word candidates against stop list
|
|
249
250
|
_first_word = candidate.split()[0].lower() if candidate else ""
|
|
@@ -495,10 +496,17 @@ class FactExtractor:
|
|
|
495
496
|
) -> list[AtomicFact]:
|
|
496
497
|
"""Rule-based extraction: regex entities, keyword classification, scoring."""
|
|
497
498
|
combined = "\n".join(turns)
|
|
498
|
-
|
|
499
|
-
if not
|
|
500
|
-
|
|
501
|
-
|
|
499
|
+
raw_sentences = _split_sentences(combined)
|
|
500
|
+
if not raw_sentences:
|
|
501
|
+
raw_sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
|
|
502
|
+
|
|
503
|
+
# V3.3.12: Sliding window of 2 sentences to preserve cross-sentence context.
|
|
504
|
+
# "She enrolled at NYU. Starting January 2024." → becomes one combined fact.
|
|
505
|
+
sentences = list(raw_sentences) # Keep originals
|
|
506
|
+
for i in range(len(raw_sentences) - 1):
|
|
507
|
+
pair = raw_sentences[i].rstrip() + " " + raw_sentences[i + 1].lstrip()
|
|
508
|
+
if len(pair) <= 300: # Only combine if not too long
|
|
509
|
+
sentences.append(pair)
|
|
502
510
|
|
|
503
511
|
# Build entity frequency map for importance scoring
|
|
504
512
|
entity_freq: dict[str, int] = {}
|
|
@@ -549,8 +557,8 @@ class FactExtractor:
|
|
|
549
557
|
if importance < self._config.min_fact_confidence:
|
|
550
558
|
continue
|
|
551
559
|
|
|
552
|
-
#
|
|
553
|
-
speaker
|
|
560
|
+
# V3.3.12: Speaker inference removed — result was never stored in AtomicFact.
|
|
561
|
+
# The speaker info is preserved in verbatim facts via [Speaker]: prefix.
|
|
554
562
|
|
|
555
563
|
facts.append(AtomicFact(
|
|
556
564
|
fact_id=_new_id(),
|
|
@@ -142,17 +142,25 @@ class GraphBuilder:
|
|
|
142
142
|
|
|
143
143
|
# -- Edge builders (private) -------------------------------------------
|
|
144
144
|
|
|
145
|
+
# V3.3.12: Cap entity edges per entity to prevent O(n²) explosion.
|
|
146
|
+
# With 500+ facts sharing a popular entity, creating an edge to each
|
|
147
|
+
# produced 44K+ edges and 22-min ingestion. Cap to 20 most recent per entity.
|
|
148
|
+
_MAX_ENTITY_EDGES_PER_ENTITY: int = 20
|
|
149
|
+
|
|
145
150
|
def _build_entity_edges(
|
|
146
151
|
self, new_fact: AtomicFact, profile_id: str,
|
|
147
152
|
) -> list[GraphEdge]:
|
|
148
|
-
"""ENTITY edges: shared canonical entity —
|
|
153
|
+
"""ENTITY edges: shared canonical entity — capped to most recent per entity."""
|
|
149
154
|
if not new_fact.canonical_entities:
|
|
150
155
|
return []
|
|
151
156
|
edges: list[GraphEdge] = []
|
|
152
157
|
seen: set[str] = set()
|
|
153
158
|
|
|
154
159
|
for entity_id in new_fact.canonical_entities:
|
|
160
|
+
entity_edge_count = 0
|
|
155
161
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
162
|
+
if entity_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
|
|
163
|
+
break
|
|
156
164
|
if other.fact_id == new_fact.fact_id or other.fact_id in seen:
|
|
157
165
|
continue
|
|
158
166
|
if self._edge_exists(new_fact.fact_id, other.fact_id, EdgeType.ENTITY, profile_id):
|
|
@@ -163,6 +171,7 @@ class GraphBuilder:
|
|
|
163
171
|
target_id=other.fact_id, edge_type=EdgeType.ENTITY,
|
|
164
172
|
weight=_ENTITY_WEIGHT,
|
|
165
173
|
))
|
|
174
|
+
entity_edge_count += 1
|
|
166
175
|
return edges
|
|
167
176
|
|
|
168
177
|
def _build_temporal_edges(
|
|
@@ -184,7 +193,10 @@ class GraphBuilder:
|
|
|
184
193
|
seen_pairs: set[tuple[str, str]] = set()
|
|
185
194
|
|
|
186
195
|
for entity_id in new_fact.canonical_entities:
|
|
196
|
+
temporal_edge_count = 0
|
|
187
197
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
198
|
+
if temporal_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
|
|
199
|
+
break # V3.3.12: cap temporal edges like entity edges
|
|
188
200
|
if other.fact_id == new_fact.fact_id:
|
|
189
201
|
continue
|
|
190
202
|
other_dt = _parse_date(other.observation_date)
|
|
@@ -212,6 +224,7 @@ class GraphBuilder:
|
|
|
212
224
|
target_id=other.fact_id, edge_type=EdgeType.TEMPORAL,
|
|
213
225
|
weight=weight,
|
|
214
226
|
))
|
|
227
|
+
temporal_edge_count += 1
|
|
215
228
|
# Reverse: other -> new
|
|
216
229
|
if not self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.TEMPORAL, profile_id):
|
|
217
230
|
edges.append(GraphEdge(
|
|
@@ -145,10 +145,14 @@ class FRQADMetric:
|
|
|
145
145
|
if bit_width >= 32:
|
|
146
146
|
return np.array(base_variance, dtype=np.float64)
|
|
147
147
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
148
|
+
# V3.3.12: Paper-correct ADDITIVE variance combination (was multiplicative).
|
|
149
|
+
# sigma²_total = sigma²_obs + sigma²_quant
|
|
150
|
+
# sigma²_quant = Delta²/12 where Delta = 2/2^b (uniform quantization step)
|
|
151
|
+
delta = 2.0 / (2 ** bit_width) # Quantization step size
|
|
152
|
+
sigma_q_sq = (delta ** 2) / 12.0 # Uniform quantization noise variance
|
|
153
|
+
sigma_total = np.asarray(base_variance, dtype=np.float64) + sigma_q_sq
|
|
154
|
+
|
|
155
|
+
return np.clip(sigma_total, self._config.variance_floor, self._config.variance_ceiling)
|
|
152
156
|
|
|
153
157
|
# ------------------------------------------------------------------
|
|
154
158
|
# Core distance (THE novel contribution)
|
|
@@ -178,6 +178,19 @@ class LangevinDynamics:
|
|
|
178
178
|
# --- Drift: -lambda^{-2} * grad_U * dt (Eq. 5 term 1) ---
|
|
179
179
|
drift = -(lam_inv ** 2) * grad * self.dt
|
|
180
180
|
|
|
181
|
+
# --- V3.3.12: Ebbinghaus forgetting drift (Eq. 6 in Paper 3) ---
|
|
182
|
+
# λ(m) = 1/S(m) pushes toward boundary (forgetting) based on memory strength.
|
|
183
|
+
# S(m) is computed from access_count + importance. Higher S → less drift.
|
|
184
|
+
strength = max(0.5, 0.3 * math.log(1.0 + access_count) + 0.4 * importance)
|
|
185
|
+
forget_rate = 1.0 / strength # λ(m)
|
|
186
|
+
# F(ξ) = ξ/||ξ|| points outward (toward boundary = archived zone)
|
|
187
|
+
xi_norm = float(np.linalg.norm(xi))
|
|
188
|
+
if xi_norm > _EPS:
|
|
189
|
+
forget_direction = xi / xi_norm
|
|
190
|
+
else:
|
|
191
|
+
forget_direction = np.zeros(self.dim)
|
|
192
|
+
forgetting_drift = forget_rate * forget_direction * self.dt * 0.1 # Scaled down to prevent instability
|
|
193
|
+
|
|
181
194
|
# --- Curvature correction: 0.5 * T * (d-2) * lambda^{-1} * xi * dt (Eq. 5 term 3) ---
|
|
182
195
|
correction = 0.5 * self.temperature * (self.dim - 2) * lam_inv * xi * self.dt
|
|
183
196
|
|
|
@@ -186,8 +199,8 @@ class LangevinDynamics:
|
|
|
186
199
|
noise = rng.standard_normal(self.dim)
|
|
187
200
|
diffusion = math.sqrt(2.0 * self.temperature * self.dt) * lam_inv * noise
|
|
188
201
|
|
|
189
|
-
# --- Full Euler-Maruyama update (Girolami & Calderhead 2011) ---
|
|
190
|
-
new_xi = xi + drift + correction + diffusion
|
|
202
|
+
# --- Full Euler-Maruyama update with forgetting (Eq. 6, Girolami & Calderhead 2011) ---
|
|
203
|
+
new_xi = xi + drift + forgetting_drift + correction + diffusion
|
|
191
204
|
|
|
192
205
|
# --- Project back into the open ball ---
|
|
193
206
|
new_xi = _project_to_ball(new_xi)
|
|
@@ -197,8 +197,8 @@ def register_resources(server, get_engine: Callable) -> None:
|
|
|
197
197
|
|
|
198
198
|
# Behavioral patterns summary
|
|
199
199
|
try:
|
|
200
|
-
from superlocalmemory.learning.behavioral import
|
|
201
|
-
store =
|
|
200
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
201
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
202
202
|
summary = store.get_summary(pid)
|
|
203
203
|
except Exception:
|
|
204
204
|
summary = {}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3
|
|
4
|
+
|
|
5
|
+
"""Shared MCP utilities — single source of truth for helpers used
|
|
6
|
+
across tools_core, tools_active, tools_v28, tools_v3, tools_v33.
|
|
7
|
+
|
|
8
|
+
V3.3.12: Extracted _emit_event to eliminate code duplication.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
_DB_PATH = Path.home() / ".superlocalmemory" / "memory.db"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def emit_event(event_type: str, payload: dict | None = None,
|
|
19
|
+
source_agent: str = "mcp_client") -> None:
|
|
20
|
+
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
21
|
+
try:
|
|
22
|
+
from superlocalmemory.infra.event_bus import EventBus
|
|
23
|
+
bus = EventBus.get_instance(_DB_PATH)
|
|
24
|
+
bus.emit(event_type, payload=payload, source_agent=source_agent,
|
|
25
|
+
source_protocol="mcp")
|
|
26
|
+
except Exception:
|
|
27
|
+
pass
|
|
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _emit_event(event_type: str, payload: dict | None = None,
|
|
30
|
-
source_agent: str = "mcp_client") -> None:
|
|
30
|
+
source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
|
|
31
31
|
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
32
32
|
try:
|
|
33
33
|
from superlocalmemory.infra.event_bus import EventBus
|
|
@@ -253,3 +253,33 @@ def register_active_tools(server, get_engine: Callable) -> None:
|
|
|
253
253
|
except Exception as exc:
|
|
254
254
|
logger.exception("report_feedback failed")
|
|
255
255
|
return {"success": False, "error": str(exc)}
|
|
256
|
+
|
|
257
|
+
# ------------------------------------------------------------------
|
|
258
|
+
# close_session — V3.3.12: Expose session closure via MCP
|
|
259
|
+
# ------------------------------------------------------------------
|
|
260
|
+
|
|
261
|
+
@server.tool()
|
|
262
|
+
async def close_session(session_id: str = "") -> dict:
|
|
263
|
+
"""Close the current session and create temporal summary events.
|
|
264
|
+
|
|
265
|
+
Aggregates facts from the session into per-entity temporal summaries,
|
|
266
|
+
enabling temporal queries like "What happened in session X?"
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
session_id: Session to close. Defaults to the most recent session.
|
|
270
|
+
"""
|
|
271
|
+
try:
|
|
272
|
+
engine = get_engine()
|
|
273
|
+
pid = engine.profile_id
|
|
274
|
+
sid = session_id or getattr(engine, '_last_session_id', '')
|
|
275
|
+
if not sid:
|
|
276
|
+
return {"success": False, "error": "No session_id provided"}
|
|
277
|
+
count = engine.close_session(sid)
|
|
278
|
+
return {
|
|
279
|
+
"success": True,
|
|
280
|
+
"session_id": sid,
|
|
281
|
+
"summary_events_created": count,
|
|
282
|
+
}
|
|
283
|
+
except Exception as exc:
|
|
284
|
+
logger.exception("close_session failed")
|
|
285
|
+
return {"success": False, "error": str(exc)}
|
|
@@ -139,6 +139,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
139
139
|
"results": result.get("results", []),
|
|
140
140
|
"count": result.get("result_count", 0),
|
|
141
141
|
"query_type": result.get("query_type", "unknown"),
|
|
142
|
+
"channel_weights": result.get("channel_weights", {}),
|
|
143
|
+
"retrieval_time_ms": result.get("retrieval_time_ms", 0),
|
|
142
144
|
}
|
|
143
145
|
return {"success": False, "error": result.get("error", "Recall failed")}
|
|
144
146
|
except Exception as exc:
|
|
@@ -280,11 +282,15 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
280
282
|
engine.profile_id = profile_id
|
|
281
283
|
|
|
282
284
|
# Persist to both config stores so CLI and Dashboard stay in sync
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
285
|
+
try:
|
|
286
|
+
from superlocalmemory.server.routes.helpers import (
|
|
287
|
+
ensure_profile_in_db, set_active_profile_everywhere,
|
|
288
|
+
)
|
|
289
|
+
ensure_profile_in_db(profile_id)
|
|
290
|
+
set_active_profile_everywhere(profile_id)
|
|
291
|
+
except ImportError:
|
|
292
|
+
# Dashboard not installed — profile switch still works for MCP/CLI
|
|
293
|
+
logger.debug("Dashboard routes not available, profile set in engine only")
|
|
288
294
|
|
|
289
295
|
return {
|
|
290
296
|
"success": True,
|
|
@@ -337,8 +343,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
337
343
|
"""Get learned behavioral patterns (interests, refinements, archival habits)."""
|
|
338
344
|
try:
|
|
339
345
|
engine = get_engine()
|
|
340
|
-
from superlocalmemory.learning.behavioral import
|
|
341
|
-
store =
|
|
346
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
347
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
342
348
|
ptype = pattern_type if pattern_type else None
|
|
343
349
|
patterns = store.get_patterns(
|
|
344
350
|
engine.profile_id, pattern_type=ptype, limit=limit,
|
|
@@ -353,8 +359,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
|
|
|
353
359
|
"""Correct or annotate a learned behavioral pattern to improve retrieval."""
|
|
354
360
|
try:
|
|
355
361
|
engine = get_engine()
|
|
356
|
-
from superlocalmemory.learning.behavioral import
|
|
357
|
-
store =
|
|
362
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
363
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
358
364
|
store.record(
|
|
359
365
|
engine.profile_id,
|
|
360
366
|
pattern_type="correction",
|
|
@@ -181,8 +181,8 @@ def register_v28_tools(server, get_engine: Callable) -> None:
|
|
|
181
181
|
"""
|
|
182
182
|
try:
|
|
183
183
|
engine = get_engine()
|
|
184
|
-
from superlocalmemory.learning.behavioral import
|
|
185
|
-
store =
|
|
184
|
+
from superlocalmemory.learning.behavioral import BehavioralPatternStore
|
|
185
|
+
store = BehavioralPatternStore(engine._db.db_path)
|
|
186
186
|
patterns = store.get_patterns(engine.profile_id, limit=limit)
|
|
187
187
|
summary = store.get_summary(engine.profile_id)
|
|
188
188
|
return {
|
|
@@ -228,6 +228,7 @@ def register_v3_tools(server, get_engine: Callable) -> None:
|
|
|
228
228
|
|
|
229
229
|
facts = engine._db.get_all_facts(pid)[:limit]
|
|
230
230
|
all_contradictions: list[dict] = []
|
|
231
|
+
errors_count = 0
|
|
231
232
|
for fact in facts:
|
|
232
233
|
if not fact.embedding or not fact.canonical_entities:
|
|
233
234
|
continue
|
|
@@ -243,11 +244,13 @@ def register_v3_tools(server, get_engine: Callable) -> None:
|
|
|
243
244
|
"content_a": fact.content[:80],
|
|
244
245
|
})
|
|
245
246
|
except Exception:
|
|
247
|
+
errors_count += 1
|
|
246
248
|
continue
|
|
247
249
|
|
|
248
250
|
return {
|
|
249
251
|
"success": True,
|
|
250
252
|
"facts_checked": len(facts),
|
|
253
|
+
"facts_errored": errors_count,
|
|
251
254
|
"contradictions": all_contradictions[:50],
|
|
252
255
|
"total_contradictions": len(all_contradictions),
|
|
253
256
|
}
|
|
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def _emit_event(event_type: str, payload: dict | None = None,
|
|
30
|
-
source_agent: str = "mcp_client") -> None:
|
|
30
|
+
source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
|
|
31
31
|
"""Emit an event to the EventBus (best-effort, never raises)."""
|
|
32
32
|
try:
|
|
33
33
|
from superlocalmemory.infra.event_bus import EventBus
|
|
@@ -76,8 +76,15 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
76
76
|
)
|
|
77
77
|
|
|
78
78
|
if dry_run:
|
|
79
|
-
#
|
|
80
|
-
|
|
79
|
+
# Dry run: compute retention stats without applying changes
|
|
80
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve as _EC
|
|
81
|
+
facts = engine._db.get_all_facts(pid)
|
|
82
|
+
zones = {"active": 0, "warm": 0, "cold": 0, "archive": 0, "forgotten": 0}
|
|
83
|
+
for f in facts:
|
|
84
|
+
r = ebbinghaus.compute_retention(f.access_count or 0, f.importance or 0.5, 0, 0.0)
|
|
85
|
+
zone = ebbinghaus.classify_zone(r)
|
|
86
|
+
zones[zone] = zones.get(zone, 0) + 1
|
|
87
|
+
result = {"total": len(facts), "transitions": 0, "dry_run_zones": zones}
|
|
81
88
|
else:
|
|
82
89
|
result = scheduler.run_decay_cycle(pid, force=True)
|
|
83
90
|
|
|
@@ -137,8 +144,9 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
137
144
|
)
|
|
138
145
|
|
|
139
146
|
if dry_run:
|
|
140
|
-
#
|
|
141
|
-
|
|
147
|
+
# Dry run: report current quantization state without changes
|
|
148
|
+
facts = engine._db.get_all_facts(pid)
|
|
149
|
+
result = {"total": len(facts), "would_quantize": 0, "dry_run": True}
|
|
142
150
|
else:
|
|
143
151
|
result = scheduler.run_eap_cycle(pid)
|
|
144
152
|
|
|
@@ -185,13 +193,13 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
185
193
|
|
|
186
194
|
_emit_event("ccq.consolidation_complete", {
|
|
187
195
|
"profile_id": pid,
|
|
188
|
-
"
|
|
196
|
+
"clusters_processed": result.clusters_processed,
|
|
189
197
|
"blocks_created": result.blocks_created,
|
|
190
198
|
})
|
|
191
199
|
|
|
192
200
|
return {
|
|
193
201
|
"success": True,
|
|
194
|
-
"
|
|
202
|
+
"clusters_processed": result.clusters_processed,
|
|
195
203
|
"blocks_created": result.blocks_created,
|
|
196
204
|
"facts_archived": result.facts_archived,
|
|
197
205
|
"compression_ratio": round(result.compression_ratio, 3),
|
|
@@ -349,3 +357,56 @@ def register_v33_tools(server, get_engine: Callable) -> None:
|
|
|
349
357
|
except Exception as exc:
|
|
350
358
|
logger.exception("get_retention_stats tool failed")
|
|
351
359
|
return {"success": False, "error": str(exc)}
|
|
360
|
+
|
|
361
|
+
# ------------------------------------------------------------------
|
|
362
|
+
# 7. run_maintenance — V3.3.12: Combined periodic maintenance cycle
|
|
363
|
+
# ------------------------------------------------------------------
|
|
364
|
+
@server.tool()
|
|
365
|
+
async def run_maintenance(profile_id: str = "") -> dict:
|
|
366
|
+
"""Run all periodic maintenance tasks in a single call.
|
|
367
|
+
|
|
368
|
+
Combines Langevin dynamics stepping, Ebbinghaus forgetting decay,
|
|
369
|
+
and behavioral pattern mining into one convenient maintenance cycle.
|
|
370
|
+
Clients should call this periodically (e.g., at session end).
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
profile_id: Profile to maintain (default: active profile).
|
|
374
|
+
"""
|
|
375
|
+
try:
|
|
376
|
+
engine = get_engine()
|
|
377
|
+
pid = profile_id or engine.profile_id
|
|
378
|
+
results = {}
|
|
379
|
+
|
|
380
|
+
# 1. Langevin dynamics step (lifecycle evolution)
|
|
381
|
+
try:
|
|
382
|
+
from superlocalmemory.core.maintenance import run_maintenance as _run_maint
|
|
383
|
+
maint_result = _run_maint(engine._db, engine._config, pid)
|
|
384
|
+
results["langevin"] = {"updated": maint_result.get("updated", 0)}
|
|
385
|
+
except Exception as exc:
|
|
386
|
+
results["langevin"] = {"error": str(exc)}
|
|
387
|
+
|
|
388
|
+
# 2. Ebbinghaus forgetting decay
|
|
389
|
+
try:
|
|
390
|
+
from superlocalmemory.math.ebbinghaus import EbbinghausCurve
|
|
391
|
+
from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
|
|
392
|
+
ebbinghaus = EbbinghausCurve(engine._config.forgetting)
|
|
393
|
+
scheduler = ForgettingScheduler(engine._db, ebbinghaus, engine._config.forgetting)
|
|
394
|
+
decay_result = scheduler.run_decay_cycle(pid, force=False)
|
|
395
|
+
results["forgetting"] = decay_result
|
|
396
|
+
except Exception as exc:
|
|
397
|
+
results["forgetting"] = {"error": str(exc)}
|
|
398
|
+
|
|
399
|
+
# 3. Behavioral pattern mining
|
|
400
|
+
try:
|
|
401
|
+
from superlocalmemory.learning.consolidation_worker import ConsolidationWorker
|
|
402
|
+
cw = ConsolidationWorker(engine._db, engine._config)
|
|
403
|
+
patterns = cw._generate_patterns(pid)
|
|
404
|
+
results["behavioral"] = {"patterns_mined": len(patterns)}
|
|
405
|
+
except Exception as exc:
|
|
406
|
+
results["behavioral"] = {"error": str(exc)}
|
|
407
|
+
|
|
408
|
+
return {"success": True, "profile": pid, **results}
|
|
409
|
+
|
|
410
|
+
except Exception as exc:
|
|
411
|
+
logger.exception("run_maintenance failed")
|
|
412
|
+
return {"success": False, "error": str(exc)}
|
|
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
|
|
|
31
31
|
|
|
32
32
|
_MAX_ROUNDS = 2
|
|
33
33
|
_SUFFICIENCY_SCORE_THRESHOLD = 0.6
|
|
34
|
-
_SKIP_TYPES = frozenset() #
|
|
34
|
+
_SKIP_TYPES = frozenset({"temporal"}) # S15: agentic harms temporal queries
|
|
35
35
|
|
|
36
36
|
_SUFFICIENCY_SYSTEM = (
|
|
37
37
|
"You evaluate whether retrieved context is sufficient to answer a query. "
|
|
@@ -68,6 +68,7 @@ class BM25Channel:
|
|
|
68
68
|
self._corpus: list[list[str]] = []
|
|
69
69
|
self._fact_ids: list[str] = []
|
|
70
70
|
self._fact_id_set: set[str] = set()
|
|
71
|
+
self._raw_texts: list[str] = [] # V3.3.12: raw content for phrase matching
|
|
71
72
|
self._bm25: BM25Plus | None = None
|
|
72
73
|
self._dirty: bool = False
|
|
73
74
|
self._loaded_profiles: set[str] = set()
|
|
@@ -96,15 +97,24 @@ class BM25Channel:
|
|
|
96
97
|
self._corpus.append(tokens)
|
|
97
98
|
self._fact_ids.append(fact.fact_id)
|
|
98
99
|
self._fact_id_set.add(fact.fact_id)
|
|
100
|
+
self._raw_texts.append(fact.content)
|
|
99
101
|
# Persist for next cold start
|
|
100
102
|
self._db.store_bm25_tokens(fact.fact_id, profile_id, tokens)
|
|
101
103
|
else:
|
|
104
|
+
# Load raw texts for phrase matching (V3.3.12)
|
|
105
|
+
fact_content_map = {}
|
|
106
|
+
try:
|
|
107
|
+
facts = self._db.get_all_facts(profile_id)
|
|
108
|
+
fact_content_map = {f.fact_id: f.content for f in facts}
|
|
109
|
+
except Exception:
|
|
110
|
+
pass
|
|
102
111
|
for fid, tokens in token_map.items():
|
|
103
112
|
if fid in self._fact_id_set:
|
|
104
113
|
continue
|
|
105
114
|
self._corpus.append(tokens)
|
|
106
115
|
self._fact_ids.append(fid)
|
|
107
116
|
self._fact_id_set.add(fid)
|
|
117
|
+
self._raw_texts.append(fact_content_map.get(fid, ""))
|
|
108
118
|
|
|
109
119
|
self._dirty = True
|
|
110
120
|
self._loaded_profiles.add(profile_id)
|
|
@@ -128,6 +138,9 @@ class BM25Channel:
|
|
|
128
138
|
self._corpus.append(tokens)
|
|
129
139
|
self._fact_ids.append(fact_id)
|
|
130
140
|
self._fact_id_set.add(fact_id)
|
|
141
|
+
if not hasattr(self, '_raw_texts'):
|
|
142
|
+
self._raw_texts = []
|
|
143
|
+
self._raw_texts.append(content)
|
|
131
144
|
self._dirty = True
|
|
132
145
|
|
|
133
146
|
# Persist for cold start
|
|
@@ -168,9 +181,16 @@ class BM25Channel:
|
|
|
168
181
|
scores = self._bm25.get_scores(query_tokens)
|
|
169
182
|
|
|
170
183
|
scored: list[tuple[str, float]] = []
|
|
184
|
+
# V3.3.12: Exact phrase bonus — boost facts containing the full query phrase
|
|
185
|
+
query_lower = query.lower().strip()
|
|
171
186
|
for i, score in enumerate(scores):
|
|
172
187
|
if score > 0.0:
|
|
173
|
-
|
|
188
|
+
bonus = score
|
|
189
|
+
# Exact phrase match bonus: if the query appears as a substring in the document
|
|
190
|
+
if len(query_lower) >= 5 and i < len(self._raw_texts):
|
|
191
|
+
if query_lower in self._raw_texts[i].lower():
|
|
192
|
+
bonus *= 1.5 # 50% boost for exact phrase match
|
|
193
|
+
scored.append((self._fact_ids[i], bonus))
|
|
174
194
|
|
|
175
195
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
176
196
|
return scored[:top_k]
|
|
@@ -75,6 +75,8 @@ class RetrievalEngine:
|
|
|
75
75
|
self._temporal: TemporalChannel | None = channels.get("temporal")
|
|
76
76
|
# Phase G: Hopfield channel (6th)
|
|
77
77
|
self._hopfield: HopfieldChannel | None = channels.get("hopfield")
|
|
78
|
+
# Phase 3: Spreading Activation channel
|
|
79
|
+
self._spreading_activation = channels.get("spreading_activation")
|
|
78
80
|
self._embedder = embedder
|
|
79
81
|
self._reranker = reranker
|
|
80
82
|
self._strategy = strategy or QueryStrategyClassifier()
|
|
@@ -101,6 +103,11 @@ class RetrievalEngine:
|
|
|
101
103
|
# Phase G: Hopfield channel (6th) — needs embedding input
|
|
102
104
|
if self._hopfield is not None:
|
|
103
105
|
self._registry.register_channel("hopfield", self._hopfield, needs_embedding=True)
|
|
106
|
+
# Phase 3: Spreading Activation (5th channel) — needs embedding input
|
|
107
|
+
if self._spreading_activation is not None:
|
|
108
|
+
self._registry.register_channel(
|
|
109
|
+
"spreading_activation", self._spreading_activation, needs_embedding=True,
|
|
110
|
+
)
|
|
104
111
|
|
|
105
112
|
def recall(
|
|
106
113
|
self, query: str, profile_id: str,
|
|
@@ -139,7 +146,7 @@ class RetrievalEngine:
|
|
|
139
146
|
fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
|
|
140
147
|
|
|
141
148
|
# Bridge discovery for multi-hop queries
|
|
142
|
-
if self._bridge is not None and strat.query_type
|
|
149
|
+
if self._bridge is not None and strat.query_type in ("multi_hop", "entity", "factual", "general"):
|
|
143
150
|
try:
|
|
144
151
|
seed_ids = [fr.fact_id for fr in fused[:10]]
|
|
145
152
|
bridges = self._bridge.discover(seed_ids, profile_id, max_bridges=10)
|
|
@@ -221,6 +228,7 @@ class RetrievalEngine:
|
|
|
221
228
|
needs_embedding = (
|
|
222
229
|
(self._semantic is not None and "semantic" not in disabled)
|
|
223
230
|
or (self._hopfield is not None and "hopfield" not in disabled)
|
|
231
|
+
or (self._spreading_activation is not None and "spreading_activation" not in disabled)
|
|
224
232
|
)
|
|
225
233
|
if needs_embedding:
|
|
226
234
|
try:
|
|
@@ -269,6 +277,23 @@ class RetrievalEngine:
|
|
|
269
277
|
except Exception as exc:
|
|
270
278
|
logger.warning("Hopfield channel: %s", exc)
|
|
271
279
|
|
|
280
|
+
# Phase 3: Spreading Activation channel (5th) — graph-based associative recall
|
|
281
|
+
if self._spreading_activation is not None and q_emb is not None and "spreading_activation" not in disabled:
|
|
282
|
+
try:
|
|
283
|
+
r = self._spreading_activation.search(q_emb, profile_id, self._config.bm25_top_k)
|
|
284
|
+
if r:
|
|
285
|
+
out["spreading_activation"] = r
|
|
286
|
+
except Exception as exc:
|
|
287
|
+
logger.warning("Spreading activation channel: %s", exc)
|
|
288
|
+
|
|
289
|
+
# Apply registered post-retrieval filters (forgetting filter, etc.)
|
|
290
|
+
if hasattr(self, '_registry') and self._registry._filters:
|
|
291
|
+
for fn in self._registry._filters:
|
|
292
|
+
try:
|
|
293
|
+
out = fn(out, profile_id, None)
|
|
294
|
+
except Exception as exc:
|
|
295
|
+
logger.warning("Post-retrieval filter failed: %s", exc)
|
|
296
|
+
|
|
272
297
|
return out
|
|
273
298
|
|
|
274
299
|
# -- Fact loading -------------------------------------------------------
|
|
@@ -336,12 +361,24 @@ class RetrievalEngine:
|
|
|
336
361
|
|
|
337
362
|
score_map = {fact.fact_id: score for fact, score in scored}
|
|
338
363
|
|
|
364
|
+
# Min-max normalize CE scores to [0, 1] within the batch instead of
|
|
365
|
+
# sigmoid (which compresses the useful discrimination range).
|
|
366
|
+
ce_values = list(score_map.values())
|
|
367
|
+
ce_min = min(ce_values) if ce_values else 0.0
|
|
368
|
+
ce_max = max(ce_values) if ce_values else 1.0
|
|
369
|
+
ce_range = ce_max - ce_min if ce_max > ce_min else 1.0
|
|
370
|
+
|
|
371
|
+
# Also normalize RRF scores so both terms contribute meaningfully
|
|
372
|
+
rrf_values = [fr.fused_score for fr in fused]
|
|
373
|
+
rrf_max = max(rrf_values) if rrf_values else 1.0
|
|
374
|
+
rrf_max = rrf_max if rrf_max > 0 else 1.0
|
|
375
|
+
|
|
339
376
|
updated = [
|
|
340
377
|
FusionResult(
|
|
341
378
|
fact_id=fr.fact_id,
|
|
342
379
|
fused_score=(
|
|
343
|
-
alpha *
|
|
344
|
-
+ (1.0 - alpha) * fr.fused_score
|
|
380
|
+
alpha * ((score_map.get(fr.fact_id, ce_min) - ce_min) / ce_range)
|
|
381
|
+
+ (1.0 - alpha) * (fr.fused_score / rrf_max)
|
|
345
382
|
),
|
|
346
383
|
channel_ranks=fr.channel_ranks,
|
|
347
384
|
channel_scores=fr.channel_scores,
|
|
@@ -425,12 +462,10 @@ class RetrievalEngine:
|
|
|
425
462
|
# due to BM25 name-matching (greetings like "Hey Caroline!" score high
|
|
426
463
|
# on BM25 but have zero retrieval value)
|
|
427
464
|
content_len = len(fact.content.strip())
|
|
428
|
-
if content_len <
|
|
429
|
-
quality = 0.
|
|
430
|
-
elif content_len <
|
|
431
|
-
quality = 0.
|
|
432
|
-
elif content_len < 80:
|
|
433
|
-
quality = 0.8
|
|
465
|
+
if content_len < 10:
|
|
466
|
+
quality = 0.3
|
|
467
|
+
elif content_len < 25:
|
|
468
|
+
quality = 0.7
|
|
434
469
|
else:
|
|
435
470
|
quality = 1.0
|
|
436
471
|
|
|
@@ -67,6 +67,12 @@ def extract_query_entities(query: str) -> list[str]:
|
|
|
67
67
|
_add(m.group(0))
|
|
68
68
|
for m in re.finditer(r'"([^"]+)"', query):
|
|
69
69
|
_add(m.group(1).strip())
|
|
70
|
+
# Also extract multi-word capitalized sequences (e.g. "New York", "San Francisco")
|
|
71
|
+
for m in re.finditer(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b', query):
|
|
72
|
+
_add(m.group(1))
|
|
73
|
+
# Extract all-caps abbreviations (e.g. NYU, MIT, UCLA) — min 2 chars
|
|
74
|
+
for m in re.finditer(r'\b([A-Z]{2,})\b', query):
|
|
75
|
+
_add(m.group(1))
|
|
70
76
|
|
|
71
77
|
return candidates
|
|
72
78
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
"""SuperLocalMemory V3 — Weighted Reciprocal Rank Fusion.
|
|
6
6
|
|
|
7
|
-
Single-pass RRF with k=
|
|
7
|
+
Single-pass RRF with k=15 for sharp rank discrimination on small candidate pools.
|
|
8
8
|
V1 had triple re-fusion which destroyed rankings — fixed in V2.
|
|
9
9
|
|
|
10
10
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
@@ -27,7 +27,7 @@ class FusionResult:
|
|
|
27
27
|
def weighted_rrf(
|
|
28
28
|
channels: dict[str, list[tuple[str, float]]],
|
|
29
29
|
weights: dict[str, float],
|
|
30
|
-
k: int =
|
|
30
|
+
k: int = 15,
|
|
31
31
|
max_rank_penalty: int = 1000,
|
|
32
32
|
) -> list[FusionResult]:
|
|
33
33
|
"""Fuse ranked lists via Weighted Reciprocal Rank Fusion.
|
|
@@ -288,8 +288,8 @@ class HopfieldChannel:
|
|
|
288
288
|
):
|
|
289
289
|
return (self._cached_matrix, self._cached_fact_ids)
|
|
290
290
|
|
|
291
|
-
# Step 2: Load
|
|
292
|
-
facts = self._db.get_all_facts(profile_id)
|
|
291
|
+
# Step 2: Load facts (V3.3.12: cap to most recent 5000 to bound memory)
|
|
292
|
+
facts = self._db.get_all_facts(profile_id)[:5000]
|
|
293
293
|
if not facts:
|
|
294
294
|
return (None, [])
|
|
295
295
|
|
|
@@ -35,7 +35,9 @@ _live_rerankers: set[weakref.ref] = set()
|
|
|
35
35
|
logger = logging.getLogger(__name__)
|
|
36
36
|
|
|
37
37
|
_IDLE_TIMEOUT_SECONDS = 120 # 2 min → kill worker
|
|
38
|
-
|
|
38
|
+
# V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
|
|
39
|
+
_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
|
|
40
|
+
_SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) for stressed system respawns
|
|
39
41
|
_WORKER_RECYCLE_AFTER = 500 # Recycle after N requests
|
|
40
42
|
|
|
41
43
|
|
|
@@ -129,8 +131,23 @@ class CrossEncoderReranker:
|
|
|
129
131
|
finally:
|
|
130
132
|
self._worker_loading = False
|
|
131
133
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
+
self._warmup_thread = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
|
|
135
|
+
self._warmup_thread.start()
|
|
136
|
+
|
|
137
|
+
def warmup_sync(self, timeout: float = 120.0) -> bool:
|
|
138
|
+
"""Block until reranker model is loaded. Returns True if ready.
|
|
139
|
+
|
|
140
|
+
V3.3.12: Critical for benchmarks and first-recall quality.
|
|
141
|
+
Without this, first 30-60s of recalls get no reranking (-30.7pp).
|
|
142
|
+
"""
|
|
143
|
+
if self._model_loaded:
|
|
144
|
+
return True
|
|
145
|
+
if not self._worker_loading and not self._model_loaded:
|
|
146
|
+
self._start_background_warmup()
|
|
147
|
+
t = getattr(self, '_warmup_thread', None)
|
|
148
|
+
if t is not None:
|
|
149
|
+
t.join(timeout=timeout)
|
|
150
|
+
return self._model_loaded
|
|
134
151
|
|
|
135
152
|
# ------------------------------------------------------------------
|
|
136
153
|
# Worker management (mirrors EmbeddingService pattern)
|
|
@@ -304,13 +321,13 @@ class CrossEncoderReranker:
|
|
|
304
321
|
|
|
305
322
|
documents = [fact.content for fact, _ in candidates]
|
|
306
323
|
|
|
307
|
-
#
|
|
308
|
-
#
|
|
324
|
+
# V3.3.12: Increased timeout 10s→60s — L-12-v2 needs PyTorch + ONNX load.
|
|
325
|
+
# Critical: Paper 2 ablation showed -30.7pp without reranking.
|
|
309
326
|
resp = self._send_request({
|
|
310
327
|
"cmd": "rerank",
|
|
311
328
|
"query": query,
|
|
312
329
|
"documents": documents,
|
|
313
|
-
}, timeout=
|
|
330
|
+
}, timeout=60.0)
|
|
314
331
|
|
|
315
332
|
if resp is None or not resp.get("ok"):
|
|
316
333
|
# Fallback: return by existing score
|
|
@@ -182,7 +182,7 @@ class SemanticChannel:
|
|
|
182
182
|
else:
|
|
183
183
|
sim = cos_sim
|
|
184
184
|
|
|
185
|
-
if sim > 0.
|
|
185
|
+
if sim > 0.05:
|
|
186
186
|
scored.append((fact.fact_id, sim))
|
|
187
187
|
|
|
188
188
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -235,7 +235,7 @@ class SemanticChannel:
|
|
|
235
235
|
else:
|
|
236
236
|
sim = cos_sim
|
|
237
237
|
|
|
238
|
-
if sim > 0.
|
|
238
|
+
if sim > 0.05:
|
|
239
239
|
scored.append((fact.fact_id, sim))
|
|
240
240
|
|
|
241
241
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
@@ -124,6 +124,16 @@ class TemporalChannel:
|
|
|
124
124
|
import re
|
|
125
125
|
_PROPER_RE = re.compile(r"\b([A-Z][a-z]+)\b")
|
|
126
126
|
names = [m.group(1) for m in _PROPER_RE.finditer(query)]
|
|
127
|
+
# Also try title-cased version for lowercase queries
|
|
128
|
+
if not names:
|
|
129
|
+
names = [m.group(1) for m in _PROPER_RE.finditer(query.title())]
|
|
130
|
+
# Filter out common words from title-casing
|
|
131
|
+
_stop = {"What", "When", "Where", "Who", "Which", "How", "Does", "Did",
|
|
132
|
+
"The", "That", "This", "There", "Then", "Have", "Has", "Had",
|
|
133
|
+
"About", "After", "Before", "From", "With", "Would", "Could",
|
|
134
|
+
"Should", "Will", "Because", "Also", "Just", "Like", "Know",
|
|
135
|
+
"Think", "Tell", "Said"}
|
|
136
|
+
names = [n for n in names if n not in _stop]
|
|
127
137
|
if not names:
|
|
128
138
|
return []
|
|
129
139
|
|
|
@@ -146,7 +156,10 @@ class TemporalChannel:
|
|
|
146
156
|
fid = dict(row)["fact_id"]
|
|
147
157
|
if fid not in seen:
|
|
148
158
|
seen.add(fid)
|
|
149
|
-
|
|
159
|
+
# Rank by position (first events more likely relevant) instead
|
|
160
|
+
# of flat 0.85 which loses discrimination
|
|
161
|
+
rank_score = 0.85 - len(seen) * 0.02
|
|
162
|
+
results.append((fid, max(0.3, rank_score)))
|
|
150
163
|
|
|
151
164
|
return results
|
|
152
165
|
|
|
@@ -442,6 +442,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_target
|
|
|
442
442
|
ON graph_edges (profile_id, target_id);
|
|
443
443
|
CREATE INDEX IF NOT EXISTS idx_edges_type
|
|
444
444
|
ON graph_edges (profile_id, edge_type);
|
|
445
|
+
CREATE INDEX IF NOT EXISTS idx_edges_exists_check
|
|
446
|
+
ON graph_edges (profile_id, source_id, target_id, edge_type);
|
|
445
447
|
"""
|
|
446
448
|
|
|
447
449
|
|