superlocalmemory 3.3.11 → 3.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +1 -1
  3. package/src/superlocalmemory/core/config.py +7 -4
  4. package/src/superlocalmemory/core/embeddings.py +3 -1
  5. package/src/superlocalmemory/core/engine_wiring.py +1 -1
  6. package/src/superlocalmemory/core/recall_pipeline.py +24 -0
  7. package/src/superlocalmemory/core/recall_worker.py +22 -4
  8. package/src/superlocalmemory/core/reranker_worker.py +220 -0
  9. package/src/superlocalmemory/core/store_pipeline.py +12 -2
  10. package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
  11. package/src/superlocalmemory/encoding/graph_builder.py +14 -1
  12. package/src/superlocalmemory/learning/adaptive.py +2 -2
  13. package/src/superlocalmemory/math/fisher_quantized.py +8 -4
  14. package/src/superlocalmemory/math/langevin.py +15 -2
  15. package/src/superlocalmemory/mcp/resources.py +2 -2
  16. package/src/superlocalmemory/mcp/shared.py +27 -0
  17. package/src/superlocalmemory/mcp/tools_active.py +31 -1
  18. package/src/superlocalmemory/mcp/tools_core.py +15 -9
  19. package/src/superlocalmemory/mcp/tools_v28.py +2 -2
  20. package/src/superlocalmemory/mcp/tools_v3.py +3 -0
  21. package/src/superlocalmemory/mcp/tools_v33.py +68 -7
  22. package/src/superlocalmemory/retrieval/agentic.py +1 -1
  23. package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
  24. package/src/superlocalmemory/retrieval/engine.py +44 -9
  25. package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
  26. package/src/superlocalmemory/retrieval/fusion.py +2 -2
  27. package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
  28. package/src/superlocalmemory/retrieval/reranker.py +23 -6
  29. package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
  30. package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
  31. package/src/superlocalmemory/storage/schema.py +2 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.3.11",
3
+ "version": "3.3.12",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.3.11"
3
+ version = "3.3.12"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -86,10 +86,10 @@ class LLMConfig:
86
86
  class ChannelWeights:
87
87
  """Retrieval channel weights — 5 channels, query-adaptive."""
88
88
 
89
- # Entity-linked facts are high-precision matches that rank above BM25.
90
- semantic: float = 1.2
89
+ # Semantic should dominate for conversational retrieval (paraphrase matters most).
90
+ semantic: float = 1.5
91
91
  bm25: float = 1.0
92
- entity_graph: float = 1.3
92
+ entity_graph: float = 1.0
93
93
  temporal: float = 1.0
94
94
  spreading_activation: float = 1.0 # Phase 3: 5th channel (BC-08: default value)
95
95
  hopfield: float = 0.8 # Phase G: 6th channel (Hopfield associative memory)
@@ -143,7 +143,7 @@ class RetrievalConfig:
143
143
  """Configuration for the retrieval (recall) pipeline."""
144
144
 
145
145
  # Fusion
146
- rrf_k: int = 60 # RRF smoothing constant (D116: k=60 for diversity)
146
+ rrf_k: int = 15 # RRF smoothing constant (k=15 for candidate pools of 50-200)
147
147
  top_k: int = 20 # Final results to return
148
148
 
149
149
  # Per-channel
@@ -740,6 +740,9 @@ class SLMConfig:
740
740
  retrieval=RetrievalConfig(
741
741
  # V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
742
742
  use_cross_encoder=True,
743
+ # Mode A is zero-LLM: disable agentic retrieval (it replaces
744
+ # precision-tuned fusion with crude heuristic expansions)
745
+ agentic_max_rounds=0,
743
746
  ),
744
747
  math=MathConfig(
745
748
  sheaf_contradiction_threshold=0.45, # 768d threshold
@@ -50,7 +50,9 @@ class DimensionMismatchError(RuntimeError):
50
50
 
51
51
 
52
52
  _IDLE_TIMEOUT_SECONDS = 120 # 2 minutes — kill worker after idle
53
- _SUBPROCESS_RESPONSE_TIMEOUT = 120 # V3.3.2: 120s for ONNX cold start
53
+ # V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
54
+ _IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
55
+ _SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) — respawns on stressed systems need more time
54
56
  _WORKER_RECYCLE_AFTER = 1000 # Recycle worker after N requests (C++ fragmentation prevention)
55
57
 
56
58
 
@@ -339,7 +339,7 @@ def _init_spreading_activation(
339
339
  SpreadingActivation,
340
340
  SpreadingActivationConfig,
341
341
  )
342
- sa_config = SpreadingActivationConfig(enabled=False)
342
+ sa_config = SpreadingActivationConfig(enabled=True)
343
343
  return SpreadingActivation(
344
344
  db=db, vector_store=vector_store, config=sa_config,
345
345
  )
@@ -192,6 +192,30 @@ def run_recall(
192
192
  except Exception as exc:
193
193
  logger.debug("Access log batch store failed: %s", exc)
194
194
 
195
+ # V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
196
+ try:
197
+ from superlocalmemory.learning.behavioral import BehavioralTracker
198
+ _tracker = BehavioralTracker(db)
199
+ _tracker.record_query(
200
+ profile_id=profile_id, query=query,
201
+ query_type=response.query_type,
202
+ result_count=len(response.results),
203
+ )
204
+ except Exception as exc:
205
+ logger.debug("Behavioral tracking: %s", exc)
206
+
207
+ # V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
208
+ if response.results:
209
+ try:
210
+ from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
211
+ from superlocalmemory.math.ebbinghaus import EbbinghausCurve
212
+ _ebbinghaus = EbbinghausCurve(config.forgetting)
213
+ _fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
214
+ for r in response.results[:10]:
215
+ _fsched.on_access_event(r.fact.fact_id, profile_id)
216
+ except Exception as exc:
217
+ logger.debug("Spaced repetition update: %s", exc)
218
+
195
219
  # Phase 3: Hebbian strengthening for co-accessed facts
196
220
  if auto_linker and response.results:
197
221
  try:
@@ -187,10 +187,28 @@ def _handle_update_memory(fact_id: str, content: str, agent_id: str = "system")
187
187
  if not rows:
188
188
  return {"ok": False, "error": f"Memory {fact_id} not found"}
189
189
  old_content = dict(rows[0]).get("content", "")[:80]
190
- engine._db.execute(
191
- "UPDATE atomic_facts SET content = ? WHERE fact_id = ?",
192
- (content, fact_id),
193
- )
190
+ # V3.3.12: Re-embed updated content so semantic search + BM25 stay consistent.
191
+ # Previously only the text column was updated, leaving stale embeddings.
192
+ updates: dict = {"content": content}
193
+ if engine._embedder:
194
+ try:
195
+ new_emb = engine._embedder.embed(content)
196
+ if new_emb:
197
+ updates["embedding"] = new_emb
198
+ fm, fv = engine._embedder.compute_fisher_params(new_emb)
199
+ updates["fisher_mean"] = fm
200
+ updates["fisher_variance"] = fv
201
+ except Exception:
202
+ pass
203
+ engine._db.update_fact(fact_id, updates)
204
+ # Update BM25 index for the new content
205
+ if hasattr(engine, '_retrieval_engine') and engine._retrieval_engine:
206
+ bm25 = getattr(engine._retrieval_engine, '_bm25', None)
207
+ if bm25:
208
+ try:
209
+ bm25.add(fact_id, content, pid)
210
+ except Exception:
211
+ pass
194
212
  import logging as _logging
195
213
  _logging.getLogger("superlocalmemory.audit").info(
196
214
  "UPDATE fact_id=%s by agent=%s old=%s new=%s",
@@ -0,0 +1,220 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Subprocess reranker worker — isolates PyTorch/ONNX from main process.
6
+
7
+ Same pattern as embedding_worker.py. The main process stays at ~60 MB.
8
+ All cross-encoder model memory lives in this worker subprocess.
9
+
10
+ Protocol (JSON over stdin/stdout):
11
+ Request: {"cmd": "rerank", "query": "...", "documents": ["...", ...]}
12
+ Response: {"ok": true, "scores": [0.95, 0.32, ...]}
13
+
14
+ Request: {"cmd": "score", "query": "...", "document": "..."}
15
+ Response: {"ok": true, "score": 0.87}
16
+
17
+ Request: {"cmd": "ping"}
18
+ Response: {"ok": true, "backend": "onnx", "model": "..."}
19
+
20
+ Request: {"cmd": "quit"}
21
+ (worker exits)
22
+
23
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import os
30
+ import platform
31
+ import signal
32
+ import struct
33
+ import sys
34
+ import threading
35
+
36
+ # Force CPU BEFORE any torch import
37
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
38
+ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
39
+ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
40
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
41
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
42
+ os.environ["TORCH_DEVICE"] = "cpu"
43
+
44
+ # SIGTERM bridge for Docker/systemd
45
+ if sys.platform != "win32":
46
+ signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
47
+
48
+
49
+ def _start_parent_watchdog() -> None:
50
+ """Monitor parent process — self-terminate if parent dies.
51
+
52
+ Prevents orphaned workers that consume 1+ GB each when the parent
53
+ process crashes, is killed, or exits without cleanup.
54
+
55
+ V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
56
+ """
57
+ parent_pid = os.getppid()
58
+
59
+ def _watch() -> None:
60
+ import time
61
+ while True:
62
+ time.sleep(5)
63
+ try:
64
+ os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
65
+ except OSError:
66
+ # Parent is dead — self-terminate
67
+ os._exit(0)
68
+
69
+ t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
70
+ t.start()
71
+
72
+
73
+ def _detect_onnx_variant() -> str:
74
+ """Auto-detect the best ONNX model variant for the current platform."""
75
+ arch = platform.machine().lower()
76
+ is_64bit = struct.calcsize("P") * 8 == 64
77
+
78
+ if sys.platform == "darwin" and arch in ("arm64", "aarch64"):
79
+ return "onnx/model_qint8_arm64.onnx"
80
+ if arch in ("x86_64", "amd64") and is_64bit:
81
+ return "onnx/model_quint8_avx2.onnx"
82
+ return "onnx/model.onnx"
83
+
84
+
85
+ def _worker_main() -> None:
86
+ """Main loop: read JSON requests from stdin, write responses to stdout."""
87
+ _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
88
+
89
+ model = None
90
+ active_backend = ""
91
+ model_name = ""
92
+
93
+ for line in sys.stdin:
94
+ line = line.strip()
95
+ if not line:
96
+ continue
97
+ try:
98
+ req = json.loads(line)
99
+ except json.JSONDecodeError:
100
+ _respond({"ok": False, "error": "Invalid JSON"})
101
+ continue
102
+
103
+ cmd = req.get("cmd", "")
104
+
105
+ if cmd == "quit":
106
+ break
107
+
108
+ if cmd == "ping":
109
+ _respond({
110
+ "ok": True,
111
+ "loaded": model is not None,
112
+ "backend": active_backend,
113
+ "model": model_name,
114
+ })
115
+ continue
116
+
117
+ if cmd == "load":
118
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
119
+ backend = req.get("backend", "onnx")
120
+ model, active_backend, model_name = _load_model(name, backend)
121
+ _respond({
122
+ "ok": model is not None,
123
+ "backend": active_backend,
124
+ "model": model_name,
125
+ })
126
+ continue
127
+
128
+ if cmd == "rerank":
129
+ query = req.get("query", "")
130
+ documents = req.get("documents", [])
131
+ if not query or not documents:
132
+ _respond({"ok": False, "error": "Missing query or documents"})
133
+ continue
134
+ if model is None:
135
+ # Auto-load with defaults
136
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
137
+ backend = req.get("backend", "onnx")
138
+ model, active_backend, model_name = _load_model(name, backend)
139
+ if model is None:
140
+ _respond({"ok": False, "error": "Model load failed"})
141
+ continue
142
+ try:
143
+ pairs = [(query, doc) for doc in documents]
144
+ try:
145
+ import torch
146
+ with torch.inference_mode():
147
+ scores = model.predict(pairs)
148
+ except ImportError:
149
+ scores = model.predict(pairs)
150
+ _respond({
151
+ "ok": True,
152
+ "scores": [float(s) for s in scores],
153
+ })
154
+ except Exception as exc:
155
+ _respond({"ok": False, "error": str(exc)})
156
+ continue
157
+
158
+ if cmd == "score":
159
+ query = req.get("query", "")
160
+ document = req.get("document", "")
161
+ if not query or not document:
162
+ _respond({"ok": False, "error": "Missing query or document"})
163
+ continue
164
+ if model is None:
165
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-6-v2")
166
+ backend = req.get("backend", "onnx")
167
+ model, active_backend, model_name = _load_model(name, backend)
168
+ if model is None:
169
+ _respond({"ok": False, "error": "Model load failed"})
170
+ continue
171
+ try:
172
+ try:
173
+ import torch
174
+ with torch.inference_mode():
175
+ scores = model.predict([(query, document)])
176
+ except ImportError:
177
+ scores = model.predict([(query, document)])
178
+ _respond({"ok": True, "score": float(scores[0])})
179
+ except Exception as exc:
180
+ _respond({"ok": False, "error": str(exc)})
181
+ continue
182
+
183
+ _respond({"ok": False, "error": f"Unknown command: {cmd}"})
184
+
185
+
186
+ def _load_model(
187
+ name: str, backend: str,
188
+ ) -> tuple:
189
+ """Load cross-encoder model. Returns (model, backend_name, model_name)."""
190
+ try:
191
+ from sentence_transformers import CrossEncoder
192
+
193
+ if backend == "onnx":
194
+ try:
195
+ onnx_file = _detect_onnx_variant()
196
+ m = CrossEncoder(
197
+ name, backend="onnx",
198
+ model_kwargs={"file_name": onnx_file},
199
+ )
200
+ return m, "onnx", name
201
+ except Exception:
202
+ # ONNX failed → try PyTorch
203
+ pass
204
+ # PyTorch fallback (or explicit pytorch backend)
205
+ m = CrossEncoder(name)
206
+ return m, "pytorch", name
207
+ except ImportError:
208
+ return None, "", ""
209
+ except Exception:
210
+ return None, "", ""
211
+
212
+
213
+ def _respond(data: dict) -> None:
214
+ """Write JSON response to stdout, flush immediately."""
215
+ sys.stdout.write(json.dumps(data) + "\n")
216
+ sys.stdout.flush()
217
+
218
+
219
+ if __name__ == "__main__":
220
+ _worker_main()
@@ -170,13 +170,23 @@ def run_store(
170
170
  # V3.3.11: Also store raw content as a verbatim fact to preserve details
171
171
  # that fact extraction may abstract away (dates, names, specifics).
172
172
  # This ensures BM25 and semantic search can always find the original text.
173
+ # V3.3.12: Extract entities from verbatim content so entity channel + temporal
174
+ # channel can find it (was entities=[] which made 4/6 channels blind).
173
175
  if content.strip() and len(content.strip()) >= 20:
174
176
  import uuid
177
+ import re as _re
178
+ _verbatim_text = content.strip()
179
+ # Extract entities using the same regex as fact_extractor
180
+ _ent_re = _re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b")
181
+ _entity_set = {m.group(1) for m in _ent_re.finditer(_verbatim_text)}
182
+ # Also extract all-caps abbreviations (NYU, MIT, etc.) — dedup with first set
183
+ _entity_set |= {m.group(1) for m in _re.finditer(r'\b([A-Z]{2,})\b', _verbatim_text)}
184
+ _verbatim_entities = sorted(_entity_set)
175
185
  verbatim = AtomicFact(
176
186
  fact_id=uuid.uuid4().hex[:16],
177
- content=content.strip(),
187
+ content=_verbatim_text,
178
188
  fact_type=FactType.EPISODIC,
179
- entities=[],
189
+ entities=_verbatim_entities,
180
190
  session_id=session_id,
181
191
  observation_date=parsed_date,
182
192
  confidence=0.9,
@@ -84,7 +84,8 @@ _INTERVAL_RE = re.compile(
84
84
  )
85
85
 
86
86
  _ENTITY_RE = re.compile(
87
- r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
87
+ r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
88
+ r"|\b([A-Z]{2,})\b" # ALL-CAPS abbreviations (NYU, MIT)
88
89
  )
89
90
 
90
91
  _QUOTED_RE = re.compile(r'"([^"]+)"') # Quoted strings as entities
@@ -243,7 +244,7 @@ def _extract_entities(text: str) -> list[str]:
243
244
 
244
245
  # Capitalized word sequences (proper nouns)
245
246
  for match in _ENTITY_RE.finditer(text):
246
- candidate = match.group(1).strip()
247
+ candidate = (match.group(1) or match.group(2) or "").strip()
247
248
  # Filter common English words that start sentences
248
249
  # Check first word of multi-word candidates against stop list
249
250
  _first_word = candidate.split()[0].lower() if candidate else ""
@@ -495,10 +496,17 @@ class FactExtractor:
495
496
  ) -> list[AtomicFact]:
496
497
  """Rule-based extraction: regex entities, keyword classification, scoring."""
497
498
  combined = "\n".join(turns)
498
- sentences = _split_sentences(combined)
499
- if not sentences:
500
- # If no proper sentences, treat each turn as a sentence
501
- sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
499
+ raw_sentences = _split_sentences(combined)
500
+ if not raw_sentences:
501
+ raw_sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
502
+
503
+ # V3.3.12: Sliding window of 2 sentences to preserve cross-sentence context.
504
+ # "She enrolled at NYU. Starting January 2024." → becomes one combined fact.
505
+ sentences = list(raw_sentences) # Keep originals
506
+ for i in range(len(raw_sentences) - 1):
507
+ pair = raw_sentences[i].rstrip() + " " + raw_sentences[i + 1].lstrip()
508
+ if len(pair) <= 300: # Only combine if not too long
509
+ sentences.append(pair)
502
510
 
503
511
  # Build entity frequency map for importance scoring
504
512
  entity_freq: dict[str, int] = {}
@@ -549,8 +557,8 @@ class FactExtractor:
549
557
  if importance < self._config.min_fact_confidence:
550
558
  continue
551
559
 
552
- # Determine speaker from turn position heuristic
553
- speaker = self._infer_speaker(normalized, turns, speaker_a, speaker_b)
560
+ # V3.3.12: Speaker inference removed result was never stored in AtomicFact.
561
+ # The speaker info is preserved in verbatim facts via [Speaker]: prefix.
554
562
 
555
563
  facts.append(AtomicFact(
556
564
  fact_id=_new_id(),
@@ -142,17 +142,25 @@ class GraphBuilder:
142
142
 
143
143
  # -- Edge builders (private) -------------------------------------------
144
144
 
145
+ # V3.3.12: Cap entity edges per entity to prevent O(n²) explosion.
146
+ # With 500+ facts sharing a popular entity, creating an edge to each
147
+ # produced 44K+ edges and 22-min ingestion. Cap to 20 most recent per entity.
148
+ _MAX_ENTITY_EDGES_PER_ENTITY: int = 20
149
+
145
150
  def _build_entity_edges(
146
151
  self, new_fact: AtomicFact, profile_id: str,
147
152
  ) -> list[GraphEdge]:
148
- """ENTITY edges: shared canonical entity — NO 50-memory limit."""
153
+ """ENTITY edges: shared canonical entity — capped to most recent per entity."""
149
154
  if not new_fact.canonical_entities:
150
155
  return []
151
156
  edges: list[GraphEdge] = []
152
157
  seen: set[str] = set()
153
158
 
154
159
  for entity_id in new_fact.canonical_entities:
160
+ entity_edge_count = 0
155
161
  for other in self._db.get_facts_by_entity(entity_id, profile_id):
162
+ if entity_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
163
+ break
156
164
  if other.fact_id == new_fact.fact_id or other.fact_id in seen:
157
165
  continue
158
166
  if self._edge_exists(new_fact.fact_id, other.fact_id, EdgeType.ENTITY, profile_id):
@@ -163,6 +171,7 @@ class GraphBuilder:
163
171
  target_id=other.fact_id, edge_type=EdgeType.ENTITY,
164
172
  weight=_ENTITY_WEIGHT,
165
173
  ))
174
+ entity_edge_count += 1
166
175
  return edges
167
176
 
168
177
  def _build_temporal_edges(
@@ -184,7 +193,10 @@ class GraphBuilder:
184
193
  seen_pairs: set[tuple[str, str]] = set()
185
194
 
186
195
  for entity_id in new_fact.canonical_entities:
196
+ temporal_edge_count = 0
187
197
  for other in self._db.get_facts_by_entity(entity_id, profile_id):
198
+ if temporal_edge_count >= self._MAX_ENTITY_EDGES_PER_ENTITY:
199
+ break # V3.3.12: cap temporal edges like entity edges
188
200
  if other.fact_id == new_fact.fact_id:
189
201
  continue
190
202
  other_dt = _parse_date(other.observation_date)
@@ -212,6 +224,7 @@ class GraphBuilder:
212
224
  target_id=other.fact_id, edge_type=EdgeType.TEMPORAL,
213
225
  weight=weight,
214
226
  ))
227
+ temporal_edge_count += 1
215
228
  # Reverse: other -> new
216
229
  if not self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.TEMPORAL, profile_id):
217
230
  edges.append(GraphEdge(
@@ -31,10 +31,10 @@ _MIN_FEEDBACK_FOR_TRAINING = 20
31
31
 
32
32
  # Default channel weights (before learning)
33
33
  _DEFAULT_WEIGHTS = {
34
- "semantic": 1.2,
34
+ "semantic": 1.5,
35
35
  "bm25": 1.0,
36
36
  "entity_graph": 1.0,
37
- "temporal": 0.8,
37
+ "temporal": 1.0,
38
38
  }
39
39
 
40
40
 
@@ -145,10 +145,14 @@ class FRQADMetric:
145
145
  if bit_width >= 32:
146
146
  return np.array(base_variance, dtype=np.float64)
147
147
 
148
- scale = (32.0 / bit_width) ** self._config.kappa
149
- sigma_q = np.asarray(base_variance, dtype=np.float64) * scale
150
-
151
- return np.clip(sigma_q, self._config.variance_floor, self._config.variance_ceiling)
148
+ # V3.3.12: Paper-correct ADDITIVE variance combination (was multiplicative).
149
+ # sigma²_total = sigma²_obs + sigma²_quant
150
+ # sigma²_quant = Delta²/12 where Delta = 2/2^b (uniform quantization step)
151
+ delta = 2.0 / (2 ** bit_width) # Quantization step size
152
+ sigma_q_sq = (delta ** 2) / 12.0 # Uniform quantization noise variance
153
+ sigma_total = np.asarray(base_variance, dtype=np.float64) + sigma_q_sq
154
+
155
+ return np.clip(sigma_total, self._config.variance_floor, self._config.variance_ceiling)
152
156
 
153
157
  # ------------------------------------------------------------------
154
158
  # Core distance (THE novel contribution)
@@ -178,6 +178,19 @@ class LangevinDynamics:
178
178
  # --- Drift: -lambda^{-2} * grad_U * dt (Eq. 5 term 1) ---
179
179
  drift = -(lam_inv ** 2) * grad * self.dt
180
180
 
181
+ # --- V3.3.12: Ebbinghaus forgetting drift (Eq. 6 in Paper 3) ---
182
+ # λ(m) = 1/S(m) pushes toward boundary (forgetting) based on memory strength.
183
+ # S(m) is computed from access_count + importance. Higher S → less drift.
184
+ strength = max(0.5, 0.3 * math.log(1.0 + access_count) + 0.4 * importance)
185
+ forget_rate = 1.0 / strength # λ(m)
186
+ # F(ξ) = ξ/||ξ|| points outward (toward boundary = archived zone)
187
+ xi_norm = float(np.linalg.norm(xi))
188
+ if xi_norm > _EPS:
189
+ forget_direction = xi / xi_norm
190
+ else:
191
+ forget_direction = np.zeros(self.dim)
192
+ forgetting_drift = forget_rate * forget_direction * self.dt * 0.1 # Scaled down to prevent instability
193
+
181
194
  # --- Curvature correction: 0.5 * T * (d-2) * lambda^{-1} * xi * dt (Eq. 5 term 3) ---
182
195
  correction = 0.5 * self.temperature * (self.dim - 2) * lam_inv * xi * self.dt
183
196
 
@@ -186,8 +199,8 @@ class LangevinDynamics:
186
199
  noise = rng.standard_normal(self.dim)
187
200
  diffusion = math.sqrt(2.0 * self.temperature * self.dt) * lam_inv * noise
188
201
 
189
- # --- Full Euler-Maruyama update (Girolami & Calderhead 2011) ---
190
- new_xi = xi + drift + correction + diffusion
202
+ # --- Full Euler-Maruyama update with forgetting (Eq. 6, Girolami & Calderhead 2011) ---
203
+ new_xi = xi + drift + forgetting_drift + correction + diffusion
191
204
 
192
205
  # --- Project back into the open ball ---
193
206
  new_xi = _project_to_ball(new_xi)
@@ -197,8 +197,8 @@ def register_resources(server, get_engine: Callable) -> None:
197
197
 
198
198
  # Behavioral patterns summary
199
199
  try:
200
- from superlocalmemory.learning.behavioral import BehavioralStore
201
- store = BehavioralStore(engine._db.db_path)
200
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
201
+ store = BehavioralPatternStore(engine._db.db_path)
202
202
  summary = store.get_summary(pid)
203
203
  except Exception:
204
204
  summary = {}
@@ -0,0 +1,27 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3
4
+
5
+ """Shared MCP utilities — single source of truth for helpers used
6
+ across tools_core, tools_active, tools_v28, tools_v3, tools_v33.
7
+
8
+ V3.3.12: Extracted _emit_event to eliminate code duplication.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from pathlib import Path
14
+
15
+ _DB_PATH = Path.home() / ".superlocalmemory" / "memory.db"
16
+
17
+
18
+ def emit_event(event_type: str, payload: dict | None = None,
19
+ source_agent: str = "mcp_client") -> None:
20
+ """Emit an event to the EventBus (best-effort, never raises)."""
21
+ try:
22
+ from superlocalmemory.infra.event_bus import EventBus
23
+ bus = EventBus.get_instance(_DB_PATH)
24
+ bus.emit(event_type, payload=payload, source_agent=source_agent,
25
+ source_protocol="mcp")
26
+ except Exception:
27
+ pass
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
27
27
 
28
28
 
29
29
  def _emit_event(event_type: str, payload: dict | None = None,
30
- source_agent: str = "mcp_client") -> None:
30
+ source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
31
31
  """Emit an event to the EventBus (best-effort, never raises)."""
32
32
  try:
33
33
  from superlocalmemory.infra.event_bus import EventBus
@@ -253,3 +253,33 @@ def register_active_tools(server, get_engine: Callable) -> None:
253
253
  except Exception as exc:
254
254
  logger.exception("report_feedback failed")
255
255
  return {"success": False, "error": str(exc)}
256
+
257
+ # ------------------------------------------------------------------
258
+ # close_session — V3.3.12: Expose session closure via MCP
259
+ # ------------------------------------------------------------------
260
+
261
+ @server.tool()
262
+ async def close_session(session_id: str = "") -> dict:
263
+ """Close the current session and create temporal summary events.
264
+
265
+ Aggregates facts from the session into per-entity temporal summaries,
266
+ enabling temporal queries like "What happened in session X?"
267
+
268
+ Args:
269
+ session_id: Session to close. Defaults to the most recent session.
270
+ """
271
+ try:
272
+ engine = get_engine()
273
+ pid = engine.profile_id
274
+ sid = session_id or getattr(engine, '_last_session_id', '')
275
+ if not sid:
276
+ return {"success": False, "error": "No session_id provided"}
277
+ count = engine.close_session(sid)
278
+ return {
279
+ "success": True,
280
+ "session_id": sid,
281
+ "summary_events_created": count,
282
+ }
283
+ except Exception as exc:
284
+ logger.exception("close_session failed")
285
+ return {"success": False, "error": str(exc)}
@@ -139,6 +139,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
139
139
  "results": result.get("results", []),
140
140
  "count": result.get("result_count", 0),
141
141
  "query_type": result.get("query_type", "unknown"),
142
+ "channel_weights": result.get("channel_weights", {}),
143
+ "retrieval_time_ms": result.get("retrieval_time_ms", 0),
142
144
  }
143
145
  return {"success": False, "error": result.get("error", "Recall failed")}
144
146
  except Exception as exc:
@@ -280,11 +282,15 @@ def register_core_tools(server, get_engine: Callable) -> None:
280
282
  engine.profile_id = profile_id
281
283
 
282
284
  # Persist to both config stores so CLI and Dashboard stay in sync
283
- from superlocalmemory.server.routes.helpers import (
284
- ensure_profile_in_db, set_active_profile_everywhere,
285
- )
286
- ensure_profile_in_db(profile_id)
287
- set_active_profile_everywhere(profile_id)
285
+ try:
286
+ from superlocalmemory.server.routes.helpers import (
287
+ ensure_profile_in_db, set_active_profile_everywhere,
288
+ )
289
+ ensure_profile_in_db(profile_id)
290
+ set_active_profile_everywhere(profile_id)
291
+ except ImportError:
292
+ # Dashboard not installed — profile switch still works for MCP/CLI
293
+ logger.debug("Dashboard routes not available, profile set in engine only")
288
294
 
289
295
  return {
290
296
  "success": True,
@@ -337,8 +343,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
337
343
  """Get learned behavioral patterns (interests, refinements, archival habits)."""
338
344
  try:
339
345
  engine = get_engine()
340
- from superlocalmemory.learning.behavioral import BehavioralStore
341
- store = BehavioralStore(engine._db.db_path)
346
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
347
+ store = BehavioralPatternStore(engine._db.db_path)
342
348
  ptype = pattern_type if pattern_type else None
343
349
  patterns = store.get_patterns(
344
350
  engine.profile_id, pattern_type=ptype, limit=limit,
@@ -353,8 +359,8 @@ def register_core_tools(server, get_engine: Callable) -> None:
353
359
  """Correct or annotate a learned behavioral pattern to improve retrieval."""
354
360
  try:
355
361
  engine = get_engine()
356
- from superlocalmemory.learning.behavioral import BehavioralStore
357
- store = BehavioralStore(engine._db.db_path)
362
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
363
+ store = BehavioralPatternStore(engine._db.db_path)
358
364
  store.record(
359
365
  engine.profile_id,
360
366
  pattern_type="correction",
@@ -181,8 +181,8 @@ def register_v28_tools(server, get_engine: Callable) -> None:
181
181
  """
182
182
  try:
183
183
  engine = get_engine()
184
- from superlocalmemory.learning.behavioral import BehavioralStore
185
- store = BehavioralStore(engine._db.db_path)
184
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
185
+ store = BehavioralPatternStore(engine._db.db_path)
186
186
  patterns = store.get_patterns(engine.profile_id, limit=limit)
187
187
  summary = store.get_summary(engine.profile_id)
188
188
  return {
@@ -228,6 +228,7 @@ def register_v3_tools(server, get_engine: Callable) -> None:
228
228
 
229
229
  facts = engine._db.get_all_facts(pid)[:limit]
230
230
  all_contradictions: list[dict] = []
231
+ errors_count = 0
231
232
  for fact in facts:
232
233
  if not fact.embedding or not fact.canonical_entities:
233
234
  continue
@@ -243,11 +244,13 @@ def register_v3_tools(server, get_engine: Callable) -> None:
243
244
  "content_a": fact.content[:80],
244
245
  })
245
246
  except Exception:
247
+ errors_count += 1
246
248
  continue
247
249
 
248
250
  return {
249
251
  "success": True,
250
252
  "facts_checked": len(facts),
253
+ "facts_errored": errors_count,
251
254
  "contradictions": all_contradictions[:50],
252
255
  "total_contradictions": len(all_contradictions),
253
256
  }
@@ -27,7 +27,7 @@ DB_PATH = MEMORY_DIR / "memory.db"
27
27
 
28
28
 
29
29
  def _emit_event(event_type: str, payload: dict | None = None,
30
- source_agent: str = "mcp_client") -> None:
30
+ source_agent: str = "mcp_client") -> None: # V3.3.12: see also mcp/shared.py
31
31
  """Emit an event to the EventBus (best-effort, never raises)."""
32
32
  try:
33
33
  from superlocalmemory.infra.event_bus import EventBus
@@ -76,8 +76,15 @@ def register_v33_tools(server, get_engine: Callable) -> None:
76
76
  )
77
77
 
78
78
  if dry_run:
79
- # Force run (bypass interval) but don't commit
80
- result = scheduler.run_decay_cycle(pid, force=True)
79
+ # Dry run: compute retention stats without applying changes
80
+ from superlocalmemory.math.ebbinghaus import EbbinghausCurve as _EC
81
+ facts = engine._db.get_all_facts(pid)
82
+ zones = {"active": 0, "warm": 0, "cold": 0, "archive": 0, "forgotten": 0}
83
+ for f in facts:
84
+ r = ebbinghaus.compute_retention(f.access_count or 0, f.importance or 0.5, 0, 0.0)
85
+ zone = ebbinghaus.classify_zone(r)
86
+ zones[zone] = zones.get(zone, 0) + 1
87
+ result = {"total": len(facts), "transitions": 0, "dry_run_zones": zones}
81
88
  else:
82
89
  result = scheduler.run_decay_cycle(pid, force=True)
83
90
 
@@ -137,8 +144,9 @@ def register_v33_tools(server, get_engine: Callable) -> None:
137
144
  )
138
145
 
139
146
  if dry_run:
140
- # Preview: count what would change without committing
141
- result = scheduler.run_eap_cycle(pid)
147
+ # Dry run: report current quantization state without changes
148
+ facts = engine._db.get_all_facts(pid)
149
+ result = {"total": len(facts), "would_quantize": 0, "dry_run": True}
142
150
  else:
143
151
  result = scheduler.run_eap_cycle(pid)
144
152
 
@@ -185,13 +193,13 @@ def register_v33_tools(server, get_engine: Callable) -> None:
185
193
 
186
194
  _emit_event("ccq.consolidation_complete", {
187
195
  "profile_id": pid,
188
- "clusters_found": result.clusters_found,
196
+ "clusters_processed": result.clusters_processed,
189
197
  "blocks_created": result.blocks_created,
190
198
  })
191
199
 
192
200
  return {
193
201
  "success": True,
194
- "clusters_found": result.clusters_found,
202
+ "clusters_processed": result.clusters_processed,
195
203
  "blocks_created": result.blocks_created,
196
204
  "facts_archived": result.facts_archived,
197
205
  "compression_ratio": round(result.compression_ratio, 3),
@@ -349,3 +357,56 @@ def register_v33_tools(server, get_engine: Callable) -> None:
349
357
  except Exception as exc:
350
358
  logger.exception("get_retention_stats tool failed")
351
359
  return {"success": False, "error": str(exc)}
360
+
361
+ # ------------------------------------------------------------------
362
+ # 7. run_maintenance — V3.3.12: Combined periodic maintenance cycle
363
+ # ------------------------------------------------------------------
364
+ @server.tool()
365
+ async def run_maintenance(profile_id: str = "") -> dict:
366
+ """Run all periodic maintenance tasks in a single call.
367
+
368
+ Combines Langevin dynamics stepping, Ebbinghaus forgetting decay,
369
+ and behavioral pattern mining into one convenient maintenance cycle.
370
+ Clients should call this periodically (e.g., at session end).
371
+
372
+ Args:
373
+ profile_id: Profile to maintain (default: active profile).
374
+ """
375
+ try:
376
+ engine = get_engine()
377
+ pid = profile_id or engine.profile_id
378
+ results = {}
379
+
380
+ # 1. Langevin dynamics step (lifecycle evolution)
381
+ try:
382
+ from superlocalmemory.core.maintenance import run_maintenance as _run_maint
383
+ maint_result = _run_maint(engine._db, engine._config, pid)
384
+ results["langevin"] = {"updated": maint_result.get("updated", 0)}
385
+ except Exception as exc:
386
+ results["langevin"] = {"error": str(exc)}
387
+
388
+ # 2. Ebbinghaus forgetting decay
389
+ try:
390
+ from superlocalmemory.math.ebbinghaus import EbbinghausCurve
391
+ from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
392
+ ebbinghaus = EbbinghausCurve(engine._config.forgetting)
393
+ scheduler = ForgettingScheduler(engine._db, ebbinghaus, engine._config.forgetting)
394
+ decay_result = scheduler.run_decay_cycle(pid, force=False)
395
+ results["forgetting"] = decay_result
396
+ except Exception as exc:
397
+ results["forgetting"] = {"error": str(exc)}
398
+
399
+ # 3. Behavioral pattern mining
400
+ try:
401
+ from superlocalmemory.learning.consolidation_worker import ConsolidationWorker
402
+ cw = ConsolidationWorker(engine._db, engine._config)
403
+ patterns = cw._generate_patterns(pid)
404
+ results["behavioral"] = {"patterns_mined": len(patterns)}
405
+ except Exception as exc:
406
+ results["behavioral"] = {"error": str(exc)}
407
+
408
+ return {"success": True, "profile": pid, **results}
409
+
410
+ except Exception as exc:
411
+ logger.exception("run_maintenance failed")
412
+ return {"success": False, "error": str(exc)}
@@ -31,7 +31,7 @@ logger = logging.getLogger(__name__)
31
31
 
32
32
  _MAX_ROUNDS = 2
33
33
  _SUFFICIENCY_SCORE_THRESHOLD = 0.6
34
- _SKIP_TYPES = frozenset() # Hotpatch: enable agentic for ALL query types including multi_hop
34
+ _SKIP_TYPES = frozenset({"temporal"}) # S15: agentic harms temporal queries
35
35
 
36
36
  _SUFFICIENCY_SYSTEM = (
37
37
  "You evaluate whether retrieved context is sufficient to answer a query. "
@@ -68,6 +68,7 @@ class BM25Channel:
68
68
  self._corpus: list[list[str]] = []
69
69
  self._fact_ids: list[str] = []
70
70
  self._fact_id_set: set[str] = set()
71
+ self._raw_texts: list[str] = [] # V3.3.12: raw content for phrase matching
71
72
  self._bm25: BM25Plus | None = None
72
73
  self._dirty: bool = False
73
74
  self._loaded_profiles: set[str] = set()
@@ -96,15 +97,24 @@ class BM25Channel:
96
97
  self._corpus.append(tokens)
97
98
  self._fact_ids.append(fact.fact_id)
98
99
  self._fact_id_set.add(fact.fact_id)
100
+ self._raw_texts.append(fact.content)
99
101
  # Persist for next cold start
100
102
  self._db.store_bm25_tokens(fact.fact_id, profile_id, tokens)
101
103
  else:
104
+ # Load raw texts for phrase matching (V3.3.12)
105
+ fact_content_map = {}
106
+ try:
107
+ facts = self._db.get_all_facts(profile_id)
108
+ fact_content_map = {f.fact_id: f.content for f in facts}
109
+ except Exception:
110
+ pass
102
111
  for fid, tokens in token_map.items():
103
112
  if fid in self._fact_id_set:
104
113
  continue
105
114
  self._corpus.append(tokens)
106
115
  self._fact_ids.append(fid)
107
116
  self._fact_id_set.add(fid)
117
+ self._raw_texts.append(fact_content_map.get(fid, ""))
108
118
 
109
119
  self._dirty = True
110
120
  self._loaded_profiles.add(profile_id)
@@ -128,6 +138,9 @@ class BM25Channel:
128
138
  self._corpus.append(tokens)
129
139
  self._fact_ids.append(fact_id)
130
140
  self._fact_id_set.add(fact_id)
141
+ if not hasattr(self, '_raw_texts'):
142
+ self._raw_texts = []
143
+ self._raw_texts.append(content)
131
144
  self._dirty = True
132
145
 
133
146
  # Persist for cold start
@@ -168,9 +181,16 @@ class BM25Channel:
168
181
  scores = self._bm25.get_scores(query_tokens)
169
182
 
170
183
  scored: list[tuple[str, float]] = []
184
+ # V3.3.12: Exact phrase bonus — boost facts containing the full query phrase
185
+ query_lower = query.lower().strip()
171
186
  for i, score in enumerate(scores):
172
187
  if score > 0.0:
173
- scored.append((self._fact_ids[i], float(score)))
188
+ bonus = score
189
+ # Exact phrase match bonus: if the query appears as a substring in the document
190
+ if len(query_lower) >= 5 and i < len(self._raw_texts):
191
+ if query_lower in self._raw_texts[i].lower():
192
+ bonus *= 1.5 # 50% boost for exact phrase match
193
+ scored.append((self._fact_ids[i], bonus))
174
194
 
175
195
  scored.sort(key=lambda x: x[1], reverse=True)
176
196
  return scored[:top_k]
@@ -75,6 +75,8 @@ class RetrievalEngine:
75
75
  self._temporal: TemporalChannel | None = channels.get("temporal")
76
76
  # Phase G: Hopfield channel (6th)
77
77
  self._hopfield: HopfieldChannel | None = channels.get("hopfield")
78
+ # Phase 3: Spreading Activation channel
79
+ self._spreading_activation = channels.get("spreading_activation")
78
80
  self._embedder = embedder
79
81
  self._reranker = reranker
80
82
  self._strategy = strategy or QueryStrategyClassifier()
@@ -101,6 +103,11 @@ class RetrievalEngine:
101
103
  # Phase G: Hopfield channel (6th) — needs embedding input
102
104
  if self._hopfield is not None:
103
105
  self._registry.register_channel("hopfield", self._hopfield, needs_embedding=True)
106
+ # Phase 3: Spreading Activation (5th channel) — needs embedding input
107
+ if self._spreading_activation is not None:
108
+ self._registry.register_channel(
109
+ "spreading_activation", self._spreading_activation, needs_embedding=True,
110
+ )
104
111
 
105
112
  def recall(
106
113
  self, query: str, profile_id: str,
@@ -139,7 +146,7 @@ class RetrievalEngine:
139
146
  fused = weighted_rrf(ch_results, strat.weights, k=self._config.rrf_k)
140
147
 
141
148
  # Bridge discovery for multi-hop queries
142
- if self._bridge is not None and strat.query_type == "multi_hop":
149
+ if self._bridge is not None and strat.query_type in ("multi_hop", "entity", "factual", "general"):
143
150
  try:
144
151
  seed_ids = [fr.fact_id for fr in fused[:10]]
145
152
  bridges = self._bridge.discover(seed_ids, profile_id, max_bridges=10)
@@ -221,6 +228,7 @@ class RetrievalEngine:
221
228
  needs_embedding = (
222
229
  (self._semantic is not None and "semantic" not in disabled)
223
230
  or (self._hopfield is not None and "hopfield" not in disabled)
231
+ or (self._spreading_activation is not None and "spreading_activation" not in disabled)
224
232
  )
225
233
  if needs_embedding:
226
234
  try:
@@ -269,6 +277,23 @@ class RetrievalEngine:
269
277
  except Exception as exc:
270
278
  logger.warning("Hopfield channel: %s", exc)
271
279
 
280
+ # Phase 3: Spreading Activation channel (5th) — graph-based associative recall
281
+ if self._spreading_activation is not None and q_emb is not None and "spreading_activation" not in disabled:
282
+ try:
283
+ r = self._spreading_activation.search(q_emb, profile_id, self._config.bm25_top_k)
284
+ if r:
285
+ out["spreading_activation"] = r
286
+ except Exception as exc:
287
+ logger.warning("Spreading activation channel: %s", exc)
288
+
289
+ # Apply registered post-retrieval filters (forgetting filter, etc.)
290
+ if hasattr(self, '_registry') and self._registry._filters:
291
+ for fn in self._registry._filters:
292
+ try:
293
+ out = fn(out, profile_id, None)
294
+ except Exception as exc:
295
+ logger.warning("Post-retrieval filter failed: %s", exc)
296
+
272
297
  return out
273
298
 
274
299
  # -- Fact loading -------------------------------------------------------
@@ -336,12 +361,24 @@ class RetrievalEngine:
336
361
 
337
362
  score_map = {fact.fact_id: score for fact, score in scored}
338
363
 
364
+ # Min-max normalize CE scores to [0, 1] within the batch instead of
365
+ # sigmoid (which compresses the useful discrimination range).
366
+ ce_values = list(score_map.values())
367
+ ce_min = min(ce_values) if ce_values else 0.0
368
+ ce_max = max(ce_values) if ce_values else 1.0
369
+ ce_range = ce_max - ce_min if ce_max > ce_min else 1.0
370
+
371
+ # Also normalize RRF scores so both terms contribute meaningfully
372
+ rrf_values = [fr.fused_score for fr in fused]
373
+ rrf_max = max(rrf_values) if rrf_values else 1.0
374
+ rrf_max = rrf_max if rrf_max > 0 else 1.0
375
+
339
376
  updated = [
340
377
  FusionResult(
341
378
  fact_id=fr.fact_id,
342
379
  fused_score=(
343
- alpha * self._sigmoid(score_map.get(fr.fact_id, 0.0))
344
- + (1.0 - alpha) * fr.fused_score
380
+ alpha * ((score_map.get(fr.fact_id, ce_min) - ce_min) / ce_range)
381
+ + (1.0 - alpha) * (fr.fused_score / rrf_max)
345
382
  ),
346
383
  channel_ranks=fr.channel_ranks,
347
384
  channel_scores=fr.channel_scores,
@@ -425,12 +462,10 @@ class RetrievalEngine:
425
462
  # due to BM25 name-matching (greetings like "Hey Caroline!" score high
426
463
  # on BM25 but have zero retrieval value)
427
464
  content_len = len(fact.content.strip())
428
- if content_len < 25:
429
- quality = 0.1
430
- elif content_len < 50:
431
- quality = 0.5
432
- elif content_len < 80:
433
- quality = 0.8
465
+ if content_len < 10:
466
+ quality = 0.3
467
+ elif content_len < 25:
468
+ quality = 0.7
434
469
  else:
435
470
  quality = 1.0
436
471
 
@@ -67,6 +67,12 @@ def extract_query_entities(query: str) -> list[str]:
67
67
  _add(m.group(0))
68
68
  for m in re.finditer(r'"([^"]+)"', query):
69
69
  _add(m.group(1).strip())
70
+ # Also extract multi-word capitalized sequences (e.g. "New York", "San Francisco")
71
+ for m in re.finditer(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b', query):
72
+ _add(m.group(1))
73
+ # Extract all-caps abbreviations (e.g. NYU, MIT, UCLA) — min 2 chars
74
+ for m in re.finditer(r'\b([A-Z]{2,})\b', query):
75
+ _add(m.group(1))
70
76
 
71
77
  return candidates
72
78
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  """SuperLocalMemory V3 — Weighted Reciprocal Rank Fusion.
6
6
 
7
- Single-pass RRF with k=60 for diverse retrieval (D116).
7
+ Single-pass RRF with k=15 for sharp rank discrimination on small candidate pools.
8
8
  V1 had triple re-fusion which destroyed rankings — fixed in V2.
9
9
 
10
10
  Part of Qualixar | Author: Varun Pratap Bhardwaj
@@ -27,7 +27,7 @@ class FusionResult:
27
27
  def weighted_rrf(
28
28
  channels: dict[str, list[tuple[str, float]]],
29
29
  weights: dict[str, float],
30
- k: int = 60,
30
+ k: int = 15,
31
31
  max_rank_penalty: int = 1000,
32
32
  ) -> list[FusionResult]:
33
33
  """Fuse ranked lists via Weighted Reciprocal Rank Fusion.
@@ -288,8 +288,8 @@ class HopfieldChannel:
288
288
  ):
289
289
  return (self._cached_matrix, self._cached_fact_ids)
290
290
 
291
- # Step 2: Load all facts
292
- facts = self._db.get_all_facts(profile_id)
291
+ # Step 2: Load facts (V3.3.12: cap to most recent 5000 to bound memory)
292
+ facts = self._db.get_all_facts(profile_id)[:5000]
293
293
  if not facts:
294
294
  return (None, [])
295
295
 
@@ -35,7 +35,9 @@ _live_rerankers: set[weakref.ref] = set()
35
35
  logger = logging.getLogger(__name__)
36
36
 
37
37
  _IDLE_TIMEOUT_SECONDS = 120 # 2 min → kill worker
38
- _SUBPROCESS_RESPONSE_TIMEOUT = 120 # 120s for ONNX cold start
38
+ # V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
39
+ _IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
40
+ _SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) for stressed system respawns
39
41
  _WORKER_RECYCLE_AFTER = 500 # Recycle after N requests
40
42
 
41
43
 
@@ -129,8 +131,23 @@ class CrossEncoderReranker:
129
131
  finally:
130
132
  self._worker_loading = False
131
133
 
132
- t = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
133
- t.start()
134
+ self._warmup_thread = threading.Thread(target=_warmup, daemon=True, name="ce-warmup")
135
+ self._warmup_thread.start()
136
+
137
+ def warmup_sync(self, timeout: float = 120.0) -> bool:
138
+ """Block until reranker model is loaded. Returns True if ready.
139
+
140
+ V3.3.12: Critical for benchmarks and first-recall quality.
141
+ Without this, first 30-60s of recalls get no reranking (-30.7pp).
142
+ """
143
+ if self._model_loaded:
144
+ return True
145
+ if not self._worker_loading and not self._model_loaded:
146
+ self._start_background_warmup()
147
+ t = getattr(self, '_warmup_thread', None)
148
+ if t is not None:
149
+ t.join(timeout=timeout)
150
+ return self._model_loaded
134
151
 
135
152
  # ------------------------------------------------------------------
136
153
  # Worker management (mirrors EmbeddingService pattern)
@@ -304,13 +321,13 @@ class CrossEncoderReranker:
304
321
 
305
322
  documents = [fact.content for fact, _ in candidates]
306
323
 
307
- # Short timeout (10s)model should already be loaded by warmup.
308
- # If worker crashed or is still loading, fallback immediately.
324
+ # V3.3.12: Increased timeout 10s→60sL-12-v2 needs PyTorch + ONNX load.
325
+ # Critical: Paper 2 ablation showed -30.7pp without reranking.
309
326
  resp = self._send_request({
310
327
  "cmd": "rerank",
311
328
  "query": query,
312
329
  "documents": documents,
313
- }, timeout=10.0)
330
+ }, timeout=60.0)
314
331
 
315
332
  if resp is None or not resp.get("ok"):
316
333
  # Fallback: return by existing score
@@ -182,7 +182,7 @@ class SemanticChannel:
182
182
  else:
183
183
  sim = cos_sim
184
184
 
185
- if sim > 0.3:
185
+ if sim > 0.05:
186
186
  scored.append((fact.fact_id, sim))
187
187
 
188
188
  scored.sort(key=lambda x: x[1], reverse=True)
@@ -235,7 +235,7 @@ class SemanticChannel:
235
235
  else:
236
236
  sim = cos_sim
237
237
 
238
- if sim > 0.3:
238
+ if sim > 0.05:
239
239
  scored.append((fact.fact_id, sim))
240
240
 
241
241
  scored.sort(key=lambda x: x[1], reverse=True)
@@ -124,6 +124,16 @@ class TemporalChannel:
124
124
  import re
125
125
  _PROPER_RE = re.compile(r"\b([A-Z][a-z]+)\b")
126
126
  names = [m.group(1) for m in _PROPER_RE.finditer(query)]
127
+ # Also try title-cased version for lowercase queries
128
+ if not names:
129
+ names = [m.group(1) for m in _PROPER_RE.finditer(query.title())]
130
+ # Filter out common words from title-casing
131
+ _stop = {"What", "When", "Where", "Who", "Which", "How", "Does", "Did",
132
+ "The", "That", "This", "There", "Then", "Have", "Has", "Had",
133
+ "About", "After", "Before", "From", "With", "Would", "Could",
134
+ "Should", "Will", "Because", "Also", "Just", "Like", "Know",
135
+ "Think", "Tell", "Said"}
136
+ names = [n for n in names if n not in _stop]
127
137
  if not names:
128
138
  return []
129
139
 
@@ -146,7 +156,10 @@ class TemporalChannel:
146
156
  fid = dict(row)["fact_id"]
147
157
  if fid not in seen:
148
158
  seen.add(fid)
149
- results.append((fid, 0.85)) # High base score for entity-temporal
159
+ # Rank by position (first events more likely relevant) instead
160
+ # of flat 0.85 which loses discrimination
161
+ rank_score = 0.85 - len(seen) * 0.02
162
+ results.append((fid, max(0.3, rank_score)))
150
163
 
151
164
  return results
152
165
 
@@ -442,6 +442,8 @@ CREATE INDEX IF NOT EXISTS idx_edges_target
442
442
  ON graph_edges (profile_id, target_id);
443
443
  CREATE INDEX IF NOT EXISTS idx_edges_type
444
444
  ON graph_edges (profile_id, edge_type);
445
+ CREATE INDEX IF NOT EXISTS idx_edges_exists_check
446
+ ON graph_edges (profile_id, source_id, target_id, edge_type);
445
447
  """
446
448
 
447
449