superlocalmemory 3.3.11 → 3.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +1 -1
  2. package/pyproject.toml +2 -3
  3. package/src/superlocalmemory/core/config.py +9 -6
  4. package/src/superlocalmemory/core/embedding_worker.py +5 -1
  5. package/src/superlocalmemory/core/embeddings.py +3 -1
  6. package/src/superlocalmemory/core/engine.py +14 -0
  7. package/src/superlocalmemory/core/engine_wiring.py +16 -1
  8. package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
  9. package/src/superlocalmemory/core/recall_pipeline.py +24 -0
  10. package/src/superlocalmemory/core/recall_worker.py +22 -4
  11. package/src/superlocalmemory/core/reranker_worker.py +246 -0
  12. package/src/superlocalmemory/core/store_pipeline.py +12 -2
  13. package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
  14. package/src/superlocalmemory/encoding/graph_builder.py +21 -1
  15. package/src/superlocalmemory/learning/adaptive.py +2 -2
  16. package/src/superlocalmemory/math/fisher_quantized.py +8 -4
  17. package/src/superlocalmemory/math/langevin.py +15 -2
  18. package/src/superlocalmemory/mcp/resources.py +2 -2
  19. package/src/superlocalmemory/mcp/shared.py +27 -0
  20. package/src/superlocalmemory/mcp/tools_active.py +31 -1
  21. package/src/superlocalmemory/mcp/tools_core.py +15 -9
  22. package/src/superlocalmemory/mcp/tools_v28.py +2 -2
  23. package/src/superlocalmemory/mcp/tools_v3.py +3 -0
  24. package/src/superlocalmemory/mcp/tools_v33.py +68 -7
  25. package/src/superlocalmemory/retrieval/agentic.py +1 -1
  26. package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
  27. package/src/superlocalmemory/retrieval/engine.py +44 -9
  28. package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
  29. package/src/superlocalmemory/retrieval/fusion.py +2 -2
  30. package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
  31. package/src/superlocalmemory/retrieval/reranker.py +24 -7
  32. package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
  33. package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
  34. package/src/superlocalmemory/storage/schema.py +2 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.3.11",
3
+ "version": "3.3.13",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.3.11"
3
+ version = "3.3.13"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "MIT"}
@@ -48,8 +48,7 @@ dependencies = [
48
48
 
49
49
  [project.optional-dependencies]
50
50
  search = [
51
- "sentence-transformers>=4.0.0",
52
- "sentence-transformers[onnx]>=4.0.0",
51
+ "sentence-transformers[onnx]>=5.0.0",
53
52
  "einops>=0.8.2",
54
53
  "torch>=2.2.0",
55
54
  "scikit-learn>=1.3.0,<2.0.0",
@@ -86,10 +86,10 @@ class LLMConfig:
86
86
  class ChannelWeights:
87
87
  """Retrieval channel weights — 5 channels, query-adaptive."""
88
88
 
89
- # Entity-linked facts are high-precision matches that rank above BM25.
90
- semantic: float = 1.2
89
+ # Semantic should dominate for conversational retrieval (paraphrase matters most).
90
+ semantic: float = 1.5
91
91
  bm25: float = 1.0
92
- entity_graph: float = 1.3
92
+ entity_graph: float = 1.0
93
93
  temporal: float = 1.0
94
94
  spreading_activation: float = 1.0 # Phase 3: 5th channel (BC-08: default value)
95
95
  hopfield: float = 0.8 # Phase G: 6th channel (Hopfield associative memory)
@@ -143,7 +143,7 @@ class RetrievalConfig:
143
143
  """Configuration for the retrieval (recall) pipeline."""
144
144
 
145
145
  # Fusion
146
- rrf_k: int = 60 # RRF smoothing constant (D116: k=60 for diversity)
146
+ rrf_k: int = 15 # RRF smoothing constant (k=15 for candidate pools of 50-200)
147
147
  top_k: int = 20 # Final results to return
148
148
 
149
149
  # Per-channel
@@ -154,7 +154,7 @@ class RetrievalConfig:
154
154
 
155
155
  # Reranking (V3.3.2: ONNX backend enabled for all modes)
156
156
  use_cross_encoder: bool = True
157
- cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
157
+ cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
158
158
  cross_encoder_backend: str = "onnx" # "onnx" (~200MB) or "" (PyTorch, ~1.5GB)
159
159
 
160
160
  # Agentic (Mode C only)
@@ -618,7 +618,7 @@ class SLMConfig:
618
618
  # but NEVER override an explicit use_cross_encoder setting.
619
619
  # The user's explicit choice always wins.
620
620
  if "cross_encoder_backend" not in rt:
621
- rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-6-v2")
621
+ rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
622
622
  rt["cross_encoder_backend"] = "onnx"
623
623
  # Only auto-enable if user didn't explicitly set the field
624
624
  rt.setdefault("use_cross_encoder", True)
@@ -740,6 +740,9 @@ class SLMConfig:
740
740
  retrieval=RetrievalConfig(
741
741
  # V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
742
742
  use_cross_encoder=True,
743
+ # Mode A is zero-LLM: disable agentic retrieval (it replaces
744
+ # precision-tuned fusion with crude heuristic expansions)
745
+ agentic_max_rounds=0,
743
746
  ),
744
747
  math=MathConfig(
745
748
  sheaf_contradiction_threshold=0.45, # 768d threshold
@@ -156,4 +156,8 @@ def _respond(data: dict) -> None:
156
156
 
157
157
 
158
158
  if __name__ == "__main__":
159
- _worker_main()
159
+ try:
160
+ _worker_main()
161
+ except KeyboardInterrupt:
162
+ # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
163
+ sys.exit(0)
@@ -50,7 +50,9 @@ class DimensionMismatchError(RuntimeError):
50
50
 
51
51
 
52
52
  _IDLE_TIMEOUT_SECONDS = 120 # 2 minutes — kill worker after idle
53
- _SUBPROCESS_RESPONSE_TIMEOUT = 120 # V3.3.2: 120s for ONNX cold start
53
+ # V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
54
+ _IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
55
+ _SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) — respawns on stressed systems need more time
54
56
  _WORKER_RECYCLE_AFTER = 1000 # Recycle worker after N requests (C++ fragmentation prevention)
55
57
 
56
58
 
@@ -79,6 +79,7 @@ class MemoryEngine:
79
79
  self._auto_linker = None
80
80
  self._graph_analyzer = None
81
81
  self._consolidation_engine = None
82
+ self._maintenance_scheduler = None
82
83
  self._hooks = HookRegistry()
83
84
 
84
85
  # -- Public properties (Phase 2+ access) --------------------------------
@@ -194,6 +195,17 @@ class MemoryEngine:
194
195
  # V3.3: Check for embedding model migration on mode switch
195
196
  self._check_embedding_migration()
196
197
 
198
+ # V3.3.13: Background maintenance scheduler (Langevin/Ebbinghaus/Sheaf)
199
+ if self._config.forgetting.enabled:
200
+ try:
201
+ from superlocalmemory.core.maintenance_scheduler import MaintenanceScheduler
202
+ self._maintenance_scheduler = MaintenanceScheduler(
203
+ self._db, self._config, self._profile_id,
204
+ )
205
+ self._maintenance_scheduler.start()
206
+ except Exception as exc:
207
+ logger.debug("Maintenance scheduler init failed: %s", exc)
208
+
197
209
  self._initialized = True
198
210
  logger.info(
199
211
  "MemoryEngine initialized: mode=%s profile=%s",
@@ -306,6 +318,8 @@ class MemoryEngine:
306
318
  # -- Lifecycle ----------------------------------------------------------
307
319
 
308
320
  def close(self) -> None:
321
+ if self._maintenance_scheduler is not None:
322
+ self._maintenance_scheduler.stop()
309
323
  self._initialized = False
310
324
 
311
325
  @property
@@ -339,7 +339,7 @@ def _init_spreading_activation(
339
339
  SpreadingActivation,
340
340
  SpreadingActivationConfig,
341
341
  )
342
- sa_config = SpreadingActivationConfig(enabled=False)
342
+ sa_config = SpreadingActivationConfig(enabled=True)
343
343
  return SpreadingActivation(
344
344
  db=db, vector_store=vector_store, config=sa_config,
345
345
  )
@@ -454,6 +454,21 @@ def init_retrieval(
454
454
  trust_scorer=trust_scorer,
455
455
  )
456
456
 
457
+ # V3.3.13: Ensure reranker warmup is in progress.
458
+ # The CrossEncoderReranker constructor starts background warmup, but
459
+ # callers can also call warmup_sync() to block until ready.
460
+ # Here we just log warmup status — benchmark scripts call warmup_sync() explicitly.
461
+ if reranker is not None:
462
+ import threading
463
+ def _log_warmup_status() -> None:
464
+ ready = reranker.warmup_sync(timeout=180)
465
+ if ready:
466
+ logger.info("Cross-encoder reranker warm and ready")
467
+ else:
468
+ logger.warning("Cross-encoder reranker warmup failed — recalls will use fallback scoring")
469
+ t = threading.Thread(target=_log_warmup_status, daemon=True, name="ce-init-warmup")
470
+ t.start()
471
+
457
472
  # Phase A: Register forgetting filter into the channel registry
458
473
  try:
459
474
  from superlocalmemory.retrieval.forgetting_filter import register_forgetting_filter
@@ -0,0 +1,94 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """SuperLocalMemory V3 — Background Maintenance Scheduler.
6
+
7
+ V3.3.13: Periodically triggers Langevin/Ebbinghaus/Sheaf maintenance
8
+ so users don't need to call run_maintenance manually.
9
+
10
+ Configurable interval via ForgettingConfig.scheduler_interval_minutes.
11
+ Defaults to 30 min. Disabled during benchmarks (no config.forgetting.enabled).
12
+
13
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
14
+ License: MIT
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import logging
20
+ import threading
21
+ from typing import TYPE_CHECKING
22
+
23
+ if TYPE_CHECKING:
24
+ from superlocalmemory.core.config import SLMConfig
25
+ from superlocalmemory.storage.database import DatabaseManager
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ class MaintenanceScheduler:
31
+ """Background scheduler for periodic math maintenance.
32
+
33
+ Runs Langevin/Sheaf/Fisher maintenance at configurable intervals.
34
+ Thread-safe. Auto-stops on garbage collection or explicit stop().
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ db: DatabaseManager,
40
+ config: SLMConfig,
41
+ profile_id: str = "default",
42
+ ) -> None:
43
+ self._db = db
44
+ self._config = config
45
+ self._profile_id = profile_id
46
+ self._timer: threading.Timer | None = None
47
+ self._running = False
48
+ self._interval = config.forgetting.scheduler_interval_minutes * 60.0
49
+
50
+ def start(self) -> None:
51
+ """Start the periodic scheduler. Idempotent."""
52
+ if self._running:
53
+ return
54
+ self._running = True
55
+ self._schedule_next()
56
+ logger.info(
57
+ "Maintenance scheduler started (interval=%dm)",
58
+ self._config.forgetting.scheduler_interval_minutes,
59
+ )
60
+
61
+ def stop(self) -> None:
62
+ """Stop the scheduler. Idempotent."""
63
+ self._running = False
64
+ if self._timer is not None:
65
+ self._timer.cancel()
66
+ self._timer = None
67
+ logger.info("Maintenance scheduler stopped")
68
+
69
+ def _schedule_next(self) -> None:
70
+ """Schedule the next maintenance run."""
71
+ if not self._running:
72
+ return
73
+ self._timer = threading.Timer(self._interval, self._run)
74
+ self._timer.daemon = True
75
+ self._timer.start()
76
+
77
+ def _run(self) -> None:
78
+ """Execute maintenance and schedule next run."""
79
+ if not self._running:
80
+ return
81
+ try:
82
+ from superlocalmemory.core.maintenance import run_maintenance
83
+ counts = run_maintenance(self._db, self._config, self._profile_id)
84
+ logger.info("Scheduled maintenance complete: %s", counts)
85
+ except Exception as exc:
86
+ logger.warning("Scheduled maintenance failed: %s", exc)
87
+ finally:
88
+ self._schedule_next()
89
+
90
+ def __del__(self) -> None:
91
+ try:
92
+ self.stop()
93
+ except Exception:
94
+ pass
@@ -192,6 +192,30 @@ def run_recall(
192
192
  except Exception as exc:
193
193
  logger.debug("Access log batch store failed: %s", exc)
194
194
 
195
+ # V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
196
+ try:
197
+ from superlocalmemory.learning.behavioral import BehavioralTracker
198
+ _tracker = BehavioralTracker(db)
199
+ _tracker.record_query(
200
+ profile_id=profile_id, query=query,
201
+ query_type=response.query_type,
202
+ result_count=len(response.results),
203
+ )
204
+ except Exception as exc:
205
+ logger.debug("Behavioral tracking: %s", exc)
206
+
207
+ # V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
208
+ if response.results:
209
+ try:
210
+ from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
211
+ from superlocalmemory.math.ebbinghaus import EbbinghausCurve
212
+ _ebbinghaus = EbbinghausCurve(config.forgetting)
213
+ _fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
214
+ for r in response.results[:10]:
215
+ _fsched.on_access_event(r.fact.fact_id, profile_id)
216
+ except Exception as exc:
217
+ logger.debug("Spaced repetition update: %s", exc)
218
+
195
219
  # Phase 3: Hebbian strengthening for co-accessed facts
196
220
  if auto_linker and response.results:
197
221
  try:
@@ -187,10 +187,28 @@ def _handle_update_memory(fact_id: str, content: str, agent_id: str = "system")
187
187
  if not rows:
188
188
  return {"ok": False, "error": f"Memory {fact_id} not found"}
189
189
  old_content = dict(rows[0]).get("content", "")[:80]
190
- engine._db.execute(
191
- "UPDATE atomic_facts SET content = ? WHERE fact_id = ?",
192
- (content, fact_id),
193
- )
190
+ # V3.3.12: Re-embed updated content so semantic search + BM25 stay consistent.
191
+ # Previously only the text column was updated, leaving stale embeddings.
192
+ updates: dict = {"content": content}
193
+ if engine._embedder:
194
+ try:
195
+ new_emb = engine._embedder.embed(content)
196
+ if new_emb:
197
+ updates["embedding"] = new_emb
198
+ fm, fv = engine._embedder.compute_fisher_params(new_emb)
199
+ updates["fisher_mean"] = fm
200
+ updates["fisher_variance"] = fv
201
+ except Exception:
202
+ pass
203
+ engine._db.update_fact(fact_id, updates)
204
+ # Update BM25 index for the new content
205
+ if hasattr(engine, '_retrieval_engine') and engine._retrieval_engine:
206
+ bm25 = getattr(engine._retrieval_engine, '_bm25', None)
207
+ if bm25:
208
+ try:
209
+ bm25.add(fact_id, content, pid)
210
+ except Exception:
211
+ pass
194
212
  import logging as _logging
195
213
  _logging.getLogger("superlocalmemory.audit").info(
196
214
  "UPDATE fact_id=%s by agent=%s old=%s new=%s",
@@ -0,0 +1,246 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """Subprocess reranker worker — isolates PyTorch/ONNX from main process.
6
+
7
+ Same pattern as embedding_worker.py. The main process stays at ~60 MB.
8
+ All cross-encoder model memory lives in this worker subprocess.
9
+
10
+ Protocol (JSON over stdin/stdout):
11
+ Request: {"cmd": "rerank", "query": "...", "documents": ["...", ...]}
12
+ Response: {"ok": true, "scores": [0.95, 0.32, ...]}
13
+
14
+ Request: {"cmd": "score", "query": "...", "document": "..."}
15
+ Response: {"ok": true, "score": 0.87}
16
+
17
+ Request: {"cmd": "ping"}
18
+ Response: {"ok": true, "backend": "onnx", "model": "..."}
19
+
20
+ Request: {"cmd": "quit"}
21
+ (worker exits)
22
+
23
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import json
29
+ import os
30
+ import platform
31
+ import signal
32
+ import struct
33
+ import sys
34
+ import threading
35
+
36
+ # Force CPU BEFORE any torch import
37
+ os.environ["CUDA_VISIBLE_DEVICES"] = ""
38
+ os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
39
+ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
40
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
41
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
42
+ os.environ["TORCH_DEVICE"] = "cpu"
43
+
44
+ # SIGTERM bridge for Docker/systemd
45
+ if sys.platform != "win32":
46
+ signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
47
+
48
+
49
+ def _start_parent_watchdog() -> None:
50
+ """Monitor parent process — self-terminate if parent dies.
51
+
52
+ Prevents orphaned workers that consume 1+ GB each when the parent
53
+ process crashes, is killed, or exits without cleanup.
54
+
55
+ V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
56
+ """
57
+ parent_pid = os.getppid()
58
+
59
+ def _watch() -> None:
60
+ import time
61
+ while True:
62
+ time.sleep(5)
63
+ try:
64
+ os.kill(parent_pid, 0) # Check if parent is alive (signal 0)
65
+ except OSError:
66
+ # Parent is dead — self-terminate
67
+ os._exit(0)
68
+
69
+ t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
70
+ t.start()
71
+
72
+
73
+ def _detect_onnx_variant() -> str:
74
+ """Auto-detect the best ONNX model variant for the current platform."""
75
+ arch = platform.machine().lower()
76
+ is_64bit = struct.calcsize("P") * 8 == 64
77
+
78
+ if sys.platform == "darwin" and arch in ("arm64", "aarch64"):
79
+ return "onnx/model_qint8_arm64.onnx"
80
+ if arch in ("x86_64", "amd64") and is_64bit:
81
+ return "onnx/model_quint8_avx2.onnx"
82
+ return "onnx/model.onnx"
83
+
84
+
85
+ def _worker_main() -> None:
86
+ """Main loop: read JSON requests from stdin, write responses to stdout."""
87
+ _start_parent_watchdog() # V3.3.7: self-terminate if parent dies
88
+
89
+ model = None
90
+ active_backend = ""
91
+ model_name = ""
92
+
93
+ for line in sys.stdin:
94
+ line = line.strip()
95
+ if not line:
96
+ continue
97
+ try:
98
+ req = json.loads(line)
99
+ except json.JSONDecodeError:
100
+ _respond({"ok": False, "error": "Invalid JSON"})
101
+ continue
102
+
103
+ cmd = req.get("cmd", "")
104
+
105
+ if cmd == "quit":
106
+ break
107
+
108
+ if cmd == "ping":
109
+ _respond({
110
+ "ok": True,
111
+ "loaded": model is not None,
112
+ "backend": active_backend,
113
+ "model": model_name,
114
+ })
115
+ continue
116
+
117
+ if cmd == "load":
118
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
119
+ backend = req.get("backend", "onnx")
120
+ model, active_backend, model_name = _load_model(name, backend)
121
+ _respond({
122
+ "ok": model is not None,
123
+ "backend": active_backend,
124
+ "model": model_name,
125
+ })
126
+ continue
127
+
128
+ if cmd == "rerank":
129
+ query = req.get("query", "")
130
+ documents = req.get("documents", [])
131
+ if not query or not documents:
132
+ _respond({"ok": False, "error": "Missing query or documents"})
133
+ continue
134
+ if model is None:
135
+ # Auto-load with defaults
136
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
137
+ backend = req.get("backend", "onnx")
138
+ model, active_backend, model_name = _load_model(name, backend)
139
+ if model is None:
140
+ _respond({"ok": False, "error": "Model load failed"})
141
+ continue
142
+ try:
143
+ pairs = [(query, doc) for doc in documents]
144
+ try:
145
+ import torch
146
+ with torch.inference_mode():
147
+ scores = model.predict(pairs)
148
+ except ImportError:
149
+ scores = model.predict(pairs)
150
+ _respond({
151
+ "ok": True,
152
+ "scores": [float(s) for s in scores],
153
+ })
154
+ except Exception as exc:
155
+ _respond({"ok": False, "error": str(exc)})
156
+ continue
157
+
158
+ if cmd == "score":
159
+ query = req.get("query", "")
160
+ document = req.get("document", "")
161
+ if not query or not document:
162
+ _respond({"ok": False, "error": "Missing query or document"})
163
+ continue
164
+ if model is None:
165
+ name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
166
+ backend = req.get("backend", "onnx")
167
+ model, active_backend, model_name = _load_model(name, backend)
168
+ if model is None:
169
+ _respond({"ok": False, "error": "Model load failed"})
170
+ continue
171
+ try:
172
+ try:
173
+ import torch
174
+ with torch.inference_mode():
175
+ scores = model.predict([(query, document)])
176
+ except ImportError:
177
+ scores = model.predict([(query, document)])
178
+ _respond({"ok": True, "score": float(scores[0])})
179
+ except Exception as exc:
180
+ _respond({"ok": False, "error": str(exc)})
181
+ continue
182
+
183
+ _respond({"ok": False, "error": f"Unknown command: {cmd}"})
184
+
185
+
186
+ def _load_model(
187
+ name: str, backend: str,
188
+ ) -> tuple:
189
+ """Load cross-encoder model. Returns (model, backend_name, model_name).
190
+
191
+ V3.3.13: sentence-transformers 5.x+ supports backend='onnx' for
192
+ CrossEncoder. We use a 3-tier fallback chain:
193
+
194
+ 1. ONNX + platform-quantized model (fastest, ~200MB, 2.4ms/pair)
195
+ 2. ONNX + generic model (fast, auto-exported on first use)
196
+ 3. PyTorch (always works, ~500MB, 6ms/pair)
197
+
198
+ Cross-platform:
199
+ Mac ARM64 → model_qint8_arm64.onnx
200
+ x86_64 → model_quint8_avx2.onnx
201
+ Fallback → model.onnx (generic)
202
+ """
203
+ try:
204
+ from sentence_transformers import CrossEncoder
205
+
206
+ if backend == "onnx":
207
+ # Tier 1: Platform-specific quantized ONNX (fastest)
208
+ try:
209
+ onnx_file = _detect_onnx_variant()
210
+ m = CrossEncoder(
211
+ name, backend="onnx",
212
+ model_kwargs={"file_name": onnx_file},
213
+ )
214
+ return m, f"onnx-quantized({onnx_file})", name
215
+ except Exception:
216
+ pass
217
+
218
+ # Tier 2: Generic ONNX (auto-exported by optimum)
219
+ try:
220
+ m = CrossEncoder(name, backend="onnx")
221
+ return m, "onnx", name
222
+ except Exception:
223
+ pass
224
+
225
+ # Tier 3: PyTorch (always works, no ONNX dependency needed)
226
+ m = CrossEncoder(name)
227
+ return m, "pytorch", name
228
+ except ImportError:
229
+ return None, "", ""
230
+ except Exception:
231
+ return None, "", ""
232
+
233
+
234
+ def _respond(data: dict) -> None:
235
+ """Write JSON response to stdout, flush immediately."""
236
+ sys.stdout.write(json.dumps(data) + "\n")
237
+ sys.stdout.flush()
238
+
239
+
240
+ if __name__ == "__main__":
241
+ try:
242
+ _worker_main()
243
+ except KeyboardInterrupt:
244
+ # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
245
+ # Exit cleanly instead of printing a traceback that fails CI.
246
+ sys.exit(0)
@@ -170,13 +170,23 @@ def run_store(
170
170
  # V3.3.11: Also store raw content as a verbatim fact to preserve details
171
171
  # that fact extraction may abstract away (dates, names, specifics).
172
172
  # This ensures BM25 and semantic search can always find the original text.
173
+ # V3.3.12: Extract entities from verbatim content so entity channel + temporal
174
+ # channel can find it (was entities=[] which made 4/6 channels blind).
173
175
  if content.strip() and len(content.strip()) >= 20:
174
176
  import uuid
177
+ import re as _re
178
+ _verbatim_text = content.strip()
179
+ # Extract entities using the same regex as fact_extractor
180
+ _ent_re = _re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b")
181
+ _entity_set = {m.group(1) for m in _ent_re.finditer(_verbatim_text)}
182
+ # Also extract all-caps abbreviations (NYU, MIT, etc.) — dedup with first set
183
+ _entity_set |= {m.group(1) for m in _re.finditer(r'\b([A-Z]{2,})\b', _verbatim_text)}
184
+ _verbatim_entities = sorted(_entity_set)
175
185
  verbatim = AtomicFact(
176
186
  fact_id=uuid.uuid4().hex[:16],
177
- content=content.strip(),
187
+ content=_verbatim_text,
178
188
  fact_type=FactType.EPISODIC,
179
- entities=[],
189
+ entities=_verbatim_entities,
180
190
  session_id=session_id,
181
191
  observation_date=parsed_date,
182
192
  confidence=0.9,
@@ -84,7 +84,8 @@ _INTERVAL_RE = re.compile(
84
84
  )
85
85
 
86
86
  _ENTITY_RE = re.compile(
87
- r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
87
+ r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b" # Capitalized word sequences
88
+ r"|\b([A-Z]{2,})\b" # ALL-CAPS abbreviations (NYU, MIT)
88
89
  )
89
90
 
90
91
  _QUOTED_RE = re.compile(r'"([^"]+)"') # Quoted strings as entities
@@ -243,7 +244,7 @@ def _extract_entities(text: str) -> list[str]:
243
244
 
244
245
  # Capitalized word sequences (proper nouns)
245
246
  for match in _ENTITY_RE.finditer(text):
246
- candidate = match.group(1).strip()
247
+ candidate = (match.group(1) or match.group(2) or "").strip()
247
248
  # Filter common English words that start sentences
248
249
  # Check first word of multi-word candidates against stop list
249
250
  _first_word = candidate.split()[0].lower() if candidate else ""
@@ -495,10 +496,17 @@ class FactExtractor:
495
496
  ) -> list[AtomicFact]:
496
497
  """Rule-based extraction: regex entities, keyword classification, scoring."""
497
498
  combined = "\n".join(turns)
498
- sentences = _split_sentences(combined)
499
- if not sentences:
500
- # If no proper sentences, treat each turn as a sentence
501
- sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
499
+ raw_sentences = _split_sentences(combined)
500
+ if not raw_sentences:
501
+ raw_sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
502
+
503
+ # V3.3.12: Sliding window of 2 sentences to preserve cross-sentence context.
504
+ # "She enrolled at NYU. Starting January 2024." → becomes one combined fact.
505
+ sentences = list(raw_sentences) # Keep originals
506
+ for i in range(len(raw_sentences) - 1):
507
+ pair = raw_sentences[i].rstrip() + " " + raw_sentences[i + 1].lstrip()
508
+ if len(pair) <= 300: # Only combine if not too long
509
+ sentences.append(pair)
502
510
 
503
511
  # Build entity frequency map for importance scoring
504
512
  entity_freq: dict[str, int] = {}
@@ -549,8 +557,8 @@ class FactExtractor:
549
557
  if importance < self._config.min_fact_confidence:
550
558
  continue
551
559
 
552
- # Determine speaker from turn position heuristic
553
- speaker = self._infer_speaker(normalized, turns, speaker_a, speaker_b)
560
+ # V3.3.12: Speaker inference removed result was never stored in AtomicFact.
561
+ # The speaker info is preserved in verbatim facts via [Speaker]: prefix.
554
562
 
555
563
  facts.append(AtomicFact(
556
564
  fact_id=_new_id(),