npm - superlocalmemory - Versions diffs - 3.3.11 → 3.3.13 - Mend

superlocalmemory 3.3.11 → 3.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

package/package.json +1 -1
package/pyproject.toml +2 -3
package/src/superlocalmemory/core/config.py +9 -6
package/src/superlocalmemory/core/embedding_worker.py +5 -1
package/src/superlocalmemory/core/embeddings.py +3 -1
package/src/superlocalmemory/core/engine.py +14 -0
package/src/superlocalmemory/core/engine_wiring.py +16 -1
package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
package/src/superlocalmemory/core/recall_pipeline.py +24 -0
package/src/superlocalmemory/core/recall_worker.py +22 -4
package/src/superlocalmemory/core/reranker_worker.py +246 -0
package/src/superlocalmemory/core/store_pipeline.py +12 -2
package/src/superlocalmemory/encoding/fact_extractor.py +16 -8
package/src/superlocalmemory/encoding/graph_builder.py +21 -1
package/src/superlocalmemory/learning/adaptive.py +2 -2
package/src/superlocalmemory/math/fisher_quantized.py +8 -4
package/src/superlocalmemory/math/langevin.py +15 -2
package/src/superlocalmemory/mcp/resources.py +2 -2
package/src/superlocalmemory/mcp/shared.py +27 -0
package/src/superlocalmemory/mcp/tools_active.py +31 -1
package/src/superlocalmemory/mcp/tools_core.py +15 -9
package/src/superlocalmemory/mcp/tools_v28.py +2 -2
package/src/superlocalmemory/mcp/tools_v3.py +3 -0
package/src/superlocalmemory/mcp/tools_v33.py +68 -7
package/src/superlocalmemory/retrieval/agentic.py +1 -1
package/src/superlocalmemory/retrieval/bm25_channel.py +21 -1
package/src/superlocalmemory/retrieval/engine.py +44 -9
package/src/superlocalmemory/retrieval/entity_channel.py +6 -0
package/src/superlocalmemory/retrieval/fusion.py +2 -2
package/src/superlocalmemory/retrieval/hopfield_channel.py +2 -2
package/src/superlocalmemory/retrieval/reranker.py +24 -7
package/src/superlocalmemory/retrieval/semantic_channel.py +2 -2
package/src/superlocalmemory/retrieval/temporal_channel.py +14 -1
package/src/superlocalmemory/storage/schema.py +2 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.11",
+  "version": "3.3.13",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.11"
+version = "3.3.13"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "MIT"}
@@ -48,8 +48,7 @@ dependencies = [
 [project.optional-dependencies]
 search = [
-    "sentence-transformers>=4.0.0",
-    "sentence-transformers[onnx]>=4.0.0",
+    "sentence-transformers[onnx]>=5.0.0",
     "einops>=0.8.2",
     "torch>=2.2.0",
     "scikit-learn>=1.3.0,<2.0.0",

package/src/superlocalmemory/core/config.py CHANGED Viewed

@@ -86,10 +86,10 @@ class LLMConfig:
 class ChannelWeights:
     """Retrieval channel weights — 5 channels, query-adaptive."""
-    # Entity-linked facts are high-precision matches that rank above BM25.
-    semantic: float = 1.2
+    # Semantic should dominate for conversational retrieval (paraphrase matters most).
+    semantic: float = 1.5
     bm25: float = 1.0
-    entity_graph: float = 1.3
+    entity_graph: float = 1.0
     temporal: float = 1.0
     spreading_activation: float = 1.0  # Phase 3: 5th channel (BC-08: default value)
     hopfield: float = 0.8  # Phase G: 6th channel (Hopfield associative memory)
@@ -143,7 +143,7 @@ class RetrievalConfig:
     """Configuration for the retrieval (recall) pipeline."""
     # Fusion
-    rrf_k: int = 60               # RRF smoothing constant (D116: k=60 for diversity)
+    rrf_k: int = 15               # RRF smoothing constant (k=15 for candidate pools of 50-200)
     top_k: int = 20               # Final results to return
     # Per-channel
@@ -154,7 +154,7 @@ class RetrievalConfig:
     # Reranking (V3.3.2: ONNX backend enabled for all modes)
     use_cross_encoder: bool = True
-    cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
+    cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
     cross_encoder_backend: str = "onnx"  # "onnx" (~200MB) or "" (PyTorch, ~1.5GB)
     # Agentic (Mode C only)
@@ -618,7 +618,7 @@ class SLMConfig:
             # but NEVER override an explicit use_cross_encoder setting.
             # The user's explicit choice always wins.
             if "cross_encoder_backend" not in rt:
-                rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-6-v2")
+                rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
                 rt["cross_encoder_backend"] = "onnx"
                 # Only auto-enable if user didn't explicitly set the field
                 rt.setdefault("use_cross_encoder", True)
@@ -740,6 +740,9 @@ class SLMConfig:
                 retrieval=RetrievalConfig(
                     # V3.3.2: ONNX cross-encoder enabled for all modes (~200MB)
                     use_cross_encoder=True,
+                    # Mode A is zero-LLM: disable agentic retrieval (it replaces
+                    # precision-tuned fusion with crude heuristic expansions)
+                    agentic_max_rounds=0,
                 ),
                 math=MathConfig(
                     sheaf_contradiction_threshold=0.45,  # 768d threshold

package/src/superlocalmemory/core/embedding_worker.py CHANGED Viewed

@@ -156,4 +156,8 @@ def _respond(data: dict) -> None:
 if __name__ == "__main__":
-    _worker_main()
+    try:
+        _worker_main()
+    except KeyboardInterrupt:
+        # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
+        sys.exit(0)

package/src/superlocalmemory/core/embeddings.py CHANGED Viewed

@@ -50,7 +50,9 @@ class DimensionMismatchError(RuntimeError):
 _IDLE_TIMEOUT_SECONDS = 120  # 2 minutes — kill worker after idle
-_SUBPROCESS_RESPONSE_TIMEOUT = 120  # V3.3.2: 120s for ONNX cold start
+# V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
+_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
+_SUBPROCESS_RESPONSE_TIMEOUT = 180  # V3.3.12: 180s (was 120s) — respawns on stressed systems need more time
 _WORKER_RECYCLE_AFTER = 1000  # Recycle worker after N requests (C++ fragmentation prevention)

package/src/superlocalmemory/core/engine.py CHANGED Viewed

@@ -79,6 +79,7 @@ class MemoryEngine:
         self._auto_linker = None
         self._graph_analyzer = None
         self._consolidation_engine = None
+        self._maintenance_scheduler = None
         self._hooks = HookRegistry()
     # -- Public properties (Phase 2+ access) --------------------------------
@@ -194,6 +195,17 @@ class MemoryEngine:
         # V3.3: Check for embedding model migration on mode switch
         self._check_embedding_migration()
+        # V3.3.13: Background maintenance scheduler (Langevin/Ebbinghaus/Sheaf)
+        if self._config.forgetting.enabled:
+            try:
+                from superlocalmemory.core.maintenance_scheduler import MaintenanceScheduler
+                self._maintenance_scheduler = MaintenanceScheduler(
+                    self._db, self._config, self._profile_id,
+                )
+                self._maintenance_scheduler.start()
+            except Exception as exc:
+                logger.debug("Maintenance scheduler init failed: %s", exc)
         self._initialized = True
         logger.info(
             "MemoryEngine initialized: mode=%s profile=%s",
@@ -306,6 +318,8 @@ class MemoryEngine:
     # -- Lifecycle ----------------------------------------------------------
     def close(self) -> None:
+        if self._maintenance_scheduler is not None:
+            self._maintenance_scheduler.stop()
         self._initialized = False
     @property

package/src/superlocalmemory/core/engine_wiring.py CHANGED Viewed

@@ -339,7 +339,7 @@ def _init_spreading_activation(
             SpreadingActivation,
             SpreadingActivationConfig,
         )
-        sa_config = SpreadingActivationConfig(enabled=False)
+        sa_config = SpreadingActivationConfig(enabled=True)
         return SpreadingActivation(
             db=db, vector_store=vector_store, config=sa_config,
         )
@@ -454,6 +454,21 @@ def init_retrieval(
         trust_scorer=trust_scorer,
     )
+    # V3.3.13: Ensure reranker warmup is in progress.
+    # The CrossEncoderReranker constructor starts background warmup, but
+    # callers can also call warmup_sync() to block until ready.
+    # Here we just log warmup status — benchmark scripts call warmup_sync() explicitly.
+    if reranker is not None:
+        import threading
+        def _log_warmup_status() -> None:
+            ready = reranker.warmup_sync(timeout=180)
+            if ready:
+                logger.info("Cross-encoder reranker warm and ready")
+            else:
+                logger.warning("Cross-encoder reranker warmup failed — recalls will use fallback scoring")
+        t = threading.Thread(target=_log_warmup_status, daemon=True, name="ce-init-warmup")
+        t.start()
     # Phase A: Register forgetting filter into the channel registry
     try:
         from superlocalmemory.retrieval.forgetting_filter import register_forgetting_filter

package/src/superlocalmemory/core/maintenance_scheduler.py ADDED Viewed

@@ -0,0 +1,94 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""SuperLocalMemory V3 — Background Maintenance Scheduler.
+V3.3.13: Periodically triggers Langevin/Ebbinghaus/Sheaf maintenance
+so users don't need to call run_maintenance manually.
+Configurable interval via ForgettingConfig.scheduler_interval_minutes.
+Defaults to 30 min. Disabled during benchmarks (no config.forgetting.enabled).
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+License: MIT
+"""
+from __future__ import annotations
+import logging
+import threading
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from superlocalmemory.core.config import SLMConfig
+    from superlocalmemory.storage.database import DatabaseManager
+logger = logging.getLogger(__name__)
+class MaintenanceScheduler:
+    """Background scheduler for periodic math maintenance.
+    Runs Langevin/Sheaf/Fisher maintenance at configurable intervals.
+    Thread-safe. Auto-stops on garbage collection or explicit stop().
+    """
+    def __init__(
+        self,
+        db: DatabaseManager,
+        config: SLMConfig,
+        profile_id: str = "default",
+    ) -> None:
+        self._db = db
+        self._config = config
+        self._profile_id = profile_id
+        self._timer: threading.Timer | None = None
+        self._running = False
+        self._interval = config.forgetting.scheduler_interval_minutes * 60.0
+    def start(self) -> None:
+        """Start the periodic scheduler. Idempotent."""
+        if self._running:
+            return
+        self._running = True
+        self._schedule_next()
+        logger.info(
+            "Maintenance scheduler started (interval=%dm)",
+            self._config.forgetting.scheduler_interval_minutes,
+        )
+    def stop(self) -> None:
+        """Stop the scheduler. Idempotent."""
+        self._running = False
+        if self._timer is not None:
+            self._timer.cancel()
+            self._timer = None
+        logger.info("Maintenance scheduler stopped")
+    def _schedule_next(self) -> None:
+        """Schedule the next maintenance run."""
+        if not self._running:
+            return
+        self._timer = threading.Timer(self._interval, self._run)
+        self._timer.daemon = True
+        self._timer.start()
+    def _run(self) -> None:
+        """Execute maintenance and schedule next run."""
+        if not self._running:
+            return
+        try:
+            from superlocalmemory.core.maintenance import run_maintenance
+            counts = run_maintenance(self._db, self._config, self._profile_id)
+            logger.info("Scheduled maintenance complete: %s", counts)
+        except Exception as exc:
+            logger.warning("Scheduled maintenance failed: %s", exc)
+        finally:
+            self._schedule_next()
+    def __del__(self) -> None:
+        try:
+            self.stop()
+        except Exception:
+            pass

package/src/superlocalmemory/core/recall_pipeline.py CHANGED Viewed

@@ -192,6 +192,30 @@ def run_recall(
         except Exception as exc:
             logger.debug("Access log batch store failed: %s", exc)
+    # V3.3.12: Wire BehavioralTracker.record_query() into live recall pipeline
+    try:
+        from superlocalmemory.learning.behavioral import BehavioralTracker
+        _tracker = BehavioralTracker(db)
+        _tracker.record_query(
+            profile_id=profile_id, query=query,
+            query_type=response.query_type,
+            result_count=len(response.results),
+        )
+    except Exception as exc:
+        logger.debug("Behavioral tracking: %s", exc)
+    # V3.3.12: Spaced repetition update on recall (Ebbinghaus on_access_event)
+    if response.results:
+        try:
+            from superlocalmemory.learning.forgetting_scheduler import ForgettingScheduler
+            from superlocalmemory.math.ebbinghaus import EbbinghausCurve
+            _ebbinghaus = EbbinghausCurve(config.forgetting)
+            _fsched = ForgettingScheduler(db, _ebbinghaus, config.forgetting)
+            for r in response.results[:10]:
+                _fsched.on_access_event(r.fact.fact_id, profile_id)
+        except Exception as exc:
+            logger.debug("Spaced repetition update: %s", exc)
     # Phase 3: Hebbian strengthening for co-accessed facts
     if auto_linker and response.results:
         try:

package/src/superlocalmemory/core/recall_worker.py CHANGED Viewed

@@ -187,10 +187,28 @@ def _handle_update_memory(fact_id: str, content: str, agent_id: str = "system")
     if not rows:
         return {"ok": False, "error": f"Memory {fact_id} not found"}
     old_content = dict(rows[0]).get("content", "")[:80]
-    engine._db.execute(
-        "UPDATE atomic_facts SET content = ? WHERE fact_id = ?",
-        (content, fact_id),
-    )
+    # V3.3.12: Re-embed updated content so semantic search + BM25 stay consistent.
+    # Previously only the text column was updated, leaving stale embeddings.
+    updates: dict = {"content": content}
+    if engine._embedder:
+        try:
+            new_emb = engine._embedder.embed(content)
+            if new_emb:
+                updates["embedding"] = new_emb
+                fm, fv = engine._embedder.compute_fisher_params(new_emb)
+                updates["fisher_mean"] = fm
+                updates["fisher_variance"] = fv
+        except Exception:
+            pass
+    engine._db.update_fact(fact_id, updates)
+    # Update BM25 index for the new content
+    if hasattr(engine, '_retrieval_engine') and engine._retrieval_engine:
+        bm25 = getattr(engine._retrieval_engine, '_bm25', None)
+        if bm25:
+            try:
+                bm25.add(fact_id, content, pid)
+            except Exception:
+                pass
     import logging as _logging
     _logging.getLogger("superlocalmemory.audit").info(
         "UPDATE fact_id=%s by agent=%s old=%s new=%s",

package/src/superlocalmemory/core/reranker_worker.py ADDED Viewed

@@ -0,0 +1,246 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under the MIT License - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""Subprocess reranker worker — isolates PyTorch/ONNX from main process.
+Same pattern as embedding_worker.py. The main process stays at ~60 MB.
+All cross-encoder model memory lives in this worker subprocess.
+Protocol (JSON over stdin/stdout):
+  Request:  {"cmd": "rerank", "query": "...", "documents": ["...", ...]}
+  Response: {"ok": true, "scores": [0.95, 0.32, ...]}
+  Request:  {"cmd": "score", "query": "...", "document": "..."}
+  Response: {"ok": true, "score": 0.87}
+  Request:  {"cmd": "ping"}
+  Response: {"ok": true, "backend": "onnx", "model": "..."}
+  Request:  {"cmd": "quit"}
+  (worker exits)
+Part of Qualixar | Author: Varun Pratap Bhardwaj
+"""
+from __future__ import annotations
+import json
+import os
+import platform
+import signal
+import struct
+import sys
+import threading
+# Force CPU BEFORE any torch import
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
+os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+os.environ["TORCH_DEVICE"] = "cpu"
+# SIGTERM bridge for Docker/systemd
+if sys.platform != "win32":
+    signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
+def _start_parent_watchdog() -> None:
+    """Monitor parent process — self-terminate if parent dies.
+    Prevents orphaned workers that consume 1+ GB each when the parent
+    process crashes, is killed, or exits without cleanup.
+    V3.3.7: Added after incident where ~30 orphaned workers consumed 33 GB.
+    """
+    parent_pid = os.getppid()
+    def _watch() -> None:
+        import time
+        while True:
+            time.sleep(5)
+            try:
+                os.kill(parent_pid, 0)  # Check if parent is alive (signal 0)
+            except OSError:
+                # Parent is dead — self-terminate
+                os._exit(0)
+    t = threading.Thread(target=_watch, daemon=True, name="parent-watchdog")
+    t.start()
+def _detect_onnx_variant() -> str:
+    """Auto-detect the best ONNX model variant for the current platform."""
+    arch = platform.machine().lower()
+    is_64bit = struct.calcsize("P") * 8 == 64
+    if sys.platform == "darwin" and arch in ("arm64", "aarch64"):
+        return "onnx/model_qint8_arm64.onnx"
+    if arch in ("x86_64", "amd64") and is_64bit:
+        return "onnx/model_quint8_avx2.onnx"
+    return "onnx/model.onnx"
+def _worker_main() -> None:
+    """Main loop: read JSON requests from stdin, write responses to stdout."""
+    _start_parent_watchdog()  # V3.3.7: self-terminate if parent dies
+    model = None
+    active_backend = ""
+    model_name = ""
+    for line in sys.stdin:
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            req = json.loads(line)
+        except json.JSONDecodeError:
+            _respond({"ok": False, "error": "Invalid JSON"})
+            continue
+        cmd = req.get("cmd", "")
+        if cmd == "quit":
+            break
+        if cmd == "ping":
+            _respond({
+                "ok": True,
+                "loaded": model is not None,
+                "backend": active_backend,
+                "model": model_name,
+            })
+            continue
+        if cmd == "load":
+            name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
+            backend = req.get("backend", "onnx")
+            model, active_backend, model_name = _load_model(name, backend)
+            _respond({
+                "ok": model is not None,
+                "backend": active_backend,
+                "model": model_name,
+            })
+            continue
+        if cmd == "rerank":
+            query = req.get("query", "")
+            documents = req.get("documents", [])
+            if not query or not documents:
+                _respond({"ok": False, "error": "Missing query or documents"})
+                continue
+            if model is None:
+                # Auto-load with defaults
+                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
+                backend = req.get("backend", "onnx")
+                model, active_backend, model_name = _load_model(name, backend)
+            if model is None:
+                _respond({"ok": False, "error": "Model load failed"})
+                continue
+            try:
+                pairs = [(query, doc) for doc in documents]
+                try:
+                    import torch
+                    with torch.inference_mode():
+                        scores = model.predict(pairs)
+                except ImportError:
+                    scores = model.predict(pairs)
+                _respond({
+                    "ok": True,
+                    "scores": [float(s) for s in scores],
+                })
+            except Exception as exc:
+                _respond({"ok": False, "error": str(exc)})
+            continue
+        if cmd == "score":
+            query = req.get("query", "")
+            document = req.get("document", "")
+            if not query or not document:
+                _respond({"ok": False, "error": "Missing query or document"})
+                continue
+            if model is None:
+                name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
+                backend = req.get("backend", "onnx")
+                model, active_backend, model_name = _load_model(name, backend)
+            if model is None:
+                _respond({"ok": False, "error": "Model load failed"})
+                continue
+            try:
+                try:
+                    import torch
+                    with torch.inference_mode():
+                        scores = model.predict([(query, document)])
+                except ImportError:
+                    scores = model.predict([(query, document)])
+                _respond({"ok": True, "score": float(scores[0])})
+            except Exception as exc:
+                _respond({"ok": False, "error": str(exc)})
+            continue
+        _respond({"ok": False, "error": f"Unknown command: {cmd}"})
+def _load_model(
+    name: str, backend: str,
+) -> tuple:
+    """Load cross-encoder model. Returns (model, backend_name, model_name).
+    V3.3.13: sentence-transformers 5.x+ supports backend='onnx' for
+    CrossEncoder. We use a 3-tier fallback chain:
+      1. ONNX + platform-quantized model (fastest, ~200MB, 2.4ms/pair)
+      2. ONNX + generic model (fast, auto-exported on first use)
+      3. PyTorch (always works, ~500MB, 6ms/pair)
+    Cross-platform:
+      Mac ARM64 → model_qint8_arm64.onnx
+      x86_64    → model_quint8_avx2.onnx
+      Fallback  → model.onnx (generic)
+    """
+    try:
+        from sentence_transformers import CrossEncoder
+        if backend == "onnx":
+            # Tier 1: Platform-specific quantized ONNX (fastest)
+            try:
+                onnx_file = _detect_onnx_variant()
+                m = CrossEncoder(
+                    name, backend="onnx",
+                    model_kwargs={"file_name": onnx_file},
+                )
+                return m, f"onnx-quantized({onnx_file})", name
+            except Exception:
+                pass
+            # Tier 2: Generic ONNX (auto-exported by optimum)
+            try:
+                m = CrossEncoder(name, backend="onnx")
+                return m, "onnx", name
+            except Exception:
+                pass
+        # Tier 3: PyTorch (always works, no ONNX dependency needed)
+        m = CrossEncoder(name)
+        return m, "pytorch", name
+    except ImportError:
+        return None, "", ""
+    except Exception:
+        return None, "", ""
+def _respond(data: dict) -> None:
+    """Write JSON response to stdout, flush immediately."""
+    sys.stdout.write(json.dumps(data) + "\n")
+    sys.stdout.flush()
+if __name__ == "__main__":
+    try:
+        _worker_main()
+    except KeyboardInterrupt:
+        # V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
+        # Exit cleanly instead of printing a traceback that fails CI.
+        sys.exit(0)

package/src/superlocalmemory/core/store_pipeline.py CHANGED Viewed

@@ -170,13 +170,23 @@ def run_store(
     # V3.3.11: Also store raw content as a verbatim fact to preserve details
     # that fact extraction may abstract away (dates, names, specifics).
     # This ensures BM25 and semantic search can always find the original text.
+    # V3.3.12: Extract entities from verbatim content so entity channel + temporal
+    # channel can find it (was entities=[] which made 4/6 channels blind).
     if content.strip() and len(content.strip()) >= 20:
         import uuid
+        import re as _re
+        _verbatim_text = content.strip()
+        # Extract entities using the same regex as fact_extractor
+        _ent_re = _re.compile(r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b")
+        _entity_set = {m.group(1) for m in _ent_re.finditer(_verbatim_text)}
+        # Also extract all-caps abbreviations (NYU, MIT, etc.) — dedup with first set
+        _entity_set |= {m.group(1) for m in _re.finditer(r'\b([A-Z]{2,})\b', _verbatim_text)}
+        _verbatim_entities = sorted(_entity_set)
         verbatim = AtomicFact(
             fact_id=uuid.uuid4().hex[:16],
-            content=content.strip(),
+            content=_verbatim_text,
             fact_type=FactType.EPISODIC,
-            entities=[],
+            entities=_verbatim_entities,
             session_id=session_id,
             observation_date=parsed_date,
             confidence=0.9,

package/src/superlocalmemory/encoding/fact_extractor.py CHANGED Viewed

@@ -84,7 +84,8 @@ _INTERVAL_RE = re.compile(
 )
 _ENTITY_RE = re.compile(
-    r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b"  # Capitalized word sequences
+    r"\b([A-Z][a-z]+(?:\s[A-Z][a-z]+){0,3})\b"    # Capitalized word sequences
+    r"|\b([A-Z]{2,})\b"                              # ALL-CAPS abbreviations (NYU, MIT)
 )
 _QUOTED_RE = re.compile(r'"([^"]+)"')  # Quoted strings as entities
@@ -243,7 +244,7 @@ def _extract_entities(text: str) -> list[str]:
     # Capitalized word sequences (proper nouns)
     for match in _ENTITY_RE.finditer(text):
-        candidate = match.group(1).strip()
+        candidate = (match.group(1) or match.group(2) or "").strip()
         # Filter common English words that start sentences
         # Check first word of multi-word candidates against stop list
         _first_word = candidate.split()[0].lower() if candidate else ""
@@ -495,10 +496,17 @@ class FactExtractor:
     ) -> list[AtomicFact]:
         """Rule-based extraction: regex entities, keyword classification, scoring."""
         combined = "\n".join(turns)
-        sentences = _split_sentences(combined)
-        if not sentences:
-            # If no proper sentences, treat each turn as a sentence
-            sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
+        raw_sentences = _split_sentences(combined)
+        if not raw_sentences:
+            raw_sentences = [t.strip() for t in turns if len(t.strip()) >= 8]
+        # V3.3.12: Sliding window of 2 sentences to preserve cross-sentence context.
+        # "She enrolled at NYU. Starting January 2024." → becomes one combined fact.
+        sentences = list(raw_sentences)  # Keep originals
+        for i in range(len(raw_sentences) - 1):
+            pair = raw_sentences[i].rstrip() + " " + raw_sentences[i + 1].lstrip()
+            if len(pair) <= 300:  # Only combine if not too long
+                sentences.append(pair)
         # Build entity frequency map for importance scoring
         entity_freq: dict[str, int] = {}
@@ -549,8 +557,8 @@ class FactExtractor:
             if importance < self._config.min_fact_confidence:
                 continue
-            # Determine speaker from turn position heuristic
-            speaker = self._infer_speaker(normalized, turns, speaker_a, speaker_b)
+            # V3.3.12: Speaker inference removed — result was never stored in AtomicFact.
+            # The speaker info is preserved in verbatim facts via [Speaker]: prefix.
             facts.append(AtomicFact(
                 fact_id=_new_id(),