npm - superlocalmemory - Versions diffs - 3.3.27 → 3.3.29 - Mend

superlocalmemory 3.3.27 → 3.3.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/ATTRIBUTION.md +1 -1
package/CHANGELOG.md +15 -0
package/README.md +5 -5
package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/cli/commands.py +53 -16
package/src/superlocalmemory/cli/daemon.py +91 -0
package/src/superlocalmemory/core/embeddings.py +125 -0
package/src/superlocalmemory/mcp/tools_v33.py +15 -11

package/ATTRIBUTION.md CHANGED Viewed

@@ -46,7 +46,7 @@ SuperLocalMemory is backed by three peer-reviewed research papers:
 2. **Paper 2 — Information-Geometric Foundations** (arXiv:2603.14588)
    Fisher-Rao geodesic distance, cellular sheaf cohomology, Riemannian Langevin lifecycle dynamics.
-3. **Paper 3 — The Living Brain** (Zenodo: 10.5281/zenodo.19435120)
+3. **Paper 3 — The Living Brain** (arXiv:2604.04514)
    FRQAD mixed-precision metric, Ebbinghaus adaptive forgetting, 7-channel cognitive retrieval, memory parameterization, trust-weighted forgetting.
 ### Research Initiative

package/CHANGELOG.md CHANGED Viewed

@@ -16,6 +16,21 @@ SuperLocalMemory V3 - Intelligent local memory system for AI coding assistants.
 ---
+## [3.3.28] - 2026-04-07 — Stability Hotfix
+### Fixed
+- **Excessive memory usage during rapid file edits** — auto-observe now reuses a single background process instead of spawning one per edit. Rapid multi-file operations (parallel agents, branch switching, batch edits) no longer risk high memory usage.
+- **Observation debounce** — rapid-fire observations are batched and deduplicated within a short window, reducing redundant work.
+- **Memory-aware worker management** — new safety check skips heavy processing when system memory is low.
+### New Environment Variables
+| Variable | Default | Description |
+|----------|---------|-------------|
+| `SLM_OBSERVE_DEBOUNCE_SEC` | `3.0` | Observation batching window |
+| `SLM_MIN_AVAILABLE_MEMORY_GB` | `2.0` | Min free RAM for background processing |
+---
 ## [3.3.3] - 2026-04-01 — Langevin Awakening
 ### Fixed

package/README.md CHANGED Viewed

@@ -5,7 +5,7 @@
 <h1 align="center">SuperLocalMemory V3.3</h1>
 <p align="center"><strong>Every other AI forgets. Yours won't.</strong><br/><em>Infinite memory for Claude Code, Cursor, Windsurf & 17+ AI tools.</em></p>
 <p align="center"><code>v3.3.26</code> — Install once. Every session remembers the last. Automatically.</p>
-<p align="center"><strong>Backed by 3 peer-reviewed research papers</strong> · <a href="#research-papers">arXiv:2603.02240</a> · <a href="#research-papers">arXiv:2603.14588</a> · <a href="#research-papers">Paper 3 (submitted)</a></p>
+<p align="center"><strong>Backed by 3 peer-reviewed research papers</strong> · <a href="https://arxiv.org/abs/2603.02240">arXiv:2603.02240</a> · <a href="https://arxiv.org/abs/2603.14588">arXiv:2603.14588</a> · <a href="https://arxiv.org/abs/2604.04514">arXiv:2604.04514</a></p>
 <p align="center">
   <code>+16pp vs Mem0 (zero cloud)</code> &nbsp;·&nbsp; <code>85% Open-Domain (best of any system)</code> &nbsp;·&nbsp; <code>EU AI Act Ready</code>
@@ -441,7 +441,7 @@ SuperLocalMemory is backed by three peer-reviewed research papers covering trust
 ### Paper 3: The Living Brain (V3.3)
 > **SuperLocalMemory V3.3: The Living Brain — Biologically-Inspired Forgetting, Cognitive Quantization, and Multi-Channel Retrieval for Zero-LLM Agent Memory Systems**
 > Varun Pratap Bhardwaj (2026)
-> [Zenodo DOI: 10.5281/zenodo.19435120](https://zenodo.org/records/19435120) · arXiv ID pending
+> [arXiv:2604.04514](https://arxiv.org/abs/2604.04514) · [Zenodo DOI: 10.5281/zenodo.19435120](https://zenodo.org/records/19435120)
 ### Paper 2: Information-Geometric Foundations (V3)
 > **SuperLocalMemory V3: Information-Geometric Foundations for Zero-LLM Enterprise Agent Memory**
@@ -461,9 +461,9 @@ SuperLocalMemory is backed by three peer-reviewed research papers covering trust
          Forgetting, Cognitive Quantization, and Multi-Channel Retrieval
          for Zero-LLM Agent Memory Systems},
   author={Bhardwaj, Varun Pratap},
-  journal={Zenodo},
-  doi={10.5281/zenodo.19435120},
-  year={2026}
+  journal={arXiv preprint arXiv:2604.04514},
+  year={2026},
+  url={https://arxiv.org/abs/2604.04514}
 }
 @article{bhardwaj2026slmv3,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.3.27",
+  "version": "3.3.29",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.3.27"
+version = "3.3.29"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "Elastic-2.0"}

package/src/superlocalmemory/cli/commands.py CHANGED Viewed

@@ -1554,11 +1554,14 @@ def cmd_session_context(args: Namespace) -> None:
 def cmd_observe(args: Namespace) -> None:
-    """Evaluate and auto-capture content from stdin or argument."""
+    """Evaluate and auto-capture content from stdin or argument.
+    V3.3.28: Routes through daemon to prevent embedding worker memory blast.
+    Previously each `slm observe` spawned its own MemoryEngine + embedding
+    worker (~1.4 GB each). With 20 parallel edits = 28+ GB = system crash.
+    Now uses the daemon's singleton engine (1 worker total).
+    """
     import sys
-    from superlocalmemory.hooks.auto_capture import AutoCapture
-    from superlocalmemory.core.config import SLMConfig
-    from superlocalmemory.core.engine import MemoryEngine
     content = getattr(args, "content", "") or ""
     if not content and not sys.stdin.isatty():
@@ -1568,22 +1571,56 @@ def cmd_observe(args: Namespace) -> None:
         print("No content to observe.")
         return
+    # V3.3.28: Route through daemon (singleton engine, single embedding worker).
+    # This is the P0 fix for the memory blast incident of April 7, 2026.
     try:
-        config = SLMConfig.load()
-        engine = MemoryEngine(config)
-        engine.initialize()
+        from superlocalmemory.cli.daemon import is_daemon_running, daemon_request, ensure_daemon
+        if is_daemon_running() or ensure_daemon():
+            result = daemon_request("POST", "/observe", {"content": content})
+            if result is not None:
+                if result.get("captured"):
+                    cat = result.get("category", "unknown")
+                    conf = result.get("confidence", 0)
+                    print(f"Auto-captured: {cat} (confidence: {conf:.2f}) (via daemon)")
+                else:
+                    reason = result.get("reason", "no patterns matched")
+                    print(f"Not captured: {reason}")
+                return
+    except Exception:
+        pass  # Fall through to direct engine
-        auto = AutoCapture(engine=engine)
-        decision = auto.evaluate(content)
+    # Fallback: direct engine (only if daemon unavailable).
+    # Acquires a system-wide file lock to prevent concurrent worker spawns.
+    try:
+        from superlocalmemory.hooks.auto_capture import AutoCapture
+        from superlocalmemory.core.config import SLMConfig
+        from superlocalmemory.core.engine import MemoryEngine
+        from superlocalmemory.core.embeddings import acquire_embedding_lock
+        if not acquire_embedding_lock():
+            logger.debug("observe: another embedding worker active, skipping")
+            print("Not captured: system busy (another embedding in progress)")
+            return
+        try:
+            config = SLMConfig.load()
+            engine = MemoryEngine(config)
+            engine.initialize()
-        if decision.capture:
-            stored = auto.capture(content, category=decision.category)
-            if stored:
-                print(f"Auto-captured: {decision.category} (confidence: {decision.confidence:.2f})")
+            auto = AutoCapture(engine=engine)
+            decision = auto.evaluate(content)
+            if decision.capture:
+                stored = auto.capture(content, category=decision.category)
+                if stored:
+                    print(f"Auto-captured: {decision.category} (confidence: {decision.confidence:.2f})")
+                else:
+                    print(f"Detected {decision.category} but store failed.")
             else:
-                print(f"Detected {decision.category} but store failed.")
-        else:
-            print(f"Not captured: {decision.reason}")
+                print(f"Not captured: {decision.reason}")
+        finally:
+            from superlocalmemory.core.embeddings import release_embedding_lock
+            release_embedding_lock()
     except Exception as exc:
         logger.debug("observe failed: %s", exc)

package/src/superlocalmemory/cli/daemon.py CHANGED Viewed

@@ -37,6 +37,7 @@ import sys
 import time
 from http.server import HTTPServer, BaseHTTPRequestHandler
 from pathlib import Path
+import threading
 from threading import Thread
 logger = logging.getLogger(__name__)
@@ -153,6 +154,73 @@ def stop_daemon() -> bool:
 _engine = None
 _last_activity = time.monotonic()
+# ---------------------------------------------------------------------------
+# V3.3.28: Observation debounce buffer.
+#
+# When 20+ file edits arrive in quick succession (from parallel AI agents,
+# git checkout, or batch sed), we buffer observations for _OBSERVE_DEBOUNCE_SEC
+# seconds and deduplicate by content hash. This reduces 20 observations → 1-3
+# batches, each processed by the singleton engine (1 embedding worker).
+# ---------------------------------------------------------------------------
+_OBSERVE_DEBOUNCE_SEC = float(os.environ.get("SLM_OBSERVE_DEBOUNCE_SEC", "3.0"))
+_observe_buffer: list[str] = []
+_observe_seen: set[str] = set()  # content hashes for dedup within window
+_observe_lock = threading.Lock()
+_observe_timer: threading.Timer | None = None
+def _flush_observe_buffer() -> None:
+    """Process all buffered observations as a single batch."""
+    global _observe_timer
+    with _observe_lock:
+        if not _observe_buffer:
+            return
+        batch = list(_observe_buffer)
+        _observe_buffer.clear()
+        _observe_seen.clear()
+        _observe_timer = None
+    # Process each unique observation (already deduped)
+    engine = _get_engine()
+    from superlocalmemory.hooks.auto_capture import AutoCapture
+    auto = AutoCapture(engine=engine)
+    for content in batch:
+        try:
+            decision = auto.evaluate(content)
+            if decision.capture:
+                auto.capture(content, category=decision.category)
+        except Exception:
+            pass  # Don't let one bad observation kill the batch
+    logger.info("Observe debounce: processed %d observations (from buffer)", len(batch))
+def _enqueue_observation(content: str) -> dict:
+    """Add an observation to the debounce buffer. Returns immediate response."""
+    global _observe_timer
+    import hashlib
+    content_hash = hashlib.md5(content.encode()).hexdigest()
+    with _observe_lock:
+        if content_hash in _observe_seen:
+            return {"captured": False, "reason": "duplicate within debounce window"}
+        _observe_seen.add(content_hash)
+        _observe_buffer.append(content)
+        buf_size = len(_observe_buffer)
+        # Reset debounce timer
+        if _observe_timer is not None:
+            _observe_timer.cancel()
+        _observe_timer = threading.Timer(_OBSERVE_DEBOUNCE_SEC, _flush_observe_buffer)
+        _observe_timer.daemon = True
+        _observe_timer.start()
+    return {"captured": True, "queued": True, "buffer_size": buf_size,
+            "debounce_sec": _OBSERVE_DEBOUNCE_SEC}
 def _get_engine():
     global _engine
@@ -276,6 +344,24 @@ class DaemonHandler(BaseHTTPRequestHandler):
                 self._send_json(500, {"error": str(exc)})
             return
+        if self.path == "/observe":
+            try:
+                body = self._read_body()
+                content = body.get("content", "")
+                if not content:
+                    self._send_json(400, {"error": "content required"})
+                    return
+                # V3.3.28: Debounced observation processing.
+                # Buffers observations for 3s, deduplicates, processes as batch.
+                # Returns immediately — the actual capture happens asynchronously
+                # via the debounce timer, using the singleton engine.
+                result = _enqueue_observation(content)
+                self._send_json(200, result)
+            except Exception as exc:
+                self._send_json(500, {"error": str(exc)})
+            return
         if self.path == "/stop":
             self._send_json(200, {"status": "stopping"})
             Thread(target=_shutdown_server, daemon=True).start()
@@ -294,6 +380,11 @@ _server_start_time = time.monotonic()
 def _shutdown_server() -> None:
     global _engine, _server
+    # V3.3.28: Flush any buffered observations before shutdown
+    try:
+        _flush_observe_buffer()
+    except Exception:
+        pass
     time.sleep(0.5)
     if _engine is not None:
         try:

package/src/superlocalmemory/core/embeddings.py CHANGED Viewed

@@ -49,6 +49,66 @@ class DimensionMismatchError(RuntimeError):
     """Raised when the actual embedding dimension differs from config."""
+# ---------------------------------------------------------------------------
+# V3.3.28: System-wide concurrency guard for embedding workers.
+#
+# The memory blast incident (April 7, 2026) was caused by 20+ concurrent
+# `slm observe` CLI processes each spawning their own embedding_worker
+# subprocess (1.4 GB each). This file lock ensures only MAX_CONCURRENT
+# embedding workers can exist across ALL processes on the machine.
+#
+# Primary defense: daemon routing (cmd_observe → daemon → singleton engine).
+# This lock is the secondary safety net for when the daemon isn't available.
+# ---------------------------------------------------------------------------
+_EMBEDDING_LOCK_FILE = Path.home() / ".superlocalmemory" / ".embedding.lock"
+_MAX_CONCURRENT_WORKERS = int(os.environ.get("SLM_MAX_EMBEDDING_WORKERS", 2))
+_embedding_lock_fd: int | None = None
+def acquire_embedding_lock(timeout: float = 5.0) -> bool:
+    """Acquire system-wide embedding worker lock.
+    Uses fcntl.flock on Unix. On Windows, falls back to allowing (no lock).
+    Returns True if lock acquired, False if timed out (another worker active).
+    """
+    global _embedding_lock_fd
+    if sys.platform == "win32":
+        return True  # No file locking on Windows — daemon routing is primary defense
+    import fcntl
+    _EMBEDDING_LOCK_FILE.parent.mkdir(parents=True, exist_ok=True)
+    try:
+        _embedding_lock_fd = os.open(str(_EMBEDDING_LOCK_FILE), os.O_CREAT | os.O_RDWR)
+        deadline = time.time() + timeout
+        while time.time() < deadline:
+            try:
+                fcntl.flock(_embedding_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+                return True
+            except (BlockingIOError, OSError):
+                time.sleep(0.2)
+        # Timeout — another worker holds the lock
+        os.close(_embedding_lock_fd)
+        _embedding_lock_fd = None
+        return False
+    except Exception:
+        return True  # On error, allow through (don't block functionality)
+def release_embedding_lock() -> None:
+    """Release system-wide embedding worker lock."""
+    global _embedding_lock_fd
+    if _embedding_lock_fd is not None:
+        try:
+            import fcntl
+            fcntl.flock(_embedding_lock_fd, fcntl.LOCK_UN)
+            os.close(_embedding_lock_fd)
+        except Exception:
+            pass
+        _embedding_lock_fd = None
 _IDLE_TIMEOUT_SECONDS = 120  # 2 minutes — kill worker after idle
 # V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
 _IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
@@ -270,11 +330,76 @@ class EmbeddingService:
             raise error_container[0]
         return result_container[0] if result_container else ""
+    @staticmethod
+    def _check_memory_pressure() -> bool:
+        """Check if system has enough memory to spawn a worker.
+        V3.3.28: Prevents spawning embedding workers (1.4 GB each) when
+        the system is already under memory pressure. Returns True if safe.
+        """
+        min_available_gb = float(os.environ.get("SLM_MIN_AVAILABLE_MEMORY_GB", "2.0"))
+        try:
+            if sys.platform == "darwin":
+                # macOS: use vm_stat to get free + inactive pages
+                import subprocess as _sp
+                result = _sp.run(["vm_stat"], capture_output=True, text=True, timeout=5)
+                if result.returncode == 0:
+                    lines = result.stdout.split("\n")
+                    page_size = 16384  # default on Apple Silicon
+                    free_pages = 0
+                    for line in lines:
+                        if "page size of" in line:
+                            try:
+                                page_size = int(line.split()[-2])
+                            except (ValueError, IndexError):
+                                pass
+                        if "Pages free" in line or "Pages inactive" in line:
+                            try:
+                                free_pages += int(line.split()[-1].rstrip("."))
+                            except (ValueError, IndexError):
+                                pass
+                    available_gb = (free_pages * page_size) / (1024 ** 3)
+                    if available_gb < min_available_gb:
+                        logger.warning(
+                            "Low memory (%.1f GB available, need %.1f GB) — "
+                            "deferring embedding worker spawn",
+                            available_gb, min_available_gb,
+                        )
+                        return False
+            else:
+                # Linux/other: use /proc/meminfo or psutil
+                try:
+                    with open("/proc/meminfo") as f:
+                        for line in f:
+                            if line.startswith("MemAvailable:"):
+                                available_kb = int(line.split()[1])
+                                available_gb = available_kb / (1024 * 1024)
+                                if available_gb < min_available_gb:
+                                    logger.warning(
+                                        "Low memory (%.1f GB available) — "
+                                        "deferring embedding worker spawn",
+                                        available_gb,
+                                    )
+                                    return False
+                                break
+                except FileNotFoundError:
+                    pass  # Not Linux, allow through
+        except Exception:
+            pass  # On error, allow through (don't block functionality)
+        return True
     def _ensure_worker(self) -> None:
         """Spawn worker subprocess if not running."""
         if self._worker_proc is not None and self._worker_proc.poll() is None:
             return
         self._worker_proc = None
+        # V3.3.28: Check memory pressure before spawning
+        if not self._check_memory_pressure():
+            logger.warning("Skipping embedding worker spawn due to memory pressure")
+            self._available = False
+            return
         worker_module = "superlocalmemory.core.embedding_worker"
         try:
             env = {

package/src/superlocalmemory/mcp/tools_v33.py CHANGED Viewed

@@ -76,15 +76,19 @@ def register_v33_tools(server, get_engine: Callable) -> None:
             )
             if dry_run:
-                # Dry run: compute retention stats without applying changes
-                from superlocalmemory.math.ebbinghaus import EbbinghausCurve as _EC
-                facts = engine._db.get_all_facts(pid)
+                rows = engine._db.execute(
+                    "SELECT lifecycle_zone, COUNT(*) as cnt "
+                    "FROM fact_retention WHERE profile_id = ? "
+                    "GROUP BY lifecycle_zone",
+                    (pid,),
+                )
                 zones = {"active": 0, "warm": 0, "cold": 0, "archive": 0, "forgotten": 0}
-                for f in facts:
-                    r = ebbinghaus.compute_retention(f.access_count or 0, f.importance or 0.5, 0, 0.0)
-                    zone = ebbinghaus.classify_zone(r)
-                    zones[zone] = zones.get(zone, 0) + 1
-                result = {"total": len(facts), "transitions": 0, "dry_run_zones": zones}
+                total = 0
+                for row in rows:
+                    r = dict(row)
+                    zones[r["lifecycle_zone"]] = int(r["cnt"])
+                    total += int(r["cnt"])
+                result = {"total": total, "transitions": 0, "dry_run_zones": zones}
             else:
                 result = scheduler.run_decay_cycle(pid, force=True)
@@ -399,9 +403,9 @@ def register_v33_tools(server, get_engine: Callable) -> None:
             # 3. Behavioral pattern mining
             try:
                 from superlocalmemory.learning.consolidation_worker import ConsolidationWorker
-                cw = ConsolidationWorker(engine._db, engine._config)
-                patterns = cw._generate_patterns(pid)
-                results["behavioral"] = {"patterns_mined": len(patterns)}
+                cw = ConsolidationWorker(engine._db.db_path, engine._db.db_path.parent / "learning.db",)
+                count = cw._generate_patterns(pid, False)
+                results["behavioral"] = {"patterns_mined": count}
             except Exception as exc:
                 results["behavioral"] = {"error": str(exc)}