npm - superlocalmemory - Versions diffs - 3.4.31 → 3.4.33 - Mend

superlocalmemory 3.4.31 → 3.4.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/CHANGELOG.md +56 -0
package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/__init__.py +1 -1
package/src/superlocalmemory/core/embedding_worker.py +1 -1
package/src/superlocalmemory/core/recall_gate.py +36 -0
package/src/superlocalmemory/learning/bandit.py +18 -0
package/src/superlocalmemory/learning/reward_proxy.py +7 -2
package/src/superlocalmemory/mcp/_pool_adapter.py +16 -4
package/src/superlocalmemory/mcp/tools_core.py +7 -34
package/src/superlocalmemory/server/routes/memories.py +8 -1
package/src/superlocalmemory/server/unified_daemon.py +117 -5
package/src/superlocalmemory.egg-info/PKG-INFO +663 -0
package/src/superlocalmemory.egg-info/SOURCES.txt +448 -0
package/src/superlocalmemory.egg-info/dependency_links.txt +1 -0
package/src/superlocalmemory.egg-info/entry_points.txt +2 -0
package/src/superlocalmemory.egg-info/requires.txt +59 -0
package/src/superlocalmemory.egg-info/top_level.txt +1 -0

package/CHANGELOG.md CHANGED Viewed

@@ -10,6 +10,62 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ---
+## [3.4.33] - 2026-04-25
+Fix: daemon leaked SQLite connections to learning.db via bandit threadlocals.
+### Fixed
+- **Bandit threadlocal connection leak.** `reward_proxy.settle_stale_plays`
+  creates a `ContextualBandit` that opens a threadlocal connection via
+  `_conn_for`. When called from `asyncio.to_thread` (bandit_loops.py,
+  every 60 s), each thread-pool thread kept its connection open for the
+  process lifetime. Over 24 h this accumulated 12+ leaked file descriptors
+  and ~100 MB of wasted SQLite page-cache RAM. New
+  `bandit.close_threadlocal_conn()` function, called in the
+  `settle_stale_plays` finally block, ensures pool threads release their
+  connections immediately.
+- **Corrected embedding worker memory comment.** The `~200MB footprint`
+  note was written for `all-MiniLM-L6-v2`; the default model
+  `nomic-ai/nomic-embed-text-v1.5` uses ~1.1 GB via ONNX.
+---
+## [3.4.32] - 2026-04-24
+Fix: concurrent remembers no longer block recalls on the shared embedder.
+### Fixed
+- **Daemon `/remember` is now async by default.** Writes to the pending
+  queue in under 100 ms and returns a `pending_id`; a background thread
+  drains the queue in the background. Previously, the synchronous
+  `engine.store()` on the FastAPI event loop could block `/search` and
+  `/health` for 30+ seconds while the single embedder worker processed a
+  large write. Under concurrent load the daemon could appear hung.
+- **Materializer yields to active recalls.** While any `/search` is in
+  flight the drainer sleeps between items, so user-initiated recalls
+  always get the embedder first.
+- **MCP remember tool simplified.** Writes to `pending.db` and returns;
+  the daemon's materializer completes the pipeline. Removes the
+  redundant in-process `pool.store` background task that previously
+  contended with `/search`.
+- **`pool_store` returns `["pending:<id>"]`** when the daemon is async,
+  keeping a stable identifier for callers without blocking on the
+  embedder.
+### Added
+- `?wait=true` query parameter on `POST /remember` for callers that
+  need synchronous behaviour and real `fact_ids` in the response.
+- `superlocalmemory.core.recall_gate` module — shared counter that lets
+  the materializer detect in-flight recalls and yield priority.
+### Migration notes
+- **No action required.** Existing clients continue to work; the
+  response shape is compatible (`ok`, `count` still present). Scripts
+  that depended on `fact_ids` to validate the write should switch to
+  `pending_id` or pass `?wait=true` to opt in to the legacy behaviour.
+---
 ## [3.4.31] - 2026-04-24
 Dashboard truth, memory vs fact clarity, and self-cleaning pending queue.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "superlocalmemory",
-  "version": "3.4.31",
+  "version": "3.4.33",
   "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
   "keywords": [
     "ai-memory",

package/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "superlocalmemory"
-version = "3.4.31"
+version = "3.4.33"
 description = "Information-geometric agent memory with mathematical guarantees"
 readme = "README.md"
 license = {text = "AGPL-3.0-or-later"}

package/src/superlocalmemory/__init__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """SuperLocalMemory — information-geometric agent memory."""
-__version__ = "3.4.31"
+__version__ = "3.4.33"

package/src/superlocalmemory/core/embedding_worker.py CHANGED Viewed

@@ -63,7 +63,7 @@ def _load_embedding_model(name: str) -> tuple:
     """
     from sentence_transformers import SentenceTransformer
-    # Tier 1: ONNX (stable memory, ~200MB footprint)
+    # Tier 1: ONNX (stable memory; ~1.1 GB for nomic-embed-text-v1.5)
     try:
         m = SentenceTransformer(name, backend="onnx", trust_remote_code=True)
         return m, "onnx"

package/src/superlocalmemory/core/recall_gate.py ADDED Viewed

@@ -0,0 +1,36 @@
+# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
+# Licensed under AGPL-3.0-or-later - see LICENSE file
+# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
+"""v3.4.32: Recall-in-flight counter used to give /search priority over the
+pending materializer.
+Every recall handler calls ``begin_recall()`` on entry and ``end_recall()``
+in a finally block. The pending-memory materializer thread polls
+``in_flight()`` and sleeps while any recall is active, so the shared
+embedder worker never serves a materialization ahead of a user-initiated
+recall.
+"""
+from __future__ import annotations
+import threading
+_lock = threading.Lock()
+_active = 0
+def begin_recall() -> None:
+    global _active
+    with _lock:
+        _active += 1
+def end_recall() -> None:
+    global _active
+    with _lock:
+        _active = max(0, _active - 1)
+def in_flight() -> int:
+    with _lock:
+        return _active

package/src/superlocalmemory/learning/bandit.py CHANGED Viewed

@@ -176,6 +176,23 @@ def _conn_for(db_path: Path) -> sqlite3.Connection:
     return conn
+def close_threadlocal_conn() -> None:
+    """Close the threadlocal bandit connection on the calling thread.
+    v3.4.33: background callers (asyncio.to_thread pool threads) MUST call
+    this after finishing bandit work.  Without it, each pool thread keeps a
+    leaked connection to learning.db for the process lifetime — observed as
+    12+ open file descriptors and ~100 MB wasted page-cache RAM.
+    """
+    if _holder.conn is not None:
+        try:
+            _holder.conn.close()
+        except sqlite3.Error:  # pragma: no cover
+            pass
+        _holder.conn = None
+        _holder.path = None
 def _now_iso() -> str:
     return datetime.now(timezone.utc).isoformat(timespec="seconds")
@@ -520,6 +537,7 @@ def retention_sweep(
 __all__ = (
     "BanditChoice",
     "ContextualBandit",
+    "close_threadlocal_conn",
     "compute_stratum",
     "current_time_bucket",
     "retention_sweep",

package/src/superlocalmemory/learning/reward_proxy.py CHANGED Viewed

@@ -317,8 +317,13 @@ def settle_stale_plays(
                 memory_conn.close()
             except sqlite3.Error:  # pragma: no cover
                 pass
-        # Don't close a caller-owned bandit instance.
-        _ = owns_bandit
+        # v3.4.33: close the threadlocal bandit connection so pool threads
+        # from asyncio.to_thread don't leak file descriptors to learning.db.
+        try:
+            from superlocalmemory.learning.bandit import close_threadlocal_conn
+            close_threadlocal_conn()
+        except Exception:  # pragma: no cover — defensive
+            pass
     return settled

package/src/superlocalmemory/mcp/_pool_adapter.py CHANGED Viewed

@@ -110,12 +110,24 @@ def pool_recall(query: str, limit: int = 10, **_: Any) -> PoolRecallResponse:
 def pool_store(content: str, metadata: dict | None = None) -> list[str]:
-    """Call pool.store and return the fact id list.
+    """Call pool.store and return fact id list (or pending tracker).
-    Raises :class:`PoolError` on worker death or any non-ok envelope.
+    v3.4.32: the daemon /remember endpoint is async by default — it
+    returns ``pending_id`` and queues the write. We surface this to
+    callers as ``["pending:<id>"]`` so they have a stable identifier
+    without blocking the remember on the embedder worker.
+    Legacy synchronous path (``?wait=true``) still returns real
+    ``fact_ids``. Worker death raises :class:`PoolError`.
     """
     raw = _pool().store(content=content, metadata=metadata or {})
     _unwrap_error(raw, "store")
-    if isinstance(raw, dict):
-        return list(raw.get("fact_ids", []))
+    if not isinstance(raw, dict):
+        return []
+    fact_ids = raw.get("fact_ids")
+    if fact_ids:
+        return list(fact_ids)
+    pending_id = raw.get("pending_id")
+    if pending_id is not None:
+        return [f"pending:{pending_id}"]
     return []

package/src/superlocalmemory/mcp/tools_core.py CHANGED Viewed

@@ -113,12 +113,11 @@ def register_core_tools(server, get_engine: Callable) -> None:
         """
         import asyncio
         try:
-            # V3.3.27: Store-first pattern — write to pending.db immediately
-            # (<100ms), then process through full pipeline in background.
-            # This eliminates the 30-40s blocking that Mode B users experience.
-            # Pending memories are auto-processed on next engine.initialize()
-            # or by the daemon's background loop.
-            from superlocalmemory.cli.pending_store import store_pending, mark_done
+            # v3.4.32: Store-first pattern. Write to pending.db and return
+            # immediately. The daemon's pending-materializer thread drains
+            # the queue with recall priority, so concurrent MCP remembers
+            # no longer contend with /search on the shared embedder.
+            from superlocalmemory.cli.pending_store import store_pending
             pending_id = store_pending(content, tags=tags, metadata={
                 "project": project,
@@ -127,39 +126,13 @@ def register_core_tools(server, get_engine: Callable) -> None:
                 "session_id": session_id,
             })
-            # Fire-and-forget: process in background thread
-            async def _process_in_background():
-                try:
-                    from superlocalmemory.core.worker_pool import WorkerPool
-                    pool = WorkerPool.shared()
-                    result = await asyncio.to_thread(
-                        pool.store, content, metadata={
-                            "tags": tags, "project": project,
-                            "importance": importance, "agent_id": agent_id,
-                            "session_id": session_id,
-                        },
-                    )
-                    if result.get("ok"):
-                        mark_done(pending_id)
-                        _emit_event("memory.created", {
-                            "content_preview": content[:80],
-                            "agent_id": agent_id,
-                            "fact_count": result.get("count", 0),
-                        }, source_agent=agent_id)
-                except Exception as _bg_exc:
-                    logger.warning(
-                        "Background store failed (pending_id=%s): %s",
-                        pending_id, _bg_exc,
-                    )
-            asyncio.create_task(_process_in_background())
             return {
                 "success": True,
                 "fact_ids": [f"pending:{pending_id}"],
                 "count": 1,
                 "pending": True,
-                "message": "Stored to pending — processing in background.",
+                "pending_id": pending_id,
+                "message": "Stored — facts will appear in the dashboard shortly.",
             }
         except Exception as exc:
             logger.exception("remember failed")

package/src/superlocalmemory/server/routes/memories.py CHANGED Viewed

@@ -398,7 +398,12 @@ async def get_graph(
 @router.post("/api/search")
 async def search_memories(request: Request, body: SearchRequest):
-    """Semantic search via subprocess worker pool (memory-isolated)."""
+    """Semantic search via subprocess worker pool (memory-isolated).
+    v3.4.32: marks recall in-flight so the pending materializer yields.
+    """
+    from superlocalmemory.core.recall_gate import begin_recall, end_recall
+    begin_recall()
     try:
         from superlocalmemory.core.worker_pool import WorkerPool
         pool = WorkerPool.shared()
@@ -435,6 +440,8 @@ async def search_memories(request: Request, body: SearchRequest):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
+    finally:
+        end_recall()
 @router.get("/api/clusters")

package/src/superlocalmemory/server/unified_daemon.py CHANGED Viewed

@@ -66,6 +66,20 @@ class ObserveRequest(BaseModel):
     content: str
+# ---------------------------------------------------------------------------
+# v3.4.32: Recall-priority gate for the pending materializer.
+# All /remember writes go to pending.db and return fast; a background
+# thread drains pending while yielding to any in-flight /search.
+# See ``superlocalmemory.core.recall_gate``.
+# ---------------------------------------------------------------------------
+from superlocalmemory.core.recall_gate import (
+    begin_recall as _begin_recall,
+    end_recall as _end_recall,
+    in_flight as _recalls_in_flight,
+)
 # ---------------------------------------------------------------------------
 # Observation debounce buffer (migrated from daemon.py)
 # ---------------------------------------------------------------------------
@@ -949,6 +963,8 @@ def _register_daemon_routes(application: FastAPI) -> None:
         if not effective_sid:
             import time as _t
             effective_sid = f"http:{int(_t.time() * 1000)}"
+        # v3.4.32: mark recall in-flight so the pending materializer pauses
+        _begin_recall()
         try:
             response = engine.recall(
                 search_query, limit=limit, session_id=effective_sid,
@@ -1006,18 +1022,47 @@ def _register_daemon_routes(application: FastAPI) -> None:
             }
         except Exception as exc:
             raise HTTPException(500, detail=str(exc))
+        finally:
+            _end_recall()
     @application.post("/remember")
-    async def remember(req: RememberRequest):
+    async def remember(req: RememberRequest, wait: bool = False):
+        """v3.4.32: Async by default — writes to pending.db, returns pending_id
+        in <100ms. Materializer thread drains at low priority, yielding to
+        /search. Pass ``?wait=true`` for legacy synchronous behavior (blocks
+        on the embedder until facts are written).
+        """
         _update_activity()
         engine = _get_engine_or_503()
+        if wait:
+            try:
+                metadata = {"tags": req.tags} if req.tags else {}
+                extra = getattr(req, "metadata", None)
+                if isinstance(extra, dict):
+                    metadata.update(extra)
+                fact_ids = engine.store(req.content, metadata=metadata)
+                return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
+            except Exception as exc:
+                raise HTTPException(500, detail=str(exc))
         try:
-            metadata = {"tags": req.tags} if req.tags else {}
+            from superlocalmemory.cli.pending_store import store_pending
+            meta = {}
+            if req.tags:
+                meta["tags"] = req.tags
             extra = getattr(req, "metadata", None)
             if isinstance(extra, dict):
-                metadata.update(extra)
-            fact_ids = engine.store(req.content, metadata=metadata)
-            return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
+                meta.update(extra)
+            pending_id = store_pending(
+                req.content, tags=req.tags or "", metadata=meta,
+            )
+            return {
+                "ok": True,
+                "pending_id": pending_id,
+                "status": "queued",
+                "note": "materialized async; pass ?wait=true for legacy sync",
+            }
         except Exception as exc:
             raise HTTPException(500, detail=str(exc))
@@ -1189,6 +1234,70 @@ def _start_memory_watchdog() -> None:
     logger.info("Memory watchdog started (limit: %d MB per worker)", MAX_WORKER_MB)
+_materializer_stop = threading.Event()
+_materializer_thread: threading.Thread | None = None
+def _start_pending_materializer() -> None:
+    """Background thread: drains pending.db, yields to active /search calls.
+    Poll loop:
+    1. Fetch up to 5 pending rows.
+    2. For each row: if any /search is in flight, sleep 500ms (yield priority).
+    3. Call engine.store(), mark_done or mark_failed.
+    4. Sleep 2s between polls when idle (empty queue).
+    """
+    global _materializer_thread
+    def _loop():
+        from superlocalmemory.cli.pending_store import (
+            get_pending, mark_done, mark_failed,
+        )
+        while not _materializer_stop.is_set():
+            try:
+                engine = _engine  # may be None briefly at startup
+                if engine is None:
+                    time.sleep(2.0)
+                    continue
+                pending = get_pending(limit=5)
+                if not pending:
+                    time.sleep(2.0)
+                    continue
+                for item in pending:
+                    if _materializer_stop.is_set():
+                        break
+                    # Yield to recalls: wait until none in flight
+                    waits = 0
+                    while _recalls_in_flight() > 0 and waits < 60:
+                        time.sleep(0.5)
+                        waits += 1
+                    try:
+                        import json as _json
+                        md_str = item.get("metadata") or "{}"
+                        try:
+                            md = _json.loads(md_str)
+                        except Exception:
+                            md = {}
+                        if item.get("tags"):
+                            md.setdefault("tags", item["tags"])
+                        engine.store(item["content"], metadata=md)
+                        mark_done(item["id"])
+                    except Exception as exc:
+                        logger.warning(
+                            "Pending %d failed: %s", item["id"], exc,
+                        )
+                        mark_failed(item["id"], str(exc))
+            except Exception as exc:
+                logger.warning("materializer loop error: %s", exc)
+                time.sleep(5.0)
+    _materializer_thread = threading.Thread(
+        target=_loop, daemon=True, name="pending-materializer",
+    )
+    _materializer_thread.start()
+    logger.info("Pending materializer started (recall-priority)")
 def start_server(port: int = _DEFAULT_PORT) -> None:
     """Start the unified daemon. Blocks until stopped."""
     global _start_time
@@ -1223,6 +1332,9 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
     # v3.4.7: Start memory watchdog to prevent runaway workers
     _start_memory_watchdog()
+    # v3.4.32: Continuous pending-queue materializer with recall priority.
+    _start_pending_materializer()
     log_dir = Path.home() / ".superlocalmemory" / "logs"
     log_dir.mkdir(parents=True, exist_ok=True)