superlocalmemory 3.4.31 → 3.4.33

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,62 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
10
10
 
11
11
  ---
12
12
 
13
+ ## [3.4.33] - 2026-04-25
14
+
15
+ Fix: daemon leaked SQLite connections to learning.db via bandit threadlocals.
16
+
17
+ ### Fixed
18
+ - **Bandit threadlocal connection leak.** `reward_proxy.settle_stale_plays`
19
+ creates a `ContextualBandit` that opens a threadlocal connection via
20
+ `_conn_for`. When called from `asyncio.to_thread` (bandit_loops.py,
21
+ every 60 s), each thread-pool thread kept its connection open for the
22
+ process lifetime. Over 24 h this accumulated 12+ leaked file descriptors
23
+ and ~100 MB of wasted SQLite page-cache RAM. New
24
+ `bandit.close_threadlocal_conn()` function, called in the
25
+ `settle_stale_plays` finally block, ensures pool threads release their
26
+ connections immediately.
27
+ - **Corrected embedding worker memory comment.** The `~200MB footprint`
28
+ note was written for `all-MiniLM-L6-v2`; the default model
29
+ `nomic-ai/nomic-embed-text-v1.5` uses ~1.1 GB via ONNX.
30
+
31
+ ---
32
+
33
+ ## [3.4.32] - 2026-04-24
34
+
35
+ Fix: concurrent remembers no longer block recalls on the shared embedder.
36
+
37
+ ### Fixed
38
+ - **Daemon `/remember` is now async by default.** Writes to the pending
39
+ queue in under 100 ms and returns a `pending_id`; a background thread
40
+ drains the queue in the background. Previously, the synchronous
41
+ `engine.store()` on the FastAPI event loop could block `/search` and
42
+ `/health` for 30+ seconds while the single embedder worker processed a
43
+ large write. Under concurrent load the daemon could appear hung.
44
+ - **Materializer yields to active recalls.** While any `/search` is in
45
+ flight the drainer sleeps between items, so user-initiated recalls
46
+ always get the embedder first.
47
+ - **MCP remember tool simplified.** Writes to `pending.db` and returns;
48
+ the daemon's materializer completes the pipeline. Removes the
49
+ redundant in-process `pool.store` background task that previously
50
+ contended with `/search`.
51
+ - **`pool_store` returns `["pending:<id>"]`** when the daemon is async,
52
+ keeping a stable identifier for callers without blocking on the
53
+ embedder.
54
+
55
+ ### Added
56
+ - `?wait=true` query parameter on `POST /remember` for callers that
57
+ need synchronous behaviour and real `fact_ids` in the response.
58
+ - `superlocalmemory.core.recall_gate` module — shared counter that lets
59
+ the materializer detect in-flight recalls and yield priority.
60
+
61
+ ### Migration notes
62
+ - **No action required.** Existing clients continue to work; the
63
+ response shape is compatible (`ok`, `count` still present). Scripts
64
+ that depended on `fact_ids` to validate the write should switch to
65
+ `pending_id` or pass `?wait=true` to opt in to the legacy behaviour.
66
+
67
+ ---
68
+
13
69
  ## [3.4.31] - 2026-04-24
14
70
 
15
71
  Dashboard truth, memory vs fact clarity, and self-cleaning pending queue.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.4.31",
3
+ "version": "3.4.33",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.4.31"
3
+ version = "3.4.33"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "AGPL-3.0-or-later"}
@@ -1,3 +1,3 @@
1
1
  """SuperLocalMemory — information-geometric agent memory."""
2
2
 
3
- __version__ = "3.4.31"
3
+ __version__ = "3.4.33"
@@ -63,7 +63,7 @@ def _load_embedding_model(name: str) -> tuple:
63
63
  """
64
64
  from sentence_transformers import SentenceTransformer
65
65
 
66
- # Tier 1: ONNX (stable memory, ~200MB footprint)
66
+ # Tier 1: ONNX (stable memory; ~1.1 GB for nomic-embed-text-v1.5)
67
67
  try:
68
68
  m = SentenceTransformer(name, backend="onnx", trust_remote_code=True)
69
69
  return m, "onnx"
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """v3.4.32: Recall-in-flight counter used to give /search priority over the
6
+ pending materializer.
7
+
8
+ Every recall handler calls ``begin_recall()`` on entry and ``end_recall()``
9
+ in a finally block. The pending-memory materializer thread polls
10
+ ``in_flight()`` and sleeps while any recall is active, so the shared
11
+ embedder worker never serves a materialization ahead of a user-initiated
12
+ recall.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import threading
17
+
18
+ _lock = threading.Lock()
19
+ _active = 0
20
+
21
+
22
+ def begin_recall() -> None:
23
+ global _active
24
+ with _lock:
25
+ _active += 1
26
+
27
+
28
+ def end_recall() -> None:
29
+ global _active
30
+ with _lock:
31
+ _active = max(0, _active - 1)
32
+
33
+
34
+ def in_flight() -> int:
35
+ with _lock:
36
+ return _active
@@ -176,6 +176,23 @@ def _conn_for(db_path: Path) -> sqlite3.Connection:
176
176
  return conn
177
177
 
178
178
 
179
+ def close_threadlocal_conn() -> None:
180
+ """Close the threadlocal bandit connection on the calling thread.
181
+
182
+ v3.4.33: background callers (asyncio.to_thread pool threads) MUST call
183
+ this after finishing bandit work. Without it, each pool thread keeps a
184
+ leaked connection to learning.db for the process lifetime — observed as
185
+ 12+ open file descriptors and ~100 MB wasted page-cache RAM.
186
+ """
187
+ if _holder.conn is not None:
188
+ try:
189
+ _holder.conn.close()
190
+ except sqlite3.Error: # pragma: no cover
191
+ pass
192
+ _holder.conn = None
193
+ _holder.path = None
194
+
195
+
179
196
  def _now_iso() -> str:
180
197
  return datetime.now(timezone.utc).isoformat(timespec="seconds")
181
198
 
@@ -520,6 +537,7 @@ def retention_sweep(
520
537
  __all__ = (
521
538
  "BanditChoice",
522
539
  "ContextualBandit",
540
+ "close_threadlocal_conn",
523
541
  "compute_stratum",
524
542
  "current_time_bucket",
525
543
  "retention_sweep",
@@ -317,8 +317,13 @@ def settle_stale_plays(
317
317
  memory_conn.close()
318
318
  except sqlite3.Error: # pragma: no cover
319
319
  pass
320
- # Don't close a caller-owned bandit instance.
321
- _ = owns_bandit
320
+ # v3.4.33: close the threadlocal bandit connection so pool threads
321
+ # from asyncio.to_thread don't leak file descriptors to learning.db.
322
+ try:
323
+ from superlocalmemory.learning.bandit import close_threadlocal_conn
324
+ close_threadlocal_conn()
325
+ except Exception: # pragma: no cover — defensive
326
+ pass
322
327
 
323
328
  return settled
324
329
 
@@ -110,12 +110,24 @@ def pool_recall(query: str, limit: int = 10, **_: Any) -> PoolRecallResponse:
110
110
 
111
111
 
112
112
  def pool_store(content: str, metadata: dict | None = None) -> list[str]:
113
- """Call pool.store and return the fact id list.
113
+ """Call pool.store and return fact id list (or pending tracker).
114
114
 
115
- Raises :class:`PoolError` on worker death or any non-ok envelope.
115
+ v3.4.32: the daemon /remember endpoint is async by default — it
116
+ returns ``pending_id`` and queues the write. We surface this to
117
+ callers as ``["pending:<id>"]`` so they have a stable identifier
118
+ without blocking the remember on the embedder worker.
119
+
120
+ Legacy synchronous path (``?wait=true``) still returns real
121
+ ``fact_ids``. Worker death raises :class:`PoolError`.
116
122
  """
117
123
  raw = _pool().store(content=content, metadata=metadata or {})
118
124
  _unwrap_error(raw, "store")
119
- if isinstance(raw, dict):
120
- return list(raw.get("fact_ids", []))
125
+ if not isinstance(raw, dict):
126
+ return []
127
+ fact_ids = raw.get("fact_ids")
128
+ if fact_ids:
129
+ return list(fact_ids)
130
+ pending_id = raw.get("pending_id")
131
+ if pending_id is not None:
132
+ return [f"pending:{pending_id}"]
121
133
  return []
@@ -113,12 +113,11 @@ def register_core_tools(server, get_engine: Callable) -> None:
113
113
  """
114
114
  import asyncio
115
115
  try:
116
- # V3.3.27: Store-first pattern write to pending.db immediately
117
- # (<100ms), then process through full pipeline in background.
118
- # This eliminates the 30-40s blocking that Mode B users experience.
119
- # Pending memories are auto-processed on next engine.initialize()
120
- # or by the daemon's background loop.
121
- from superlocalmemory.cli.pending_store import store_pending, mark_done
116
+ # v3.4.32: Store-first pattern. Write to pending.db and return
117
+ # immediately. The daemon's pending-materializer thread drains
118
+ # the queue with recall priority, so concurrent MCP remembers
119
+ # no longer contend with /search on the shared embedder.
120
+ from superlocalmemory.cli.pending_store import store_pending
122
121
 
123
122
  pending_id = store_pending(content, tags=tags, metadata={
124
123
  "project": project,
@@ -127,39 +126,13 @@ def register_core_tools(server, get_engine: Callable) -> None:
127
126
  "session_id": session_id,
128
127
  })
129
128
 
130
- # Fire-and-forget: process in background thread
131
- async def _process_in_background():
132
- try:
133
- from superlocalmemory.core.worker_pool import WorkerPool
134
- pool = WorkerPool.shared()
135
- result = await asyncio.to_thread(
136
- pool.store, content, metadata={
137
- "tags": tags, "project": project,
138
- "importance": importance, "agent_id": agent_id,
139
- "session_id": session_id,
140
- },
141
- )
142
- if result.get("ok"):
143
- mark_done(pending_id)
144
- _emit_event("memory.created", {
145
- "content_preview": content[:80],
146
- "agent_id": agent_id,
147
- "fact_count": result.get("count", 0),
148
- }, source_agent=agent_id)
149
- except Exception as _bg_exc:
150
- logger.warning(
151
- "Background store failed (pending_id=%s): %s",
152
- pending_id, _bg_exc,
153
- )
154
-
155
- asyncio.create_task(_process_in_background())
156
-
157
129
  return {
158
130
  "success": True,
159
131
  "fact_ids": [f"pending:{pending_id}"],
160
132
  "count": 1,
161
133
  "pending": True,
162
- "message": "Stored to pending — processing in background.",
134
+ "pending_id": pending_id,
135
+ "message": "Stored — facts will appear in the dashboard shortly.",
163
136
  }
164
137
  except Exception as exc:
165
138
  logger.exception("remember failed")
@@ -398,7 +398,12 @@ async def get_graph(
398
398
 
399
399
  @router.post("/api/search")
400
400
  async def search_memories(request: Request, body: SearchRequest):
401
- """Semantic search via subprocess worker pool (memory-isolated)."""
401
+ """Semantic search via subprocess worker pool (memory-isolated).
402
+
403
+ v3.4.32: marks recall in-flight so the pending materializer yields.
404
+ """
405
+ from superlocalmemory.core.recall_gate import begin_recall, end_recall
406
+ begin_recall()
402
407
  try:
403
408
  from superlocalmemory.core.worker_pool import WorkerPool
404
409
  pool = WorkerPool.shared()
@@ -435,6 +440,8 @@ async def search_memories(request: Request, body: SearchRequest):
435
440
 
436
441
  except Exception as e:
437
442
  raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
443
+ finally:
444
+ end_recall()
438
445
 
439
446
 
440
447
  @router.get("/api/clusters")
@@ -66,6 +66,20 @@ class ObserveRequest(BaseModel):
66
66
  content: str
67
67
 
68
68
 
69
+ # ---------------------------------------------------------------------------
70
+ # v3.4.32: Recall-priority gate for the pending materializer.
71
+ # All /remember writes go to pending.db and return fast; a background
72
+ # thread drains pending while yielding to any in-flight /search.
73
+ # See ``superlocalmemory.core.recall_gate``.
74
+ # ---------------------------------------------------------------------------
75
+
76
+ from superlocalmemory.core.recall_gate import (
77
+ begin_recall as _begin_recall,
78
+ end_recall as _end_recall,
79
+ in_flight as _recalls_in_flight,
80
+ )
81
+
82
+
69
83
  # ---------------------------------------------------------------------------
70
84
  # Observation debounce buffer (migrated from daemon.py)
71
85
  # ---------------------------------------------------------------------------
@@ -949,6 +963,8 @@ def _register_daemon_routes(application: FastAPI) -> None:
949
963
  if not effective_sid:
950
964
  import time as _t
951
965
  effective_sid = f"http:{int(_t.time() * 1000)}"
966
+ # v3.4.32: mark recall in-flight so the pending materializer pauses
967
+ _begin_recall()
952
968
  try:
953
969
  response = engine.recall(
954
970
  search_query, limit=limit, session_id=effective_sid,
@@ -1006,18 +1022,47 @@ def _register_daemon_routes(application: FastAPI) -> None:
1006
1022
  }
1007
1023
  except Exception as exc:
1008
1024
  raise HTTPException(500, detail=str(exc))
1025
+ finally:
1026
+ _end_recall()
1009
1027
 
1010
1028
  @application.post("/remember")
1011
- async def remember(req: RememberRequest):
1029
+ async def remember(req: RememberRequest, wait: bool = False):
1030
+ """v3.4.32: Async by default — writes to pending.db, returns pending_id
1031
+ in <100ms. Materializer thread drains at low priority, yielding to
1032
+ /search. Pass ``?wait=true`` for legacy synchronous behavior (blocks
1033
+ on the embedder until facts are written).
1034
+ """
1012
1035
  _update_activity()
1013
1036
  engine = _get_engine_or_503()
1037
+
1038
+ if wait:
1039
+ try:
1040
+ metadata = {"tags": req.tags} if req.tags else {}
1041
+ extra = getattr(req, "metadata", None)
1042
+ if isinstance(extra, dict):
1043
+ metadata.update(extra)
1044
+ fact_ids = engine.store(req.content, metadata=metadata)
1045
+ return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
1046
+ except Exception as exc:
1047
+ raise HTTPException(500, detail=str(exc))
1048
+
1014
1049
  try:
1015
- metadata = {"tags": req.tags} if req.tags else {}
1050
+ from superlocalmemory.cli.pending_store import store_pending
1051
+ meta = {}
1052
+ if req.tags:
1053
+ meta["tags"] = req.tags
1016
1054
  extra = getattr(req, "metadata", None)
1017
1055
  if isinstance(extra, dict):
1018
- metadata.update(extra)
1019
- fact_ids = engine.store(req.content, metadata=metadata)
1020
- return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
1056
+ meta.update(extra)
1057
+ pending_id = store_pending(
1058
+ req.content, tags=req.tags or "", metadata=meta,
1059
+ )
1060
+ return {
1061
+ "ok": True,
1062
+ "pending_id": pending_id,
1063
+ "status": "queued",
1064
+ "note": "materialized async; pass ?wait=true for legacy sync",
1065
+ }
1021
1066
  except Exception as exc:
1022
1067
  raise HTTPException(500, detail=str(exc))
1023
1068
 
@@ -1189,6 +1234,70 @@ def _start_memory_watchdog() -> None:
1189
1234
  logger.info("Memory watchdog started (limit: %d MB per worker)", MAX_WORKER_MB)
1190
1235
 
1191
1236
 
1237
+ _materializer_stop = threading.Event()
1238
+ _materializer_thread: threading.Thread | None = None
1239
+
1240
+
1241
+ def _start_pending_materializer() -> None:
1242
+ """Background thread: drains pending.db, yields to active /search calls.
1243
+
1244
+ Poll loop:
1245
+ 1. Fetch up to 5 pending rows.
1246
+ 2. For each row: if any /search is in flight, sleep 500ms (yield priority).
1247
+ 3. Call engine.store(), mark_done or mark_failed.
1248
+ 4. Sleep 2s between polls when idle (empty queue).
1249
+ """
1250
+ global _materializer_thread
1251
+
1252
+ def _loop():
1253
+ from superlocalmemory.cli.pending_store import (
1254
+ get_pending, mark_done, mark_failed,
1255
+ )
1256
+ while not _materializer_stop.is_set():
1257
+ try:
1258
+ engine = _engine # may be None briefly at startup
1259
+ if engine is None:
1260
+ time.sleep(2.0)
1261
+ continue
1262
+ pending = get_pending(limit=5)
1263
+ if not pending:
1264
+ time.sleep(2.0)
1265
+ continue
1266
+ for item in pending:
1267
+ if _materializer_stop.is_set():
1268
+ break
1269
+ # Yield to recalls: wait until none in flight
1270
+ waits = 0
1271
+ while _recalls_in_flight() > 0 and waits < 60:
1272
+ time.sleep(0.5)
1273
+ waits += 1
1274
+ try:
1275
+ import json as _json
1276
+ md_str = item.get("metadata") or "{}"
1277
+ try:
1278
+ md = _json.loads(md_str)
1279
+ except Exception:
1280
+ md = {}
1281
+ if item.get("tags"):
1282
+ md.setdefault("tags", item["tags"])
1283
+ engine.store(item["content"], metadata=md)
1284
+ mark_done(item["id"])
1285
+ except Exception as exc:
1286
+ logger.warning(
1287
+ "Pending %d failed: %s", item["id"], exc,
1288
+ )
1289
+ mark_failed(item["id"], str(exc))
1290
+ except Exception as exc:
1291
+ logger.warning("materializer loop error: %s", exc)
1292
+ time.sleep(5.0)
1293
+
1294
+ _materializer_thread = threading.Thread(
1295
+ target=_loop, daemon=True, name="pending-materializer",
1296
+ )
1297
+ _materializer_thread.start()
1298
+ logger.info("Pending materializer started (recall-priority)")
1299
+
1300
+
1192
1301
  def start_server(port: int = _DEFAULT_PORT) -> None:
1193
1302
  """Start the unified daemon. Blocks until stopped."""
1194
1303
  global _start_time
@@ -1223,6 +1332,9 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
1223
1332
  # v3.4.7: Start memory watchdog to prevent runaway workers
1224
1333
  _start_memory_watchdog()
1225
1334
 
1335
+ # v3.4.32: Continuous pending-queue materializer with recall priority.
1336
+ _start_pending_materializer()
1337
+
1226
1338
  log_dir = Path.home() / ".superlocalmemory" / "logs"
1227
1339
  log_dir.mkdir(parents=True, exist_ok=True)
1228
1340