superlocalmemory 3.4.31 → 3.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -10,6 +10,42 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
10
10
 
11
11
  ---
12
12
 
13
+ ## [3.4.32] - 2026-04-24
14
+
15
+ Fix: concurrent remembers no longer block recalls on the shared embedder.
16
+
17
+ ### Fixed
18
+ - **Daemon `/remember` is now async by default.** Writes to the pending
19
+ queue in under 100 ms and returns a `pending_id`; a background thread
20
+ drains the queue in the background. Previously, the synchronous
21
+ `engine.store()` on the FastAPI event loop could block `/search` and
22
+ `/health` for 30+ seconds while the single embedder worker processed a
23
+ large write. Under concurrent load the daemon could appear hung.
24
+ - **Materializer yields to active recalls.** While any `/search` is in
25
+ flight the drainer sleeps between items, so user-initiated recalls
26
+ always get the embedder first.
27
+ - **MCP remember tool simplified.** Writes to `pending.db` and returns;
28
+ the daemon's materializer completes the pipeline. Removes the
29
+ redundant in-process `pool.store` background task that previously
30
+ contended with `/search`.
31
+ - **`pool_store` returns `["pending:<id>"]`** when the daemon is async,
32
+ keeping a stable identifier for callers without blocking on the
33
+ embedder.
34
+
35
+ ### Added
36
+ - `?wait=true` query parameter on `POST /remember` for callers that
37
+ need synchronous behaviour and real `fact_ids` in the response.
38
+ - `superlocalmemory.core.recall_gate` module — shared counter that lets
39
+ the materializer detect in-flight recalls and yield priority.
40
+
41
+ ### Migration notes
42
+ - **No action required.** Existing clients continue to work; the
43
+ response shape is compatible (`ok`, `count` still present). Scripts
44
+ that depended on `fact_ids` to validate the write should switch to
45
+ `pending_id` or pass `?wait=true` to opt in to the legacy behaviour.
46
+
47
+ ---
48
+
13
49
  ## [3.4.31] - 2026-04-24
14
50
 
15
51
  Dashboard truth, memory vs fact clarity, and self-cleaning pending queue.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "superlocalmemory",
3
- "version": "3.4.31",
3
+ "version": "3.4.32",
4
4
  "description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
5
5
  "keywords": [
6
6
  "ai-memory",
package/pyproject.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "superlocalmemory"
3
- version = "3.4.31"
3
+ version = "3.4.32"
4
4
  description = "Information-geometric agent memory with mathematical guarantees"
5
5
  readme = "README.md"
6
6
  license = {text = "AGPL-3.0-or-later"}
@@ -1,3 +1,3 @@
1
1
  """SuperLocalMemory — information-geometric agent memory."""
2
2
 
3
- __version__ = "3.4.31"
3
+ __version__ = "3.4.32"
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under AGPL-3.0-or-later - see LICENSE file
3
+ # Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
4
+
5
+ """v3.4.32: Recall-in-flight counter used to give /search priority over the
6
+ pending materializer.
7
+
8
+ Every recall handler calls ``begin_recall()`` on entry and ``end_recall()``
9
+ in a finally block. The pending-memory materializer thread polls
10
+ ``in_flight()`` and sleeps while any recall is active, so the shared
11
+ embedder worker never serves a materialization ahead of a user-initiated
12
+ recall.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import threading
17
+
18
+ _lock = threading.Lock()
19
+ _active = 0
20
+
21
+
22
+ def begin_recall() -> None:
23
+ global _active
24
+ with _lock:
25
+ _active += 1
26
+
27
+
28
+ def end_recall() -> None:
29
+ global _active
30
+ with _lock:
31
+ _active = max(0, _active - 1)
32
+
33
+
34
+ def in_flight() -> int:
35
+ with _lock:
36
+ return _active
@@ -110,12 +110,24 @@ def pool_recall(query: str, limit: int = 10, **_: Any) -> PoolRecallResponse:
110
110
 
111
111
 
112
112
  def pool_store(content: str, metadata: dict | None = None) -> list[str]:
113
- """Call pool.store and return the fact id list.
113
+ """Call pool.store and return fact id list (or pending tracker).
114
114
 
115
- Raises :class:`PoolError` on worker death or any non-ok envelope.
115
+ v3.4.32: the daemon /remember endpoint is async by default — it
116
+ returns ``pending_id`` and queues the write. We surface this to
117
+ callers as ``["pending:<id>"]`` so they have a stable identifier
118
+ without blocking the remember on the embedder worker.
119
+
120
+ Legacy synchronous path (``?wait=true``) still returns real
121
+ ``fact_ids``. Worker death raises :class:`PoolError`.
116
122
  """
117
123
  raw = _pool().store(content=content, metadata=metadata or {})
118
124
  _unwrap_error(raw, "store")
119
- if isinstance(raw, dict):
120
- return list(raw.get("fact_ids", []))
125
+ if not isinstance(raw, dict):
126
+ return []
127
+ fact_ids = raw.get("fact_ids")
128
+ if fact_ids:
129
+ return list(fact_ids)
130
+ pending_id = raw.get("pending_id")
131
+ if pending_id is not None:
132
+ return [f"pending:{pending_id}"]
121
133
  return []
@@ -113,12 +113,11 @@ def register_core_tools(server, get_engine: Callable) -> None:
113
113
  """
114
114
  import asyncio
115
115
  try:
116
- # V3.3.27: Store-first pattern write to pending.db immediately
117
- # (<100ms), then process through full pipeline in background.
118
- # This eliminates the 30-40s blocking that Mode B users experience.
119
- # Pending memories are auto-processed on next engine.initialize()
120
- # or by the daemon's background loop.
121
- from superlocalmemory.cli.pending_store import store_pending, mark_done
116
+ # v3.4.32: Store-first pattern. Write to pending.db and return
117
+ # immediately. The daemon's pending-materializer thread drains
118
+ # the queue with recall priority, so concurrent MCP remembers
119
+ # no longer contend with /search on the shared embedder.
120
+ from superlocalmemory.cli.pending_store import store_pending
122
121
 
123
122
  pending_id = store_pending(content, tags=tags, metadata={
124
123
  "project": project,
@@ -127,39 +126,13 @@ def register_core_tools(server, get_engine: Callable) -> None:
127
126
  "session_id": session_id,
128
127
  })
129
128
 
130
- # Fire-and-forget: process in background thread
131
- async def _process_in_background():
132
- try:
133
- from superlocalmemory.core.worker_pool import WorkerPool
134
- pool = WorkerPool.shared()
135
- result = await asyncio.to_thread(
136
- pool.store, content, metadata={
137
- "tags": tags, "project": project,
138
- "importance": importance, "agent_id": agent_id,
139
- "session_id": session_id,
140
- },
141
- )
142
- if result.get("ok"):
143
- mark_done(pending_id)
144
- _emit_event("memory.created", {
145
- "content_preview": content[:80],
146
- "agent_id": agent_id,
147
- "fact_count": result.get("count", 0),
148
- }, source_agent=agent_id)
149
- except Exception as _bg_exc:
150
- logger.warning(
151
- "Background store failed (pending_id=%s): %s",
152
- pending_id, _bg_exc,
153
- )
154
-
155
- asyncio.create_task(_process_in_background())
156
-
157
129
  return {
158
130
  "success": True,
159
131
  "fact_ids": [f"pending:{pending_id}"],
160
132
  "count": 1,
161
133
  "pending": True,
162
- "message": "Stored to pending — processing in background.",
134
+ "pending_id": pending_id,
135
+ "message": "Stored — facts will appear in the dashboard shortly.",
163
136
  }
164
137
  except Exception as exc:
165
138
  logger.exception("remember failed")
@@ -398,7 +398,12 @@ async def get_graph(
398
398
 
399
399
  @router.post("/api/search")
400
400
  async def search_memories(request: Request, body: SearchRequest):
401
- """Semantic search via subprocess worker pool (memory-isolated)."""
401
+ """Semantic search via subprocess worker pool (memory-isolated).
402
+
403
+ v3.4.32: marks recall in-flight so the pending materializer yields.
404
+ """
405
+ from superlocalmemory.core.recall_gate import begin_recall, end_recall
406
+ begin_recall()
402
407
  try:
403
408
  from superlocalmemory.core.worker_pool import WorkerPool
404
409
  pool = WorkerPool.shared()
@@ -435,6 +440,8 @@ async def search_memories(request: Request, body: SearchRequest):
435
440
 
436
441
  except Exception as e:
437
442
  raise HTTPException(status_code=500, detail=f"Search error: {str(e)}")
443
+ finally:
444
+ end_recall()
438
445
 
439
446
 
440
447
  @router.get("/api/clusters")
@@ -66,6 +66,20 @@ class ObserveRequest(BaseModel):
66
66
  content: str
67
67
 
68
68
 
69
+ # ---------------------------------------------------------------------------
70
+ # v3.4.32: Recall-priority gate for the pending materializer.
71
+ # All /remember writes go to pending.db and return fast; a background
72
+ # thread drains pending while yielding to any in-flight /search.
73
+ # See ``superlocalmemory.core.recall_gate``.
74
+ # ---------------------------------------------------------------------------
75
+
76
+ from superlocalmemory.core.recall_gate import (
77
+ begin_recall as _begin_recall,
78
+ end_recall as _end_recall,
79
+ in_flight as _recalls_in_flight,
80
+ )
81
+
82
+
69
83
  # ---------------------------------------------------------------------------
70
84
  # Observation debounce buffer (migrated from daemon.py)
71
85
  # ---------------------------------------------------------------------------
@@ -949,6 +963,8 @@ def _register_daemon_routes(application: FastAPI) -> None:
949
963
  if not effective_sid:
950
964
  import time as _t
951
965
  effective_sid = f"http:{int(_t.time() * 1000)}"
966
+ # v3.4.32: mark recall in-flight so the pending materializer pauses
967
+ _begin_recall()
952
968
  try:
953
969
  response = engine.recall(
954
970
  search_query, limit=limit, session_id=effective_sid,
@@ -1006,18 +1022,47 @@ def _register_daemon_routes(application: FastAPI) -> None:
1006
1022
  }
1007
1023
  except Exception as exc:
1008
1024
  raise HTTPException(500, detail=str(exc))
1025
+ finally:
1026
+ _end_recall()
1009
1027
 
1010
1028
  @application.post("/remember")
1011
- async def remember(req: RememberRequest):
1029
+ async def remember(req: RememberRequest, wait: bool = False):
1030
+ """v3.4.32: Async by default — writes to pending.db, returns pending_id
1031
+ in <100ms. Materializer thread drains at low priority, yielding to
1032
+ /search. Pass ``?wait=true`` for legacy synchronous behavior (blocks
1033
+ on the embedder until facts are written).
1034
+ """
1012
1035
  _update_activity()
1013
1036
  engine = _get_engine_or_503()
1037
+
1038
+ if wait:
1039
+ try:
1040
+ metadata = {"tags": req.tags} if req.tags else {}
1041
+ extra = getattr(req, "metadata", None)
1042
+ if isinstance(extra, dict):
1043
+ metadata.update(extra)
1044
+ fact_ids = engine.store(req.content, metadata=metadata)
1045
+ return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
1046
+ except Exception as exc:
1047
+ raise HTTPException(500, detail=str(exc))
1048
+
1014
1049
  try:
1015
- metadata = {"tags": req.tags} if req.tags else {}
1050
+ from superlocalmemory.cli.pending_store import store_pending
1051
+ meta = {}
1052
+ if req.tags:
1053
+ meta["tags"] = req.tags
1016
1054
  extra = getattr(req, "metadata", None)
1017
1055
  if isinstance(extra, dict):
1018
- metadata.update(extra)
1019
- fact_ids = engine.store(req.content, metadata=metadata)
1020
- return {"ok": True, "fact_ids": fact_ids, "count": len(fact_ids)}
1056
+ meta.update(extra)
1057
+ pending_id = store_pending(
1058
+ req.content, tags=req.tags or "", metadata=meta,
1059
+ )
1060
+ return {
1061
+ "ok": True,
1062
+ "pending_id": pending_id,
1063
+ "status": "queued",
1064
+ "note": "materialized async; pass ?wait=true for legacy sync",
1065
+ }
1021
1066
  except Exception as exc:
1022
1067
  raise HTTPException(500, detail=str(exc))
1023
1068
 
@@ -1189,6 +1234,70 @@ def _start_memory_watchdog() -> None:
1189
1234
  logger.info("Memory watchdog started (limit: %d MB per worker)", MAX_WORKER_MB)
1190
1235
 
1191
1236
 
1237
+ _materializer_stop = threading.Event()
1238
+ _materializer_thread: threading.Thread | None = None
1239
+
1240
+
1241
+ def _start_pending_materializer() -> None:
1242
+ """Background thread: drains pending.db, yields to active /search calls.
1243
+
1244
+ Poll loop:
1245
+ 1. Fetch up to 5 pending rows.
1246
+ 2. For each row: if any /search is in flight, sleep 500ms (yield priority).
1247
+ 3. Call engine.store(), mark_done or mark_failed.
1248
+ 4. Sleep 2s between polls when idle (empty queue).
1249
+ """
1250
+ global _materializer_thread
1251
+
1252
+ def _loop():
1253
+ from superlocalmemory.cli.pending_store import (
1254
+ get_pending, mark_done, mark_failed,
1255
+ )
1256
+ while not _materializer_stop.is_set():
1257
+ try:
1258
+ engine = _engine # may be None briefly at startup
1259
+ if engine is None:
1260
+ time.sleep(2.0)
1261
+ continue
1262
+ pending = get_pending(limit=5)
1263
+ if not pending:
1264
+ time.sleep(2.0)
1265
+ continue
1266
+ for item in pending:
1267
+ if _materializer_stop.is_set():
1268
+ break
1269
+ # Yield to recalls: wait until none in flight
1270
+ waits = 0
1271
+ while _recalls_in_flight() > 0 and waits < 60:
1272
+ time.sleep(0.5)
1273
+ waits += 1
1274
+ try:
1275
+ import json as _json
1276
+ md_str = item.get("metadata") or "{}"
1277
+ try:
1278
+ md = _json.loads(md_str)
1279
+ except Exception:
1280
+ md = {}
1281
+ if item.get("tags"):
1282
+ md.setdefault("tags", item["tags"])
1283
+ engine.store(item["content"], metadata=md)
1284
+ mark_done(item["id"])
1285
+ except Exception as exc:
1286
+ logger.warning(
1287
+ "Pending %d failed: %s", item["id"], exc,
1288
+ )
1289
+ mark_failed(item["id"], str(exc))
1290
+ except Exception as exc:
1291
+ logger.warning("materializer loop error: %s", exc)
1292
+ time.sleep(5.0)
1293
+
1294
+ _materializer_thread = threading.Thread(
1295
+ target=_loop, daemon=True, name="pending-materializer",
1296
+ )
1297
+ _materializer_thread.start()
1298
+ logger.info("Pending materializer started (recall-priority)")
1299
+
1300
+
1192
1301
  def start_server(port: int = _DEFAULT_PORT) -> None:
1193
1302
  """Start the unified daemon. Blocks until stopped."""
1194
1303
  global _start_time
@@ -1223,6 +1332,9 @@ def start_server(port: int = _DEFAULT_PORT) -> None:
1223
1332
  # v3.4.7: Start memory watchdog to prevent runaway workers
1224
1333
  _start_memory_watchdog()
1225
1334
 
1335
+ # v3.4.32: Continuous pending-queue materializer with recall priority.
1336
+ _start_pending_materializer()
1337
+
1226
1338
  log_dir = Path.home() / ".superlocalmemory" / "logs"
1227
1339
  log_dir.mkdir(parents=True, exist_ok=True)
1228
1340