superlocalmemory 3.4.18 → 3.4.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md
CHANGED
|
@@ -10,6 +10,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
10
10
|
|
|
11
11
|
---
|
|
12
12
|
|
|
13
|
+
## [3.4.19] - 2026-04-17
|
|
14
|
+
|
|
15
|
+
### Fixed
|
|
16
|
+
- Recall cold-start eliminated. Embedding + reranker workers stay warm for 30 minutes by default instead of 2 minutes, so bursts of recalls no longer pay a 30-60 second model-load tax on every other query.
|
|
17
|
+
|
|
18
|
+
### New environment variables
|
|
19
|
+
- `SLM_EMBED_IDLE_TIMEOUT` — seconds to keep the embedding worker warm (default 1800). Set to 120 to restore pre-v3.4.19 behavior.
|
|
20
|
+
- `SLM_RERANKER_IDLE_TIMEOUT` — same, for the cross-encoder reranker (default 1800).
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
13
24
|
## [3.4.18] - 2026-04-17
|
|
14
25
|
|
|
15
26
|
### Fixed
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.4.
|
|
3
|
+
"version": "3.4.19",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -140,8 +140,14 @@ def release_embedding_lock() -> None:
|
|
|
140
140
|
_embedding_lock_fd = None
|
|
141
141
|
|
|
142
142
|
|
|
143
|
-
_IDLE_TIMEOUT_SECONDS =
|
|
144
|
-
# V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds)
|
|
143
|
+
_IDLE_TIMEOUT_SECONDS = 1800 # 30 minutes — keep model warm across bursty use.
|
|
144
|
+
# V3.3.12: Configurable via SLM_EMBED_IDLE_TIMEOUT env var (seconds).
|
|
145
|
+
# V3.4.19: Bumped from 120 → 1800 to eliminate the 30-60s cold-start pain
|
|
146
|
+
# when the embedding worker was killed too aggressively. Safety: the
|
|
147
|
+
# per-embed RSS self-check (SLM_EMBED_WORKER_RSS_LIMIT_MB, 4GB default) and
|
|
148
|
+
# the daemon memory watchdog (unified_daemon.py, 4GB/60s) still cap any
|
|
149
|
+
# runaway. To restore the old aggressive policy without redeploying, set
|
|
150
|
+
# ``SLM_EMBED_IDLE_TIMEOUT=120`` and ``slm restart``.
|
|
145
151
|
_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_EMBED_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
|
|
146
152
|
# V3.3.21: Configurable response timeout — 180s default, but batch ingestion
|
|
147
153
|
# (2-turn chunks across 10 conversations) needs 600s+ to survive cold-start
|
|
@@ -51,8 +51,10 @@ _live_rerankers: set[weakref.ref] = set()
|
|
|
51
51
|
|
|
52
52
|
logger = logging.getLogger(__name__)
|
|
53
53
|
|
|
54
|
-
_IDLE_TIMEOUT_SECONDS =
|
|
55
|
-
# V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var
|
|
54
|
+
_IDLE_TIMEOUT_SECONDS = 1800 # 30 min — keep cross-encoder warm for active sessions.
|
|
55
|
+
# V3.3.12: Configurable via SLM_RERANKER_IDLE_TIMEOUT env var.
|
|
56
|
+
# V3.4.19: Bumped from 120 → 1800 in lock-step with the embedding worker.
|
|
57
|
+
# Set ``SLM_RERANKER_IDLE_TIMEOUT=120`` + ``slm restart`` to revert.
|
|
56
58
|
_IDLE_TIMEOUT_SECONDS = int(os.environ.get("SLM_RERANKER_IDLE_TIMEOUT", _IDLE_TIMEOUT_SECONDS))
|
|
57
59
|
_SUBPROCESS_RESPONSE_TIMEOUT = 180 # V3.3.12: 180s (was 120s) for stressed system respawns
|
|
58
60
|
_WORKER_RECYCLE_AFTER = 500 # Recycle after N requests
|