superlocalmemory 3.3.12 → 3.3.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/pyproject.toml +2 -3
- package/src/superlocalmemory/core/config.py +2 -2
- package/src/superlocalmemory/core/embedding_worker.py +5 -1
- package/src/superlocalmemory/core/engine.py +14 -0
- package/src/superlocalmemory/core/engine_wiring.py +15 -0
- package/src/superlocalmemory/core/maintenance_scheduler.py +94 -0
- package/src/superlocalmemory/core/reranker_worker.py +33 -7
- package/src/superlocalmemory/encoding/graph_builder.py +7 -0
- package/src/superlocalmemory/retrieval/engine.py +8 -3
- package/src/superlocalmemory/retrieval/reranker.py +1 -1
- package/src/superlocalmemory/storage/database.py +11 -3
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.14",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "superlocalmemory"
|
|
3
|
-
version = "3.3.
|
|
3
|
+
version = "3.3.14"
|
|
4
4
|
description = "Information-geometric agent memory with mathematical guarantees"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = {text = "MIT"}
|
|
@@ -48,8 +48,7 @@ dependencies = [
|
|
|
48
48
|
|
|
49
49
|
[project.optional-dependencies]
|
|
50
50
|
search = [
|
|
51
|
-
"sentence-transformers>=
|
|
52
|
-
"sentence-transformers[onnx]>=4.0.0",
|
|
51
|
+
"sentence-transformers[onnx]>=5.0.0",
|
|
53
52
|
"einops>=0.8.2",
|
|
54
53
|
"torch>=2.2.0",
|
|
55
54
|
"scikit-learn>=1.3.0,<2.0.0",
|
|
@@ -154,7 +154,7 @@ class RetrievalConfig:
|
|
|
154
154
|
|
|
155
155
|
# Reranking (V3.3.2: ONNX backend enabled for all modes)
|
|
156
156
|
use_cross_encoder: bool = True
|
|
157
|
-
cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-
|
|
157
|
+
cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
|
|
158
158
|
cross_encoder_backend: str = "onnx" # "onnx" (~200MB) or "" (PyTorch, ~1.5GB)
|
|
159
159
|
|
|
160
160
|
# Agentic (Mode C only)
|
|
@@ -618,7 +618,7 @@ class SLMConfig:
|
|
|
618
618
|
# but NEVER override an explicit use_cross_encoder setting.
|
|
619
619
|
# The user's explicit choice always wins.
|
|
620
620
|
if "cross_encoder_backend" not in rt:
|
|
621
|
-
rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-
|
|
621
|
+
rt.setdefault("cross_encoder_model", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
622
622
|
rt["cross_encoder_backend"] = "onnx"
|
|
623
623
|
# Only auto-enable if user didn't explicitly set the field
|
|
624
624
|
rt.setdefault("use_cross_encoder", True)
|
|
@@ -79,6 +79,7 @@ class MemoryEngine:
|
|
|
79
79
|
self._auto_linker = None
|
|
80
80
|
self._graph_analyzer = None
|
|
81
81
|
self._consolidation_engine = None
|
|
82
|
+
self._maintenance_scheduler = None
|
|
82
83
|
self._hooks = HookRegistry()
|
|
83
84
|
|
|
84
85
|
# -- Public properties (Phase 2+ access) --------------------------------
|
|
@@ -194,6 +195,17 @@ class MemoryEngine:
|
|
|
194
195
|
# V3.3: Check for embedding model migration on mode switch
|
|
195
196
|
self._check_embedding_migration()
|
|
196
197
|
|
|
198
|
+
# V3.3.13: Background maintenance scheduler (Langevin/Ebbinghaus/Sheaf)
|
|
199
|
+
if self._config.forgetting.enabled:
|
|
200
|
+
try:
|
|
201
|
+
from superlocalmemory.core.maintenance_scheduler import MaintenanceScheduler
|
|
202
|
+
self._maintenance_scheduler = MaintenanceScheduler(
|
|
203
|
+
self._db, self._config, self._profile_id,
|
|
204
|
+
)
|
|
205
|
+
self._maintenance_scheduler.start()
|
|
206
|
+
except Exception as exc:
|
|
207
|
+
logger.debug("Maintenance scheduler init failed: %s", exc)
|
|
208
|
+
|
|
197
209
|
self._initialized = True
|
|
198
210
|
logger.info(
|
|
199
211
|
"MemoryEngine initialized: mode=%s profile=%s",
|
|
@@ -306,6 +318,8 @@ class MemoryEngine:
|
|
|
306
318
|
# -- Lifecycle ----------------------------------------------------------
|
|
307
319
|
|
|
308
320
|
def close(self) -> None:
|
|
321
|
+
if self._maintenance_scheduler is not None:
|
|
322
|
+
self._maintenance_scheduler.stop()
|
|
309
323
|
self._initialized = False
|
|
310
324
|
|
|
311
325
|
@property
|
|
@@ -454,6 +454,21 @@ def init_retrieval(
|
|
|
454
454
|
trust_scorer=trust_scorer,
|
|
455
455
|
)
|
|
456
456
|
|
|
457
|
+
# V3.3.13: Ensure reranker warmup is in progress.
|
|
458
|
+
# The CrossEncoderReranker constructor starts background warmup, but
|
|
459
|
+
# callers can also call warmup_sync() to block until ready.
|
|
460
|
+
# Here we just log warmup status — benchmark scripts call warmup_sync() explicitly.
|
|
461
|
+
if reranker is not None:
|
|
462
|
+
import threading
|
|
463
|
+
def _log_warmup_status() -> None:
|
|
464
|
+
ready = reranker.warmup_sync(timeout=180)
|
|
465
|
+
if ready:
|
|
466
|
+
logger.info("Cross-encoder reranker warm and ready")
|
|
467
|
+
else:
|
|
468
|
+
logger.warning("Cross-encoder reranker warmup failed — recalls will use fallback scoring")
|
|
469
|
+
t = threading.Thread(target=_log_warmup_status, daemon=True, name="ce-init-warmup")
|
|
470
|
+
t.start()
|
|
471
|
+
|
|
457
472
|
# Phase A: Register forgetting filter into the channel registry
|
|
458
473
|
try:
|
|
459
474
|
from superlocalmemory.retrieval.forgetting_filter import register_forgetting_filter
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
|
|
2
|
+
# Licensed under the MIT License - see LICENSE file
|
|
3
|
+
# Part of SuperLocalMemory V3 | https://qualixar.com | https://varunpratap.com
|
|
4
|
+
|
|
5
|
+
"""SuperLocalMemory V3 — Background Maintenance Scheduler.
|
|
6
|
+
|
|
7
|
+
V3.3.13: Periodically triggers Langevin/Ebbinghaus/Sheaf maintenance
|
|
8
|
+
so users don't need to call run_maintenance manually.
|
|
9
|
+
|
|
10
|
+
Configurable interval via ForgettingConfig.scheduler_interval_minutes.
|
|
11
|
+
Defaults to 30 min. Disabled during benchmarks (no config.forgetting.enabled).
|
|
12
|
+
|
|
13
|
+
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
14
|
+
License: MIT
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
import threading
|
|
21
|
+
from typing import TYPE_CHECKING
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from superlocalmemory.core.config import SLMConfig
|
|
25
|
+
from superlocalmemory.storage.database import DatabaseManager
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MaintenanceScheduler:
|
|
31
|
+
"""Background scheduler for periodic math maintenance.
|
|
32
|
+
|
|
33
|
+
Runs Langevin/Sheaf/Fisher maintenance at configurable intervals.
|
|
34
|
+
Thread-safe. Auto-stops on garbage collection or explicit stop().
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
db: DatabaseManager,
|
|
40
|
+
config: SLMConfig,
|
|
41
|
+
profile_id: str = "default",
|
|
42
|
+
) -> None:
|
|
43
|
+
self._db = db
|
|
44
|
+
self._config = config
|
|
45
|
+
self._profile_id = profile_id
|
|
46
|
+
self._timer: threading.Timer | None = None
|
|
47
|
+
self._running = False
|
|
48
|
+
self._interval = config.forgetting.scheduler_interval_minutes * 60.0
|
|
49
|
+
|
|
50
|
+
def start(self) -> None:
|
|
51
|
+
"""Start the periodic scheduler. Idempotent."""
|
|
52
|
+
if self._running:
|
|
53
|
+
return
|
|
54
|
+
self._running = True
|
|
55
|
+
self._schedule_next()
|
|
56
|
+
logger.info(
|
|
57
|
+
"Maintenance scheduler started (interval=%dm)",
|
|
58
|
+
self._config.forgetting.scheduler_interval_minutes,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def stop(self) -> None:
|
|
62
|
+
"""Stop the scheduler. Idempotent."""
|
|
63
|
+
self._running = False
|
|
64
|
+
if self._timer is not None:
|
|
65
|
+
self._timer.cancel()
|
|
66
|
+
self._timer = None
|
|
67
|
+
logger.info("Maintenance scheduler stopped")
|
|
68
|
+
|
|
69
|
+
def _schedule_next(self) -> None:
|
|
70
|
+
"""Schedule the next maintenance run."""
|
|
71
|
+
if not self._running:
|
|
72
|
+
return
|
|
73
|
+
self._timer = threading.Timer(self._interval, self._run)
|
|
74
|
+
self._timer.daemon = True
|
|
75
|
+
self._timer.start()
|
|
76
|
+
|
|
77
|
+
def _run(self) -> None:
|
|
78
|
+
"""Execute maintenance and schedule next run."""
|
|
79
|
+
if not self._running:
|
|
80
|
+
return
|
|
81
|
+
try:
|
|
82
|
+
from superlocalmemory.core.maintenance import run_maintenance
|
|
83
|
+
counts = run_maintenance(self._db, self._config, self._profile_id)
|
|
84
|
+
logger.info("Scheduled maintenance complete: %s", counts)
|
|
85
|
+
except Exception as exc:
|
|
86
|
+
logger.warning("Scheduled maintenance failed: %s", exc)
|
|
87
|
+
finally:
|
|
88
|
+
self._schedule_next()
|
|
89
|
+
|
|
90
|
+
def __del__(self) -> None:
|
|
91
|
+
try:
|
|
92
|
+
self.stop()
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
@@ -115,7 +115,7 @@ def _worker_main() -> None:
|
|
|
115
115
|
continue
|
|
116
116
|
|
|
117
117
|
if cmd == "load":
|
|
118
|
-
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-
|
|
118
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
119
119
|
backend = req.get("backend", "onnx")
|
|
120
120
|
model, active_backend, model_name = _load_model(name, backend)
|
|
121
121
|
_respond({
|
|
@@ -133,7 +133,7 @@ def _worker_main() -> None:
|
|
|
133
133
|
continue
|
|
134
134
|
if model is None:
|
|
135
135
|
# Auto-load with defaults
|
|
136
|
-
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-
|
|
136
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
137
137
|
backend = req.get("backend", "onnx")
|
|
138
138
|
model, active_backend, model_name = _load_model(name, backend)
|
|
139
139
|
if model is None:
|
|
@@ -162,7 +162,7 @@ def _worker_main() -> None:
|
|
|
162
162
|
_respond({"ok": False, "error": "Missing query or document"})
|
|
163
163
|
continue
|
|
164
164
|
if model is None:
|
|
165
|
-
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-
|
|
165
|
+
name = req.get("model_name", "cross-encoder/ms-marco-MiniLM-L-12-v2")
|
|
166
166
|
backend = req.get("backend", "onnx")
|
|
167
167
|
model, active_backend, model_name = _load_model(name, backend)
|
|
168
168
|
if model is None:
|
|
@@ -186,22 +186,43 @@ def _worker_main() -> None:
|
|
|
186
186
|
def _load_model(
|
|
187
187
|
name: str, backend: str,
|
|
188
188
|
) -> tuple:
|
|
189
|
-
"""Load cross-encoder model. Returns (model, backend_name, model_name).
|
|
189
|
+
"""Load cross-encoder model. Returns (model, backend_name, model_name).
|
|
190
|
+
|
|
191
|
+
V3.3.13: sentence-transformers 5.x+ supports backend='onnx' for
|
|
192
|
+
CrossEncoder. We use a 3-tier fallback chain:
|
|
193
|
+
|
|
194
|
+
1. ONNX + platform-quantized model (fastest, ~200MB, 2.4ms/pair)
|
|
195
|
+
2. ONNX + generic model (fast, auto-exported on first use)
|
|
196
|
+
3. PyTorch (always works, ~500MB, 6ms/pair)
|
|
197
|
+
|
|
198
|
+
Cross-platform:
|
|
199
|
+
Mac ARM64 → model_qint8_arm64.onnx
|
|
200
|
+
x86_64 → model_quint8_avx2.onnx
|
|
201
|
+
Fallback → model.onnx (generic)
|
|
202
|
+
"""
|
|
190
203
|
try:
|
|
191
204
|
from sentence_transformers import CrossEncoder
|
|
192
205
|
|
|
193
206
|
if backend == "onnx":
|
|
207
|
+
# Tier 1: Platform-specific quantized ONNX (fastest)
|
|
194
208
|
try:
|
|
195
209
|
onnx_file = _detect_onnx_variant()
|
|
196
210
|
m = CrossEncoder(
|
|
197
211
|
name, backend="onnx",
|
|
198
212
|
model_kwargs={"file_name": onnx_file},
|
|
199
213
|
)
|
|
214
|
+
return m, f"onnx-quantized({onnx_file})", name
|
|
215
|
+
except Exception:
|
|
216
|
+
pass
|
|
217
|
+
|
|
218
|
+
# Tier 2: Generic ONNX (auto-exported by optimum)
|
|
219
|
+
try:
|
|
220
|
+
m = CrossEncoder(name, backend="onnx")
|
|
200
221
|
return m, "onnx", name
|
|
201
222
|
except Exception:
|
|
202
|
-
# ONNX failed → try PyTorch
|
|
203
223
|
pass
|
|
204
|
-
|
|
224
|
+
|
|
225
|
+
# Tier 3: PyTorch (always works, no ONNX dependency needed)
|
|
205
226
|
m = CrossEncoder(name)
|
|
206
227
|
return m, "pytorch", name
|
|
207
228
|
except ImportError:
|
|
@@ -217,4 +238,9 @@ def _respond(data: dict) -> None:
|
|
|
217
238
|
|
|
218
239
|
|
|
219
240
|
if __name__ == "__main__":
|
|
220
|
-
|
|
241
|
+
try:
|
|
242
|
+
_worker_main()
|
|
243
|
+
except KeyboardInterrupt:
|
|
244
|
+
# V3.3.13: Windows CI sends KeyboardInterrupt on test completion.
|
|
245
|
+
# Exit cleanly instead of printing a traceback that fails CI.
|
|
246
|
+
sys.exit(0)
|
|
@@ -261,6 +261,9 @@ class GraphBuilder:
|
|
|
261
261
|
break
|
|
262
262
|
return edges
|
|
263
263
|
|
|
264
|
+
# V3.3.13: Cap causal edges per entity to prevent O(n²) explosion (same as entity/temporal).
|
|
265
|
+
_MAX_CAUSAL_EDGES_PER_ENTITY: int = 20
|
|
266
|
+
|
|
264
267
|
def _build_causal_edges(
|
|
265
268
|
self, new_fact: AtomicFact, profile_id: str,
|
|
266
269
|
) -> list[GraphEdge]:
|
|
@@ -273,7 +276,10 @@ class GraphBuilder:
|
|
|
273
276
|
edges: list[GraphEdge] = []
|
|
274
277
|
seen: set[str] = set()
|
|
275
278
|
for entity_id in new_fact.canonical_entities:
|
|
279
|
+
causal_edge_count = 0
|
|
276
280
|
for other in self._db.get_facts_by_entity(entity_id, profile_id):
|
|
281
|
+
if causal_edge_count >= self._MAX_CAUSAL_EDGES_PER_ENTITY:
|
|
282
|
+
break
|
|
277
283
|
if other.fact_id == new_fact.fact_id or other.fact_id in seen:
|
|
278
284
|
continue
|
|
279
285
|
if self._edge_exists(other.fact_id, new_fact.fact_id, EdgeType.CAUSAL, profile_id):
|
|
@@ -284,6 +290,7 @@ class GraphBuilder:
|
|
|
284
290
|
target_id=new_fact.fact_id, edge_type=EdgeType.CAUSAL,
|
|
285
291
|
weight=_CAUSAL_WEIGHT,
|
|
286
292
|
))
|
|
293
|
+
causal_edge_count += 1
|
|
287
294
|
return edges
|
|
288
295
|
|
|
289
296
|
# -- Helpers -----------------------------------------------------------
|
|
@@ -301,11 +301,16 @@ class RetrievalEngine:
|
|
|
301
301
|
def _load_facts(
|
|
302
302
|
self, fused: list[FusionResult], profile_id: str,
|
|
303
303
|
) -> dict[str, AtomicFact]:
|
|
304
|
-
|
|
304
|
+
"""Load facts by ID — targeted query, not full-table scan.
|
|
305
|
+
|
|
306
|
+
V3.3.13: Was loading ALL facts (O(n) memory) then filtering.
|
|
307
|
+
Now uses get_facts_by_ids() for O(k) where k = pool size (~60).
|
|
308
|
+
"""
|
|
309
|
+
needed = [fr.fact_id for fr in fused]
|
|
305
310
|
if not needed:
|
|
306
311
|
return {}
|
|
307
|
-
|
|
308
|
-
return {f.fact_id: f for f in
|
|
312
|
+
facts = self._db.get_facts_by_ids(needed, profile_id)
|
|
313
|
+
return {f.fact_id: f for f in facts}
|
|
309
314
|
|
|
310
315
|
# -- Cross-encoder rerank -----------------------------------------------
|
|
311
316
|
|
|
@@ -250,12 +250,20 @@ class DatabaseManager:
|
|
|
250
250
|
)
|
|
251
251
|
return [self._row_to_fact(r) for r in rows]
|
|
252
252
|
|
|
253
|
+
_MAX_FACTS_PER_ENTITY_LOOKUP: int = 100
|
|
254
|
+
|
|
253
255
|
def get_facts_by_entity(self, entity_id: str, profile_id: str) -> list[AtomicFact]:
|
|
254
|
-
"""Facts whose canonical_entities JSON array contains *entity_id*.
|
|
256
|
+
"""Facts whose canonical_entities JSON array contains *entity_id*.
|
|
257
|
+
|
|
258
|
+
V3.3.14: LIMIT to _MAX_FACTS_PER_ENTITY_LOOKUP (100) to prevent
|
|
259
|
+
unbounded memory growth during ingestion. Previously loaded ALL
|
|
260
|
+
facts for popular entities (500+) causing 17GB+ memory usage.
|
|
261
|
+
Ordered by created_at DESC so newest facts are always included.
|
|
262
|
+
"""
|
|
255
263
|
rows = self.execute(
|
|
256
264
|
"SELECT * FROM atomic_facts WHERE profile_id = ? AND canonical_entities_json LIKE ? "
|
|
257
|
-
"ORDER BY created_at DESC",
|
|
258
|
-
(profile_id, f'%"{entity_id}"%'),
|
|
265
|
+
"ORDER BY created_at DESC LIMIT ?",
|
|
266
|
+
(profile_id, f'%"{entity_id}"%', self._MAX_FACTS_PER_ENTITY_LOOKUP),
|
|
259
267
|
)
|
|
260
268
|
return [self._row_to_fact(r) for r in rows]
|
|
261
269
|
|