superlocalmemory 3.3.17 → 3.3.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "superlocalmemory",
|
|
3
|
-
"version": "3.3.
|
|
3
|
+
"version": "3.3.18",
|
|
4
4
|
"description": "Information-geometric agent memory with mathematical guarantees. 4-channel retrieval, Fisher-Rao similarity, zero-LLM mode, EU AI Act compliant. Works with Claude, Cursor, Windsurf, and 17+ AI tools.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"ai-memory",
|
package/pyproject.toml
CHANGED
|
@@ -155,7 +155,7 @@ class RetrievalConfig:
|
|
|
155
155
|
# Reranking (V3.3.2: ONNX backend enabled for all modes)
|
|
156
156
|
use_cross_encoder: bool = True
|
|
157
157
|
cross_encoder_model: str = "cross-encoder/ms-marco-MiniLM-L-12-v2"
|
|
158
|
-
cross_encoder_backend: str = "
|
|
158
|
+
cross_encoder_backend: str = "" # "" = PyTorch (~500MB stable), "onnx" = ONNX (leaks on ARM64 CoreML)
|
|
159
159
|
|
|
160
160
|
# Agentic (Mode C only)
|
|
161
161
|
agentic_max_rounds: int = 3
|
|
@@ -35,6 +35,8 @@ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
|
|
|
35
35
|
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
36
36
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
37
37
|
os.environ["TORCH_DEVICE"] = "cpu"
|
|
38
|
+
# V3.3.17: Disable CoreML EP for ONNX Runtime — uses 3-5GB on ARM64 Mac.
|
|
39
|
+
os.environ["ORT_DISABLE_COREML"] = "1"
|
|
38
40
|
|
|
39
41
|
# SIGTERM bridge: Docker/systemd send SIGTERM to stop processes.
|
|
40
42
|
# Without this, the worker ignores SIGTERM and becomes a zombie.
|
|
@@ -65,6 +67,34 @@ def _start_parent_watchdog() -> None:
|
|
|
65
67
|
t.start()
|
|
66
68
|
|
|
67
69
|
|
|
70
|
+
def _load_embedding_model(name: str) -> tuple:
|
|
71
|
+
"""Load embedding model. ONNX first (no memory leak), PyTorch fallback.
|
|
72
|
+
|
|
73
|
+
V3.3.17: PyTorch SentenceTransformer on ARM64 Mac leaks memory —
|
|
74
|
+
grows from 300MB to 17GB after ~200 encode calls. ONNX Runtime
|
|
75
|
+
has no such issue. Same approach as CrossEncoder ONNX migration.
|
|
76
|
+
|
|
77
|
+
Returns (model, backend_name) or (None, "").
|
|
78
|
+
"""
|
|
79
|
+
from sentence_transformers import SentenceTransformer
|
|
80
|
+
|
|
81
|
+
# Tier 1: ONNX (stable memory, ~200MB footprint)
|
|
82
|
+
try:
|
|
83
|
+
m = SentenceTransformer(name, backend="onnx", trust_remote_code=True)
|
|
84
|
+
return m, "onnx"
|
|
85
|
+
except Exception:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
# Tier 2: PyTorch CPU (stable at ~1.4GB after 100+ calls, verified)
|
|
89
|
+
try:
|
|
90
|
+
import torch
|
|
91
|
+
with torch.inference_mode():
|
|
92
|
+
m = SentenceTransformer(name, trust_remote_code=True, device="cpu")
|
|
93
|
+
return m, "pytorch"
|
|
94
|
+
except Exception:
|
|
95
|
+
return None, ""
|
|
96
|
+
|
|
97
|
+
|
|
68
98
|
def _worker_main() -> None:
|
|
69
99
|
"""Main loop: read JSON requests from stdin, write responses to stdout."""
|
|
70
100
|
_start_parent_watchdog() # V3.3.7: self-terminate if parent dies
|
|
@@ -97,18 +127,17 @@ def _worker_main() -> None:
|
|
|
97
127
|
if cmd == "load":
|
|
98
128
|
name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
|
|
99
129
|
expected_dim = req.get("dimension", 768)
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
|
|
130
|
+
model, active_backend = _load_embedding_model(name)
|
|
131
|
+
if model is not None:
|
|
103
132
|
dim = model.get_sentence_embedding_dimension()
|
|
104
133
|
if dim != expected_dim:
|
|
105
134
|
_respond({"ok": False, "error": f"Dimension mismatch: {dim} != {expected_dim}"})
|
|
106
135
|
model = None
|
|
107
136
|
continue
|
|
108
137
|
model_name = name
|
|
109
|
-
_respond({"ok": True, "dim": dim, "model": name})
|
|
110
|
-
|
|
111
|
-
_respond({"ok": False, "error":
|
|
138
|
+
_respond({"ok": True, "dim": dim, "model": name, "backend": active_backend})
|
|
139
|
+
else:
|
|
140
|
+
_respond({"ok": False, "error": "Model load failed"})
|
|
112
141
|
continue
|
|
113
142
|
|
|
114
143
|
if cmd == "embed":
|
|
@@ -117,26 +146,16 @@ def _worker_main() -> None:
|
|
|
117
146
|
_respond({"ok": False, "error": "No texts provided"})
|
|
118
147
|
continue
|
|
119
148
|
if model is None:
|
|
120
|
-
# Auto-load if not yet loaded
|
|
121
149
|
name = req.get("model_name", "nomic-ai/nomic-embed-text-v1.5")
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
from sentence_transformers import SentenceTransformer
|
|
125
|
-
model = SentenceTransformer(name, trust_remote_code=True, device="cpu")
|
|
150
|
+
model, active_backend = _load_embedding_model(name)
|
|
151
|
+
if model is not None:
|
|
126
152
|
dim = model.get_sentence_embedding_dimension()
|
|
127
153
|
model_name = name
|
|
128
|
-
|
|
129
|
-
_respond({"ok": False, "error":
|
|
154
|
+
else:
|
|
155
|
+
_respond({"ok": False, "error": "Model load failed"})
|
|
130
156
|
continue
|
|
131
157
|
try:
|
|
132
|
-
|
|
133
|
-
# which causes silent memory leaks over long-running sessions.
|
|
134
|
-
try:
|
|
135
|
-
import torch
|
|
136
|
-
with torch.inference_mode():
|
|
137
|
-
vecs = model.encode(texts, normalize_embeddings=True)
|
|
138
|
-
except ImportError:
|
|
139
|
-
vecs = model.encode(texts, normalize_embeddings=True)
|
|
158
|
+
vecs = model.encode(texts, normalize_embeddings=True)
|
|
140
159
|
if isinstance(vecs, np.ndarray) and vecs.ndim == 2:
|
|
141
160
|
result = [vecs[i].tolist() for i in range(vecs.shape[0])]
|
|
142
161
|
else:
|
|
@@ -40,6 +40,9 @@ os.environ["PYTORCH_MPS_MEM_LIMIT"] = "0"
|
|
|
40
40
|
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
|
|
41
41
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
42
42
|
os.environ["TORCH_DEVICE"] = "cpu"
|
|
43
|
+
# V3.3.17: Disable CoreML EP for ONNX Runtime. CoreML compiles execution
|
|
44
|
+
# plans that consume 3-5GB on ARM64 Mac. CPU EP is ~500MB and fast enough.
|
|
45
|
+
os.environ["ORT_DISABLE_COREML"] = "1"
|
|
43
46
|
|
|
44
47
|
# SIGTERM bridge for Docker/systemd
|
|
45
48
|
if sys.platform != "win32":
|