superlocalmemory 3.0.13 → 3.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +2 -1
- package/NOTICE +1 -1
- package/README.md +193 -191
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/superlocalmemory/cli/commands.py +2 -0
- package/src/superlocalmemory/core/embeddings.py +57 -2
- package/src/superlocalmemory/storage/schema.py +31 -4
- package/src/superlocalmemory/storage/v2_migrator.py +34 -8
- package/src/superlocalmemory.egg-info/PKG-INFO +194 -192
|
@@ -8,16 +8,29 @@ Thread-safe, dimension-validated embedding with Fisher variance computation.
|
|
|
8
8
|
Supports local (768-dim nomic) and cloud (3072-dim) models with EXPLICIT errors
|
|
9
9
|
on dimension mismatch — NEVER silently falls back to a different dimension.
|
|
10
10
|
|
|
11
|
+
Memory management: Forces CPU-only inference to prevent GPU memory accumulation.
|
|
12
|
+
Auto-unloads model after idle timeout to keep long-running MCP servers lean.
|
|
13
|
+
|
|
11
14
|
Part of Qualixar | Author: Varun Pratap Bhardwaj
|
|
12
15
|
"""
|
|
13
16
|
|
|
14
17
|
from __future__ import annotations
|
|
15
18
|
|
|
16
19
|
import logging
|
|
20
|
+
import os
|
|
17
21
|
import threading
|
|
18
22
|
import time
|
|
19
23
|
from typing import TYPE_CHECKING
|
|
20
24
|
|
|
25
|
+
# Force CPU before any torch/sentence-transformers import.
|
|
26
|
+
# On Apple Silicon, PyTorch defaults to Metal (MPS) which allocates 4-6 GB
|
|
27
|
+
# of GPU shader buffers that grow over time and never get released.
|
|
28
|
+
# On Windows/Linux with CUDA, similar GPU memory issues occur.
|
|
29
|
+
# CPU-only keeps footprint under 1 GB (vs 6+ GB with GPU).
|
|
30
|
+
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
|
|
31
|
+
os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
|
|
32
|
+
os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
|
|
33
|
+
|
|
21
34
|
import numpy as np
|
|
22
35
|
|
|
23
36
|
if TYPE_CHECKING:
|
|
@@ -44,11 +57,16 @@ class DimensionMismatchError(RuntimeError):
|
|
|
44
57
|
"""
|
|
45
58
|
|
|
46
59
|
|
|
60
|
+
_IDLE_TIMEOUT_SECONDS = 300 # 5 minutes — unload model after idle
|
|
61
|
+
|
|
62
|
+
|
|
47
63
|
class EmbeddingService:
|
|
48
64
|
"""Thread-safe embedding service with strict dimension validation.
|
|
49
65
|
|
|
50
66
|
Lazy-loads the underlying model on first embed call.
|
|
51
67
|
Validates every output dimension against the configured expectation.
|
|
68
|
+
Auto-unloads after 5 minutes idle to keep MCP server memory low.
|
|
69
|
+
Forces CPU-only inference to prevent GPU memory accumulation.
|
|
52
70
|
"""
|
|
53
71
|
|
|
54
72
|
def __init__(self, config: EmbeddingConfig) -> None:
|
|
@@ -57,6 +75,8 @@ class EmbeddingService:
|
|
|
57
75
|
self._lock = threading.Lock()
|
|
58
76
|
self._loaded = False
|
|
59
77
|
self._available = True # Set False if model can't load
|
|
78
|
+
self._last_used: float = 0.0
|
|
79
|
+
self._idle_timer: threading.Timer | None = None
|
|
60
80
|
|
|
61
81
|
@property
|
|
62
82
|
def is_available(self) -> bool:
|
|
@@ -65,6 +85,32 @@ class EmbeddingService:
|
|
|
65
85
|
self._ensure_loaded()
|
|
66
86
|
return self._available and self._model is not None
|
|
67
87
|
|
|
88
|
+
def unload(self) -> None:
|
|
89
|
+
"""Explicitly unload the model to free memory.
|
|
90
|
+
|
|
91
|
+
Called automatically after idle timeout, or manually for cleanup.
|
|
92
|
+
The model will lazy-reload on next embed call.
|
|
93
|
+
"""
|
|
94
|
+
with self._lock:
|
|
95
|
+
if self._model is not None:
|
|
96
|
+
del self._model
|
|
97
|
+
self._model = None
|
|
98
|
+
self._loaded = False
|
|
99
|
+
import gc
|
|
100
|
+
gc.collect()
|
|
101
|
+
logger.info("EmbeddingService: model unloaded (idle timeout)")
|
|
102
|
+
|
|
103
|
+
def _reset_idle_timer(self) -> None:
|
|
104
|
+
"""Reset the idle unload timer after each use."""
|
|
105
|
+
if self._idle_timer is not None:
|
|
106
|
+
self._idle_timer.cancel()
|
|
107
|
+
self._idle_timer = threading.Timer(
|
|
108
|
+
_IDLE_TIMEOUT_SECONDS, self.unload,
|
|
109
|
+
)
|
|
110
|
+
self._idle_timer.daemon = True
|
|
111
|
+
self._idle_timer.start()
|
|
112
|
+
self._last_used = time.time()
|
|
113
|
+
|
|
68
114
|
# ------------------------------------------------------------------
|
|
69
115
|
# Public API
|
|
70
116
|
# ------------------------------------------------------------------
|
|
@@ -91,6 +137,7 @@ class EmbeddingService:
|
|
|
91
137
|
return None
|
|
92
138
|
vec = self._encode_single(text)
|
|
93
139
|
self._validate_dimension(vec)
|
|
140
|
+
self._reset_idle_timer()
|
|
94
141
|
return vec.tolist()
|
|
95
142
|
|
|
96
143
|
def embed_batch(self, texts: list[str]) -> list[list[float]]:
|
|
@@ -115,6 +162,7 @@ class EmbeddingService:
|
|
|
115
162
|
vectors = self._encode_batch(texts)
|
|
116
163
|
for vec in vectors:
|
|
117
164
|
self._validate_dimension(vec)
|
|
165
|
+
self._reset_idle_timer()
|
|
118
166
|
return [v.tolist() for v in vectors]
|
|
119
167
|
|
|
120
168
|
def compute_fisher_params(
|
|
@@ -185,7 +233,13 @@ class EmbeddingService:
|
|
|
185
233
|
self._loaded = True
|
|
186
234
|
|
|
187
235
|
def _load_local_model(self) -> None:
|
|
188
|
-
"""Load sentence-transformers model for local embedding.
|
|
236
|
+
"""Load sentence-transformers model for local embedding.
|
|
237
|
+
|
|
238
|
+
Forces CPU device to prevent GPU memory accumulation:
|
|
239
|
+
- Apple Silicon MPS: allocates 4-6 GB Metal shader buffers
|
|
240
|
+
- NVIDIA CUDA: allocates GPU VRAM that never releases
|
|
241
|
+
- CPU-only: stable ~880 MB footprint, no growth over time
|
|
242
|
+
"""
|
|
189
243
|
try:
|
|
190
244
|
from sentence_transformers import SentenceTransformer
|
|
191
245
|
except ImportError:
|
|
@@ -199,6 +253,7 @@ class EmbeddingService:
|
|
|
199
253
|
return
|
|
200
254
|
model = SentenceTransformer(
|
|
201
255
|
self._config.model_name, trust_remote_code=True,
|
|
256
|
+
device="cpu",
|
|
202
257
|
)
|
|
203
258
|
actual_dim = model.get_sentence_embedding_dimension()
|
|
204
259
|
if actual_dim != self._config.dimension:
|
|
@@ -208,7 +263,7 @@ class EmbeddingService:
|
|
|
208
263
|
)
|
|
209
264
|
self._model = model
|
|
210
265
|
logger.info(
|
|
211
|
-
"EmbeddingService: local model loaded (%s, %d-dim)",
|
|
266
|
+
"EmbeddingService: local model loaded (%s, %d-dim, device=cpu)",
|
|
212
267
|
self._config.model_name,
|
|
213
268
|
actual_dim,
|
|
214
269
|
)
|
|
@@ -216,6 +216,28 @@ CREATE INDEX IF NOT EXISTS idx_facts_interval
|
|
|
216
216
|
# FTS5 virtual table on atomic_facts for full-text search
|
|
217
217
|
# ---------------------------------------------------------------------------
|
|
218
218
|
|
|
219
|
+
_SQL_V2_MIGRATION_CLEANUP: Final[str] = """
|
|
220
|
+
-- Clean up stale V2 triggers that fire on active tables but reference
|
|
221
|
+
-- renamed backup FTS tables. The V2→V3 migration renames tables via
|
|
222
|
+
-- ALTER TABLE RENAME, which auto-updates trigger bodies to reference
|
|
223
|
+
-- _v2_bak_* tables but leaves FTS5 delete-command column names stale.
|
|
224
|
+
-- This causes: "table _v2_bak_*_fts has no column named *_fts"
|
|
225
|
+
|
|
226
|
+
-- Drop V2-era triggers on memories table (memories_ai/ad/au)
|
|
227
|
+
DROP TRIGGER IF EXISTS memories_ai;
|
|
228
|
+
DROP TRIGGER IF EXISTS memories_ad;
|
|
229
|
+
DROP TRIGGER IF EXISTS memories_au;
|
|
230
|
+
|
|
231
|
+
-- Drop stale V3 triggers (may have been corrupted by V2 rename)
|
|
232
|
+
DROP TRIGGER IF EXISTS atomic_facts_fts_insert;
|
|
233
|
+
DROP TRIGGER IF EXISTS atomic_facts_fts_delete;
|
|
234
|
+
DROP TRIGGER IF EXISTS atomic_facts_fts_update;
|
|
235
|
+
|
|
236
|
+
-- Drop renamed V2 backup FTS virtual tables (and their shadow tables)
|
|
237
|
+
DROP TABLE IF EXISTS "_v2_bak_atomic_facts_fts";
|
|
238
|
+
DROP TABLE IF EXISTS "_v2_bak_memories_fts";
|
|
239
|
+
"""
|
|
240
|
+
|
|
219
241
|
_SQL_ATOMIC_FACTS_FTS: Final[str] = """
|
|
220
242
|
CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
|
|
221
243
|
USING fts5(
|
|
@@ -226,8 +248,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
|
|
|
226
248
|
);
|
|
227
249
|
|
|
228
250
|
-- Triggers to keep FTS in sync with atomic_facts.
|
|
251
|
+
-- Always DROP+CREATE (not IF NOT EXISTS) to replace any stale triggers
|
|
252
|
+
-- left by V2 migration.
|
|
253
|
+
|
|
229
254
|
-- INSERT trigger
|
|
230
|
-
CREATE TRIGGER
|
|
255
|
+
CREATE TRIGGER atomic_facts_fts_insert
|
|
231
256
|
AFTER INSERT ON atomic_facts
|
|
232
257
|
BEGIN
|
|
233
258
|
INSERT INTO atomic_facts_fts (rowid, fact_id, content)
|
|
@@ -235,7 +260,7 @@ BEGIN
|
|
|
235
260
|
END;
|
|
236
261
|
|
|
237
262
|
-- DELETE trigger
|
|
238
|
-
CREATE TRIGGER
|
|
263
|
+
CREATE TRIGGER atomic_facts_fts_delete
|
|
239
264
|
AFTER DELETE ON atomic_facts
|
|
240
265
|
BEGIN
|
|
241
266
|
INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
|
|
@@ -243,7 +268,7 @@ BEGIN
|
|
|
243
268
|
END;
|
|
244
269
|
|
|
245
270
|
-- UPDATE trigger
|
|
246
|
-
CREATE TRIGGER
|
|
271
|
+
CREATE TRIGGER atomic_facts_fts_update
|
|
247
272
|
AFTER UPDATE OF content ON atomic_facts
|
|
248
273
|
BEGIN
|
|
249
274
|
INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
|
|
@@ -655,7 +680,9 @@ _DDL_ORDERED: Final[tuple[str, ...]] = (
|
|
|
655
680
|
_SQL_COMPLIANCE_AUDIT,
|
|
656
681
|
_SQL_BM25_TOKENS,
|
|
657
682
|
_SQL_CONFIG,
|
|
658
|
-
#
|
|
683
|
+
# V2 migration cleanup — drop stale triggers/FTS before recreating
|
|
684
|
+
_SQL_V2_MIGRATION_CLEANUP,
|
|
685
|
+
# FTS5 must come after atomic_facts (content table) AND after cleanup
|
|
659
686
|
_SQL_ATOMIC_FACTS_FTS,
|
|
660
687
|
)
|
|
661
688
|
|
|
@@ -151,20 +151,33 @@ class V2Migrator:
|
|
|
151
151
|
self._backup_db = self._v3_base / BACKUP_NAME
|
|
152
152
|
|
|
153
153
|
def detect_v2(self) -> bool:
|
|
154
|
-
"""Check if a V2 installation exists.
|
|
154
|
+
"""Check if a V2 installation exists.
|
|
155
|
+
|
|
156
|
+
Returns False if .claude-memory is a symlink (already migrated).
|
|
157
|
+
"""
|
|
158
|
+
if self._v2_base.is_symlink():
|
|
159
|
+
return False
|
|
155
160
|
return self._v2_db.exists() and self._v2_db.is_file()
|
|
156
161
|
|
|
157
162
|
def is_already_migrated(self) -> bool:
|
|
158
|
-
"""Check if migration has already been performed.
|
|
163
|
+
"""Check if migration has already been performed.
|
|
164
|
+
|
|
165
|
+
Detects migration by:
|
|
166
|
+
1. .claude-memory is a symlink to .superlocalmemory (definitive)
|
|
167
|
+
2. V3 schema tables exist in the V3 database
|
|
168
|
+
"""
|
|
169
|
+
if self._v2_base.is_symlink():
|
|
170
|
+
return True
|
|
159
171
|
if not self._v3_db.exists():
|
|
160
172
|
return False
|
|
161
173
|
try:
|
|
162
174
|
conn = sqlite3.connect(str(self._v3_db))
|
|
163
175
|
try:
|
|
164
|
-
tables =
|
|
176
|
+
tables = {r[0] for r in conn.execute(
|
|
165
177
|
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
166
|
-
).fetchall()
|
|
167
|
-
|
|
178
|
+
).fetchall()}
|
|
179
|
+
# Check for actual V3 schema tables (not old migration markers)
|
|
180
|
+
return "atomic_facts" in tables and "canonical_entities" in tables
|
|
168
181
|
finally:
|
|
169
182
|
conn.close()
|
|
170
183
|
except Exception:
|
|
@@ -217,12 +230,12 @@ class V2Migrator:
|
|
|
217
230
|
|
|
218
231
|
Returns dict with migration stats.
|
|
219
232
|
"""
|
|
220
|
-
if not self.detect_v2():
|
|
221
|
-
return {"success": False, "error": "No V2 installation found"}
|
|
222
|
-
|
|
223
233
|
if self.is_already_migrated():
|
|
224
234
|
return {"success": True, "message": "Already migrated"}
|
|
225
235
|
|
|
236
|
+
if not self.detect_v2():
|
|
237
|
+
return {"success": False, "error": "No V2 installation found"}
|
|
238
|
+
|
|
226
239
|
stats = {"steps": []}
|
|
227
240
|
|
|
228
241
|
try:
|
|
@@ -268,6 +281,19 @@ class V2Migrator:
|
|
|
268
281
|
# Disable FK enforcement for migrated DBs (V2 schema is incompatible)
|
|
269
282
|
conn.execute("PRAGMA foreign_keys=OFF")
|
|
270
283
|
|
|
284
|
+
# Drop ALL triggers before renaming tables.
|
|
285
|
+
# ALTER TABLE RENAME auto-updates trigger bodies but corrupts
|
|
286
|
+
# FTS5 delete-command column names, causing:
|
|
287
|
+
# "table _v2_bak_*_fts has no column named *_fts"
|
|
288
|
+
v2_triggers = [r[0] for r in conn.execute(
|
|
289
|
+
"SELECT name FROM sqlite_master WHERE type='trigger'"
|
|
290
|
+
).fetchall()]
|
|
291
|
+
for trigger in v2_triggers:
|
|
292
|
+
try:
|
|
293
|
+
conn.execute(f'DROP TRIGGER IF EXISTS "{trigger}"')
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
296
|
+
|
|
271
297
|
# Rename ALL tables with incompatible schemas (V2 + old alpha)
|
|
272
298
|
# User data is in 'memories' table (already upgraded above)
|
|
273
299
|
# Everything else is computed/derived and will be recreated by V3
|