superlocalmemory 3.0.13 → 3.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,16 +8,29 @@ Thread-safe, dimension-validated embedding with Fisher variance computation.
8
8
  Supports local (768-dim nomic) and cloud (3072-dim) models with EXPLICIT errors
9
9
  on dimension mismatch — NEVER silently falls back to a different dimension.
10
10
 
11
+ Memory management: Forces CPU-only inference to prevent GPU memory accumulation.
12
+ Auto-unloads model after idle timeout to keep long-running MCP servers lean.
13
+
11
14
  Part of Qualixar | Author: Varun Pratap Bhardwaj
12
15
  """
13
16
 
14
17
  from __future__ import annotations
15
18
 
16
19
  import logging
20
+ import os
17
21
  import threading
18
22
  import time
19
23
  from typing import TYPE_CHECKING
20
24
 
25
+ # Force CPU before any torch/sentence-transformers import.
26
+ # On Apple Silicon, PyTorch defaults to Metal (MPS) which allocates 4-6 GB
27
+ # of GPU shader buffers that grow over time and never get released.
28
+ # On Windows/Linux with CUDA, similar GPU memory issues occur.
29
+ # CPU-only keeps footprint under 1 GB (vs 6+ GB with GPU).
30
+ os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
31
+ os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
32
+ os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
33
+
21
34
  import numpy as np
22
35
 
23
36
  if TYPE_CHECKING:
@@ -44,11 +57,16 @@ class DimensionMismatchError(RuntimeError):
44
57
  """
45
58
 
46
59
 
60
+ _IDLE_TIMEOUT_SECONDS = 300 # 5 minutes — unload model after idle
61
+
62
+
47
63
  class EmbeddingService:
48
64
  """Thread-safe embedding service with strict dimension validation.
49
65
 
50
66
  Lazy-loads the underlying model on first embed call.
51
67
  Validates every output dimension against the configured expectation.
68
+ Auto-unloads after 5 minutes idle to keep MCP server memory low.
69
+ Forces CPU-only inference to prevent GPU memory accumulation.
52
70
  """
53
71
 
54
72
  def __init__(self, config: EmbeddingConfig) -> None:
@@ -57,6 +75,8 @@ class EmbeddingService:
57
75
  self._lock = threading.Lock()
58
76
  self._loaded = False
59
77
  self._available = True # Set False if model can't load
78
+ self._last_used: float = 0.0
79
+ self._idle_timer: threading.Timer | None = None
60
80
 
61
81
  @property
62
82
  def is_available(self) -> bool:
@@ -65,6 +85,32 @@ class EmbeddingService:
65
85
  self._ensure_loaded()
66
86
  return self._available and self._model is not None
67
87
 
88
+ def unload(self) -> None:
89
+ """Explicitly unload the model to free memory.
90
+
91
+ Called automatically after idle timeout, or manually for cleanup.
92
+ The model will lazy-reload on next embed call.
93
+ """
94
+ with self._lock:
95
+ if self._model is not None:
96
+ del self._model
97
+ self._model = None
98
+ self._loaded = False
99
+ import gc
100
+ gc.collect()
101
+ logger.info("EmbeddingService: model unloaded (idle timeout)")
102
+
103
+ def _reset_idle_timer(self) -> None:
104
+ """Reset the idle unload timer after each use."""
105
+ if self._idle_timer is not None:
106
+ self._idle_timer.cancel()
107
+ self._idle_timer = threading.Timer(
108
+ _IDLE_TIMEOUT_SECONDS, self.unload,
109
+ )
110
+ self._idle_timer.daemon = True
111
+ self._idle_timer.start()
112
+ self._last_used = time.time()
113
+
68
114
  # ------------------------------------------------------------------
69
115
  # Public API
70
116
  # ------------------------------------------------------------------
@@ -91,6 +137,7 @@ class EmbeddingService:
91
137
  return None
92
138
  vec = self._encode_single(text)
93
139
  self._validate_dimension(vec)
140
+ self._reset_idle_timer()
94
141
  return vec.tolist()
95
142
 
96
143
  def embed_batch(self, texts: list[str]) -> list[list[float]]:
@@ -115,6 +162,7 @@ class EmbeddingService:
115
162
  vectors = self._encode_batch(texts)
116
163
  for vec in vectors:
117
164
  self._validate_dimension(vec)
165
+ self._reset_idle_timer()
118
166
  return [v.tolist() for v in vectors]
119
167
 
120
168
  def compute_fisher_params(
@@ -185,7 +233,13 @@ class EmbeddingService:
185
233
  self._loaded = True
186
234
 
187
235
  def _load_local_model(self) -> None:
188
- """Load sentence-transformers model for local embedding."""
236
+ """Load sentence-transformers model for local embedding.
237
+
238
+ Forces CPU device to prevent GPU memory accumulation:
239
+ - Apple Silicon MPS: allocates 4-6 GB Metal shader buffers
240
+ - NVIDIA CUDA: allocates GPU VRAM that never releases
241
+ - CPU-only: stable ~880 MB footprint, no growth over time
242
+ """
189
243
  try:
190
244
  from sentence_transformers import SentenceTransformer
191
245
  except ImportError:
@@ -199,6 +253,7 @@ class EmbeddingService:
199
253
  return
200
254
  model = SentenceTransformer(
201
255
  self._config.model_name, trust_remote_code=True,
256
+ device="cpu",
202
257
  )
203
258
  actual_dim = model.get_sentence_embedding_dimension()
204
259
  if actual_dim != self._config.dimension:
@@ -208,7 +263,7 @@ class EmbeddingService:
208
263
  )
209
264
  self._model = model
210
265
  logger.info(
211
- "EmbeddingService: local model loaded (%s, %d-dim)",
266
+ "EmbeddingService: local model loaded (%s, %d-dim, device=cpu)",
212
267
  self._config.model_name,
213
268
  actual_dim,
214
269
  )
@@ -216,6 +216,28 @@ CREATE INDEX IF NOT EXISTS idx_facts_interval
216
216
  # FTS5 virtual table on atomic_facts for full-text search
217
217
  # ---------------------------------------------------------------------------
218
218
 
219
+ _SQL_V2_MIGRATION_CLEANUP: Final[str] = """
220
+ -- Clean up stale V2 triggers that fire on active tables but reference
221
+ -- renamed backup FTS tables. The V2→V3 migration renames tables via
222
+ -- ALTER TABLE RENAME, which auto-updates trigger bodies to reference
223
+ -- _v2_bak_* tables but leaves FTS5 delete-command column names stale.
224
+ -- This causes: "table _v2_bak_*_fts has no column named *_fts"
225
+
226
+ -- Drop V2-era triggers on memories table (memories_ai/ad/au)
227
+ DROP TRIGGER IF EXISTS memories_ai;
228
+ DROP TRIGGER IF EXISTS memories_ad;
229
+ DROP TRIGGER IF EXISTS memories_au;
230
+
231
+ -- Drop stale V3 triggers (may have been corrupted by V2 rename)
232
+ DROP TRIGGER IF EXISTS atomic_facts_fts_insert;
233
+ DROP TRIGGER IF EXISTS atomic_facts_fts_delete;
234
+ DROP TRIGGER IF EXISTS atomic_facts_fts_update;
235
+
236
+ -- Drop renamed V2 backup FTS virtual tables (and their shadow tables)
237
+ DROP TABLE IF EXISTS "_v2_bak_atomic_facts_fts";
238
+ DROP TABLE IF EXISTS "_v2_bak_memories_fts";
239
+ """
240
+
219
241
  _SQL_ATOMIC_FACTS_FTS: Final[str] = """
220
242
  CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
221
243
  USING fts5(
@@ -226,8 +248,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
226
248
  );
227
249
 
228
250
  -- Triggers to keep FTS in sync with atomic_facts.
251
+ -- Always DROP+CREATE (not IF NOT EXISTS) to replace any stale triggers
252
+ -- left by V2 migration.
253
+
229
254
  -- INSERT trigger
230
- CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_insert
255
+ CREATE TRIGGER atomic_facts_fts_insert
231
256
  AFTER INSERT ON atomic_facts
232
257
  BEGIN
233
258
  INSERT INTO atomic_facts_fts (rowid, fact_id, content)
@@ -235,7 +260,7 @@ BEGIN
235
260
  END;
236
261
 
237
262
  -- DELETE trigger
238
- CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_delete
263
+ CREATE TRIGGER atomic_facts_fts_delete
239
264
  AFTER DELETE ON atomic_facts
240
265
  BEGIN
241
266
  INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
@@ -243,7 +268,7 @@ BEGIN
243
268
  END;
244
269
 
245
270
  -- UPDATE trigger
246
- CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_update
271
+ CREATE TRIGGER atomic_facts_fts_update
247
272
  AFTER UPDATE OF content ON atomic_facts
248
273
  BEGIN
249
274
  INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
@@ -655,7 +680,9 @@ _DDL_ORDERED: Final[tuple[str, ...]] = (
655
680
  _SQL_COMPLIANCE_AUDIT,
656
681
  _SQL_BM25_TOKENS,
657
682
  _SQL_CONFIG,
658
- # FTS5 must come after atomic_facts (content table)
683
+ # V2 migration cleanup drop stale triggers/FTS before recreating
684
+ _SQL_V2_MIGRATION_CLEANUP,
685
+ # FTS5 must come after atomic_facts (content table) AND after cleanup
659
686
  _SQL_ATOMIC_FACTS_FTS,
660
687
  )
661
688
 
@@ -151,20 +151,33 @@ class V2Migrator:
151
151
  self._backup_db = self._v3_base / BACKUP_NAME
152
152
 
153
153
  def detect_v2(self) -> bool:
154
- """Check if a V2 installation exists."""
154
+ """Check if a V2 installation exists.
155
+
156
+ Returns False if .claude-memory is a symlink (already migrated).
157
+ """
158
+ if self._v2_base.is_symlink():
159
+ return False
155
160
  return self._v2_db.exists() and self._v2_db.is_file()
156
161
 
157
162
  def is_already_migrated(self) -> bool:
158
- """Check if migration has already been performed."""
163
+ """Check if migration has already been performed.
164
+
165
+ Detects migration by:
166
+ 1. .claude-memory is a symlink to .superlocalmemory (definitive)
167
+ 2. V3 schema tables exist in the V3 database
168
+ """
169
+ if self._v2_base.is_symlink():
170
+ return True
159
171
  if not self._v3_db.exists():
160
172
  return False
161
173
  try:
162
174
  conn = sqlite3.connect(str(self._v3_db))
163
175
  try:
164
- tables = [r[0] for r in conn.execute(
176
+ tables = {r[0] for r in conn.execute(
165
177
  "SELECT name FROM sqlite_master WHERE type='table'"
166
- ).fetchall()]
167
- return "semantic_facts" in tables and "v3_config" in tables
178
+ ).fetchall()}
179
+ # Check for actual V3 schema tables (not old migration markers)
180
+ return "atomic_facts" in tables and "canonical_entities" in tables
168
181
  finally:
169
182
  conn.close()
170
183
  except Exception:
@@ -217,12 +230,12 @@ class V2Migrator:
217
230
 
218
231
  Returns dict with migration stats.
219
232
  """
220
- if not self.detect_v2():
221
- return {"success": False, "error": "No V2 installation found"}
222
-
223
233
  if self.is_already_migrated():
224
234
  return {"success": True, "message": "Already migrated"}
225
235
 
236
+ if not self.detect_v2():
237
+ return {"success": False, "error": "No V2 installation found"}
238
+
226
239
  stats = {"steps": []}
227
240
 
228
241
  try:
@@ -268,6 +281,19 @@ class V2Migrator:
268
281
  # Disable FK enforcement for migrated DBs (V2 schema is incompatible)
269
282
  conn.execute("PRAGMA foreign_keys=OFF")
270
283
 
284
+ # Drop ALL triggers before renaming tables.
285
+ # ALTER TABLE RENAME auto-updates trigger bodies but corrupts
286
+ # FTS5 delete-command column names, causing:
287
+ # "table _v2_bak_*_fts has no column named *_fts"
288
+ v2_triggers = [r[0] for r in conn.execute(
289
+ "SELECT name FROM sqlite_master WHERE type='trigger'"
290
+ ).fetchall()]
291
+ for trigger in v2_triggers:
292
+ try:
293
+ conn.execute(f'DROP TRIGGER IF EXISTS "{trigger}"')
294
+ except Exception:
295
+ pass
296
+
271
297
  # Rename ALL tables with incompatible schemas (V2 + old alpha)
272
298
  # User data is in 'memories' table (already upgraded above)
273
299
  # Everything else is computed/derived and will be recreated by V3