npm - superlocalmemory - Versions diffs - 3.0.13 → 3.0.15 - Mend

superlocalmemory 3.0.13 → 3.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/LICENSE +2 -1
package/NOTICE +1 -1
package/README.md +193 -191
package/package.json +1 -1
package/pyproject.toml +1 -1
package/src/superlocalmemory/cli/commands.py +2 -0
package/src/superlocalmemory/core/embeddings.py +57 -2
package/src/superlocalmemory/storage/schema.py +31 -4
package/src/superlocalmemory/storage/v2_migrator.py +34 -8
package/src/superlocalmemory.egg-info/PKG-INFO +194 -192

package/src/superlocalmemory/core/embeddings.py CHANGED Viewed

@@ -8,16 +8,29 @@ Thread-safe, dimension-validated embedding with Fisher variance computation.
 Supports local (768-dim nomic) and cloud (3072-dim) models with EXPLICIT errors
 on dimension mismatch — NEVER silently falls back to a different dimension.
+Memory management: Forces CPU-only inference to prevent GPU memory accumulation.
+Auto-unloads model after idle timeout to keep long-running MCP servers lean.
 Part of Qualixar | Author: Varun Pratap Bhardwaj
 """
 from __future__ import annotations
 import logging
+import os
 import threading
 import time
 from typing import TYPE_CHECKING
+# Force CPU before any torch/sentence-transformers import.
+# On Apple Silicon, PyTorch defaults to Metal (MPS) which allocates 4-6 GB
+# of GPU shader buffers that grow over time and never get released.
+# On Windows/Linux with CUDA, similar GPU memory issues occur.
+# CPU-only keeps footprint under 1 GB (vs 6+ GB with GPU).
+os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
+os.environ.setdefault("PYTORCH_MPS_HIGH_WATERMARK_RATIO", "0.0")
+os.environ.setdefault("PYTORCH_ENABLE_MPS_FALLBACK", "1")
 import numpy as np
 if TYPE_CHECKING:
@@ -44,11 +57,16 @@ class DimensionMismatchError(RuntimeError):
     """
+_IDLE_TIMEOUT_SECONDS = 300  # 5 minutes — unload model after idle
 class EmbeddingService:
     """Thread-safe embedding service with strict dimension validation.
     Lazy-loads the underlying model on first embed call.
     Validates every output dimension against the configured expectation.
+    Auto-unloads after 5 minutes idle to keep MCP server memory low.
+    Forces CPU-only inference to prevent GPU memory accumulation.
     """
     def __init__(self, config: EmbeddingConfig) -> None:
@@ -57,6 +75,8 @@ class EmbeddingService:
         self._lock = threading.Lock()
         self._loaded = False
         self._available = True  # Set False if model can't load
+        self._last_used: float = 0.0
+        self._idle_timer: threading.Timer | None = None
     @property
     def is_available(self) -> bool:
@@ -65,6 +85,32 @@ class EmbeddingService:
             self._ensure_loaded()
         return self._available and self._model is not None
+    def unload(self) -> None:
+        """Explicitly unload the model to free memory.
+        Called automatically after idle timeout, or manually for cleanup.
+        The model will lazy-reload on next embed call.
+        """
+        with self._lock:
+            if self._model is not None:
+                del self._model
+                self._model = None
+                self._loaded = False
+                import gc
+                gc.collect()
+                logger.info("EmbeddingService: model unloaded (idle timeout)")
+    def _reset_idle_timer(self) -> None:
+        """Reset the idle unload timer after each use."""
+        if self._idle_timer is not None:
+            self._idle_timer.cancel()
+        self._idle_timer = threading.Timer(
+            _IDLE_TIMEOUT_SECONDS, self.unload,
+        )
+        self._idle_timer.daemon = True
+        self._idle_timer.start()
+        self._last_used = time.time()
     # ------------------------------------------------------------------
     # Public API
     # ------------------------------------------------------------------
@@ -91,6 +137,7 @@ class EmbeddingService:
             return None
         vec = self._encode_single(text)
         self._validate_dimension(vec)
+        self._reset_idle_timer()
         return vec.tolist()
     def embed_batch(self, texts: list[str]) -> list[list[float]]:
@@ -115,6 +162,7 @@ class EmbeddingService:
         vectors = self._encode_batch(texts)
         for vec in vectors:
             self._validate_dimension(vec)
+        self._reset_idle_timer()
         return [v.tolist() for v in vectors]
     def compute_fisher_params(
@@ -185,7 +233,13 @@ class EmbeddingService:
             self._loaded = True
     def _load_local_model(self) -> None:
-        """Load sentence-transformers model for local embedding."""
+        """Load sentence-transformers model for local embedding.
+        Forces CPU device to prevent GPU memory accumulation:
+        - Apple Silicon MPS: allocates 4-6 GB Metal shader buffers
+        - NVIDIA CUDA: allocates GPU VRAM that never releases
+        - CPU-only: stable ~880 MB footprint, no growth over time
+        """
         try:
             from sentence_transformers import SentenceTransformer
         except ImportError:
@@ -199,6 +253,7 @@ class EmbeddingService:
             return
         model = SentenceTransformer(
             self._config.model_name, trust_remote_code=True,
+            device="cpu",
         )
         actual_dim = model.get_sentence_embedding_dimension()
         if actual_dim != self._config.dimension:
@@ -208,7 +263,7 @@ class EmbeddingService:
             )
         self._model = model
         logger.info(
-            "EmbeddingService: local model loaded (%s, %d-dim)",
+            "EmbeddingService: local model loaded (%s, %d-dim, device=cpu)",
             self._config.model_name,
             actual_dim,
         )

package/src/superlocalmemory/storage/schema.py CHANGED Viewed

@@ -216,6 +216,28 @@ CREATE INDEX IF NOT EXISTS idx_facts_interval
 # FTS5 virtual table on atomic_facts for full-text search
 # ---------------------------------------------------------------------------
+_SQL_V2_MIGRATION_CLEANUP: Final[str] = """
+-- Clean up stale V2 triggers that fire on active tables but reference
+-- renamed backup FTS tables. The V2→V3 migration renames tables via
+-- ALTER TABLE RENAME, which auto-updates trigger bodies to reference
+-- _v2_bak_* tables but leaves FTS5 delete-command column names stale.
+-- This causes: "table _v2_bak_*_fts has no column named *_fts"
+-- Drop V2-era triggers on memories table (memories_ai/ad/au)
+DROP TRIGGER IF EXISTS memories_ai;
+DROP TRIGGER IF EXISTS memories_ad;
+DROP TRIGGER IF EXISTS memories_au;
+-- Drop stale V3 triggers (may have been corrupted by V2 rename)
+DROP TRIGGER IF EXISTS atomic_facts_fts_insert;
+DROP TRIGGER IF EXISTS atomic_facts_fts_delete;
+DROP TRIGGER IF EXISTS atomic_facts_fts_update;
+-- Drop renamed V2 backup FTS virtual tables (and their shadow tables)
+DROP TABLE IF EXISTS "_v2_bak_atomic_facts_fts";
+DROP TABLE IF EXISTS "_v2_bak_memories_fts";
+"""
 _SQL_ATOMIC_FACTS_FTS: Final[str] = """
 CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
     USING fts5(
@@ -226,8 +248,11 @@ CREATE VIRTUAL TABLE IF NOT EXISTS atomic_facts_fts
     );
 -- Triggers to keep FTS in sync with atomic_facts.
+-- Always DROP+CREATE (not IF NOT EXISTS) to replace any stale triggers
+-- left by V2 migration.
 -- INSERT trigger
-CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_insert
+CREATE TRIGGER atomic_facts_fts_insert
     AFTER INSERT ON atomic_facts
 BEGIN
     INSERT INTO atomic_facts_fts (rowid, fact_id, content)
@@ -235,7 +260,7 @@ BEGIN
 END;
 -- DELETE trigger
-CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_delete
+CREATE TRIGGER atomic_facts_fts_delete
     AFTER DELETE ON atomic_facts
 BEGIN
     INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
@@ -243,7 +268,7 @@ BEGIN
 END;
 -- UPDATE trigger
-CREATE TRIGGER IF NOT EXISTS atomic_facts_fts_update
+CREATE TRIGGER atomic_facts_fts_update
     AFTER UPDATE OF content ON atomic_facts
 BEGIN
     INSERT INTO atomic_facts_fts (atomic_facts_fts, rowid, fact_id, content)
@@ -655,7 +680,9 @@ _DDL_ORDERED: Final[tuple[str, ...]] = (
     _SQL_COMPLIANCE_AUDIT,
     _SQL_BM25_TOKENS,
     _SQL_CONFIG,
-    # FTS5 must come after atomic_facts (content table)
+    # V2 migration cleanup — drop stale triggers/FTS before recreating
+    _SQL_V2_MIGRATION_CLEANUP,
+    # FTS5 must come after atomic_facts (content table) AND after cleanup
     _SQL_ATOMIC_FACTS_FTS,
 )

package/src/superlocalmemory/storage/v2_migrator.py CHANGED Viewed

@@ -151,20 +151,33 @@ class V2Migrator:
         self._backup_db = self._v3_base / BACKUP_NAME
     def detect_v2(self) -> bool:
-        """Check if a V2 installation exists."""
+        """Check if a V2 installation exists.
+        Returns False if .claude-memory is a symlink (already migrated).
+        """
+        if self._v2_base.is_symlink():
+            return False
         return self._v2_db.exists() and self._v2_db.is_file()
     def is_already_migrated(self) -> bool:
-        """Check if migration has already been performed."""
+        """Check if migration has already been performed.
+        Detects migration by:
+        1. .claude-memory is a symlink to .superlocalmemory (definitive)
+        2. V3 schema tables exist in the V3 database
+        """
+        if self._v2_base.is_symlink():
+            return True
         if not self._v3_db.exists():
             return False
         try:
             conn = sqlite3.connect(str(self._v3_db))
             try:
-                tables = [r[0] for r in conn.execute(
+                tables = {r[0] for r in conn.execute(
                     "SELECT name FROM sqlite_master WHERE type='table'"
-                ).fetchall()]
-                return "semantic_facts" in tables and "v3_config" in tables
+                ).fetchall()}
+                # Check for actual V3 schema tables (not old migration markers)
+                return "atomic_facts" in tables and "canonical_entities" in tables
             finally:
                 conn.close()
         except Exception:
@@ -217,12 +230,12 @@ class V2Migrator:
         Returns dict with migration stats.
         """
-        if not self.detect_v2():
-            return {"success": False, "error": "No V2 installation found"}
         if self.is_already_migrated():
             return {"success": True, "message": "Already migrated"}
+        if not self.detect_v2():
+            return {"success": False, "error": "No V2 installation found"}
         stats = {"steps": []}
         try:
@@ -268,6 +281,19 @@ class V2Migrator:
             # Disable FK enforcement for migrated DBs (V2 schema is incompatible)
             conn.execute("PRAGMA foreign_keys=OFF")
+            # Drop ALL triggers before renaming tables.
+            # ALTER TABLE RENAME auto-updates trigger bodies but corrupts
+            # FTS5 delete-command column names, causing:
+            #   "table _v2_bak_*_fts has no column named *_fts"
+            v2_triggers = [r[0] for r in conn.execute(
+                "SELECT name FROM sqlite_master WHERE type='trigger'"
+            ).fetchall()]
+            for trigger in v2_triggers:
+                try:
+                    conn.execute(f'DROP TRIGGER IF EXISTS "{trigger}"')
+                except Exception:
+                    pass
             # Rename ALL tables with incompatible schemas (V2 + old alpha)
             # User data is in 'memories' table (already upgraded above)
             # Everything else is computed/derived and will be recreated by V3