npm - specmem-hardwicksoftware - Versions diffs - 3.5.99 → 3.6.1 - Mend

specmem-hardwicksoftware 3.5.99 → 3.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/bin/specmem-statusbar.cjs +154 -298
package/claude-hooks/agent-loading-hook.js +8 -4
package/claude-hooks/team-comms-enforcer.cjs +109 -92
package/dist/config/embeddingTimeouts.js +4 -4
package/dist/database.js +52 -6
package/dist/db/bigBrainMigrations.js +7 -6
package/dist/db/memoryDrilldown.sql +1 -1
package/dist/db/projectSchemaInit.sql +21 -0
package/dist/index.js +238 -13
package/dist/installer/firstRun.js +2 -2
package/dist/mcp/embeddingServerManager.js +225 -7
package/dist/mcp/healthMonitor.js +165 -32
package/dist/mcp/tools/embeddingControl.js +31 -0
package/dist/mcp/tools/teamComms.js +16 -0
package/dist/mcp/watcherIntegration.js +50 -7
package/dist/services/CameraZoomSearch.js +62 -5
package/dist/services/DimensionService.js +73 -6
package/dist/services/EmbeddingQueue.js +64 -0
package/dist/services/MemoryDrilldown.js +19 -12
package/dist/tools/goofy/findCodePointers.js +11 -7
package/dist/tools/goofy/findWhatISaid.js +145 -53
package/dist/utils/qoms.js +187 -4
package/dist/watcher/changeHandler.js +54 -4
package/dist/watcher/fileWatcher.js +121 -1
package/dist/watcher/index.js +75 -31
package/dist/watcher/syncChecker.js +248 -63
package/embedding-sandbox/__pycache__/frankenstein-embeddings.cpython-313.pyc +0 -0
package/embedding-sandbox/frankenstein-embeddings.py +175 -64
package/package.json +1 -1

package/embedding-sandbox/frankenstein-embeddings.py CHANGED Viewed

@@ -2034,6 +2034,10 @@ class FrankensteinEmbeddings:
         # THREAD SAFETY: Lock for model loading to prevent race conditions
         self._model_lock = threading.Lock()
+        # Health status flag: reflects whether model is loaded and functional
+        # Set to False on load failure, True on successful load + health check
+        self._model_healthy = True
         # ═══════════════════════════════════════════════════════════════════
         # OPT-6: LAZY LOADING - Don't load model until first request
         # ═══════════════════════════════════════════════════════════════════
@@ -2142,43 +2146,79 @@ class FrankensteinEmbeddings:
         Uses double-checked locking pattern to avoid lock contention when
         model is already loaded.
+        Retries with exponential backoff on failure (Issue #17 fix).
+        Configurable via:
+        - SPECMEM_MODEL_RELOAD_RETRIES (default 3)
+        - SPECMEM_MODEL_RELOAD_DELAY_MS (default 1000) - base delay in ms
+        Raises RuntimeError if all retries fail, ensuring callers get an
+        explicit error instead of silent failure.
         """
-        # Fast path: model already loaded (no lock needed)
-        if self.model is not None:
+        # Fast path: model already loaded and healthy (no lock needed)
+        if self.model is not None and getattr(self, '_model_healthy', True):
             return
+        max_retries = int(os.environ.get('SPECMEM_MODEL_RELOAD_RETRIES', '3'))
+        base_delay_ms = int(os.environ.get('SPECMEM_MODEL_RELOAD_DELAY_MS', '1000'))
         # Slow path: need to load model (with lock)
         with self._model_lock:
             # Double-check inside lock (another thread may have loaded it)
-            if self.model is not None:
+            if self.model is not None and getattr(self, '_model_healthy', True):
                 return
-            print(f"🔄 Lazy-loading model: {self.base_model} ({_BEST_ONNX_FILE})", file=sys.stderr)
-            start = time.time()
-            try:
-                # NOTE: backend='onnx' is REQUIRED for model_kwargs file_name to work
-                self.model = SentenceTransformer(
-                    self.base_model,
-                    device='cpu',
-                    backend='onnx',
-                    cache_folder=str(self.cache_dir),
-                    model_kwargs={"file_name": _BEST_ONNX_FILE}
-                )
-                load_time = (time.time() - start) * 1000
-                print(f"✅ Model loaded in {load_time:.0f}ms - ready to embed!", file=sys.stderr)
+            last_error = None
+            for attempt in range(1, max_retries + 1):
+                print(f"[MODEL-RELOAD] Loading model: {self.base_model} ({_BEST_ONNX_FILE}) (attempt {attempt}/{max_retries})", file=sys.stderr)
+                start = time.time()
+                try:
+                    # NOTE: backend='onnx' is REQUIRED for model_kwargs file_name to work
+                    self.model = SentenceTransformer(
+                        self.base_model,
+                        device='cpu',
+                        backend='onnx',
+                        cache_folder=str(self.cache_dir),
+                        model_kwargs={"file_name": _BEST_ONNX_FILE}
+                    )
+                    load_time = (time.time() - start) * 1000
-                # Update native dims if we didn't know them
-                actual_dims = self.model.get_sentence_embedding_dimension()
-                if self.dim_config.native_dims != actual_dims:
-                    print(f"   Native dims updated: {self.dim_config.native_dims} -> {actual_dims}", file=sys.stderr)
-                    self.dim_config.native_dims = actual_dims
+                    # Verify the model actually works by doing a test encode
+                    test_embedding = self.model.encode("health check", show_progress_bar=False)
+                    if test_embedding is None or len(test_embedding) == 0:
+                        raise RuntimeError("Model loaded but produced empty embedding on health check")
-            except Exception as e:
-                print(f"❌ Model loading failed: {e}", file=sys.stderr)
-                raise
+                    self._model_healthy = True
+                    print(f"[MODEL-RELOAD] Model loaded and verified in {load_time:.0f}ms (attempt {attempt}) - ready to embed!", file=sys.stderr)
-            # Update last request time so idle monitor resets
-            self.last_request_time = time.time()
+                    # Update native dims if we didn't know them
+                    actual_dims = self.model.get_sentence_embedding_dimension()
+                    if self.dim_config.native_dims != actual_dims:
+                        print(f"   Native dims updated: {self.dim_config.native_dims} -> {actual_dims}", file=sys.stderr)
+                        self.dim_config.native_dims = actual_dims
+                    # Update last request time so idle monitor resets
+                    self.last_request_time = time.time()
+                    return  # Success
+                except Exception as e:
+                    last_error = e
+                    self.model = None
+                    self._model_healthy = False
+                    print(f"[MODEL-RELOAD] Attempt {attempt}/{max_retries} failed: {e}", file=sys.stderr)
+                    if attempt < max_retries:
+                        # Exponential backoff: base_delay * 2^(attempt-1)
+                        # e.g., with 1000ms base: 1s, 2s, 4s
+                        delay_seconds = (base_delay_ms / 1000.0) * (2 ** (attempt - 1))
+                        print(f"[MODEL-RELOAD] Retrying in {delay_seconds:.1f}s...", file=sys.stderr)
+                        time.sleep(delay_seconds)
+            # All retries exhausted
+            self._model_healthy = False
+            error_msg = f"Model reload failed after {max_retries} attempts. Last error: {last_error}"
+            print(f"[MODEL-RELOAD] FATAL: {error_msg}", file=sys.stderr)
+            raise RuntimeError(error_msg)
     def _query_database_dimension(self) -> int:
         """
@@ -2503,7 +2543,8 @@ class FrankensteinEmbeddings:
             'ram_usage_mb': round(self.ram_guard.get_ram_usage_mb(), 1),
             'ram_limit_mb': self.ram_guard.MAX_RAM_MB,
             'throttling_enabled': self.enable_throttling,
-            'model_loaded': self.model is not None
+            'model_loaded': self.model is not None,
+            'model_healthy': getattr(self, '_model_healthy', True)
         }
         # Add low-resource optimization stats
@@ -2564,11 +2605,17 @@ class EmbeddingServer:
         self.shutdown_requested = False
         # KYS (Keep Yourself Safe) watchdog - two-way health check
-        # If MCP server doesn't send "kys" heartbeat within 90 seconds, we suicide
+        # If MCP server doesn't send "kys" heartbeat within timeout, take action
         # This prevents orphan embedding servers when MCP crashes
-        # Grace period increased to handle startup delays and heavy operations
+        # Timeout and mode are configurable via environment variables
         self.last_kys_time = time.time()
-        self.kys_timeout = 90  # 25 seconds from MCP + 65 second grace period
+        self.kys_timeout = int(os.environ.get('SPECMEM_KYS_TIMEOUT_SECONDS', '600'))
+        # KYS mode: "kill" = process exit (old behavior), "unload" = release model but keep socket,
+        # "standby" = keep everything loaded and just idle
+        self.kys_mode = os.environ.get('SPECMEM_KYS_MODE', 'unload').lower()
+        if self.kys_mode not in ('kill', 'unload', 'standby'):
+            print(f"[KYS] Invalid SPECMEM_KYS_MODE '{self.kys_mode}', defaulting to 'unload'", file=sys.stderr)
+            self.kys_mode = 'unload'
         # QQMS v2 - enhanced queue with FIFO + ACK (takes precedence if provided)
         self.qqms_v2 = qqms_v2
@@ -2800,10 +2847,15 @@ class EmbeddingServer:
         KYS (Keep Yourself Safe) Watchdog - Two-way health check system.
         The MCP server sends {"type": "kys", "text": "kurt cobain t minus 25"} every 25 seconds.
-        If we don't receive this heartbeat within 30 seconds (25 + 5 grace), we commit suicide.
+        If we don't receive this heartbeat within the configured timeout, we take action.
         This prevents orphan embedding servers when MCP crashes or is killed.
         Without this, crashed MCP leaves zombie embedding servers consuming RAM/CPU forever.
+        Modes (SPECMEM_KYS_MODE):
+        - "kill":    Process exit (original behavior)
+        - "unload":  Release ONNX model from memory but keep socket listener alive (default)
+        - "standby": Keep everything loaded, just idle
         """
         def is_claude_alive_for_project():
             """Check if any Claude/node process is running for this project directory."""
@@ -2827,6 +2879,28 @@ class EmbeddingServer:
             except Exception:
                 return False  # Assume dead if we can't check
+        def _kys_unload_model():
+            """Unload the model to free RAM but keep the socket listener alive.
+            On next request, _ensure_model_loaded() will reload it."""
+            try:
+                if hasattr(self.embedder, 'model') and self.embedder.model is not None:
+                    del self.embedder.model
+                    self.embedder.model = None
+                    import gc
+                    gc.collect()
+                    try:
+                        import torch
+                        if torch.cuda.is_available():
+                            torch.cuda.empty_cache()
+                    except Exception:
+                        pass
+                    print(f"[KYS-UNLOAD] Model released from memory. Socket still listening.", file=sys.stderr)
+                    print(f"[KYS-UNLOAD] Model will reload on next embedding request.", file=sys.stderr)
+                else:
+                    print(f"[KYS-UNLOAD] Model already unloaded, nothing to do.", file=sys.stderr)
+            except Exception as e:
+                print(f"[KYS-UNLOAD] Error unloading model: {e}", file=sys.stderr)
         def watchdog():
             # STARTUP GRACE PERIOD: Don't enforce KYS for first 60 seconds
             # This allows MCP server to fully initialize (can take 50-60+ seconds)
@@ -2849,49 +2923,79 @@ class EmbeddingServer:
                 if is_claude_alive_for_project():
                     # Claude is alive! Don't kill even without heartbeat
                     if time_since_kys > self.kys_timeout and int(time_since_kys) % 120 < 10:
-                        print(f"ℹ️ KYS: No heartbeat for {time_since_kys:.0f}s but Claude process detected - staying alive", file=sys.stderr)
+                        print(f"[KYS] No heartbeat for {time_since_kys:.0f}s but Claude process detected - staying alive", file=sys.stderr)
                     continue
-                # NEW LOGIC: Only kill if BOTH conditions are true:
-                # 1. No heartbeat for kys_timeout (90s)
+                # Only take action if BOTH conditions are true:
+                # 1. No heartbeat for kys_timeout
                 # 2. No embedding activity for activity_grace_period (5 min)
-                # This prevents killing active servers just because heartbeat stopped
+                # This prevents acting on active servers just because heartbeat stopped
                 if time_since_kys > self.kys_timeout:
                     if time_since_activity < activity_grace_period:
-                        # Recent activity - don't kill, just warn once per minute
+                        # Recent activity - don't act, just warn once per minute
                         if int(time_since_kys) % 60 < 10:
-                            print(f"⚠️ KYS: No heartbeat for {time_since_kys:.0f}s but recent activity ({time_since_activity:.0f}s ago) - staying alive", file=sys.stderr)
+                            print(f"[KYS] No heartbeat for {time_since_kys:.0f}s but recent activity ({time_since_activity:.0f}s ago) - staying alive", file=sys.stderr)
                         continue
-                    print(f"", file=sys.stderr)
-                    print(f"💀 KYS WATCHDOG TRIGGERED", file=sys.stderr)
-                    print(f"   No heartbeat from MCP in {time_since_kys:.0f}s (timeout: {self.kys_timeout}s)", file=sys.stderr)
-                    print(f"   No embedding activity for {time_since_activity:.0f}s (grace: {activity_grace_period}s)", file=sys.stderr)
-                    print(f"   MCP server likely crashed - committing suicide to prevent zombie", file=sys.stderr)
-                    print(f"   'kurt cobain t minus 0'", file=sys.stderr)
-                    print(f"", file=sys.stderr)
+                    # --- KYS MODE DISPATCH ---
+                    if self.kys_mode == 'standby':
+                        # STANDBY MODE: Keep everything loaded, just log and continue
+                        if int(time_since_kys) % 120 < 10:
+                            print(f"[KYS-STANDBY] No heartbeat for {time_since_kys:.0f}s, no activity for {time_since_activity:.0f}s - idling in standby mode", file=sys.stderr)
+                        continue
-                    # Write death reason file so clients know to auto-respawn
-                    try:
-                        death_reason_path = os.path.join(os.path.dirname(self.socket_path), 'embedding-death-reason.txt')
-                        with open(death_reason_path, 'w') as f:
-                            f.write(f"kys\n{time.time()}\nNo heartbeat ({time_since_kys:.0f}s) AND no activity ({time_since_activity:.0f}s)")
-                        print(f"   📝 Death reason written to {death_reason_path}", file=sys.stderr)
-                    except Exception as e:
-                        print(f"   ⚠️ Failed to write death reason: {e}", file=sys.stderr)
+                    elif self.kys_mode == 'unload':
+                        # UNLOAD MODE: Release model from memory but keep socket alive
+                        # Only unload once - check if model is still loaded
+                        if hasattr(self.embedder, 'model') and self.embedder.model is not None:
+                            print(f"", file=sys.stderr)
+                            print(f"[KYS-UNLOAD] WATCHDOG TRIGGERED (mode=unload)", file=sys.stderr)
+                            print(f"   No heartbeat from MCP in {time_since_kys:.0f}s (timeout: {self.kys_timeout}s)", file=sys.stderr)
+                            print(f"   No embedding activity for {time_since_activity:.0f}s (grace: {activity_grace_period}s)", file=sys.stderr)
+                            print(f"   Unloading model to free RAM - socket stays alive for reconnection", file=sys.stderr)
+                            _kys_unload_model()
+                            # Write status file so clients know state
+                            try:
+                                death_reason_path = os.path.join(os.path.dirname(self.socket_path), 'embedding-death-reason.txt')
+                                with open(death_reason_path, 'w') as f:
+                                    f.write(f"kys-unload\n{time.time()}\nModel unloaded after no heartbeat ({time_since_kys:.0f}s) AND no activity ({time_since_activity:.0f}s). Socket still alive.")
+                            except Exception as e:
+                                print(f"   [KYS-UNLOAD] Failed to write status file: {e}", file=sys.stderr)
+                        # Don't exit - keep looping. Model will reload on next request.
+                        continue
-                    # Set shutdown flag and force exit
-                    self.shutdown_requested = True
+                    else:
+                        # KILL MODE (original behavior): Process exit
+                        print(f"", file=sys.stderr)
+                        print(f"[KYS-KILL] WATCHDOG TRIGGERED (mode=kill)", file=sys.stderr)
+                        print(f"   No heartbeat from MCP in {time_since_kys:.0f}s (timeout: {self.kys_timeout}s)", file=sys.stderr)
+                        print(f"   No embedding activity for {time_since_activity:.0f}s (grace: {activity_grace_period}s)", file=sys.stderr)
+                        print(f"   MCP server likely crashed - committing suicide to prevent zombie", file=sys.stderr)
+                        print(f"   'kurt cobain t minus 0'", file=sys.stderr)
+                        print(f"", file=sys.stderr)
+                        # Write death reason file so clients know to auto-respawn
+                        try:
+                            death_reason_path = os.path.join(os.path.dirname(self.socket_path), 'embedding-death-reason.txt')
+                            with open(death_reason_path, 'w') as f:
+                                f.write(f"kys\n{time.time()}\nNo heartbeat ({time_since_kys:.0f}s) AND no activity ({time_since_activity:.0f}s)")
+                            print(f"   Death reason written to {death_reason_path}", file=sys.stderr)
+                        except Exception as e:
+                            print(f"   Failed to write death reason: {e}", file=sys.stderr)
+                        # Set shutdown flag and force exit
+                        self.shutdown_requested = True
-                    # Give a moment for cleanup
-                    time.sleep(1)
+                        # Give a moment for cleanup
+                        time.sleep(1)
-                    # Force exit - os._exit bypasses finally blocks for immediate death
-                    os._exit(0)
+                        # Force exit - os._exit bypasses finally blocks for immediate death
+                        os._exit(0)
         thread = threading.Thread(target=watchdog, daemon=True)
         thread.start()
-        print(f"   🛡️  KYS Watchdog: ENABLED (suicide if no heartbeat in {self.kys_timeout}s)", file=sys.stderr)
+        print(f"   KYS Watchdog: ENABLED (mode={self.kys_mode}, timeout={self.kys_timeout}s)", file=sys.stderr)
     def _process_codebase_files(self, batch_size: int = 200, limit: int = 0, project_path: str = None) -> Dict:
         """
@@ -3277,11 +3381,13 @@ class EmbeddingServer:
             # Fast readiness check - just returns model loading state
             # Used by specmem-init for event-based startup instead of timeouts
             model_loaded = self.embedder.model is not None
+            model_healthy = getattr(self.embedder, '_model_healthy', True)
             return {
-                'ready': model_loaded,
+                'ready': model_loaded and model_healthy,
                 'model_loaded': model_loaded,
+                'model_healthy': model_healthy,
                 'lazy_loading': self.embedder.low_resource_config.lazy_loading,
-                'status': 'ready' if model_loaded else 'loading'
+                'status': 'ready' if (model_loaded and model_healthy) else ('error' if not model_healthy else 'loading')
             }
         elif req_type == 'kys':
             # KYS (Keep Yourself Safe) heartbeat from MCP server
@@ -3292,6 +3398,9 @@ class EmbeddingServer:
                 'status': 'alive',
                 'ack': 'kurt cobain t minus reset',
                 'timeout_remaining': self.kys_timeout,
+                'kys_mode': self.kys_mode,
+                'model_loaded': self.embedder.model is not None,
+                'model_healthy': getattr(self.embedder, '_model_healthy', True),
                 'project': PROJECT_DIR_NAME
             }
         elif req_type == 'get_dimension':
@@ -3323,10 +3432,12 @@ class EmbeddingServer:
         # Stats request (or health check)
         if request.get('stats'):
             model_loaded = self.embedder.model is not None
+            model_healthy = getattr(self.embedder, '_model_healthy', True)
             stats_response = {
-                'status': 'healthy',  # For health check compatibility
-                'ready': model_loaded,  # For event-based startup polling
-                'model_loaded': model_loaded,  # Explicit model loading state
+                'status': 'healthy' if model_healthy else 'degraded',
+                'ready': model_loaded and model_healthy,
+                'model_loaded': model_loaded,
+                'model_healthy': model_healthy,
                 'stats': self.embedder.get_stats(),
                 'model': 'frankenstein-v5-dynamic',
                 'project': PROJECT_DIR_NAME,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "specmem-hardwicksoftware",
-  "version": "3.5.99",
+  "version": "3.6.1",
   "type": "module",
   "description": "Persistent memory system for coding sessions - semantic search with pgvector, token compression, team coordination, file watching. Needs root: installs system-wide hooks, manages docker/PostgreSQL, writes global configs, handles screen sessions. justcalljon.pro",
   "main": "dist/index.js",