npm - code-graph-context - Versions diffs - 2.10.2 → 2.10.4 - Mend

code-graph-context 2.10.2 → 2.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/core/embeddings/embedding-sidecar.js +24 -1
package/package.json +1 -1
package/sidecar/embedding_server.py +71 -17

package/dist/core/embeddings/embedding-sidecar.js CHANGED Viewed

@@ -22,6 +22,7 @@ export class EmbeddingSidecar {
     config;
     _dimensions = null;
     stopping = false;
+    _exitHandler = null;
     constructor(config = {}) {
         this.config = { ...DEFAULT_CONFIG, ...config };
     }
@@ -74,12 +75,15 @@ export class EmbeddingSidecar {
         console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
         this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
             cwd: sidecarDir,
-            stdio: ['ignore', 'pipe', 'pipe'],
+            // stdin='pipe' so the child detects parent death when the pipe breaks
+            stdio: ['pipe', 'pipe', 'pipe'],
             env: {
                 ...process.env,
                 EMBEDDING_MODEL: this.config.model,
             },
         });
+        // Store pid for synchronous cleanup on exit
+        const childPid = this.process.pid;
         // Forward stderr for visibility (model loading progress, errors)
         this.process.stderr?.on('data', (data) => {
             const line = data.toString().trim();
@@ -95,6 +99,21 @@ export class EmbeddingSidecar {
             }
             this.cleanup();
         });
+        // Synchronous kill on parent exit — this is the only guaranteed cleanup
+        // when the Node process dies unexpectedly (SIGKILL, crash, etc.)
+        if (childPid) {
+            const exitHandler = () => {
+                try {
+                    process.kill(childPid, 'SIGKILL');
+                }
+                catch {
+                    // Process already dead — ignore
+                }
+            };
+            process.on('exit', exitHandler);
+            // Store handler so we can remove it when the sidecar stops normally
+            this._exitHandler = exitHandler;
+        }
         // Poll until healthy
         await this.waitForHealthy();
     }
@@ -233,6 +252,10 @@ export class EmbeddingSidecar {
         this.cleanup();
     }
     cleanup() {
+        if (this._exitHandler) {
+            process.removeListener('exit', this._exitHandler);
+            this._exitHandler = null;
+        }
         this.process = null;
         this.readyPromise = null;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "code-graph-context",
-  "version": "2.10.2",
+  "version": "2.10.4",
   "description": "MCP server that builds code graphs to provide rich context to LLMs",
   "type": "module",
   "homepage": "https://github.com/drewdrewH/code-graph-context#readme",

package/sidecar/embedding_server.py CHANGED Viewed

@@ -9,6 +9,8 @@ import os
 import sys
 import signal
 import logging
+import threading
+import time
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
@@ -20,6 +22,8 @@ logging.basicConfig(
 )
 logger = logging.getLogger("embedding-sidecar")
+logger.info(f"Sidecar process starting (pid={os.getpid()})")
 app = FastAPI(title="code-graph-context embedding sidecar")
 model = None
@@ -46,14 +50,19 @@ def load_model():
         device = "mps" if torch.backends.mps.is_available() else "cpu"
         logger.info(f"Loading {model_name} on {device}...")
+        logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
         model = SentenceTransformer(model_name, device=device)
+        logger.info(f"Model loaded into memory, running warmup...")
         # Warm up with a test embedding
-        test = model.encode(["warmup"], show_progress_bar=False)
+        with torch.no_grad():
+            test = model.encode(["warmup"], show_progress_bar=False)
         dims = len(test[0])
-        logger.info(f"Model loaded: {dims} dimensions, device={device}")
+        logger.info(f"Warmup complete: {dims} dimensions, device={device}")
+        logger.info(f"Sidecar ready (pid={os.getpid()})")
     except Exception as e:
-        logger.error(f"Failed to load model: {e}")
+        logger.error(f"Failed to load model: {e}", exc_info=True)
         raise
@@ -78,16 +87,21 @@ async def embed(req: EmbedRequest):
     if not req.texts:
         return EmbedResponse(embeddings=[], dimensions=0, model=model_name)
+    logger.info(f"Embed request: {len(req.texts)} texts, batch_size={req.batch_size}")
+    start = time.time()
     try:
         embeddings = _encode_with_oom_fallback(req.texts, req.batch_size)
         dims = len(embeddings[0])
+        elapsed = time.time() - start
+        logger.info(f"Embed complete: {len(embeddings)} embeddings in {elapsed:.2f}s")
         return EmbedResponse(
             embeddings=embeddings,
             dimensions=dims,
             model=model_name,
         )
     except Exception as e:
-        logger.error(f"Embedding error: {e}")
+        logger.error(f"Embedding error: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=str(e))
@@ -99,12 +113,16 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
     import torch
     try:
-        result = model.encode(
-            texts,
-            batch_size=batch_size,
-            show_progress_bar=False,
-            normalize_embeddings=True,
-        )
+        with torch.no_grad():
+            result = model.encode(
+                texts,
+                batch_size=batch_size,
+                show_progress_bar=False,
+                normalize_embeddings=True,
+            )
+        # Free intermediate tensors after each request
+        if hasattr(torch.mps, "empty_cache"):
+            torch.mps.empty_cache()
         return result.tolist()
     except (torch.mps.OutOfMemoryError, RuntimeError) as e:
         if "out of memory" not in str(e).lower():
@@ -120,28 +138,64 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
         # Fall back to CPU for this request
         original_device = model.device
         model.to("cpu")
+        logger.info("Model moved to CPU for fallback encoding")
         try:
             # Use smaller batches on CPU
             cpu_batch = min(batch_size, 4)
-            result = model.encode(
-                texts,
-                batch_size=cpu_batch,
-                show_progress_bar=False,
-                normalize_embeddings=True,
-            )
+            with torch.no_grad():
+                result = model.encode(
+                    texts,
+                    batch_size=cpu_batch,
+                    show_progress_bar=False,
+                    normalize_embeddings=True,
+                )
+            logger.info(f"CPU fallback encoding complete ({len(texts)} texts)")
             return result.tolist()
         finally:
             # Move back to MPS for future requests
             try:
                 model.to(original_device)
+                logger.info(f"Model moved back to {original_device}")
             except Exception:
                 logger.warning("Could not move model back to MPS, staying on CPU")
 def handle_signal(sig, _frame):
-    logger.info(f"Received signal {sig}, shutting down")
+    logger.info(f"Received signal {sig}, shutting down (pid={os.getpid()})")
     sys.exit(0)
 signal.signal(signal.SIGTERM, handle_signal)
+def _watch_stdin():
+    """
+    Watch stdin for EOF — when the parent Node.js process dies (any reason),
+    the pipe breaks and stdin closes. This is our most reliable way to detect
+    parent death and self-terminate instead of becoming an orphan.
+    """
+    def _watcher():
+        logger.info("Stdin watcher thread started")
+        try:
+            # Blocks until stdin is closed (parent died)
+            while True:
+                data = sys.stdin.read(1)
+                if not data:
+                    # EOF — parent closed the pipe
+                    break
+        except Exception as e:
+            logger.info(f"Stdin watcher exception: {e}")
+        logger.info("Parent process died (stdin closed), shutting down")
+        os._exit(0)
+    t = threading.Thread(target=_watcher, daemon=True)
+    t.start()
+# Only watch stdin if it's a pipe (not a TTY) — avoids issues when run manually
+if not sys.stdin.isatty():
+    _watch_stdin()
+else:
+    logger.info("Running in terminal mode, stdin watcher disabled")