code-graph-context 2.10.2 → 2.10.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ export class EmbeddingSidecar {
22
22
  config;
23
23
  _dimensions = null;
24
24
  stopping = false;
25
+ _exitHandler = null;
25
26
  constructor(config = {}) {
26
27
  this.config = { ...DEFAULT_CONFIG, ...config };
27
28
  }
@@ -74,12 +75,15 @@ export class EmbeddingSidecar {
74
75
  console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
75
76
  this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
76
77
  cwd: sidecarDir,
77
- stdio: ['ignore', 'pipe', 'pipe'],
78
+ // stdin='pipe' so the child detects parent death when the pipe breaks
79
+ stdio: ['pipe', 'pipe', 'pipe'],
78
80
  env: {
79
81
  ...process.env,
80
82
  EMBEDDING_MODEL: this.config.model,
81
83
  },
82
84
  });
85
+ // Store pid for synchronous cleanup on exit
86
+ const childPid = this.process.pid;
83
87
  // Forward stderr for visibility (model loading progress, errors)
84
88
  this.process.stderr?.on('data', (data) => {
85
89
  const line = data.toString().trim();
@@ -95,6 +99,21 @@ export class EmbeddingSidecar {
95
99
  }
96
100
  this.cleanup();
97
101
  });
102
+ // Synchronous kill on parent exit — this is the only guaranteed cleanup
103
+ // when the Node process dies unexpectedly (SIGKILL, crash, etc.)
104
+ if (childPid) {
105
+ const exitHandler = () => {
106
+ try {
107
+ process.kill(childPid, 'SIGKILL');
108
+ }
109
+ catch {
110
+ // Process already dead — ignore
111
+ }
112
+ };
113
+ process.on('exit', exitHandler);
114
+ // Store handler so we can remove it when the sidecar stops normally
115
+ this._exitHandler = exitHandler;
116
+ }
98
117
  // Poll until healthy
99
118
  await this.waitForHealthy();
100
119
  }
@@ -233,6 +252,10 @@ export class EmbeddingSidecar {
233
252
  this.cleanup();
234
253
  }
235
254
  cleanup() {
255
+ if (this._exitHandler) {
256
+ process.removeListener('exit', this._exitHandler);
257
+ this._exitHandler = null;
258
+ }
236
259
  this.process = null;
237
260
  this.readyPromise = null;
238
261
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.10.2",
3
+ "version": "2.10.4",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",
@@ -9,6 +9,8 @@ import os
9
9
  import sys
10
10
  import signal
11
11
  import logging
12
+ import threading
13
+ import time
12
14
 
13
15
  from fastapi import FastAPI, HTTPException
14
16
  from pydantic import BaseModel
@@ -20,6 +22,8 @@ logging.basicConfig(
20
22
  )
21
23
  logger = logging.getLogger("embedding-sidecar")
22
24
 
25
+ logger.info(f"Sidecar process starting (pid={os.getpid()})")
26
+
23
27
  app = FastAPI(title="code-graph-context embedding sidecar")
24
28
 
25
29
  model = None
@@ -46,14 +50,19 @@ def load_model():
46
50
 
47
51
  device = "mps" if torch.backends.mps.is_available() else "cpu"
48
52
  logger.info(f"Loading {model_name} on {device}...")
53
+ logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
54
+
49
55
  model = SentenceTransformer(model_name, device=device)
56
+ logger.info(f"Model loaded into memory, running warmup...")
50
57
 
51
58
  # Warm up with a test embedding
52
- test = model.encode(["warmup"], show_progress_bar=False)
59
+ with torch.no_grad():
60
+ test = model.encode(["warmup"], show_progress_bar=False)
53
61
  dims = len(test[0])
54
- logger.info(f"Model loaded: {dims} dimensions, device={device}")
62
+ logger.info(f"Warmup complete: {dims} dimensions, device={device}")
63
+ logger.info(f"Sidecar ready (pid={os.getpid()})")
55
64
  except Exception as e:
56
- logger.error(f"Failed to load model: {e}")
65
+ logger.error(f"Failed to load model: {e}", exc_info=True)
57
66
  raise
58
67
 
59
68
 
@@ -78,16 +87,21 @@ async def embed(req: EmbedRequest):
78
87
  if not req.texts:
79
88
  return EmbedResponse(embeddings=[], dimensions=0, model=model_name)
80
89
 
90
+ logger.info(f"Embed request: {len(req.texts)} texts, batch_size={req.batch_size}")
91
+ start = time.time()
92
+
81
93
  try:
82
94
  embeddings = _encode_with_oom_fallback(req.texts, req.batch_size)
83
95
  dims = len(embeddings[0])
96
+ elapsed = time.time() - start
97
+ logger.info(f"Embed complete: {len(embeddings)} embeddings in {elapsed:.2f}s")
84
98
  return EmbedResponse(
85
99
  embeddings=embeddings,
86
100
  dimensions=dims,
87
101
  model=model_name,
88
102
  )
89
103
  except Exception as e:
90
- logger.error(f"Embedding error: {e}")
104
+ logger.error(f"Embedding error: {e}", exc_info=True)
91
105
  raise HTTPException(status_code=500, detail=str(e))
92
106
 
93
107
 
@@ -99,12 +113,16 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
99
113
  import torch
100
114
 
101
115
  try:
102
- result = model.encode(
103
- texts,
104
- batch_size=batch_size,
105
- show_progress_bar=False,
106
- normalize_embeddings=True,
107
- )
116
+ with torch.no_grad():
117
+ result = model.encode(
118
+ texts,
119
+ batch_size=batch_size,
120
+ show_progress_bar=False,
121
+ normalize_embeddings=True,
122
+ )
123
+ # Free intermediate tensors after each request
124
+ if hasattr(torch.mps, "empty_cache"):
125
+ torch.mps.empty_cache()
108
126
  return result.tolist()
109
127
  except (torch.mps.OutOfMemoryError, RuntimeError) as e:
110
128
  if "out of memory" not in str(e).lower():
@@ -120,28 +138,64 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
120
138
  # Fall back to CPU for this request
121
139
  original_device = model.device
122
140
  model.to("cpu")
141
+ logger.info("Model moved to CPU for fallback encoding")
123
142
 
124
143
  try:
125
144
  # Use smaller batches on CPU
126
145
  cpu_batch = min(batch_size, 4)
127
- result = model.encode(
128
- texts,
129
- batch_size=cpu_batch,
130
- show_progress_bar=False,
131
- normalize_embeddings=True,
132
- )
146
+ with torch.no_grad():
147
+ result = model.encode(
148
+ texts,
149
+ batch_size=cpu_batch,
150
+ show_progress_bar=False,
151
+ normalize_embeddings=True,
152
+ )
153
+ logger.info(f"CPU fallback encoding complete ({len(texts)} texts)")
133
154
  return result.tolist()
134
155
  finally:
135
156
  # Move back to MPS for future requests
136
157
  try:
137
158
  model.to(original_device)
159
+ logger.info(f"Model moved back to {original_device}")
138
160
  except Exception:
139
161
  logger.warning("Could not move model back to MPS, staying on CPU")
140
162
 
141
163
 
142
164
  def handle_signal(sig, _frame):
143
- logger.info(f"Received signal {sig}, shutting down")
165
+ logger.info(f"Received signal {sig}, shutting down (pid={os.getpid()})")
144
166
  sys.exit(0)
145
167
 
146
168
 
147
169
  signal.signal(signal.SIGTERM, handle_signal)
170
+
171
+
172
+ def _watch_stdin():
173
+ """
174
+ Watch stdin for EOF — when the parent Node.js process dies (any reason),
175
+ the pipe breaks and stdin closes. This is our most reliable way to detect
176
+ parent death and self-terminate instead of becoming an orphan.
177
+ """
178
+
179
+ def _watcher():
180
+ logger.info("Stdin watcher thread started")
181
+ try:
182
+ # Blocks until stdin is closed (parent died)
183
+ while True:
184
+ data = sys.stdin.read(1)
185
+ if not data:
186
+ # EOF — parent closed the pipe
187
+ break
188
+ except Exception as e:
189
+ logger.info(f"Stdin watcher exception: {e}")
190
+ logger.info("Parent process died (stdin closed), shutting down")
191
+ os._exit(0)
192
+
193
+ t = threading.Thread(target=_watcher, daemon=True)
194
+ t.start()
195
+
196
+
197
+ # Only watch stdin if it's a pipe (not a TTY) — avoids issues when run manually
198
+ if not sys.stdin.isatty():
199
+ _watch_stdin()
200
+ else:
201
+ logger.info("Running in terminal mode, stdin watcher disabled")