code-graph-context 2.10.2 → 2.10.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -22,6 +22,7 @@ export class EmbeddingSidecar {
22
22
  config;
23
23
  _dimensions = null;
24
24
  stopping = false;
25
+ _exitHandler = null;
25
26
  constructor(config = {}) {
26
27
  this.config = { ...DEFAULT_CONFIG, ...config };
27
28
  }
@@ -74,12 +75,15 @@ export class EmbeddingSidecar {
74
75
  console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
75
76
  this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
76
77
  cwd: sidecarDir,
77
- stdio: ['ignore', 'pipe', 'pipe'],
78
+ // stdin='pipe' so the child detects parent death when the pipe breaks
79
+ stdio: ['pipe', 'pipe', 'pipe'],
78
80
  env: {
79
81
  ...process.env,
80
82
  EMBEDDING_MODEL: this.config.model,
81
83
  },
82
84
  });
85
+ // Store pid for synchronous cleanup on exit
86
+ const childPid = this.process.pid;
83
87
  // Forward stderr for visibility (model loading progress, errors)
84
88
  this.process.stderr?.on('data', (data) => {
85
89
  const line = data.toString().trim();
@@ -95,6 +99,21 @@ export class EmbeddingSidecar {
95
99
  }
96
100
  this.cleanup();
97
101
  });
102
+ // Synchronous kill on parent exit — this is the only guaranteed cleanup
103
+ // when the Node process dies unexpectedly (SIGKILL, crash, etc.)
104
+ if (childPid) {
105
+ const exitHandler = () => {
106
+ try {
107
+ process.kill(childPid, 'SIGKILL');
108
+ }
109
+ catch {
110
+ // Process already dead — ignore
111
+ }
112
+ };
113
+ process.on('exit', exitHandler);
114
+ // Store handler so we can remove it when the sidecar stops normally
115
+ this._exitHandler = exitHandler;
116
+ }
98
117
  // Poll until healthy
99
118
  await this.waitForHealthy();
100
119
  }
@@ -233,6 +252,10 @@ export class EmbeddingSidecar {
233
252
  this.cleanup();
234
253
  }
235
254
  cleanup() {
255
+ if (this._exitHandler) {
256
+ process.removeListener('exit', this._exitHandler);
257
+ this._exitHandler = null;
258
+ }
236
259
  this.process = null;
237
260
  this.readyPromise = null;
238
261
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.10.2",
3
+ "version": "2.10.3",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",
@@ -49,7 +49,8 @@ def load_model():
49
49
  model = SentenceTransformer(model_name, device=device)
50
50
 
51
51
  # Warm up with a test embedding
52
- test = model.encode(["warmup"], show_progress_bar=False)
52
+ with torch.no_grad():
53
+ test = model.encode(["warmup"], show_progress_bar=False)
53
54
  dims = len(test[0])
54
55
  logger.info(f"Model loaded: {dims} dimensions, device={device}")
55
56
  except Exception as e:
@@ -99,12 +100,16 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
99
100
  import torch
100
101
 
101
102
  try:
102
- result = model.encode(
103
- texts,
104
- batch_size=batch_size,
105
- show_progress_bar=False,
106
- normalize_embeddings=True,
107
- )
103
+ with torch.no_grad():
104
+ result = model.encode(
105
+ texts,
106
+ batch_size=batch_size,
107
+ show_progress_bar=False,
108
+ normalize_embeddings=True,
109
+ )
110
+ # Free intermediate tensors after each request
111
+ if hasattr(torch.mps, "empty_cache"):
112
+ torch.mps.empty_cache()
108
113
  return result.tolist()
109
114
  except (torch.mps.OutOfMemoryError, RuntimeError) as e:
110
115
  if "out of memory" not in str(e).lower():
@@ -124,12 +129,13 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
124
129
  try:
125
130
  # Use smaller batches on CPU
126
131
  cpu_batch = min(batch_size, 4)
127
- result = model.encode(
128
- texts,
129
- batch_size=cpu_batch,
130
- show_progress_bar=False,
131
- normalize_embeddings=True,
132
- )
132
+ with torch.no_grad():
133
+ result = model.encode(
134
+ texts,
135
+ batch_size=cpu_batch,
136
+ show_progress_bar=False,
137
+ normalize_embeddings=True,
138
+ )
133
139
  return result.tolist()
134
140
  finally:
135
141
  # Move back to MPS for future requests
@@ -145,3 +151,27 @@ def handle_signal(sig, _frame):
145
151
 
146
152
 
147
153
  signal.signal(signal.SIGTERM, handle_signal)
154
+
155
+
156
+ def _watch_stdin():
157
+ """
158
+ Watch stdin for EOF — when the parent Node.js process dies (any reason),
159
+ the pipe breaks and stdin closes. This is our most reliable way to detect
160
+ parent death and self-terminate instead of becoming an orphan.
161
+ """
162
+ import threading
163
+
164
+ def _watcher():
165
+ try:
166
+ # Blocks until stdin is closed (parent died)
167
+ sys.stdin.read()
168
+ except Exception:
169
+ pass
170
+ logger.info("Parent process died (stdin closed), shutting down")
171
+ os._exit(0)
172
+
173
+ t = threading.Thread(target=_watcher, daemon=True)
174
+ t.start()
175
+
176
+
177
+ _watch_stdin()