code-graph-context 2.10.2 → 2.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -22,6 +22,7 @@ export class EmbeddingSidecar {
|
|
|
22
22
|
config;
|
|
23
23
|
_dimensions = null;
|
|
24
24
|
stopping = false;
|
|
25
|
+
_exitHandler = null;
|
|
25
26
|
constructor(config = {}) {
|
|
26
27
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
27
28
|
}
|
|
@@ -74,12 +75,15 @@ export class EmbeddingSidecar {
|
|
|
74
75
|
console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
|
|
75
76
|
this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
|
|
76
77
|
cwd: sidecarDir,
|
|
77
|
-
|
|
78
|
+
// stdin='pipe' so the child detects parent death when the pipe breaks
|
|
79
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
78
80
|
env: {
|
|
79
81
|
...process.env,
|
|
80
82
|
EMBEDDING_MODEL: this.config.model,
|
|
81
83
|
},
|
|
82
84
|
});
|
|
85
|
+
// Store pid for synchronous cleanup on exit
|
|
86
|
+
const childPid = this.process.pid;
|
|
83
87
|
// Forward stderr for visibility (model loading progress, errors)
|
|
84
88
|
this.process.stderr?.on('data', (data) => {
|
|
85
89
|
const line = data.toString().trim();
|
|
@@ -95,6 +99,21 @@ export class EmbeddingSidecar {
|
|
|
95
99
|
}
|
|
96
100
|
this.cleanup();
|
|
97
101
|
});
|
|
102
|
+
// Synchronous kill on parent exit — this is the only guaranteed cleanup
|
|
103
|
+
// when the Node process dies unexpectedly (SIGKILL, crash, etc.)
|
|
104
|
+
if (childPid) {
|
|
105
|
+
const exitHandler = () => {
|
|
106
|
+
try {
|
|
107
|
+
process.kill(childPid, 'SIGKILL');
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// Process already dead — ignore
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
process.on('exit', exitHandler);
|
|
114
|
+
// Store handler so we can remove it when the sidecar stops normally
|
|
115
|
+
this._exitHandler = exitHandler;
|
|
116
|
+
}
|
|
98
117
|
// Poll until healthy
|
|
99
118
|
await this.waitForHealthy();
|
|
100
119
|
}
|
|
@@ -233,6 +252,10 @@ export class EmbeddingSidecar {
|
|
|
233
252
|
this.cleanup();
|
|
234
253
|
}
|
|
235
254
|
cleanup() {
|
|
255
|
+
if (this._exitHandler) {
|
|
256
|
+
process.removeListener('exit', this._exitHandler);
|
|
257
|
+
this._exitHandler = null;
|
|
258
|
+
}
|
|
236
259
|
this.process = null;
|
|
237
260
|
this.readyPromise = null;
|
|
238
261
|
}
|
package/package.json
CHANGED
|
@@ -9,6 +9,8 @@ import os
|
|
|
9
9
|
import sys
|
|
10
10
|
import signal
|
|
11
11
|
import logging
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
12
14
|
|
|
13
15
|
from fastapi import FastAPI, HTTPException
|
|
14
16
|
from pydantic import BaseModel
|
|
@@ -20,6 +22,8 @@ logging.basicConfig(
|
|
|
20
22
|
)
|
|
21
23
|
logger = logging.getLogger("embedding-sidecar")
|
|
22
24
|
|
|
25
|
+
logger.info(f"Sidecar process starting (pid={os.getpid()})")
|
|
26
|
+
|
|
23
27
|
app = FastAPI(title="code-graph-context embedding sidecar")
|
|
24
28
|
|
|
25
29
|
model = None
|
|
@@ -46,14 +50,19 @@ def load_model():
|
|
|
46
50
|
|
|
47
51
|
device = "mps" if torch.backends.mps.is_available() else "cpu"
|
|
48
52
|
logger.info(f"Loading {model_name} on {device}...")
|
|
53
|
+
logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
|
|
54
|
+
|
|
49
55
|
model = SentenceTransformer(model_name, device=device)
|
|
56
|
+
logger.info(f"Model loaded into memory, running warmup...")
|
|
50
57
|
|
|
51
58
|
# Warm up with a test embedding
|
|
52
|
-
|
|
59
|
+
with torch.no_grad():
|
|
60
|
+
test = model.encode(["warmup"], show_progress_bar=False)
|
|
53
61
|
dims = len(test[0])
|
|
54
|
-
logger.info(f"
|
|
62
|
+
logger.info(f"Warmup complete: {dims} dimensions, device={device}")
|
|
63
|
+
logger.info(f"Sidecar ready (pid={os.getpid()})")
|
|
55
64
|
except Exception as e:
|
|
56
|
-
logger.error(f"Failed to load model: {e}")
|
|
65
|
+
logger.error(f"Failed to load model: {e}", exc_info=True)
|
|
57
66
|
raise
|
|
58
67
|
|
|
59
68
|
|
|
@@ -78,16 +87,21 @@ async def embed(req: EmbedRequest):
|
|
|
78
87
|
if not req.texts:
|
|
79
88
|
return EmbedResponse(embeddings=[], dimensions=0, model=model_name)
|
|
80
89
|
|
|
90
|
+
logger.info(f"Embed request: {len(req.texts)} texts, batch_size={req.batch_size}")
|
|
91
|
+
start = time.time()
|
|
92
|
+
|
|
81
93
|
try:
|
|
82
94
|
embeddings = _encode_with_oom_fallback(req.texts, req.batch_size)
|
|
83
95
|
dims = len(embeddings[0])
|
|
96
|
+
elapsed = time.time() - start
|
|
97
|
+
logger.info(f"Embed complete: {len(embeddings)} embeddings in {elapsed:.2f}s")
|
|
84
98
|
return EmbedResponse(
|
|
85
99
|
embeddings=embeddings,
|
|
86
100
|
dimensions=dims,
|
|
87
101
|
model=model_name,
|
|
88
102
|
)
|
|
89
103
|
except Exception as e:
|
|
90
|
-
logger.error(f"Embedding error: {e}")
|
|
104
|
+
logger.error(f"Embedding error: {e}", exc_info=True)
|
|
91
105
|
raise HTTPException(status_code=500, detail=str(e))
|
|
92
106
|
|
|
93
107
|
|
|
@@ -99,12 +113,16 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
99
113
|
import torch
|
|
100
114
|
|
|
101
115
|
try:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
116
|
+
with torch.no_grad():
|
|
117
|
+
result = model.encode(
|
|
118
|
+
texts,
|
|
119
|
+
batch_size=batch_size,
|
|
120
|
+
show_progress_bar=False,
|
|
121
|
+
normalize_embeddings=True,
|
|
122
|
+
)
|
|
123
|
+
# Free intermediate tensors after each request
|
|
124
|
+
if hasattr(torch.mps, "empty_cache"):
|
|
125
|
+
torch.mps.empty_cache()
|
|
108
126
|
return result.tolist()
|
|
109
127
|
except (torch.mps.OutOfMemoryError, RuntimeError) as e:
|
|
110
128
|
if "out of memory" not in str(e).lower():
|
|
@@ -120,28 +138,64 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
120
138
|
# Fall back to CPU for this request
|
|
121
139
|
original_device = model.device
|
|
122
140
|
model.to("cpu")
|
|
141
|
+
logger.info("Model moved to CPU for fallback encoding")
|
|
123
142
|
|
|
124
143
|
try:
|
|
125
144
|
# Use smaller batches on CPU
|
|
126
145
|
cpu_batch = min(batch_size, 4)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
146
|
+
with torch.no_grad():
|
|
147
|
+
result = model.encode(
|
|
148
|
+
texts,
|
|
149
|
+
batch_size=cpu_batch,
|
|
150
|
+
show_progress_bar=False,
|
|
151
|
+
normalize_embeddings=True,
|
|
152
|
+
)
|
|
153
|
+
logger.info(f"CPU fallback encoding complete ({len(texts)} texts)")
|
|
133
154
|
return result.tolist()
|
|
134
155
|
finally:
|
|
135
156
|
# Move back to MPS for future requests
|
|
136
157
|
try:
|
|
137
158
|
model.to(original_device)
|
|
159
|
+
logger.info(f"Model moved back to {original_device}")
|
|
138
160
|
except Exception:
|
|
139
161
|
logger.warning("Could not move model back to MPS, staying on CPU")
|
|
140
162
|
|
|
141
163
|
|
|
142
164
|
def handle_signal(sig, _frame):
|
|
143
|
-
logger.info(f"Received signal {sig}, shutting down")
|
|
165
|
+
logger.info(f"Received signal {sig}, shutting down (pid={os.getpid()})")
|
|
144
166
|
sys.exit(0)
|
|
145
167
|
|
|
146
168
|
|
|
147
169
|
signal.signal(signal.SIGTERM, handle_signal)
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _watch_stdin():
|
|
173
|
+
"""
|
|
174
|
+
Watch stdin for EOF — when the parent Node.js process dies (any reason),
|
|
175
|
+
the pipe breaks and stdin closes. This is our most reliable way to detect
|
|
176
|
+
parent death and self-terminate instead of becoming an orphan.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
def _watcher():
|
|
180
|
+
logger.info("Stdin watcher thread started")
|
|
181
|
+
try:
|
|
182
|
+
# Blocks until stdin is closed (parent died)
|
|
183
|
+
while True:
|
|
184
|
+
data = sys.stdin.read(1)
|
|
185
|
+
if not data:
|
|
186
|
+
# EOF — parent closed the pipe
|
|
187
|
+
break
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.info(f"Stdin watcher exception: {e}")
|
|
190
|
+
logger.info("Parent process died (stdin closed), shutting down")
|
|
191
|
+
os._exit(0)
|
|
192
|
+
|
|
193
|
+
t = threading.Thread(target=_watcher, daemon=True)
|
|
194
|
+
t.start()
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# Only watch stdin if it's a pipe (not a TTY) — avoids issues when run manually
|
|
198
|
+
if not sys.stdin.isatty():
|
|
199
|
+
_watch_stdin()
|
|
200
|
+
else:
|
|
201
|
+
logger.info("Running in terminal mode, stdin watcher disabled")
|