code-graph-context 2.10.2 → 2.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -22,6 +22,7 @@ export class EmbeddingSidecar {
|
|
|
22
22
|
config;
|
|
23
23
|
_dimensions = null;
|
|
24
24
|
stopping = false;
|
|
25
|
+
_exitHandler = null;
|
|
25
26
|
constructor(config = {}) {
|
|
26
27
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
27
28
|
}
|
|
@@ -74,12 +75,15 @@ export class EmbeddingSidecar {
|
|
|
74
75
|
console.error(`[embedding-sidecar] Starting on ${this.baseUrl} (python: ${python}, model: ${this.config.model})`);
|
|
75
76
|
this.process = spawn(python, ['-m', 'uvicorn', 'embedding_server:app', '--host', this.config.host, '--port', String(this.config.port)], {
|
|
76
77
|
cwd: sidecarDir,
|
|
77
|
-
|
|
78
|
+
// stdin='pipe' so the child detects parent death when the pipe breaks
|
|
79
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
78
80
|
env: {
|
|
79
81
|
...process.env,
|
|
80
82
|
EMBEDDING_MODEL: this.config.model,
|
|
81
83
|
},
|
|
82
84
|
});
|
|
85
|
+
// Store pid for synchronous cleanup on exit
|
|
86
|
+
const childPid = this.process.pid;
|
|
83
87
|
// Forward stderr for visibility (model loading progress, errors)
|
|
84
88
|
this.process.stderr?.on('data', (data) => {
|
|
85
89
|
const line = data.toString().trim();
|
|
@@ -95,6 +99,21 @@ export class EmbeddingSidecar {
|
|
|
95
99
|
}
|
|
96
100
|
this.cleanup();
|
|
97
101
|
});
|
|
102
|
+
// Synchronous kill on parent exit — this is the only guaranteed cleanup
|
|
103
|
+
// when the Node process dies unexpectedly (SIGKILL, crash, etc.)
|
|
104
|
+
if (childPid) {
|
|
105
|
+
const exitHandler = () => {
|
|
106
|
+
try {
|
|
107
|
+
process.kill(childPid, 'SIGKILL');
|
|
108
|
+
}
|
|
109
|
+
catch {
|
|
110
|
+
// Process already dead — ignore
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
process.on('exit', exitHandler);
|
|
114
|
+
// Store handler so we can remove it when the sidecar stops normally
|
|
115
|
+
this._exitHandler = exitHandler;
|
|
116
|
+
}
|
|
98
117
|
// Poll until healthy
|
|
99
118
|
await this.waitForHealthy();
|
|
100
119
|
}
|
|
@@ -233,6 +252,10 @@ export class EmbeddingSidecar {
|
|
|
233
252
|
this.cleanup();
|
|
234
253
|
}
|
|
235
254
|
cleanup() {
|
|
255
|
+
if (this._exitHandler) {
|
|
256
|
+
process.removeListener('exit', this._exitHandler);
|
|
257
|
+
this._exitHandler = null;
|
|
258
|
+
}
|
|
236
259
|
this.process = null;
|
|
237
260
|
this.readyPromise = null;
|
|
238
261
|
}
|
package/package.json
CHANGED
|
@@ -49,7 +49,8 @@ def load_model():
|
|
|
49
49
|
model = SentenceTransformer(model_name, device=device)
|
|
50
50
|
|
|
51
51
|
# Warm up with a test embedding
|
|
52
|
-
|
|
52
|
+
with torch.no_grad():
|
|
53
|
+
test = model.encode(["warmup"], show_progress_bar=False)
|
|
53
54
|
dims = len(test[0])
|
|
54
55
|
logger.info(f"Model loaded: {dims} dimensions, device={device}")
|
|
55
56
|
except Exception as e:
|
|
@@ -99,12 +100,16 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
99
100
|
import torch
|
|
100
101
|
|
|
101
102
|
try:
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
103
|
+
with torch.no_grad():
|
|
104
|
+
result = model.encode(
|
|
105
|
+
texts,
|
|
106
|
+
batch_size=batch_size,
|
|
107
|
+
show_progress_bar=False,
|
|
108
|
+
normalize_embeddings=True,
|
|
109
|
+
)
|
|
110
|
+
# Free intermediate tensors after each request
|
|
111
|
+
if hasattr(torch.mps, "empty_cache"):
|
|
112
|
+
torch.mps.empty_cache()
|
|
108
113
|
return result.tolist()
|
|
109
114
|
except (torch.mps.OutOfMemoryError, RuntimeError) as e:
|
|
110
115
|
if "out of memory" not in str(e).lower():
|
|
@@ -124,12 +129,13 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
124
129
|
try:
|
|
125
130
|
# Use smaller batches on CPU
|
|
126
131
|
cpu_batch = min(batch_size, 4)
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
132
|
+
with torch.no_grad():
|
|
133
|
+
result = model.encode(
|
|
134
|
+
texts,
|
|
135
|
+
batch_size=cpu_batch,
|
|
136
|
+
show_progress_bar=False,
|
|
137
|
+
normalize_embeddings=True,
|
|
138
|
+
)
|
|
133
139
|
return result.tolist()
|
|
134
140
|
finally:
|
|
135
141
|
# Move back to MPS for future requests
|
|
@@ -145,3 +151,27 @@ def handle_signal(sig, _frame):
|
|
|
145
151
|
|
|
146
152
|
|
|
147
153
|
signal.signal(signal.SIGTERM, handle_signal)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _watch_stdin():
|
|
157
|
+
"""
|
|
158
|
+
Watch stdin for EOF — when the parent Node.js process dies (any reason),
|
|
159
|
+
the pipe breaks and stdin closes. This is our most reliable way to detect
|
|
160
|
+
parent death and self-terminate instead of becoming an orphan.
|
|
161
|
+
"""
|
|
162
|
+
import threading
|
|
163
|
+
|
|
164
|
+
def _watcher():
|
|
165
|
+
try:
|
|
166
|
+
# Blocks until stdin is closed (parent died)
|
|
167
|
+
sys.stdin.read()
|
|
168
|
+
except Exception:
|
|
169
|
+
pass
|
|
170
|
+
logger.info("Parent process died (stdin closed), shutting down")
|
|
171
|
+
os._exit(0)
|
|
172
|
+
|
|
173
|
+
t = threading.Thread(target=_watcher, daemon=True)
|
|
174
|
+
t.start()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
_watch_stdin()
|