code-graph-context 2.10.3 → 2.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/sidecar/embedding_server.py +33 -9
package/package.json
CHANGED
|
@@ -9,6 +9,8 @@ import os
|
|
|
9
9
|
import sys
|
|
10
10
|
import signal
|
|
11
11
|
import logging
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
12
14
|
|
|
13
15
|
from fastapi import FastAPI, HTTPException
|
|
14
16
|
from pydantic import BaseModel
|
|
@@ -20,6 +22,8 @@ logging.basicConfig(
|
|
|
20
22
|
)
|
|
21
23
|
logger = logging.getLogger("embedding-sidecar")
|
|
22
24
|
|
|
25
|
+
logger.info(f"Sidecar process starting (pid={os.getpid()})")
|
|
26
|
+
|
|
23
27
|
app = FastAPI(title="code-graph-context embedding sidecar")
|
|
24
28
|
|
|
25
29
|
model = None
|
|
@@ -46,15 +50,19 @@ def load_model():
|
|
|
46
50
|
|
|
47
51
|
device = "mps" if torch.backends.mps.is_available() else "cpu"
|
|
48
52
|
logger.info(f"Loading {model_name} on {device}...")
|
|
53
|
+
logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
|
|
54
|
+
|
|
49
55
|
model = SentenceTransformer(model_name, device=device)
|
|
56
|
+
logger.info(f"Model loaded into memory, running warmup...")
|
|
50
57
|
|
|
51
58
|
# Warm up with a test embedding
|
|
52
59
|
with torch.no_grad():
|
|
53
60
|
test = model.encode(["warmup"], show_progress_bar=False)
|
|
54
61
|
dims = len(test[0])
|
|
55
|
-
logger.info(f"
|
|
62
|
+
logger.info(f"Warmup complete: {dims} dimensions, device={device}")
|
|
63
|
+
logger.info(f"Sidecar ready (pid={os.getpid()})")
|
|
56
64
|
except Exception as e:
|
|
57
|
-
logger.error(f"Failed to load model: {e}")
|
|
65
|
+
logger.error(f"Failed to load model: {e}", exc_info=True)
|
|
58
66
|
raise
|
|
59
67
|
|
|
60
68
|
|
|
@@ -79,16 +87,21 @@ async def embed(req: EmbedRequest):
|
|
|
79
87
|
if not req.texts:
|
|
80
88
|
return EmbedResponse(embeddings=[], dimensions=0, model=model_name)
|
|
81
89
|
|
|
90
|
+
logger.info(f"Embed request: {len(req.texts)} texts, batch_size={req.batch_size}")
|
|
91
|
+
start = time.time()
|
|
92
|
+
|
|
82
93
|
try:
|
|
83
94
|
embeddings = _encode_with_oom_fallback(req.texts, req.batch_size)
|
|
84
95
|
dims = len(embeddings[0])
|
|
96
|
+
elapsed = time.time() - start
|
|
97
|
+
logger.info(f"Embed complete: {len(embeddings)} embeddings in {elapsed:.2f}s")
|
|
85
98
|
return EmbedResponse(
|
|
86
99
|
embeddings=embeddings,
|
|
87
100
|
dimensions=dims,
|
|
88
101
|
model=model_name,
|
|
89
102
|
)
|
|
90
103
|
except Exception as e:
|
|
91
|
-
logger.error(f"Embedding error: {e}")
|
|
104
|
+
logger.error(f"Embedding error: {e}", exc_info=True)
|
|
92
105
|
raise HTTPException(status_code=500, detail=str(e))
|
|
93
106
|
|
|
94
107
|
|
|
@@ -125,6 +138,7 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
125
138
|
# Fall back to CPU for this request
|
|
126
139
|
original_device = model.device
|
|
127
140
|
model.to("cpu")
|
|
141
|
+
logger.info("Model moved to CPU for fallback encoding")
|
|
128
142
|
|
|
129
143
|
try:
|
|
130
144
|
# Use smaller batches on CPU
|
|
@@ -136,17 +150,19 @@ def _encode_with_oom_fallback(texts: list[str], batch_size: int) -> list[list[fl
|
|
|
136
150
|
show_progress_bar=False,
|
|
137
151
|
normalize_embeddings=True,
|
|
138
152
|
)
|
|
153
|
+
logger.info(f"CPU fallback encoding complete ({len(texts)} texts)")
|
|
139
154
|
return result.tolist()
|
|
140
155
|
finally:
|
|
141
156
|
# Move back to MPS for future requests
|
|
142
157
|
try:
|
|
143
158
|
model.to(original_device)
|
|
159
|
+
logger.info(f"Model moved back to {original_device}")
|
|
144
160
|
except Exception:
|
|
145
161
|
logger.warning("Could not move model back to MPS, staying on CPU")
|
|
146
162
|
|
|
147
163
|
|
|
148
164
|
def handle_signal(sig, _frame):
|
|
149
|
-
logger.info(f"Received signal {sig}, shutting down")
|
|
165
|
+
logger.info(f"Received signal {sig}, shutting down (pid={os.getpid()})")
|
|
150
166
|
sys.exit(0)
|
|
151
167
|
|
|
152
168
|
|
|
@@ -159,14 +175,18 @@ def _watch_stdin():
|
|
|
159
175
|
the pipe breaks and stdin closes. This is our most reliable way to detect
|
|
160
176
|
parent death and self-terminate instead of becoming an orphan.
|
|
161
177
|
"""
|
|
162
|
-
import threading
|
|
163
178
|
|
|
164
179
|
def _watcher():
|
|
180
|
+
logger.info("Stdin watcher thread started")
|
|
165
181
|
try:
|
|
166
182
|
# Blocks until stdin is closed (parent died)
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
183
|
+
while True:
|
|
184
|
+
data = sys.stdin.read(1)
|
|
185
|
+
if not data:
|
|
186
|
+
# EOF — parent closed the pipe
|
|
187
|
+
break
|
|
188
|
+
except Exception as e:
|
|
189
|
+
logger.info(f"Stdin watcher exception: {e}")
|
|
170
190
|
logger.info("Parent process died (stdin closed), shutting down")
|
|
171
191
|
os._exit(0)
|
|
172
192
|
|
|
@@ -174,4 +194,8 @@ def _watch_stdin():
|
|
|
174
194
|
t.start()
|
|
175
195
|
|
|
176
196
|
|
|
177
|
-
|
|
197
|
+
# Only watch stdin if it's a pipe (not a TTY) — avoids issues when run manually
|
|
198
|
+
if not sys.stdin.isatty():
|
|
199
|
+
_watch_stdin()
|
|
200
|
+
else:
|
|
201
|
+
logger.info("Running in terminal mode, stdin watcher disabled")
|