code-graph-context 2.10.4 → 2.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -191,6 +191,7 @@ If you prefer to edit the config files directly:
|
|
|
191
191
|
| `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
|
|
192
192
|
| `EMBEDDING_MODEL` | No | `Qodo/Qodo-Embed-1-1.5B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
|
|
193
193
|
| `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
|
|
194
|
+
| `EMBEDDING_FULL_PRECISION` | No | `false` | Set `true` for float32 (uses ~2x memory) |
|
|
194
195
|
| `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
|
|
195
196
|
| `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
|
|
196
197
|
|
|
@@ -535,20 +536,24 @@ This enables queries like "find all hooks that use context" while maintaining AS
|
|
|
535
536
|
|
|
536
537
|
Local embeddings are the default — **no API key needed**. The Python sidecar starts automatically on first use and runs a local model for high-quality code embeddings.
|
|
537
538
|
|
|
539
|
+
The sidecar uses **float16 (half precision)** by default, which halves memory usage with no meaningful quality loss. It also auto-shuts down after 3 minutes of inactivity to free memory, and restarts lazily when needed (~15-20s).
|
|
540
|
+
|
|
541
|
+
> **Full precision mode:** If you have 32+ GB RAM and want float32, set `EMBEDDING_FULL_PRECISION=true`.
|
|
542
|
+
|
|
538
543
|
### Available Models
|
|
539
544
|
|
|
540
545
|
Set via the `EMBEDDING_MODEL` environment variable:
|
|
541
546
|
|
|
542
|
-
| Model | Dimensions | RAM | Quality | Best For |
|
|
547
|
+
| Model | Dimensions | RAM (fp16) | Quality | Best For |
|
|
543
548
|
|-------|-----------|-----|---------|----------|
|
|
544
|
-
| `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~
|
|
545
|
-
| `BAAI/bge-base-en-v1.5` | 768 | ~
|
|
546
|
-
| `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~
|
|
547
|
-
| `nomic-ai/nomic-embed-text-v1.5` | 768 | ~
|
|
548
|
-
| `sentence-transformers/all-mpnet-base-v2` | 768 | ~
|
|
549
|
-
| `BAAI/bge-small-en-v1.5` | 384 | ~
|
|
550
|
-
|
|
551
|
-
**Example:** Use a lightweight model on a
|
|
549
|
+
| `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~4.5 GB | Best | Default, works on 16GB machines |
|
|
550
|
+
| `BAAI/bge-base-en-v1.5` | 768 | ~250 MB | Good | General purpose, low RAM |
|
|
551
|
+
| `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~100 MB | OK | Minimal RAM, fast |
|
|
552
|
+
| `nomic-ai/nomic-embed-text-v1.5` | 768 | ~300 MB | Good | Code + prose mixed |
|
|
553
|
+
| `sentence-transformers/all-mpnet-base-v2` | 768 | ~250 MB | Good | Balanced quality/speed |
|
|
554
|
+
| `BAAI/bge-small-en-v1.5` | 384 | ~65 MB | OK | Smallest footprint |
|
|
555
|
+
|
|
556
|
+
**Example:** Use a lightweight model on a low-memory machine:
|
|
552
557
|
```bash
|
|
553
558
|
claude mcp add --scope user code-graph-context \
|
|
554
559
|
-e EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 \
|
|
@@ -15,6 +15,7 @@ const DEFAULT_CONFIG = {
|
|
|
15
15
|
model: process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B',
|
|
16
16
|
startupTimeoutMs: 120_000, // 2 min — first run downloads the model
|
|
17
17
|
requestTimeoutMs: 60_000,
|
|
18
|
+
idleTimeoutMs: 180_000, // 3 min — auto-shutdown after no requests
|
|
18
19
|
};
|
|
19
20
|
export class EmbeddingSidecar {
|
|
20
21
|
process = null;
|
|
@@ -23,6 +24,7 @@ export class EmbeddingSidecar {
|
|
|
23
24
|
_dimensions = null;
|
|
24
25
|
stopping = false;
|
|
25
26
|
_exitHandler = null;
|
|
27
|
+
_idleTimer = null;
|
|
26
28
|
constructor(config = {}) {
|
|
27
29
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
28
30
|
}
|
|
@@ -205,6 +207,7 @@ export class EmbeddingSidecar {
|
|
|
205
207
|
const data = (await res.json());
|
|
206
208
|
if (data.dimensions)
|
|
207
209
|
this._dimensions = data.dimensions;
|
|
210
|
+
this.resetIdleTimer();
|
|
208
211
|
return data.embeddings;
|
|
209
212
|
}
|
|
210
213
|
catch (err) {
|
|
@@ -251,7 +254,21 @@ export class EmbeddingSidecar {
|
|
|
251
254
|
}
|
|
252
255
|
this.cleanup();
|
|
253
256
|
}
|
|
257
|
+
resetIdleTimer() {
|
|
258
|
+
if (this._idleTimer)
|
|
259
|
+
clearTimeout(this._idleTimer);
|
|
260
|
+
this._idleTimer = setTimeout(() => {
|
|
261
|
+
console.error(`[embedding-sidecar] Idle for ${this.config.idleTimeoutMs / 1000}s, shutting down to free memory`);
|
|
262
|
+
this.stop();
|
|
263
|
+
}, this.config.idleTimeoutMs);
|
|
264
|
+
// Don't let the timer prevent Node from exiting
|
|
265
|
+
this._idleTimer.unref();
|
|
266
|
+
}
|
|
254
267
|
cleanup() {
|
|
268
|
+
if (this._idleTimer) {
|
|
269
|
+
clearTimeout(this._idleTimer);
|
|
270
|
+
this._idleTimer = null;
|
|
271
|
+
}
|
|
255
272
|
if (this._exitHandler) {
|
|
256
273
|
process.removeListener('exit', this._exitHandler);
|
|
257
274
|
this._exitHandler = null;
|
|
@@ -10,6 +10,7 @@ import { fileURLToPath } from 'url';
|
|
|
10
10
|
import { Worker } from 'worker_threads';
|
|
11
11
|
import { z } from 'zod';
|
|
12
12
|
import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
|
|
13
|
+
import { stopEmbeddingSidecar } from '../../core/embeddings/embedding-sidecar.js';
|
|
13
14
|
import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
|
|
14
15
|
import { ParserFactory } from '../../core/parsers/parser-factory.js';
|
|
15
16
|
import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
|
|
@@ -165,6 +166,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
165
166
|
const job = jobManager.getJob(jobId);
|
|
166
167
|
if (job && job.status === 'running') {
|
|
167
168
|
jobManager.failJob(jobId, `Worker timed out after ${PARSING.workerTimeoutMs / 60000} minutes`);
|
|
169
|
+
await stopEmbeddingSidecar();
|
|
168
170
|
await terminateWorker('timeout');
|
|
169
171
|
}
|
|
170
172
|
}, PARSING.workerTimeoutMs);
|
|
@@ -183,6 +185,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
183
185
|
clearTimeout(timeoutId);
|
|
184
186
|
jobManager.failJob(jobId, msg.error);
|
|
185
187
|
debugLog('Async parsing failed', { jobId, error: msg.error });
|
|
188
|
+
stopEmbeddingSidecar();
|
|
186
189
|
terminateWorker('error');
|
|
187
190
|
}
|
|
188
191
|
});
|
|
@@ -191,6 +194,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
191
194
|
clearTimeout(timeoutId);
|
|
192
195
|
jobManager.failJob(jobId, err.message ?? String(err));
|
|
193
196
|
console.error('Worker thread error:', err);
|
|
197
|
+
stopEmbeddingSidecar();
|
|
194
198
|
terminateWorker('worker-error');
|
|
195
199
|
});
|
|
196
200
|
// Handle worker exit
|
|
@@ -371,12 +375,16 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
371
375
|
edgeCount: 0,
|
|
372
376
|
});
|
|
373
377
|
await debugLog('Project status updated to failed', { projectId: finalProjectId });
|
|
378
|
+
// Stop sidecar to free memory (restarts lazily on next embed request)
|
|
379
|
+
await stopEmbeddingSidecar();
|
|
374
380
|
return createSuccessResponse(formatParsePartialSuccess(nodes.length, edges.length, outputPath, neo4jError.message));
|
|
375
381
|
}
|
|
376
382
|
}
|
|
377
383
|
catch (error) {
|
|
378
384
|
console.error('Parse tool error:', error);
|
|
379
385
|
await debugLog('Parse tool error', { projectPath, tsconfigPath, error });
|
|
386
|
+
// Stop sidecar to free memory (restarts lazily on next embed request)
|
|
387
|
+
await stopEmbeddingSidecar();
|
|
380
388
|
return createErrorResponse(error);
|
|
381
389
|
}
|
|
382
390
|
});
|
package/package.json
CHANGED
|
@@ -52,8 +52,14 @@ def load_model():
|
|
|
52
52
|
logger.info(f"Loading {model_name} on {device}...")
|
|
53
53
|
logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
|
|
54
54
|
|
|
55
|
+
use_half = os.environ.get("EMBEDDING_FULL_PRECISION", "").lower() != "true"
|
|
55
56
|
model = SentenceTransformer(model_name, device=device)
|
|
56
|
-
|
|
57
|
+
if use_half:
|
|
58
|
+
model.half()
|
|
59
|
+
logger.info(f"Model loaded in float16 (half precision)")
|
|
60
|
+
else:
|
|
61
|
+
logger.info(f"Model loaded in float32 (full precision)")
|
|
62
|
+
logger.info(f"Running warmup...")
|
|
57
63
|
|
|
58
64
|
# Warm up with a test embedding
|
|
59
65
|
with torch.no_grad():
|