code-graph-context 2.10.4 → 2.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -10
- package/dist/cli/cli.js +1 -1
- package/dist/core/embeddings/embedding-sidecar.js +18 -1
- package/dist/core/embeddings/embeddings.service.js +3 -2
- package/dist/mcp/tools/parse-typescript-project.tool.js +8 -0
- package/package.json +1 -1
- package/sidecar/embedding_server.py +9 -3
- package/sidecar/requirements.txt +1 -0
package/README.md
CHANGED
|
@@ -189,8 +189,9 @@ If you prefer to edit the config files directly:
|
|
|
189
189
|
| `NEO4J_URI` | No | `bolt://localhost:7687` | Neo4j connection URI |
|
|
190
190
|
| `NEO4J_USER` | No | `neo4j` | Neo4j username |
|
|
191
191
|
| `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
|
|
192
|
-
| `EMBEDDING_MODEL` | No | `
|
|
192
|
+
| `EMBEDDING_MODEL` | No | `Qwen/Qwen3-Embedding-0.6B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
|
|
193
193
|
| `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
|
|
194
|
+
| `EMBEDDING_HALF_PRECISION` | No | `false` | Set `true` for float16 (uses ~0.5x memory) |
|
|
194
195
|
| `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
|
|
195
196
|
| `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
|
|
196
197
|
|
|
@@ -535,20 +536,25 @@ This enables queries like "find all hooks that use context" while maintaining AS
|
|
|
535
536
|
|
|
536
537
|
Local embeddings are the default — **no API key needed**. The Python sidecar starts automatically on first use and runs a local model for high-quality code embeddings.
|
|
537
538
|
|
|
539
|
+
The sidecar uses **float16 (half precision)** by default, which halves memory usage with no meaningful quality loss. It also auto-shuts down after 3 minutes of inactivity to free memory, and restarts lazily when needed (~15-20s).
|
|
540
|
+
|
|
541
|
+
> **Half precision mode:** To reduce memory usage at the cost of some accuracy, set `EMBEDDING_HALF_PRECISION=true`.
|
|
542
|
+
|
|
538
543
|
### Available Models
|
|
539
544
|
|
|
540
545
|
Set via the `EMBEDDING_MODEL` environment variable:
|
|
541
546
|
|
|
542
|
-
| Model | Dimensions | RAM | Quality | Best For |
|
|
547
|
+
| Model | Dimensions | RAM (fp16) | Quality | Best For |
|
|
543
548
|
|-------|-----------|-----|---------|----------|
|
|
544
|
-
| `
|
|
545
|
-
| `
|
|
546
|
-
| `
|
|
547
|
-
| `
|
|
548
|
-
| `
|
|
549
|
-
| `
|
|
550
|
-
|
|
551
|
-
|
|
549
|
+
| `Qwen/Qwen3-Embedding-0.6B` (default) | 1024 | ~1.2 GB | Best | Default, code-aware, MTEB-Code #1 |
|
|
550
|
+
| `Qodo/Qodo-Embed-1-1.5B` | 1536 | ~4.5 GB | Great | Machines with 32+ GB RAM |
|
|
551
|
+
| `BAAI/bge-base-en-v1.5` | 768 | ~250 MB | Good | General purpose, low RAM |
|
|
552
|
+
| `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~100 MB | OK | Minimal RAM, fast |
|
|
553
|
+
| `nomic-ai/nomic-embed-text-v1.5` | 768 | ~300 MB | Good | Code + prose mixed |
|
|
554
|
+
| `sentence-transformers/all-mpnet-base-v2` | 768 | ~250 MB | Good | Balanced quality/speed |
|
|
555
|
+
| `BAAI/bge-small-en-v1.5` | 384 | ~65 MB | OK | Smallest footprint |
|
|
556
|
+
|
|
557
|
+
**Example:** Use a lightweight model on a low-memory machine:
|
|
552
558
|
```bash
|
|
553
559
|
claude mcp add --scope user code-graph-context \
|
|
554
560
|
-e EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 \
|
package/dist/cli/cli.js
CHANGED
|
@@ -251,7 +251,7 @@ const setupSidecar = async () => {
|
|
|
251
251
|
return;
|
|
252
252
|
}
|
|
253
253
|
// Pre-download the embedding model so first real use is fast
|
|
254
|
-
const modelName = process.env.EMBEDDING_MODEL ?? '
|
|
254
|
+
const modelName = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
|
|
255
255
|
await preDownloadModel(sidecarDir, python, modelName);
|
|
256
256
|
};
|
|
257
257
|
/**
|
|
@@ -12,9 +12,10 @@ const __dirname = dirname(__filename);
|
|
|
12
12
|
const DEFAULT_CONFIG = {
|
|
13
13
|
port: parseInt(process.env.EMBEDDING_SIDECAR_PORT ?? '', 10) || 8787,
|
|
14
14
|
host: '127.0.0.1',
|
|
15
|
-
model: process.env.EMBEDDING_MODEL ?? '
|
|
15
|
+
model: process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B',
|
|
16
16
|
startupTimeoutMs: 120_000, // 2 min — first run downloads the model
|
|
17
17
|
requestTimeoutMs: 60_000,
|
|
18
|
+
idleTimeoutMs: 180_000, // 3 min — auto-shutdown after no requests
|
|
18
19
|
};
|
|
19
20
|
export class EmbeddingSidecar {
|
|
20
21
|
process = null;
|
|
@@ -23,6 +24,7 @@ export class EmbeddingSidecar {
|
|
|
23
24
|
_dimensions = null;
|
|
24
25
|
stopping = false;
|
|
25
26
|
_exitHandler = null;
|
|
27
|
+
_idleTimer = null;
|
|
26
28
|
constructor(config = {}) {
|
|
27
29
|
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
28
30
|
}
|
|
@@ -205,6 +207,7 @@ export class EmbeddingSidecar {
|
|
|
205
207
|
const data = (await res.json());
|
|
206
208
|
if (data.dimensions)
|
|
207
209
|
this._dimensions = data.dimensions;
|
|
210
|
+
this.resetIdleTimer();
|
|
208
211
|
return data.embeddings;
|
|
209
212
|
}
|
|
210
213
|
catch (err) {
|
|
@@ -251,7 +254,21 @@ export class EmbeddingSidecar {
|
|
|
251
254
|
}
|
|
252
255
|
this.cleanup();
|
|
253
256
|
}
|
|
257
|
+
resetIdleTimer() {
|
|
258
|
+
if (this._idleTimer)
|
|
259
|
+
clearTimeout(this._idleTimer);
|
|
260
|
+
this._idleTimer = setTimeout(() => {
|
|
261
|
+
console.error(`[embedding-sidecar] Idle for ${this.config.idleTimeoutMs / 1000}s, shutting down to free memory`);
|
|
262
|
+
this.stop();
|
|
263
|
+
}, this.config.idleTimeoutMs);
|
|
264
|
+
// Don't let the timer prevent Node from exiting
|
|
265
|
+
this._idleTimer.unref();
|
|
266
|
+
}
|
|
254
267
|
cleanup() {
|
|
268
|
+
if (this._idleTimer) {
|
|
269
|
+
clearTimeout(this._idleTimer);
|
|
270
|
+
this._idleTimer = null;
|
|
271
|
+
}
|
|
255
272
|
if (this._exitHandler) {
|
|
256
273
|
process.removeListener('exit', this._exitHandler);
|
|
257
274
|
this._exitHandler = null;
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* and get the right implementation based on OPENAI_ENABLED.
|
|
6
6
|
*
|
|
7
7
|
* OPENAI_ENABLED=true → OpenAI text-embedding-3-large (requires OPENAI_API_KEY)
|
|
8
|
-
* default → Local Python sidecar with
|
|
8
|
+
* default → Local Python sidecar with Qwen3-Embedding-0.6B
|
|
9
9
|
*/
|
|
10
10
|
import { LocalEmbeddingsService } from './local-embeddings.service.js';
|
|
11
11
|
import { OpenAIEmbeddingsService } from './openai-embeddings.service.js';
|
|
@@ -24,6 +24,7 @@ export const EMBEDDING_DIMENSIONS = {
|
|
|
24
24
|
'text-embedding-3-large': 3072,
|
|
25
25
|
'text-embedding-3-small': 1536,
|
|
26
26
|
// Local models (via sidecar)
|
|
27
|
+
'Qwen/Qwen3-Embedding-0.6B': 1024,
|
|
27
28
|
'Qodo/Qodo-Embed-1-1.5B': 1536,
|
|
28
29
|
'sentence-transformers/all-MiniLM-L6-v2': 384,
|
|
29
30
|
'sentence-transformers/all-mpnet-base-v2': 768,
|
|
@@ -45,7 +46,7 @@ export const getEmbeddingDimensions = () => {
|
|
|
45
46
|
const model = process.env.OPENAI_EMBEDDING_MODEL ?? 'text-embedding-3-large';
|
|
46
47
|
return EMBEDDING_DIMENSIONS[model] ?? 3072;
|
|
47
48
|
}
|
|
48
|
-
const model = process.env.EMBEDDING_MODEL ?? '
|
|
49
|
+
const model = process.env.EMBEDDING_MODEL ?? 'Qwen/Qwen3-Embedding-0.6B';
|
|
49
50
|
return EMBEDDING_DIMENSIONS[model] ?? 1536;
|
|
50
51
|
};
|
|
51
52
|
/**
|
|
@@ -10,6 +10,7 @@ import { fileURLToPath } from 'url';
|
|
|
10
10
|
import { Worker } from 'worker_threads';
|
|
11
11
|
import { z } from 'zod';
|
|
12
12
|
import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
|
|
13
|
+
import { stopEmbeddingSidecar } from '../../core/embeddings/embedding-sidecar.js';
|
|
13
14
|
import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
|
|
14
15
|
import { ParserFactory } from '../../core/parsers/parser-factory.js';
|
|
15
16
|
import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
|
|
@@ -165,6 +166,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
165
166
|
const job = jobManager.getJob(jobId);
|
|
166
167
|
if (job && job.status === 'running') {
|
|
167
168
|
jobManager.failJob(jobId, `Worker timed out after ${PARSING.workerTimeoutMs / 60000} minutes`);
|
|
169
|
+
await stopEmbeddingSidecar();
|
|
168
170
|
await terminateWorker('timeout');
|
|
169
171
|
}
|
|
170
172
|
}, PARSING.workerTimeoutMs);
|
|
@@ -183,6 +185,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
183
185
|
clearTimeout(timeoutId);
|
|
184
186
|
jobManager.failJob(jobId, msg.error);
|
|
185
187
|
debugLog('Async parsing failed', { jobId, error: msg.error });
|
|
188
|
+
stopEmbeddingSidecar();
|
|
186
189
|
terminateWorker('error');
|
|
187
190
|
}
|
|
188
191
|
});
|
|
@@ -191,6 +194,7 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
191
194
|
clearTimeout(timeoutId);
|
|
192
195
|
jobManager.failJob(jobId, err.message ?? String(err));
|
|
193
196
|
console.error('Worker thread error:', err);
|
|
197
|
+
stopEmbeddingSidecar();
|
|
194
198
|
terminateWorker('worker-error');
|
|
195
199
|
});
|
|
196
200
|
// Handle worker exit
|
|
@@ -371,12 +375,16 @@ export const createParseTypescriptProjectTool = (server) => {
|
|
|
371
375
|
edgeCount: 0,
|
|
372
376
|
});
|
|
373
377
|
await debugLog('Project status updated to failed', { projectId: finalProjectId });
|
|
378
|
+
// Stop sidecar to free memory (restarts lazily on next embed request)
|
|
379
|
+
await stopEmbeddingSidecar();
|
|
374
380
|
return createSuccessResponse(formatParsePartialSuccess(nodes.length, edges.length, outputPath, neo4jError.message));
|
|
375
381
|
}
|
|
376
382
|
}
|
|
377
383
|
catch (error) {
|
|
378
384
|
console.error('Parse tool error:', error);
|
|
379
385
|
await debugLog('Parse tool error', { projectPath, tsconfigPath, error });
|
|
386
|
+
// Stop sidecar to free memory (restarts lazily on next embed request)
|
|
387
|
+
await stopEmbeddingSidecar();
|
|
380
388
|
return createErrorResponse(error);
|
|
381
389
|
}
|
|
382
390
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Local embedding server for code-graph-context.
|
|
3
|
-
Uses
|
|
3
|
+
Uses Qwen3-Embedding-0.6B for high-quality code embeddings without OpenAI dependency.
|
|
4
4
|
Runs as a sidecar process managed by the Node.js MCP server.
|
|
5
5
|
"""
|
|
6
6
|
|
|
@@ -27,7 +27,7 @@ logger.info(f"Sidecar process starting (pid={os.getpid()})")
|
|
|
27
27
|
app = FastAPI(title="code-graph-context embedding sidecar")
|
|
28
28
|
|
|
29
29
|
model = None
|
|
30
|
-
model_name = os.environ.get("EMBEDDING_MODEL", "
|
|
30
|
+
model_name = os.environ.get("EMBEDDING_MODEL", "Qwen/Qwen3-Embedding-0.6B")
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
class EmbedRequest(BaseModel):
|
|
@@ -52,8 +52,14 @@ def load_model():
|
|
|
52
52
|
logger.info(f"Loading {model_name} on {device}...")
|
|
53
53
|
logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
|
|
54
54
|
|
|
55
|
+
use_half = os.environ.get("EMBEDDING_HALF_PRECISION", "").lower() == "true"
|
|
55
56
|
model = SentenceTransformer(model_name, device=device)
|
|
56
|
-
|
|
57
|
+
if use_half:
|
|
58
|
+
model.half()
|
|
59
|
+
logger.info(f"Model loaded in float16 (half precision)")
|
|
60
|
+
else:
|
|
61
|
+
logger.info(f"Model loaded in float32 (full precision)")
|
|
62
|
+
logger.info(f"Running warmup...")
|
|
57
63
|
|
|
58
64
|
# Warm up with a test embedding
|
|
59
65
|
with torch.no_grad():
|
package/sidecar/requirements.txt
CHANGED