code-graph-context 2.10.4 → 2.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -191,6 +191,7 @@ If you prefer to edit the config files directly:
191
191
  | `NEO4J_PASSWORD` | No | `PASSWORD` | Neo4j password |
192
192
  | `EMBEDDING_MODEL` | No | `Qodo/Qodo-Embed-1-1.5B` | Local embedding model (see [Embedding Configuration](#embedding-configuration)) |
193
193
  | `EMBEDDING_SIDECAR_PORT` | No | `8787` | Port for local embedding server |
194
+ | `EMBEDDING_FULL_PRECISION` | No | `false` | Set `true` for float32 (uses ~2x memory) |
194
195
  | `OPENAI_ENABLED` | No | `false` | Set `true` to use OpenAI instead of local |
195
196
  | `OPENAI_API_KEY` | No* | - | Required when `OPENAI_ENABLED=true` |
196
197
 
@@ -535,20 +536,24 @@ This enables queries like "find all hooks that use context" while maintaining AS
535
536
 
536
537
  Local embeddings are the default — **no API key needed**. The Python sidecar starts automatically on first use and runs a local model for high-quality code embeddings.
537
538
 
539
+ The sidecar uses **float16 (half precision)** by default, which halves memory usage with no meaningful quality loss. It also auto-shuts down after 3 minutes of inactivity to free memory, and restarts lazily when needed (~15-20s).
540
+
541
+ > **Full precision mode:** If you have 32+ GB RAM and want float32, set `EMBEDDING_FULL_PRECISION=true`.
542
+
538
543
  ### Available Models
539
544
 
540
545
  Set via the `EMBEDDING_MODEL` environment variable:
541
546
 
542
- | Model | Dimensions | RAM | Quality | Best For |
547
+ | Model | Dimensions | RAM (fp16) | Quality | Best For |
543
548
  |-------|-----------|-----|---------|----------|
544
- | `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~9 GB | Best | Machines with 32+ GB RAM |
545
- | `BAAI/bge-base-en-v1.5` | 768 | ~500 MB | Good | General purpose, low RAM |
546
- | `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~200 MB | OK | Minimal RAM, fast |
547
- | `nomic-ai/nomic-embed-text-v1.5` | 768 | ~600 MB | Good | Code + prose mixed |
548
- | `sentence-transformers/all-mpnet-base-v2` | 768 | ~500 MB | Good | Balanced quality/speed |
549
- | `BAAI/bge-small-en-v1.5` | 384 | ~130 MB | OK | Smallest footprint |
550
-
551
- **Example:** Use a lightweight model on a 16GB machine:
549
+ | `Qodo/Qodo-Embed-1-1.5B` (default) | 1536 | ~4.5 GB | Best | Default, works on 16GB machines |
550
+ | `BAAI/bge-base-en-v1.5` | 768 | ~250 MB | Good | General purpose, low RAM |
551
+ | `sentence-transformers/all-MiniLM-L6-v2` | 384 | ~100 MB | OK | Minimal RAM, fast |
552
+ | `nomic-ai/nomic-embed-text-v1.5` | 768 | ~300 MB | Good | Code + prose mixed |
553
+ | `sentence-transformers/all-mpnet-base-v2` | 768 | ~250 MB | Good | Balanced quality/speed |
554
+ | `BAAI/bge-small-en-v1.5` | 384 | ~65 MB | OK | Smallest footprint |
555
+
556
+ **Example:** Use a lightweight model on a low-memory machine:
552
557
  ```bash
553
558
  claude mcp add --scope user code-graph-context \
554
559
  -e EMBEDDING_MODEL=BAAI/bge-base-en-v1.5 \
@@ -15,6 +15,7 @@ const DEFAULT_CONFIG = {
15
15
  model: process.env.EMBEDDING_MODEL ?? 'Qodo/Qodo-Embed-1-1.5B',
16
16
  startupTimeoutMs: 120_000, // 2 min — first run downloads the model
17
17
  requestTimeoutMs: 60_000,
18
+ idleTimeoutMs: 180_000, // 3 min — auto-shutdown after no requests
18
19
  };
19
20
  export class EmbeddingSidecar {
20
21
  process = null;
@@ -23,6 +24,7 @@ export class EmbeddingSidecar {
23
24
  _dimensions = null;
24
25
  stopping = false;
25
26
  _exitHandler = null;
27
+ _idleTimer = null;
26
28
  constructor(config = {}) {
27
29
  this.config = { ...DEFAULT_CONFIG, ...config };
28
30
  }
@@ -205,6 +207,7 @@ export class EmbeddingSidecar {
205
207
  const data = (await res.json());
206
208
  if (data.dimensions)
207
209
  this._dimensions = data.dimensions;
210
+ this.resetIdleTimer();
208
211
  return data.embeddings;
209
212
  }
210
213
  catch (err) {
@@ -251,7 +254,21 @@ export class EmbeddingSidecar {
251
254
  }
252
255
  this.cleanup();
253
256
  }
257
+ resetIdleTimer() {
258
+ if (this._idleTimer)
259
+ clearTimeout(this._idleTimer);
260
+ this._idleTimer = setTimeout(() => {
261
+ console.error(`[embedding-sidecar] Idle for ${this.config.idleTimeoutMs / 1000}s, shutting down to free memory`);
262
+ this.stop();
263
+ }, this.config.idleTimeoutMs);
264
+ // Don't let the timer prevent Node from exiting
265
+ this._idleTimer.unref();
266
+ }
254
267
  cleanup() {
268
+ if (this._idleTimer) {
269
+ clearTimeout(this._idleTimer);
270
+ this._idleTimer = null;
271
+ }
255
272
  if (this._exitHandler) {
256
273
  process.removeListener('exit', this._exitHandler);
257
274
  this._exitHandler = null;
@@ -10,6 +10,7 @@ import { fileURLToPath } from 'url';
10
10
  import { Worker } from 'worker_threads';
11
11
  import { z } from 'zod';
12
12
  import { CORE_TYPESCRIPT_SCHEMA } from '../../core/config/schema.js';
13
+ import { stopEmbeddingSidecar } from '../../core/embeddings/embedding-sidecar.js';
13
14
  import { EmbeddingsService } from '../../core/embeddings/embeddings.service.js';
14
15
  import { ParserFactory } from '../../core/parsers/parser-factory.js';
15
16
  import { detectChangedFiles } from '../../core/utils/file-change-detection.js';
@@ -165,6 +166,7 @@ export const createParseTypescriptProjectTool = (server) => {
165
166
  const job = jobManager.getJob(jobId);
166
167
  if (job && job.status === 'running') {
167
168
  jobManager.failJob(jobId, `Worker timed out after ${PARSING.workerTimeoutMs / 60000} minutes`);
169
+ await stopEmbeddingSidecar();
168
170
  await terminateWorker('timeout');
169
171
  }
170
172
  }, PARSING.workerTimeoutMs);
@@ -183,6 +185,7 @@ export const createParseTypescriptProjectTool = (server) => {
183
185
  clearTimeout(timeoutId);
184
186
  jobManager.failJob(jobId, msg.error);
185
187
  debugLog('Async parsing failed', { jobId, error: msg.error });
188
+ stopEmbeddingSidecar();
186
189
  terminateWorker('error');
187
190
  }
188
191
  });
@@ -191,6 +194,7 @@ export const createParseTypescriptProjectTool = (server) => {
191
194
  clearTimeout(timeoutId);
192
195
  jobManager.failJob(jobId, err.message ?? String(err));
193
196
  console.error('Worker thread error:', err);
197
+ stopEmbeddingSidecar();
194
198
  terminateWorker('worker-error');
195
199
  });
196
200
  // Handle worker exit
@@ -371,12 +375,16 @@ export const createParseTypescriptProjectTool = (server) => {
371
375
  edgeCount: 0,
372
376
  });
373
377
  await debugLog('Project status updated to failed', { projectId: finalProjectId });
378
+ // Stop sidecar to free memory (restarts lazily on next embed request)
379
+ await stopEmbeddingSidecar();
374
380
  return createSuccessResponse(formatParsePartialSuccess(nodes.length, edges.length, outputPath, neo4jError.message));
375
381
  }
376
382
  }
377
383
  catch (error) {
378
384
  console.error('Parse tool error:', error);
379
385
  await debugLog('Parse tool error', { projectPath, tsconfigPath, error });
386
+ // Stop sidecar to free memory (restarts lazily on next embed request)
387
+ await stopEmbeddingSidecar();
380
388
  return createErrorResponse(error);
381
389
  }
382
390
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.10.4",
3
+ "version": "2.10.5",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",
@@ -52,8 +52,14 @@ def load_model():
52
52
  logger.info(f"Loading {model_name} on {device}...")
53
53
  logger.info(f"PyTorch version: {torch.__version__}, MPS available: {torch.backends.mps.is_available()}")
54
54
 
55
+ use_half = os.environ.get("EMBEDDING_FULL_PRECISION", "").lower() != "true"
55
56
  model = SentenceTransformer(model_name, device=device)
56
- logger.info(f"Model loaded into memory, running warmup...")
57
+ if use_half:
58
+ model.half()
59
+ logger.info(f"Model loaded in float16 (half precision)")
60
+ else:
61
+ logger.info(f"Model loaded in float32 (full precision)")
62
+ logger.info(f"Running warmup...")
57
63
 
58
64
  # Warm up with a test embedding
59
65
  with torch.no_grad():