code-graph-context 2.13.0 → 2.13.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,10 @@ import { debugLog } from '../../mcp/utils.js';
7
7
  import { getEmbeddingSidecar } from './embedding-sidecar.js';
8
8
  const BATCH_CONFIG = {
9
9
  maxBatchSize: parseInt(process.env.EMBEDDING_BATCH_SIZE ?? '', 10) || 8,
10
+ // Max texts per HTTP request to the sidecar. Keeps memory bounded when
11
+ // multiple parallel workers call embedTextsInBatches concurrently.
12
+ // The sidecar still handles GPU batching internally via batch_size.
13
+ httpBatchLimit: parseInt(process.env.EMBEDDING_HTTP_BATCH_LIMIT ?? '', 10) || 50,
10
14
  };
11
15
  export class LocalEmbeddingsService {
12
16
  async embedText(text) {
@@ -22,22 +26,30 @@ export class LocalEmbeddingsService {
22
26
  async embedTextsInBatches(texts, _batchSize) {
23
27
  if (texts.length === 0)
24
28
  return [];
25
- // GPU batch size controls how many texts the model processes at once (memory-bound).
26
- // We send ALL texts in a single HTTP request and let the sidecar handle GPU batching
27
- // internally via model.encode(batch_size=N). This eliminates HTTP round-trip overhead.
28
29
  const gpuBatchSize = BATCH_CONFIG.maxBatchSize;
29
- const gpuBatches = Math.ceil(texts.length / gpuBatchSize);
30
- console.error(`[embedding] Sending ${texts.length} texts in 1 request (gpu_batch_size=${gpuBatchSize}, ~${gpuBatches} GPU batches)`);
31
- await debugLog('Batch embedding started', { provider: 'local', textCount: texts.length, gpuBatchSize });
30
+ const httpLimit = BATCH_CONFIG.httpBatchLimit;
31
+ const httpBatches = Math.ceil(texts.length / httpLimit);
32
+ const gpuBatchesPerRequest = Math.ceil(httpLimit / gpuBatchSize);
33
+ console.error(`[embedding] ${texts.length} texts → ${httpBatches} HTTP requests (http_limit=${httpLimit}, gpu_batch_size=${gpuBatchSize}, ~${gpuBatchesPerRequest} GPU batches/req)`);
34
+ await debugLog('Batch embedding started', { provider: 'local', textCount: texts.length, gpuBatchSize, httpLimit, httpBatches });
32
35
  const sidecar = getEmbeddingSidecar();
33
- try {
34
- const results = await sidecar.embed(texts, gpuBatchSize);
35
- return results;
36
- }
37
- catch (error) {
38
- const msg = error instanceof Error ? error.message : String(error);
39
- console.error(`[embedding] FAILED (${texts.length} texts, gpuBatchSize=${gpuBatchSize}): ${msg}`);
40
- throw error;
36
+ const allResults = [];
37
+ for (let i = 0; i < texts.length; i += httpLimit) {
38
+ const batch = texts.slice(i, i + httpLimit);
39
+ const batchNum = Math.floor(i / httpLimit) + 1;
40
+ try {
41
+ const results = await sidecar.embed(batch, gpuBatchSize);
42
+ allResults.push(...results);
43
+ if (httpBatches > 1) {
44
+ console.error(`[embedding] HTTP batch ${batchNum}/${httpBatches}: ${batch.length} texts embedded`);
45
+ }
46
+ }
47
+ catch (error) {
48
+ const msg = error instanceof Error ? error.message : String(error);
49
+ console.error(`[embedding] FAILED HTTP batch ${batchNum}/${httpBatches} (${batch.length} texts, gpuBatchSize=${gpuBatchSize}): ${msg}`);
50
+ throw error;
51
+ }
41
52
  }
53
+ return allResults;
42
54
  }
43
55
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "code-graph-context",
3
- "version": "2.13.0",
3
+ "version": "2.13.1",
4
4
  "description": "MCP server that builds code graphs to provide rich context to LLMs",
5
5
  "type": "module",
6
6
  "homepage": "https://github.com/drewdrewH/code-graph-context#readme",