code-graph-context 2.13.0 → 2.13.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -7,6 +7,10 @@ import { debugLog } from '../../mcp/utils.js';
|
|
|
7
7
|
import { getEmbeddingSidecar } from './embedding-sidecar.js';
|
|
8
8
|
const BATCH_CONFIG = {
|
|
9
9
|
maxBatchSize: parseInt(process.env.EMBEDDING_BATCH_SIZE ?? '', 10) || 8,
|
|
10
|
+
// Max texts per HTTP request to the sidecar. Keeps memory bounded when
|
|
11
|
+
// multiple parallel workers call embedTextsInBatches concurrently.
|
|
12
|
+
// The sidecar still handles GPU batching internally via batch_size.
|
|
13
|
+
httpBatchLimit: parseInt(process.env.EMBEDDING_HTTP_BATCH_LIMIT ?? '', 10) || 50,
|
|
10
14
|
};
|
|
11
15
|
export class LocalEmbeddingsService {
|
|
12
16
|
async embedText(text) {
|
|
@@ -22,22 +26,30 @@ export class LocalEmbeddingsService {
|
|
|
22
26
|
async embedTextsInBatches(texts, _batchSize) {
|
|
23
27
|
if (texts.length === 0)
|
|
24
28
|
return [];
|
|
25
|
-
// GPU batch size controls how many texts the model processes at once (memory-bound).
|
|
26
|
-
// We send ALL texts in a single HTTP request and let the sidecar handle GPU batching
|
|
27
|
-
// internally via model.encode(batch_size=N). This eliminates HTTP round-trip overhead.
|
|
28
29
|
const gpuBatchSize = BATCH_CONFIG.maxBatchSize;
|
|
29
|
-
const
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
const httpLimit = BATCH_CONFIG.httpBatchLimit;
|
|
31
|
+
const httpBatches = Math.ceil(texts.length / httpLimit);
|
|
32
|
+
const gpuBatchesPerRequest = Math.ceil(httpLimit / gpuBatchSize);
|
|
33
|
+
console.error(`[embedding] ${texts.length} texts → ${httpBatches} HTTP requests (http_limit=${httpLimit}, gpu_batch_size=${gpuBatchSize}, ~${gpuBatchesPerRequest} GPU batches/req)`);
|
|
34
|
+
await debugLog('Batch embedding started', { provider: 'local', textCount: texts.length, gpuBatchSize, httpLimit, httpBatches });
|
|
32
35
|
const sidecar = getEmbeddingSidecar();
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
36
|
+
const allResults = [];
|
|
37
|
+
for (let i = 0; i < texts.length; i += httpLimit) {
|
|
38
|
+
const batch = texts.slice(i, i + httpLimit);
|
|
39
|
+
const batchNum = Math.floor(i / httpLimit) + 1;
|
|
40
|
+
try {
|
|
41
|
+
const results = await sidecar.embed(batch, gpuBatchSize);
|
|
42
|
+
allResults.push(...results);
|
|
43
|
+
if (httpBatches > 1) {
|
|
44
|
+
console.error(`[embedding] HTTP batch ${batchNum}/${httpBatches}: ${batch.length} texts embedded`);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
catch (error) {
|
|
48
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
49
|
+
console.error(`[embedding] FAILED HTTP batch ${batchNum}/${httpBatches} (${batch.length} texts, gpuBatchSize=${gpuBatchSize}): ${msg}`);
|
|
50
|
+
throw error;
|
|
51
|
+
}
|
|
41
52
|
}
|
|
53
|
+
return allResults;
|
|
42
54
|
}
|
|
43
55
|
}
|
package/package.json
CHANGED