@zuvia-software-solutions/code-mapper 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -159,24 +159,34 @@ export const embedBatch = async (texts) => {
|
|
|
159
159
|
return [];
|
|
160
160
|
if (!ready)
|
|
161
161
|
await initEmbedder();
|
|
162
|
-
//
|
|
163
|
-
//
|
|
164
|
-
|
|
162
|
+
// Batch at Node level to keep stdin/stdout JSON messages manageable.
|
|
163
|
+
// Python does internal length-tiered batching within each chunk.
|
|
164
|
+
// 500 texts per chunk balances IPC overhead vs pipe buffer limits.
|
|
165
|
+
const CHUNK_SIZE = 500;
|
|
166
|
+
const allResults = [];
|
|
167
|
+
const totalChunks = Math.ceil(texts.length / CHUNK_SIZE);
|
|
165
168
|
const t0 = Date.now();
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
169
|
+
console.error(`Code Mapper: embedBatch ${texts.length} texts in ${totalChunks} chunk(s)...`);
|
|
170
|
+
for (let i = 0; i < texts.length; i += CHUNK_SIZE) {
|
|
171
|
+
const chunk = texts.slice(i, i + CHUNK_SIZE);
|
|
172
|
+
const result = await sendAndReceive({
|
|
173
|
+
texts: chunk,
|
|
174
|
+
task: 'nl2code',
|
|
175
|
+
type: 'passage',
|
|
176
|
+
dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
|
|
177
|
+
});
|
|
178
|
+
if (result.error)
|
|
179
|
+
throw new Error(`Batch embedding failed: ${result.error}`);
|
|
180
|
+
if (!result.embeddings || !Array.isArray(result.embeddings)) {
|
|
181
|
+
throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
|
|
182
|
+
}
|
|
183
|
+
for (const e of result.embeddings) {
|
|
184
|
+
allResults.push(new Float32Array(e));
|
|
185
|
+
}
|
|
176
186
|
}
|
|
177
187
|
const elapsed = Date.now() - t0;
|
|
178
|
-
console.error(`Code Mapper: embedBatch complete — ${
|
|
179
|
-
return
|
|
188
|
+
console.error(`Code Mapper: embedBatch complete — ${allResults.length} embeddings in ${elapsed}ms`);
|
|
189
|
+
return allResults;
|
|
180
190
|
};
|
|
181
191
|
/**
|
|
182
192
|
* Embed a query text for semantic search (cached, uses "query" prompt type)
|
|
@@ -59,11 +59,16 @@ const processParsingWithWorkers = async (graph, files, symbolTable, _astCache, w
|
|
|
59
59
|
...(sym.parameterTypes !== undefined ? { parameterTypes: sym.parameterTypes } : {}),
|
|
60
60
|
});
|
|
61
61
|
}
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
62
|
+
for (const item of result.imports)
|
|
63
|
+
allImports.push(item);
|
|
64
|
+
for (const item of result.calls)
|
|
65
|
+
allCalls.push(item);
|
|
66
|
+
for (const item of result.heritage)
|
|
67
|
+
allHeritage.push(item);
|
|
68
|
+
for (const item of result.routes)
|
|
69
|
+
allRoutes.push(item);
|
|
70
|
+
for (const item of result.constructorBindings)
|
|
71
|
+
allConstructorBindings.push(item);
|
|
67
72
|
}
|
|
68
73
|
// Merge and log skipped languages
|
|
69
74
|
const skippedLanguages = new Map();
|
|
@@ -201,9 +201,12 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
|
|
|
201
201
|
});
|
|
202
202
|
}, repoPath, importCtx);
|
|
203
203
|
// COLLECT calls for deferred resolution (don't resolve yet — callee may be in later chunk)
|
|
204
|
-
|
|
204
|
+
// Use loop instead of spread to avoid stack overflow on large codebases (100K+ calls)
|
|
205
|
+
for (const call of chunkWorkerData.calls)
|
|
206
|
+
allExtractedCalls.push(call);
|
|
205
207
|
if (chunkWorkerData.constructorBindings) {
|
|
206
|
-
|
|
208
|
+
for (const cb of chunkWorkerData.constructorBindings)
|
|
209
|
+
allConstructorBindings.push(cb);
|
|
207
210
|
}
|
|
208
211
|
// Heritage + Routes can resolve per-chunk (class-level, usually same-file)
|
|
209
212
|
await Promise.all([
|
|
@@ -1072,15 +1072,24 @@ let accumulated = {
|
|
|
1072
1072
|
imports: [], calls: [], heritage: [], routes: [], constructorBindings: [], skippedLanguages: {}, fileCount: 0,
|
|
1073
1073
|
};
|
|
1074
1074
|
let cumulativeProcessed = 0;
|
|
1075
|
+
/** Append src arrays into target without spread (avoids stack overflow on large codebases) */
|
|
1075
1076
|
const mergeResult = (target, src) => {
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1077
|
+
for (const item of src.nodes)
|
|
1078
|
+
target.nodes.push(item);
|
|
1079
|
+
for (const item of src.relationships)
|
|
1080
|
+
target.relationships.push(item);
|
|
1081
|
+
for (const item of src.symbols)
|
|
1082
|
+
target.symbols.push(item);
|
|
1083
|
+
for (const item of src.imports)
|
|
1084
|
+
target.imports.push(item);
|
|
1085
|
+
for (const item of src.calls)
|
|
1086
|
+
target.calls.push(item);
|
|
1087
|
+
for (const item of src.heritage)
|
|
1088
|
+
target.heritage.push(item);
|
|
1089
|
+
for (const item of src.routes)
|
|
1090
|
+
target.routes.push(item);
|
|
1091
|
+
for (const item of src.constructorBindings)
|
|
1092
|
+
target.constructorBindings.push(item);
|
|
1084
1093
|
for (const [lang, count] of Object.entries(src.skippedLanguages)) {
|
|
1085
1094
|
target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
|
|
1086
1095
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|