npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.0.1 → 2.1.0 - Mend

@zuvia-software-solutions/code-mapper 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/core/embeddings/embedder.js +25 -15
package/dist/core/ingestion/parsing-processor.js +10 -5
package/dist/core/ingestion/pipeline.js +5 -2
package/dist/core/ingestion/workers/parse-worker.js +17 -8
package/package.json +1 -1

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -159,24 +159,34 @@ export const embedBatch = async (texts) => {
         return [];
     if (!ready)
         await initEmbedder();
-    // Send all texts to Python in one call — Python does optimal length-tiered
-    // batching internally for Metal GPU. No need to double-batch at the Node level.
-    console.error(`Code Mapper: embedBatch sending ${texts.length} texts to MLX...`);
+    // Batch at Node level to keep stdin/stdout JSON messages manageable.
+    // Python does internal length-tiered batching within each chunk.
+    // 500 texts per chunk balances IPC overhead vs pipe buffer limits.
+    const CHUNK_SIZE = 500;
+    const allResults = [];
+    const totalChunks = Math.ceil(texts.length / CHUNK_SIZE);
     const t0 = Date.now();
-    const result = await sendAndReceive({
-        texts,
-        task: 'nl2code',
-        type: 'passage',
-        dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
-    });
-    if (result.error)
-        throw new Error(`Batch embedding failed: ${result.error}`);
-    if (!result.embeddings || !Array.isArray(result.embeddings)) {
-        throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
+    console.error(`Code Mapper: embedBatch ${texts.length} texts in ${totalChunks} chunk(s)...`);
+    for (let i = 0; i < texts.length; i += CHUNK_SIZE) {
+        const chunk = texts.slice(i, i + CHUNK_SIZE);
+        const result = await sendAndReceive({
+            texts: chunk,
+            task: 'nl2code',
+            type: 'passage',
+            dims: DEFAULT_EMBEDDING_CONFIG.dimensions,
+        });
+        if (result.error)
+            throw new Error(`Batch embedding failed: ${result.error}`);
+        if (!result.embeddings || !Array.isArray(result.embeddings)) {
+            throw new Error(`Batch embedding returned invalid response: ${JSON.stringify(result).slice(0, 200)}`);
+        }
+        for (const e of result.embeddings) {
+            allResults.push(new Float32Array(e));
+        }
     }
     const elapsed = Date.now() - t0;
-    console.error(`Code Mapper: embedBatch complete — ${result.embeddings.length} embeddings in ${elapsed}ms (${result.ms ?? '?'}ms inference)`);
-    return result.embeddings.map((e) => new Float32Array(e));
+    console.error(`Code Mapper: embedBatch complete — ${allResults.length} embeddings in ${elapsed}ms`);
+    return allResults;
 };
 /**
  * Embed a query text for semantic search (cached, uses "query" prompt type)

package/dist/core/ingestion/parsing-processor.js CHANGED Viewed

@@ -59,11 +59,16 @@ const processParsingWithWorkers = async (graph, files, symbolTable, _astCache, w
                 ...(sym.parameterTypes !== undefined ? { parameterTypes: sym.parameterTypes } : {}),
             });
         }
-        allImports.push(...result.imports);
-        allCalls.push(...result.calls);
-        allHeritage.push(...result.heritage);
-        allRoutes.push(...result.routes);
-        allConstructorBindings.push(...result.constructorBindings);
+        for (const item of result.imports)
+            allImports.push(item);
+        for (const item of result.calls)
+            allCalls.push(item);
+        for (const item of result.heritage)
+            allHeritage.push(item);
+        for (const item of result.routes)
+            allRoutes.push(item);
+        for (const item of result.constructorBindings)
+            allConstructorBindings.push(item);
     }
     // Merge and log skipped languages
     const skippedLanguages = new Map();

package/dist/core/ingestion/pipeline.js CHANGED Viewed

@@ -201,9 +201,12 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
                         });
                     }, repoPath, importCtx);
                     // COLLECT calls for deferred resolution (don't resolve yet — callee may be in later chunk)
-                    allExtractedCalls.push(...chunkWorkerData.calls);
+                    // Use loop instead of spread to avoid stack overflow on large codebases (100K+ calls)
+                    for (const call of chunkWorkerData.calls)
+                        allExtractedCalls.push(call);
                     if (chunkWorkerData.constructorBindings) {
-                        allConstructorBindings.push(...chunkWorkerData.constructorBindings);
+                        for (const cb of chunkWorkerData.constructorBindings)
+                            allConstructorBindings.push(cb);
                     }
                     // Heritage + Routes can resolve per-chunk (class-level, usually same-file)
                     await Promise.all([

package/dist/core/ingestion/workers/parse-worker.js CHANGED Viewed

@@ -1072,15 +1072,24 @@ let accumulated = {
     imports: [], calls: [], heritage: [], routes: [], constructorBindings: [], skippedLanguages: {}, fileCount: 0,
 };
 let cumulativeProcessed = 0;
+/** Append src arrays into target without spread (avoids stack overflow on large codebases) */
 const mergeResult = (target, src) => {
-    target.nodes.push(...src.nodes);
-    target.relationships.push(...src.relationships);
-    target.symbols.push(...src.symbols);
-    target.imports.push(...src.imports);
-    target.calls.push(...src.calls);
-    target.heritage.push(...src.heritage);
-    target.routes.push(...src.routes);
-    target.constructorBindings.push(...src.constructorBindings);
+    for (const item of src.nodes)
+        target.nodes.push(item);
+    for (const item of src.relationships)
+        target.relationships.push(item);
+    for (const item of src.symbols)
+        target.symbols.push(item);
+    for (const item of src.imports)
+        target.imports.push(item);
+    for (const item of src.calls)
+        target.calls.push(item);
+    for (const item of src.heritage)
+        target.heritage.push(item);
+    for (const item of src.routes)
+        target.routes.push(item);
+    for (const item of src.constructorBindings)
+        target.constructorBindings.push(item);
     for (const [lang, count] of Object.entries(src.skippedLanguages)) {
         target.skippedLanguages[lang] = (target.skippedLanguages[lang] || 0) + count;
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.0.1",
+  "version": "2.1.0",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",