npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.3.9 → 2.3.10 - Mend

@zuvia-software-solutions/code-mapper 2.3.9 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli/analyze.js +20 -8
package/dist/core/ingestion/call-processor.js +2 -35
package/package.json +1 -1

package/dist/cli/analyze.js CHANGED Viewed

@@ -298,22 +298,34 @@ export const analyzeCommand = async (inputPath, options) => {
         closeDb(dbPath);
         // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
         // Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
-        const { execFile } = await import('child_process');
+        const { spawn: spawnChild } = await import('child_process');
         const { fileURLToPath } = await import('url');
         const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
         await new Promise((resolve, reject) => {
-            const proc = execFile('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
-                maxBuffer: 10 * 1024 * 1024,
-                timeout: 600_000, // 10 min max for huge codebases
-            }, (err, _stdout, stderr) => {
-                if (err) {
-                    console.error(stderr || '');
-                    reject(new Error(`Embedding failed: ${err.message}`));
+            // Use spawn (not execFile) — no internal buffer limit, streams only.
+            // execFile buffers all stdout in memory which causes OOM/kill on large codebases.
+            const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
+                stdio: ['ignore', 'pipe', 'pipe'],
+            });
+            let stderrBuf = '';
+            proc.stderr?.on('data', (chunk) => {
+                stderrBuf += chunk.toString();
+                // Keep only last 10KB of stderr for error reporting
+                if (stderrBuf.length > 10240)
+                    stderrBuf = stderrBuf.slice(-10240);
+            });
+            proc.on('close', (code) => {
+                if (code !== 0) {
+                    console.error(stderrBuf);
+                    reject(new Error(`Embedding failed: python3 exited with code ${code}`));
                 }
                 else {
                     resolve();
                 }
             });
+            proc.on('error', (err) => {
+                reject(new Error(`Embedding failed: ${err.message}`));
+            });
             // Stream progress from Python's JSON lines on stdout
             let lineBuf = '';
             proc.stdout?.on('data', (chunk) => {

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -753,20 +753,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     }
     if (eligible.length === 0)
         return results;
-    // Group calls by file — process one file at a time so tsgo only needs
-    // one file hot in memory. LSP is sequential over stdio, so concurrent
-    // requests just create a queue that causes timeouts.
-    const byFile = new Map();
-    for (const call of eligible) {
-        let list = byFile.get(call.filePath);
-        if (!list) {
-            list = [];
-            byFile.set(call.filePath, list);
-        }
-        list.push(call);
-    }
     // Built-in receiver names that resolve to external types, not project code.
-    // tsgo always fails on these — skip them to avoid wasted LSP round-trips.
     const BUILTIN_RECEIVERS = new Set([
         'console', 'Math', 'JSON', 'Object', 'Array', 'String', 'Number', 'Boolean',
         'Date', 'RegExp', 'Error', 'Promise', 'Map', 'Set', 'WeakMap', 'WeakSet',
@@ -779,13 +766,10 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     // Pre-filter calls where tsgo won't add value:
     // A. Free-form calls with unambiguous name — heuristic resolves perfectly
     // B. Member calls on built-in receivers — tsgo always fails on these
-    // Note: member calls with known receiver types are NOT skipped — tsgo provides
-    // compiler-verified 0.99 confidence that the heuristic can't match.
     const tsgoEligible = [];
     let skippedUnambiguous = 0;
     let skippedBuiltin = 0;
     for (const call of eligible) {
-        // A. Free-form, unique name match
         if (call.callForm === 'free' || call.callForm === undefined) {
             const resolved = ctx.resolve(call.calledName, call.filePath);
             if (resolved && resolved.candidates.length === 1) {
@@ -793,7 +777,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                 continue;
             }
         }
-        // B. Built-in receiver — tsgo resolves to node_modules/lib.d.ts, never project code
         if (call.callForm === 'member' && call.receiverName && BUILTIN_RECEIVERS.has(call.receiverName)) {
             skippedBuiltin++;
             continue;
@@ -812,10 +795,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     }
     const t0 = Date.now();
     const skippedTotal = skippedUnambiguous + skippedBuiltin;
-    // Adaptive parallelism based on three constraints:
-    // 1. CPU: 75% of cores — parsing workers are done, leave 25% for Node.js event loop + OS
-    // 2. Memory: each tsgo loads the full project (~500MB estimate) — cap by free system memory
-    // 3. Workload: at least 50 files per process to amortize ~0.5s startup cost
+    // Adaptive parallelism
     const osModule = await import('os');
     const cpuCount = osModule.cpus().length;
     const freeMemGB = osModule.freemem() / (1024 * 1024 * 1024);
@@ -826,12 +806,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     if (process.env['CODE_MAPPER_VERBOSE']) {
         console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedBuiltin} builtin)...`);
     }
-    // Dynamic dispatch: shared queue, each process grabs the next file when done.
-    // Naturally self-balancing — fast processes get more work, zero idle time.
-    // Sort heaviest files first so they're assigned early (avoids tail latency).
+    // Dynamic dispatch: shared queue sorted by call count descending
     const fileEntries = [...tsgoByFile.entries()];
     fileEntries.sort((a, b) => b[1].length - a[1].length);
-    // Shared progress counter and file queue (single-threaded, no mutex needed)
     let totalFilesProcessed = 0;
     let nextFileIdx = 0;
     const tsgoTotalFiles = tsgoByFile.size;
@@ -840,7 +817,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
             return null;
         return fileEntries[nextFileIdx++];
     };
-    /** Resolve files from the shared queue using a single tsgo service */
     const resolveWorker = async (service) => {
         const sliceResults = new Map();
         let sliceResolved = 0;
@@ -865,7 +841,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                         sliceFailed++;
                         continue;
                     }
-                    // Match by exact startLine, then by range containment
                     let bestMatch;
                     for (const sym of targetSymbols) {
                         const node = graph.getNode(toNodeId(sym.nodeId));
@@ -908,8 +883,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                     sliceFailed++;
                 }
             }
-            // Close file after all its calls are resolved — frees tsgo memory,
-            // prevents progressive slowdown as the type graph grows
             service.notifyFileDeleted(absFilePath);
         }
         return { resolved: sliceResolved, failed: sliceFailed, results: sliceResults };
@@ -917,7 +890,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     let resolved = 0;
     let failed = 0;
     if (actualWorkers === 1) {
-        // Single process — use the existing service (already started)
         const outcome = await resolveWorker(tsgoService);
         resolved = outcome.resolved;
         failed = outcome.failed;
@@ -925,10 +897,8 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
             results.set(k, v);
     }
     else {
-        // Parallel — spawn extra services, all pull from shared queue
         const extraServices = [];
         try {
-            // Start extra tsgo processes in parallel
             const startPromises = [];
             for (let i = 1; i < actualWorkers; i++) {
                 startPromises.push((async () => {
@@ -943,11 +913,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                 if (svc)
                     extraServices.push(svc);
             }
-            // Build final service list: original + extras that started successfully
             const services = [tsgoService, ...extraServices];
             if (process.env['CODE_MAPPER_VERBOSE'])
                 console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving with dynamic dispatch...`);
-            // All workers pull from the shared queue — naturally self-balancing
             const outcomes = await Promise.all(services.map(svc => resolveWorker(svc)));
             for (const outcome of outcomes) {
                 resolved += outcome.resolved;
@@ -957,7 +925,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
             }
         }
         finally {
-            // Stop extra services (the original is stopped by the caller)
             for (const svc of extraServices)
                 svc.stop();
         }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.3.9",
+  "version": "2.3.10",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",