npm - @zuvia-software-solutions/code-mapper - Versions diffs - 2.3.8 → 2.3.10 - Mend

@zuvia-software-solutions/code-mapper 2.3.8 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/cli/analyze.js +54 -11
package/dist/core/ingestion/call-processor.d.ts +1 -1
package/dist/core/ingestion/call-processor.js +27 -59
package/dist/core/ingestion/pipeline.js +3 -3
package/dist/core/semantic/tsgo-service.js +3 -3
package/dist/types/pipeline.d.ts +1 -0
package/package.json +1 -1

package/dist/cli/analyze.js CHANGED Viewed

@@ -136,10 +136,12 @@ export const analyzeCommand = async (inputPath, options) => {
     const t0Global = Date.now();
     const cpuStart = process.cpuUsage();
     let peakRssMB = 0;
-    // Phase timing tracker — records wall time and RSS for each phase
+    // Phase timing tracker — records wall time, RSS, file count, and worker count per phase
     const phaseTimes = [];
     let currentPhaseName = 'init';
     let currentPhaseStart = Date.now();
+    let currentPhaseFiles = 0;
+    let currentPhaseWorkers = 0;
     const recordPhase = (nextPhase) => {
         const now = Date.now();
         const elapsed = now - currentPhaseStart;
@@ -148,10 +150,14 @@ export const analyzeCommand = async (inputPath, options) => {
                 name: currentPhaseName,
                 ms: elapsed,
                 rssMB: Math.round(process.memoryUsage.rss() / (1024 * 1024)),
+                ...(currentPhaseFiles > 0 ? { fileCount: currentPhaseFiles } : {}),
+                ...(currentPhaseWorkers > 0 ? { workerCount: currentPhaseWorkers } : {}),
             });
         }
         currentPhaseName = nextPhase;
         currentPhaseStart = now;
+        currentPhaseFiles = 0;
+        currentPhaseWorkers = 0;
     };
     // Live resource stats for the progress bar
     const cpuCount = os.cpus().length;
@@ -222,7 +228,23 @@ export const analyzeCommand = async (inputPath, options) => {
         const baseLabel = PHASE_LABELS[progress.phase] || progress.phase;
         let phaseLabel = baseLabel;
         if (progress.stats && progress.stats.totalFiles > 0) {
-            phaseLabel += ` (${progress.stats.filesProcessed.toLocaleString()}/${progress.stats.totalFiles.toLocaleString()})`;
+            const current = progress.stats.filesProcessed;
+            const total = progress.stats.totalFiles;
+            // Track peak file count and worker count for the summary
+            currentPhaseFiles = Math.max(currentPhaseFiles, total);
+            if (progress.stats.workerCount)
+                currentPhaseWorkers = Math.max(currentPhaseWorkers, progress.stats.workerCount);
+            phaseLabel += ` (${current.toLocaleString()}/${total.toLocaleString()})`;
+            // Show rate (files/s) after 1s
+            const elapsedSec = (Date.now() - phaseStart) / 1000;
+            if (elapsedSec >= 1 && current > 0) {
+                const rate = Math.round(current / elapsedSec);
+                phaseLabel += ` ${rate}/s`;
+            }
+            // Show worker/process count if available
+            if (progress.stats.workerCount && progress.stats.workerCount > 1) {
+                phaseLabel += ` [${progress.stats.workerCount}p]`;
+            }
         }
         const scaled = Math.round(progress.percent * 0.6);
         updateBar(scaled, phaseLabel, baseLabel);
@@ -276,22 +298,34 @@ export const analyzeCommand = async (inputPath, options) => {
         closeDb(dbPath);
         // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
         // Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
-        const { execFile } = await import('child_process');
+        const { spawn: spawnChild } = await import('child_process');
         const { fileURLToPath } = await import('url');
         const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
         await new Promise((resolve, reject) => {
-            const proc = execFile('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
-                maxBuffer: 10 * 1024 * 1024,
-                timeout: 600_000, // 10 min max for huge codebases
-            }, (err, _stdout, stderr) => {
-                if (err) {
-                    console.error(stderr || '');
-                    reject(new Error(`Embedding failed: ${err.message}`));
+            // Use spawn (not execFile) — no internal buffer limit, streams only.
+            // execFile buffers all stdout in memory which causes OOM/kill on large codebases.
+            const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
+                stdio: ['ignore', 'pipe', 'pipe'],
+            });
+            let stderrBuf = '';
+            proc.stderr?.on('data', (chunk) => {
+                stderrBuf += chunk.toString();
+                // Keep only last 10KB of stderr for error reporting
+                if (stderrBuf.length > 10240)
+                    stderrBuf = stderrBuf.slice(-10240);
+            });
+            proc.on('close', (code) => {
+                if (code !== 0) {
+                    console.error(stderrBuf);
+                    reject(new Error(`Embedding failed: python3 exited with code ${code}`));
                 }
                 else {
                     resolve();
                 }
             });
+            proc.on('error', (err) => {
+                reject(new Error(`Embedding failed: ${err.message}`));
+            });
             // Stream progress from Python's JSON lines on stdout
             let lineBuf = '';
             proc.stdout?.on('data', (chunk) => {
@@ -433,7 +467,16 @@ export const analyzeCommand = async (inputPath, options) => {
         const pct = Math.round((phase.ms / totalMs) * 100);
         const name = PHASE_DISPLAY_NAMES[phase.name] || phase.name;
         const bar = pct >= 2 ? ' ' + '█'.repeat(Math.max(1, Math.round(pct / 3))) : '';
-        console.log(`    ${name.padEnd(22)} ${sec.padStart(6)}s  ${String(pct).padStart(3)}%  ${phase.rssMB}MB${bar}`);
+        // Build extra stats: rate + workers
+        let extra = '';
+        if (phase.fileCount && phase.ms > 0) {
+            const rate = Math.round(phase.fileCount / (phase.ms / 1000));
+            extra += `  ${phase.fileCount.toLocaleString()} files (${rate}/s)`;
+        }
+        if (phase.workerCount && phase.workerCount > 1) {
+            extra += `  [${phase.workerCount}p]`;
+        }
+        console.log(`    ${name.padEnd(22)} ${sec.padStart(6)}s  ${String(pct).padStart(3)}%  ${phase.rssMB}MB${bar}${extra}`);
     }
     console.log(`    ${'─'.repeat(50)}`);
     console.log(`    ${'Total'.padEnd(22)} ${totalTime.padStart(6)}s  100%  ${peakRssMB}MB peak`);

package/dist/core/ingestion/call-processor.d.ts CHANGED Viewed

@@ -10,7 +10,7 @@ export declare const processCalls: (graph: KnowledgeGraph, files: {
 }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<ExtractedHeritage[]>;
 export declare const extractReturnTypeName: (raw: string, depth?: number) => string | undefined;
 /** Resolve pre-extracted call sites from workers (no AST parsing needed) */
-export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, constructorBindings?: FileConstructorBindings[], tsgoService?: TsgoService | null, repoPath?: string) => Promise<void>;
+export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], ctx: ResolutionContext, onProgress?: (current: number, total: number, workerCount?: number) => void, constructorBindings?: FileConstructorBindings[], tsgoService?: TsgoService | null, repoPath?: string) => Promise<void>;
 /** Resolve pre-extracted Laravel routes to CALLS edges from route files to controller methods */
 export declare const processRoutesFromExtracted: (graph: KnowledgeGraph, extractedRoutes: ExtractedRoute[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<void>;
 /**

package/dist/core/ingestion/call-processor.js CHANGED Viewed

@@ -753,20 +753,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     }
     if (eligible.length === 0)
         return results;
-    // Group calls by file — process one file at a time so tsgo only needs
-    // one file hot in memory. LSP is sequential over stdio, so concurrent
-    // requests just create a queue that causes timeouts.
-    const byFile = new Map();
-    for (const call of eligible) {
-        let list = byFile.get(call.filePath);
-        if (!list) {
-            list = [];
-            byFile.set(call.filePath, list);
-        }
-        list.push(call);
-    }
     // Built-in receiver names that resolve to external types, not project code.
-    // tsgo always fails on these — skip them to avoid wasted LSP round-trips.
     const BUILTIN_RECEIVERS = new Set([
         'console', 'Math', 'JSON', 'Object', 'Array', 'String', 'Number', 'Boolean',
         'Date', 'RegExp', 'Error', 'Promise', 'Map', 'Set', 'WeakMap', 'WeakSet',
@@ -778,14 +765,11 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     ]);
     // Pre-filter calls where tsgo won't add value:
     // A. Free-form calls with unambiguous name — heuristic resolves perfectly
-    // B. Member calls with known receiver type AND unambiguous method — heuristic handles
-    // C. Member calls on built-in receivers — tsgo always fails on these
+    // B. Member calls on built-in receivers — tsgo always fails on these
     const tsgoEligible = [];
     let skippedUnambiguous = 0;
-    const skippedKnownType = 0;
     let skippedBuiltin = 0;
     for (const call of eligible) {
-        // A. Free-form, unique name match
         if (call.callForm === 'free' || call.callForm === undefined) {
             const resolved = ctx.resolve(call.calledName, call.filePath);
             if (resolved && resolved.candidates.length === 1) {
@@ -793,7 +777,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                 continue;
             }
         }
-        // B. Built-in receiver — tsgo resolves to node_modules/lib.d.ts, never project code
         if (call.callForm === 'member' && call.receiverName && BUILTIN_RECEIVERS.has(call.receiverName)) {
             skippedBuiltin++;
             continue;
@@ -811,11 +794,8 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
         list.push(call);
     }
     const t0 = Date.now();
-    const skippedTotal = skippedUnambiguous + skippedKnownType + skippedBuiltin;
-    // Adaptive parallelism based on three constraints:
-    // 1. CPU: 75% of cores — parsing workers are done, leave 25% for Node.js event loop + OS
-    // 2. Memory: each tsgo loads the full project (~500MB estimate) — cap by free system memory
-    // 3. Workload: at least 50 files per process to amortize ~0.5s startup cost
+    const skippedTotal = skippedUnambiguous + skippedBuiltin;
+    // Adaptive parallelism
     const osModule = await import('os');
     const cpuCount = osModule.cpus().length;
     const freeMemGB = osModule.freemem() / (1024 * 1024 * 1024);
@@ -823,25 +803,30 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
     const maxByMemory = Math.max(1, Math.floor(freeMemGB / 0.5));
     const maxByWorkload = Math.max(1, Math.floor(tsgoByFile.size / 50));
     const actualWorkers = Math.min(maxByCpu, maxByMemory, maxByWorkload);
-    console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedKnownType} known-type, ${skippedBuiltin} builtin)...`);
-    // Split files round-robin across workers for balanced distribution
-    const fileEntries = [...tsgoByFile.entries()];
-    const workerSlices = Array.from({ length: actualWorkers }, () => []);
-    for (let i = 0; i < fileEntries.length; i++) {
-        workerSlices[i % actualWorkers].push(fileEntries[i]);
+    if (process.env['CODE_MAPPER_VERBOSE']) {
+        console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedBuiltin} builtin)...`);
     }
-    // Shared progress counter
+    // Dynamic dispatch: shared queue sorted by call count descending
+    const fileEntries = [...tsgoByFile.entries()];
+    fileEntries.sort((a, b) => b[1].length - a[1].length);
     let totalFilesProcessed = 0;
+    let nextFileIdx = 0;
     const tsgoTotalFiles = tsgoByFile.size;
-    /** Resolve a slice of files using a single tsgo service */
-    const resolveSlice = async (service, slice) => {
+    const getNextFile = () => {
+        if (nextFileIdx >= fileEntries.length)
+            return null;
+        return fileEntries[nextFileIdx++];
+    };
+    const resolveWorker = async (service) => {
         const sliceResults = new Map();
         let sliceResolved = 0;
         let sliceFailed = 0;
-        for (const [filePath, calls] of slice) {
+        let entry;
+        while ((entry = getNextFile()) !== null) {
+            const [filePath, calls] = entry;
             totalFilesProcessed++;
             if (totalFilesProcessed % 25 === 0) {
-                onProgress?.(totalFilesProcessed, tsgoTotalFiles);
+                onProgress?.(totalFilesProcessed, tsgoTotalFiles, actualWorkers);
             }
             const absFilePath = path.resolve(repoPath, filePath);
             for (const call of calls) {
@@ -856,7 +841,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                         sliceFailed++;
                         continue;
                     }
-                    // Match by exact startLine, then by range containment
                     let bestMatch;
                     for (const sym of targetSymbols) {
                         const node = graph.getNode(toNodeId(sym.nodeId));
@@ -899,24 +883,22 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                     sliceFailed++;
                 }
             }
+            service.notifyFileDeleted(absFilePath);
         }
         return { resolved: sliceResolved, failed: sliceFailed, results: sliceResults };
     };
     let resolved = 0;
     let failed = 0;
     if (actualWorkers === 1) {
-        // Single process — use the existing service (already started)
-        const outcome = await resolveSlice(tsgoService, fileEntries);
+        const outcome = await resolveWorker(tsgoService);
         resolved = outcome.resolved;
         failed = outcome.failed;
         for (const [k, v] of outcome.results)
             results.set(k, v);
     }
     else {
-        // Parallel — spawn extra services, keep the original for slice 0
         const extraServices = [];
         try {
-            // Start extra tsgo processes in parallel
             const startPromises = [];
             for (let i = 1; i < actualWorkers; i++) {
                 startPromises.push((async () => {
@@ -931,24 +913,10 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
                 if (svc)
                     extraServices.push(svc);
             }
-            // Build final service list: original + extras that started successfully
             const services = [tsgoService, ...extraServices];
-            const activeSlices = workerSlices.slice(0, services.length);
-            // If some services failed to start, redistribute their slices
-            if (services.length < actualWorkers) {
-                for (let i = services.length; i < actualWorkers; i++) {
-                    const orphanSlice = workerSlices[i];
-                    if (orphanSlice) {
-                        // Distribute orphan files round-robin across active services
-                        for (let j = 0; j < orphanSlice.length; j++) {
-                            activeSlices[j % services.length].push(orphanSlice[j]);
-                        }
-                    }
-                }
-            }
-            console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving in parallel...`);
-            // Resolve all slices in parallel
-            const outcomes = await Promise.all(activeSlices.map((slice, i) => resolveSlice(services[i], slice)));
+            if (process.env['CODE_MAPPER_VERBOSE'])
+                console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving with dynamic dispatch...`);
+            const outcomes = await Promise.all(services.map(svc => resolveWorker(svc)));
             for (const outcome of outcomes) {
                 resolved += outcome.resolved;
                 failed += outcome.failed;
@@ -957,13 +925,13 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
             }
         }
         finally {
-            // Stop extra services (the original is stopped by the caller)
             for (const svc of extraServices)
                 svc.stop();
         }
     }
     const elapsed = Date.now() - t0;
-    console.error(`Code Mapper: tsgo resolved ${resolved}/${eligible.length} calls in ${elapsed}ms (${failed} unresolvable, ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''})`);
+    if (process.env['CODE_MAPPER_VERBOSE'])
+        console.error(`Code Mapper: tsgo resolved ${resolved}/${eligible.length} calls in ${elapsed}ms (${failed} unresolvable, ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''})`);
     return results;
 }
 /** Generic method names that produce false edges when receiver type is unknown (worker-extracted path) */
@@ -1009,7 +977,7 @@ export const processCallsFromExtracted = async (graph, extractedCalls, ctx, onPr
     for (const [filePath, calls] of byFile) {
         filesProcessed++;
         if (filesProcessed % 25 === 0) {
-            onProgress?.(filesProcessed, totalFiles);
+            onProgress?.(filesProcessed, totalFiles, 1);
             await yieldToEventLoop();
         }
         ctx.enableCache(filePath);

package/dist/core/ingestion/pipeline.js CHANGED Viewed

@@ -192,7 +192,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
                         percent: Math.round(parsingProgress),
                         message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
                         detail: filePath,
-                        stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
+                        stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount, ...(workerPool ? { workerCount: workerPool.size } : {}) },
                     });
                 }, workerPool);
                 const parseMs = Date.now() - parseStart;
@@ -297,13 +297,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
                 }
             }
             try {
-                await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total) => {
+                await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total, workerCount) => {
                     const callPercent = 70 + Math.round((current / Math.max(total, 1)) * 12);
                     onProgress({
                         phase: 'calls',
                         percent: callPercent,
                         message: `Resolving calls: ${current}/${total} files...`,
-                        stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
+                        stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount, ...(workerCount ? { workerCount } : {}) },
                     });
                 }, allConstructorBindings.length > 0 ? allConstructorBindings : undefined, tsgoService, repoPath);
             }

package/dist/core/semantic/tsgo-service.js CHANGED Viewed

@@ -258,10 +258,10 @@ export class TsgoService {
             this.process.stderr.on('data', (chunk) => {
                 const msg = chunk.toString().trim();
                 if (msg)
-                    console.error(`[tsgo-service] stderr: ${msg}`);
+                    verbose('stderr:', msg);
             });
             this.process.on('exit', (code, signal) => {
-                console.error(`[tsgo-service] process exited (code=${code}, signal=${signal})`);
+                verbose(`process exited (code=${code}, signal=${signal})`);
                 this.ready = false;
                 this.process = null;
             });
@@ -284,7 +284,7 @@ export class TsgoService {
             // Send initialized notification
             this.send({ jsonrpc: '2.0', method: 'initialized', params: {} });
             this.ready = true;
-            console.error('Code Mapper: tsgo LSP ready (semantic resolution enabled)');
+            verbose('LSP ready');
             return true;
         }
         catch (err) {

package/dist/types/pipeline.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export interface PipelineProgress {
         filesProcessed: number;
         totalFiles: number;
         nodesCreated: number;
+        workerCount?: number;
     };
 }
 export interface PipelineResult {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@zuvia-software-solutions/code-mapper",
-  "version": "2.3.8",
+  "version": "2.3.10",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",