@zuvia-software-solutions/code-mapper 2.3.8 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -136,10 +136,12 @@ export const analyzeCommand = async (inputPath, options) => {
136
136
  const t0Global = Date.now();
137
137
  const cpuStart = process.cpuUsage();
138
138
  let peakRssMB = 0;
139
- // Phase timing tracker — records wall time and RSS for each phase
139
+ // Phase timing tracker — records wall time, RSS, file count, and worker count per phase
140
140
  const phaseTimes = [];
141
141
  let currentPhaseName = 'init';
142
142
  let currentPhaseStart = Date.now();
143
+ let currentPhaseFiles = 0;
144
+ let currentPhaseWorkers = 0;
143
145
  const recordPhase = (nextPhase) => {
144
146
  const now = Date.now();
145
147
  const elapsed = now - currentPhaseStart;
@@ -148,10 +150,14 @@ export const analyzeCommand = async (inputPath, options) => {
148
150
  name: currentPhaseName,
149
151
  ms: elapsed,
150
152
  rssMB: Math.round(process.memoryUsage.rss() / (1024 * 1024)),
153
+ ...(currentPhaseFiles > 0 ? { fileCount: currentPhaseFiles } : {}),
154
+ ...(currentPhaseWorkers > 0 ? { workerCount: currentPhaseWorkers } : {}),
151
155
  });
152
156
  }
153
157
  currentPhaseName = nextPhase;
154
158
  currentPhaseStart = now;
159
+ currentPhaseFiles = 0;
160
+ currentPhaseWorkers = 0;
155
161
  };
156
162
  // Live resource stats for the progress bar
157
163
  const cpuCount = os.cpus().length;
@@ -222,7 +228,23 @@ export const analyzeCommand = async (inputPath, options) => {
222
228
  const baseLabel = PHASE_LABELS[progress.phase] || progress.phase;
223
229
  let phaseLabel = baseLabel;
224
230
  if (progress.stats && progress.stats.totalFiles > 0) {
225
- phaseLabel += ` (${progress.stats.filesProcessed.toLocaleString()}/${progress.stats.totalFiles.toLocaleString()})`;
231
+ const current = progress.stats.filesProcessed;
232
+ const total = progress.stats.totalFiles;
233
+ // Track peak file count and worker count for the summary
234
+ currentPhaseFiles = Math.max(currentPhaseFiles, total);
235
+ if (progress.stats.workerCount)
236
+ currentPhaseWorkers = Math.max(currentPhaseWorkers, progress.stats.workerCount);
237
+ phaseLabel += ` (${current.toLocaleString()}/${total.toLocaleString()})`;
238
+ // Show rate (files/s) after 1s
239
+ const elapsedSec = (Date.now() - phaseStart) / 1000;
240
+ if (elapsedSec >= 1 && current > 0) {
241
+ const rate = Math.round(current / elapsedSec);
242
+ phaseLabel += ` ${rate}/s`;
243
+ }
244
+ // Show worker/process count if available
245
+ if (progress.stats.workerCount && progress.stats.workerCount > 1) {
246
+ phaseLabel += ` [${progress.stats.workerCount}p]`;
247
+ }
226
248
  }
227
249
  const scaled = Math.round(progress.percent * 0.6);
228
250
  updateBar(scaled, phaseLabel, baseLabel);
@@ -276,22 +298,34 @@ export const analyzeCommand = async (inputPath, options) => {
276
298
  closeDb(dbPath);
277
299
  // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
278
300
  // Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
279
- const { execFile } = await import('child_process');
301
+ const { spawn: spawnChild } = await import('child_process');
280
302
  const { fileURLToPath } = await import('url');
281
303
  const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
282
304
  await new Promise((resolve, reject) => {
283
- const proc = execFile('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
284
- maxBuffer: 10 * 1024 * 1024,
285
- timeout: 600_000, // 10 min max for huge codebases
286
- }, (err, _stdout, stderr) => {
287
- if (err) {
288
- console.error(stderr || '');
289
- reject(new Error(`Embedding failed: ${err.message}`));
305
+ // Use spawn (not execFile) no internal buffer limit, streams only.
306
+ // execFile buffers all stdout in memory which causes OOM/kill on large codebases.
307
+ const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
308
+ stdio: ['ignore', 'pipe', 'pipe'],
309
+ });
310
+ let stderrBuf = '';
311
+ proc.stderr?.on('data', (chunk) => {
312
+ stderrBuf += chunk.toString();
313
+ // Keep only last 10KB of stderr for error reporting
314
+ if (stderrBuf.length > 10240)
315
+ stderrBuf = stderrBuf.slice(-10240);
316
+ });
317
+ proc.on('close', (code) => {
318
+ if (code !== 0) {
319
+ console.error(stderrBuf);
320
+ reject(new Error(`Embedding failed: python3 exited with code ${code}`));
290
321
  }
291
322
  else {
292
323
  resolve();
293
324
  }
294
325
  });
326
+ proc.on('error', (err) => {
327
+ reject(new Error(`Embedding failed: ${err.message}`));
328
+ });
295
329
  // Stream progress from Python's JSON lines on stdout
296
330
  let lineBuf = '';
297
331
  proc.stdout?.on('data', (chunk) => {
@@ -433,7 +467,16 @@ export const analyzeCommand = async (inputPath, options) => {
433
467
  const pct = Math.round((phase.ms / totalMs) * 100);
434
468
  const name = PHASE_DISPLAY_NAMES[phase.name] || phase.name;
435
469
  const bar = pct >= 2 ? ' ' + '█'.repeat(Math.max(1, Math.round(pct / 3))) : '';
436
- console.log(` ${name.padEnd(22)} ${sec.padStart(6)}s ${String(pct).padStart(3)}% ${phase.rssMB}MB${bar}`);
470
+ // Build extra stats: rate + workers
471
+ let extra = '';
472
+ if (phase.fileCount && phase.ms > 0) {
473
+ const rate = Math.round(phase.fileCount / (phase.ms / 1000));
474
+ extra += ` ${phase.fileCount.toLocaleString()} files (${rate}/s)`;
475
+ }
476
+ if (phase.workerCount && phase.workerCount > 1) {
477
+ extra += ` [${phase.workerCount}p]`;
478
+ }
479
+ console.log(` ${name.padEnd(22)} ${sec.padStart(6)}s ${String(pct).padStart(3)}% ${phase.rssMB}MB${bar}${extra}`);
437
480
  }
438
481
  console.log(` ${'─'.repeat(50)}`);
439
482
  console.log(` ${'Total'.padEnd(22)} ${totalTime.padStart(6)}s 100% ${peakRssMB}MB peak`);
@@ -10,7 +10,7 @@ export declare const processCalls: (graph: KnowledgeGraph, files: {
10
10
  }[], astCache: ASTCache, ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<ExtractedHeritage[]>;
11
11
  export declare const extractReturnTypeName: (raw: string, depth?: number) => string | undefined;
12
12
  /** Resolve pre-extracted call sites from workers (no AST parsing needed) */
13
- export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void, constructorBindings?: FileConstructorBindings[], tsgoService?: TsgoService | null, repoPath?: string) => Promise<void>;
13
+ export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], ctx: ResolutionContext, onProgress?: (current: number, total: number, workerCount?: number) => void, constructorBindings?: FileConstructorBindings[], tsgoService?: TsgoService | null, repoPath?: string) => Promise<void>;
14
14
  /** Resolve pre-extracted Laravel routes to CALLS edges from route files to controller methods */
15
15
  export declare const processRoutesFromExtracted: (graph: KnowledgeGraph, extractedRoutes: ExtractedRoute[], ctx: ResolutionContext, onProgress?: (current: number, total: number) => void) => Promise<void>;
16
16
  /**
@@ -753,20 +753,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
753
753
  }
754
754
  if (eligible.length === 0)
755
755
  return results;
756
- // Group calls by file — process one file at a time so tsgo only needs
757
- // one file hot in memory. LSP is sequential over stdio, so concurrent
758
- // requests just create a queue that causes timeouts.
759
- const byFile = new Map();
760
- for (const call of eligible) {
761
- let list = byFile.get(call.filePath);
762
- if (!list) {
763
- list = [];
764
- byFile.set(call.filePath, list);
765
- }
766
- list.push(call);
767
- }
768
756
  // Built-in receiver names that resolve to external types, not project code.
769
- // tsgo always fails on these — skip them to avoid wasted LSP round-trips.
770
757
  const BUILTIN_RECEIVERS = new Set([
771
758
  'console', 'Math', 'JSON', 'Object', 'Array', 'String', 'Number', 'Boolean',
772
759
  'Date', 'RegExp', 'Error', 'Promise', 'Map', 'Set', 'WeakMap', 'WeakSet',
@@ -778,14 +765,11 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
778
765
  ]);
779
766
  // Pre-filter calls where tsgo won't add value:
780
767
  // A. Free-form calls with unambiguous name — heuristic resolves perfectly
781
- // B. Member calls with known receiver type AND unambiguous method heuristic handles
782
- // C. Member calls on built-in receivers — tsgo always fails on these
768
+ // B. Member calls on built-in receivers tsgo always fails on these
783
769
  const tsgoEligible = [];
784
770
  let skippedUnambiguous = 0;
785
- const skippedKnownType = 0;
786
771
  let skippedBuiltin = 0;
787
772
  for (const call of eligible) {
788
- // A. Free-form, unique name match
789
773
  if (call.callForm === 'free' || call.callForm === undefined) {
790
774
  const resolved = ctx.resolve(call.calledName, call.filePath);
791
775
  if (resolved && resolved.candidates.length === 1) {
@@ -793,7 +777,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
793
777
  continue;
794
778
  }
795
779
  }
796
- // B. Built-in receiver — tsgo resolves to node_modules/lib.d.ts, never project code
797
780
  if (call.callForm === 'member' && call.receiverName && BUILTIN_RECEIVERS.has(call.receiverName)) {
798
781
  skippedBuiltin++;
799
782
  continue;
@@ -811,11 +794,8 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
811
794
  list.push(call);
812
795
  }
813
796
  const t0 = Date.now();
814
- const skippedTotal = skippedUnambiguous + skippedKnownType + skippedBuiltin;
815
- // Adaptive parallelism based on three constraints:
816
- // 1. CPU: 75% of cores — parsing workers are done, leave 25% for Node.js event loop + OS
817
- // 2. Memory: each tsgo loads the full project (~500MB estimate) — cap by free system memory
818
- // 3. Workload: at least 50 files per process to amortize ~0.5s startup cost
797
+ const skippedTotal = skippedUnambiguous + skippedBuiltin;
798
+ // Adaptive parallelism
819
799
  const osModule = await import('os');
820
800
  const cpuCount = osModule.cpus().length;
821
801
  const freeMemGB = osModule.freemem() / (1024 * 1024 * 1024);
@@ -823,25 +803,30 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
823
803
  const maxByMemory = Math.max(1, Math.floor(freeMemGB / 0.5));
824
804
  const maxByWorkload = Math.max(1, Math.floor(tsgoByFile.size / 50));
825
805
  const actualWorkers = Math.min(maxByCpu, maxByMemory, maxByWorkload);
826
- console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedKnownType} known-type, ${skippedBuiltin} builtin)...`);
827
- // Split files round-robin across workers for balanced distribution
828
- const fileEntries = [...tsgoByFile.entries()];
829
- const workerSlices = Array.from({ length: actualWorkers }, () => []);
830
- for (let i = 0; i < fileEntries.length; i++) {
831
- workerSlices[i % actualWorkers].push(fileEntries[i]);
806
+ if (process.env['CODE_MAPPER_VERBOSE']) {
807
+ console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedBuiltin} builtin)...`);
832
808
  }
833
- // Shared progress counter
809
+ // Dynamic dispatch: shared queue sorted by call count descending
810
+ const fileEntries = [...tsgoByFile.entries()];
811
+ fileEntries.sort((a, b) => b[1].length - a[1].length);
834
812
  let totalFilesProcessed = 0;
813
+ let nextFileIdx = 0;
835
814
  const tsgoTotalFiles = tsgoByFile.size;
836
- /** Resolve a slice of files using a single tsgo service */
837
- const resolveSlice = async (service, slice) => {
815
+ const getNextFile = () => {
816
+ if (nextFileIdx >= fileEntries.length)
817
+ return null;
818
+ return fileEntries[nextFileIdx++];
819
+ };
820
+ const resolveWorker = async (service) => {
838
821
  const sliceResults = new Map();
839
822
  let sliceResolved = 0;
840
823
  let sliceFailed = 0;
841
- for (const [filePath, calls] of slice) {
824
+ let entry;
825
+ while ((entry = getNextFile()) !== null) {
826
+ const [filePath, calls] = entry;
842
827
  totalFilesProcessed++;
843
828
  if (totalFilesProcessed % 25 === 0) {
844
- onProgress?.(totalFilesProcessed, tsgoTotalFiles);
829
+ onProgress?.(totalFilesProcessed, tsgoTotalFiles, actualWorkers);
845
830
  }
846
831
  const absFilePath = path.resolve(repoPath, filePath);
847
832
  for (const call of calls) {
@@ -856,7 +841,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
856
841
  sliceFailed++;
857
842
  continue;
858
843
  }
859
- // Match by exact startLine, then by range containment
860
844
  let bestMatch;
861
845
  for (const sym of targetSymbols) {
862
846
  const node = graph.getNode(toNodeId(sym.nodeId));
@@ -899,24 +883,22 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
899
883
  sliceFailed++;
900
884
  }
901
885
  }
886
+ service.notifyFileDeleted(absFilePath);
902
887
  }
903
888
  return { resolved: sliceResolved, failed: sliceFailed, results: sliceResults };
904
889
  };
905
890
  let resolved = 0;
906
891
  let failed = 0;
907
892
  if (actualWorkers === 1) {
908
- // Single process use the existing service (already started)
909
- const outcome = await resolveSlice(tsgoService, fileEntries);
893
+ const outcome = await resolveWorker(tsgoService);
910
894
  resolved = outcome.resolved;
911
895
  failed = outcome.failed;
912
896
  for (const [k, v] of outcome.results)
913
897
  results.set(k, v);
914
898
  }
915
899
  else {
916
- // Parallel — spawn extra services, keep the original for slice 0
917
900
  const extraServices = [];
918
901
  try {
919
- // Start extra tsgo processes in parallel
920
902
  const startPromises = [];
921
903
  for (let i = 1; i < actualWorkers; i++) {
922
904
  startPromises.push((async () => {
@@ -931,24 +913,10 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
931
913
  if (svc)
932
914
  extraServices.push(svc);
933
915
  }
934
- // Build final service list: original + extras that started successfully
935
916
  const services = [tsgoService, ...extraServices];
936
- const activeSlices = workerSlices.slice(0, services.length);
937
- // If some services failed to start, redistribute their slices
938
- if (services.length < actualWorkers) {
939
- for (let i = services.length; i < actualWorkers; i++) {
940
- const orphanSlice = workerSlices[i];
941
- if (orphanSlice) {
942
- // Distribute orphan files round-robin across active services
943
- for (let j = 0; j < orphanSlice.length; j++) {
944
- activeSlices[j % services.length].push(orphanSlice[j]);
945
- }
946
- }
947
- }
948
- }
949
- console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving in parallel...`);
950
- // Resolve all slices in parallel
951
- const outcomes = await Promise.all(activeSlices.map((slice, i) => resolveSlice(services[i], slice)));
917
+ if (process.env['CODE_MAPPER_VERBOSE'])
918
+ console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving with dynamic dispatch...`);
919
+ const outcomes = await Promise.all(services.map(svc => resolveWorker(svc)));
952
920
  for (const outcome of outcomes) {
953
921
  resolved += outcome.resolved;
954
922
  failed += outcome.failed;
@@ -957,13 +925,13 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
957
925
  }
958
926
  }
959
927
  finally {
960
- // Stop extra services (the original is stopped by the caller)
961
928
  for (const svc of extraServices)
962
929
  svc.stop();
963
930
  }
964
931
  }
965
932
  const elapsed = Date.now() - t0;
966
- console.error(`Code Mapper: tsgo resolved ${resolved}/${eligible.length} calls in ${elapsed}ms (${failed} unresolvable, ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''})`);
933
+ if (process.env['CODE_MAPPER_VERBOSE'])
934
+ console.error(`Code Mapper: tsgo resolved ${resolved}/${eligible.length} calls in ${elapsed}ms (${failed} unresolvable, ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''})`);
967
935
  return results;
968
936
  }
969
937
  /** Generic method names that produce false edges when receiver type is unknown (worker-extracted path) */
@@ -1009,7 +977,7 @@ export const processCallsFromExtracted = async (graph, extractedCalls, ctx, onPr
1009
977
  for (const [filePath, calls] of byFile) {
1010
978
  filesProcessed++;
1011
979
  if (filesProcessed % 25 === 0) {
1012
- onProgress?.(filesProcessed, totalFiles);
980
+ onProgress?.(filesProcessed, totalFiles, 1);
1013
981
  await yieldToEventLoop();
1014
982
  }
1015
983
  ctx.enableCache(filePath);
@@ -192,7 +192,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
192
192
  percent: Math.round(parsingProgress),
193
193
  message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
194
194
  detail: filePath,
195
- stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
195
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount, ...(workerPool ? { workerCount: workerPool.size } : {}) },
196
196
  });
197
197
  }, workerPool);
198
198
  const parseMs = Date.now() - parseStart;
@@ -297,13 +297,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
297
297
  }
298
298
  }
299
299
  try {
300
- await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total) => {
300
+ await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total, workerCount) => {
301
301
  const callPercent = 70 + Math.round((current / Math.max(total, 1)) * 12);
302
302
  onProgress({
303
303
  phase: 'calls',
304
304
  percent: callPercent,
305
305
  message: `Resolving calls: ${current}/${total} files...`,
306
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
306
+ stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount, ...(workerCount ? { workerCount } : {}) },
307
307
  });
308
308
  }, allConstructorBindings.length > 0 ? allConstructorBindings : undefined, tsgoService, repoPath);
309
309
  }
@@ -258,10 +258,10 @@ export class TsgoService {
258
258
  this.process.stderr.on('data', (chunk) => {
259
259
  const msg = chunk.toString().trim();
260
260
  if (msg)
261
- console.error(`[tsgo-service] stderr: ${msg}`);
261
+ verbose('stderr:', msg);
262
262
  });
263
263
  this.process.on('exit', (code, signal) => {
264
- console.error(`[tsgo-service] process exited (code=${code}, signal=${signal})`);
264
+ verbose(`process exited (code=${code}, signal=${signal})`);
265
265
  this.ready = false;
266
266
  this.process = null;
267
267
  });
@@ -284,7 +284,7 @@ export class TsgoService {
284
284
  // Send initialized notification
285
285
  this.send({ jsonrpc: '2.0', method: 'initialized', params: {} });
286
286
  this.ready = true;
287
- console.error('Code Mapper: tsgo LSP ready (semantic resolution enabled)');
287
+ verbose('LSP ready');
288
288
  return true;
289
289
  }
290
290
  catch (err) {
@@ -13,6 +13,7 @@ export interface PipelineProgress {
13
13
  filesProcessed: number;
14
14
  totalFiles: number;
15
15
  nodesCreated: number;
16
+ workerCount?: number;
16
17
  };
17
18
  }
18
19
  export interface PipelineResult {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.3.8",
3
+ "version": "2.3.10",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",