@zuvia-software-solutions/code-mapper 2.3.2 → 2.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,7 @@
1
1
  // code-mapper/src/cli/analyze.ts
2
2
  /** @file analyze.ts @description Indexes a repository, builds the knowledge graph, and stores it in .code-mapper/ */
3
3
  import path from 'path';
4
+ import os from 'os';
4
5
  import { execFileSync } from 'child_process';
5
6
  import v8 from 'v8';
6
7
  import cliProgress from 'cli-progress';
@@ -93,7 +94,8 @@ export const analyzeCommand = async (inputPath, options) => {
93
94
  }
94
95
  // Single progress bar for the entire pipeline
95
96
  const bar = new cliProgress.SingleBar({
96
- format: ' {bar} {percentage}% | {phase}',
97
+ // \x1b[K at end clears to EOL so shorter redraws don't leave trailing characters
98
+ format: ' {bar} {percentage}% | {phase} | {resources}\x1b[K',
97
99
  barCompleteChar: '\u2588',
98
100
  barIncompleteChar: '\u2591',
99
101
  hideCursor: true,
@@ -131,6 +133,37 @@ export const analyzeCommand = async (inputPath, options) => {
131
133
  console.log = barLog;
132
134
  console.warn = barLog;
133
135
  console.error = barLog;
136
+ const t0Global = Date.now();
137
+ const cpuStart = process.cpuUsage();
138
+ let peakRssMB = 0;
139
+ // Phase timing tracker — records wall time and RSS for each phase
140
+ const phaseTimes = [];
141
+ let currentPhaseName = 'init';
142
+ let currentPhaseStart = Date.now();
143
+ const recordPhase = (nextPhase) => {
144
+ const now = Date.now();
145
+ const elapsed = now - currentPhaseStart;
146
+ if (elapsed > 0) {
147
+ phaseTimes.push({
148
+ name: currentPhaseName,
149
+ ms: elapsed,
150
+ rssMB: Math.round(process.memoryUsage.rss() / (1024 * 1024)),
151
+ });
152
+ }
153
+ currentPhaseName = nextPhase;
154
+ currentPhaseStart = now;
155
+ };
156
+ // Live resource stats for the progress bar
157
+ const cpuCount = os.cpus().length;
158
+ const getResourceStats = () => {
159
+ const rssMB = Math.round(process.memoryUsage.rss() / (1024 * 1024));
160
+ if (rssMB > peakRssMB)
161
+ peakRssMB = rssMB;
162
+ const cpuDelta = process.cpuUsage(cpuStart);
163
+ const wallMs = Date.now() - t0Global || 1;
164
+ const cpuPct = Math.round(((cpuDelta.user + cpuDelta.system) / 1e3) / wallMs * 100);
165
+ return `${rssMB}MB | CPU ${cpuPct}%`;
166
+ };
134
167
  // Track elapsed time per phase — both updateBar and the interval use
135
168
  // the same format so they don't flicker against each other
136
169
  let lastPhaseLabel = 'Initializing...';
@@ -143,17 +176,16 @@ export const analyzeCommand = async (inputPath, options) => {
143
176
  }
144
177
  const elapsed = Math.round((Date.now() - phaseStart) / 1000);
145
178
  const display = elapsed >= 3 ? `${phaseLabel} (${elapsed}s)` : phaseLabel;
146
- bar.update(value, { phase: display });
179
+ bar.update(value, { phase: display, resources: getResourceStats() });
147
180
  };
148
181
  // Tick elapsed seconds for phases with infrequent progress callbacks
149
182
  // (e.g. CSV streaming, FTS indexing) — uses the same display format as updateBar
150
183
  const elapsedTimer = setInterval(() => {
151
184
  const elapsed = Math.round((Date.now() - phaseStart) / 1000);
152
185
  if (elapsed >= 3) {
153
- bar.update({ phase: `${lastPhaseLabel} (${elapsed}s)` });
186
+ bar.update({ phase: `${lastPhaseLabel} (${elapsed}s)`, resources: getResourceStats() });
154
187
  }
155
188
  }, 1000);
156
- const t0Global = Date.now();
157
189
  // Cache embeddings from existing index before rebuild
158
190
  let cachedEmbeddingNodeIds = new Set();
159
191
  let cachedEmbeddings = [];
@@ -180,15 +212,24 @@ export const analyzeCommand = async (inputPath, options) => {
180
212
  }
181
213
  }
182
214
  // Phase 1: Full Pipeline (0-60%)
215
+ let lastPipelinePhase = '';
183
216
  const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => {
184
- const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase;
217
+ if (progress.phase !== lastPipelinePhase) {
218
+ recordPhase(progress.phase);
219
+ lastPipelinePhase = progress.phase;
220
+ }
221
+ let phaseLabel = PHASE_LABELS[progress.phase] || progress.phase;
222
+ if (progress.stats && progress.stats.totalFiles > 0 &&
223
+ (progress.phase === 'parsing' || progress.phase === 'extracting')) {
224
+ phaseLabel += ` (${progress.stats.filesProcessed.toLocaleString()}/${progress.stats.totalFiles.toLocaleString()})`;
225
+ }
185
226
  const scaled = Math.round(progress.percent * 0.6);
186
227
  updateBar(scaled, phaseLabel);
187
228
  }, options?.tsgo === false ? { tsgo: false } : {});
188
229
  // Phase 2: SQLite (60-85%)
230
+ recordPhase('sqlite');
189
231
  updateBar(60, 'Loading into database...');
190
232
  // Reset the database (delete and recreate)
191
- const t0Db = Date.now();
192
233
  let db = resetDb(dbPath);
193
234
  let dbMsgCount = 0;
194
235
  const dbResult = loadGraphToDb(db, pipelineResult.graph, pipelineResult.repoPath, (msg) => {
@@ -196,20 +237,21 @@ export const analyzeCommand = async (inputPath, options) => {
196
237
  const progress = Math.min(84, 60 + Math.round((dbMsgCount / (dbMsgCount + 10)) * 24));
197
238
  updateBar(progress, msg);
198
239
  });
199
- const dbTime = ((Date.now() - t0Db) / 1000).toFixed(1);
200
240
  const dbWarnings = dbResult.warnings;
201
241
  // Phase 2.5: HTTP route stitching (post-DB-load, needs content field)
242
+ recordPhase('routes');
202
243
  stitchRoutes(db);
203
244
  // Phase 2.6: Populate searchText for BM25 concept matching
204
245
  // Uses first comment + callers + module — must run after edges are loaded
246
+ recordPhase('search-text');
205
247
  updateBar(84, 'Building search index...');
206
248
  populateSearchText(db);
207
249
  // Phase 3: FTS (85-90%)
208
250
  // FTS5 is auto-created by schema triggers — no manual index creation needed
251
+ recordPhase('fts');
209
252
  updateBar(85, 'Search indexes ready');
210
- const t0Fts = Date.now();
211
- const ftsTime = ((Date.now() - t0Fts) / 1000).toFixed(1);
212
253
  // Phase 3.5: Re-insert cached embeddings
254
+ recordPhase('restore-embeddings');
213
255
  if (cachedEmbeddings.length > 0) {
214
256
  updateBar(88, `Restoring ${cachedEmbeddings.length} cached embeddings...`);
215
257
  const EMBED_BATCH = 200;
@@ -226,15 +268,9 @@ export const analyzeCommand = async (inputPath, options) => {
226
268
  }
227
269
  // Phase 4: Embeddings (90-98%)
228
270
  const stats = getStats(db);
229
- let embeddingTime = '0.0';
230
- let embeddingSkipped = true;
231
- let embeddingSkipReason = 'off (use --no-embeddings to skip)';
232
271
  if (options?.embeddings) {
233
- embeddingSkipped = false;
234
- }
235
- if (!embeddingSkipped) {
272
+ recordPhase('embeddings');
236
273
  updateBar(90, 'Generating embeddings...');
237
- const t0Emb = Date.now();
238
274
  // Close DB so Python can write to it
239
275
  closeDb(dbPath);
240
276
  // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
@@ -295,9 +331,9 @@ export const analyzeCommand = async (inputPath, options) => {
295
331
  });
296
332
  // Reopen DB after Python is done
297
333
  db = openDb(dbPath);
298
- embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1);
299
334
  }
300
335
  // Phase 5: Finalize (98-100%)
336
+ recordPhase('finalize');
301
337
  updateBar(98, 'Saving metadata...');
302
338
  // Count embeddings in the index (cached + newly generated) for metadata
303
339
  const embeddingCount = countEmbeddings(db);
@@ -338,19 +374,26 @@ export const analyzeCommand = async (inputPath, options) => {
338
374
  ...(processCount !== undefined ? { processes: processCount } : {}),
339
375
  });
340
376
  closeDb(dbPath);
377
+ recordPhase('done'); // close the last phase
341
378
  const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1);
342
379
  clearInterval(elapsedTimer);
343
380
  process.removeListener('SIGINT', sigintHandler);
344
381
  console.log = origLog;
345
382
  console.warn = origWarn;
346
383
  console.error = origError;
347
- bar.update(100, { phase: 'Done' });
384
+ bar.update(100, { phase: 'Done', resources: '' });
348
385
  bar.stop();
386
+ // Clear any leftover characters from the progress bar line
387
+ process.stdout.write('\x1b[2K');
349
388
  // Summary
350
389
  const embeddingsCached = cachedEmbeddings.length > 0;
351
390
  console.log(`\n Repository indexed successfully (${totalTime}s)${embeddingsCached ? ` [${cachedEmbeddings.length} embeddings cached]` : ''}\n`);
352
391
  console.log(` ${stats.nodes.toLocaleString()} nodes | ${stats.edges.toLocaleString()} edges | ${pipelineResult.communityResult?.stats.totalCommunities || 0} clusters | ${pipelineResult.processResult?.stats.totalProcesses || 0} flows`);
353
- console.log(` SQLite ${dbTime}s | FTS ${ftsTime}s | Embeddings ${embeddingSkipped ? embeddingSkipReason : embeddingTime + 's'}`);
392
+ // Resource usage
393
+ const cpuEnd = process.cpuUsage(cpuStart);
394
+ const wallMs = Date.now() - t0Global || 1;
395
+ const cpuPct = Math.round(((cpuEnd.user + cpuEnd.system) / 1e3) / wallMs * 100);
396
+ console.log(` Memory: peak ${peakRssMB}MB RSS | CPU: ${cpuPct}% (${cpuCount} cores)`);
354
397
  console.log(` tsgo: ${pipelineResult.tsgoEnabled ? 'enabled (compiler-verified call resolution)' : 'disabled — install @typescript/native-preview for higher accuracy'}`);
355
398
  console.log(` ${repoPath}`);
356
399
  if (aiContext.files.length > 0) {
@@ -360,6 +403,39 @@ export const analyzeCommand = async (inputPath, options) => {
360
403
  if (dbWarnings.length > 0) {
361
404
  console.log(` Note: ${dbWarnings.length} warnings during graph load`);
362
405
  }
406
+ // Detailed performance breakdown
407
+ const totalMs = phaseTimes.reduce((s, p) => s + p.ms, 0) || 1;
408
+ const PHASE_DISPLAY_NAMES = {
409
+ init: 'Init',
410
+ extracting: 'Scanning files',
411
+ structure: 'Building structure',
412
+ parsing: 'Parsing & resolving',
413
+ imports: 'Resolving imports',
414
+ calls: 'Tracing calls',
415
+ heritage: 'Extracting inheritance',
416
+ communities: 'Detecting communities',
417
+ processes: 'Detecting processes',
418
+ enriching: 'Enriching clusters',
419
+ complete: 'Pipeline complete',
420
+ sqlite: 'SQLite load',
421
+ routes: 'Route stitching',
422
+ 'search-text': 'Search text',
423
+ fts: 'FTS indexing',
424
+ 'restore-embeddings': 'Restore embeddings',
425
+ embeddings: 'Embeddings (MLX)',
426
+ finalize: 'Finalize & context',
427
+ done: 'Done',
428
+ };
429
+ console.log('\n Phase breakdown:');
430
+ for (const phase of phaseTimes) {
431
+ const sec = (phase.ms / 1000).toFixed(1);
432
+ const pct = Math.round((phase.ms / totalMs) * 100);
433
+ const name = PHASE_DISPLAY_NAMES[phase.name] || phase.name;
434
+ const bar = pct >= 2 ? ' ' + '█'.repeat(Math.max(1, Math.round(pct / 3))) : '';
435
+ console.log(` ${name.padEnd(22)} ${sec.padStart(6)}s ${String(pct).padStart(3)}% ${phase.rssMB}MB${bar}`);
436
+ }
437
+ console.log(` ${'─'.repeat(50)}`);
438
+ console.log(` ${'Total'.padEnd(22)} ${totalTime.padStart(6)}s 100% ${peakRssMB}MB peak`);
363
439
  try {
364
440
  await fs.access(getGlobalRegistryPath());
365
441
  }
package/dist/cli/index.js CHANGED
File without changes
@@ -739,7 +739,7 @@ function isTypeScriptOrJavaScript(filePath) {
739
739
  *
740
740
  * Call key format: "sourceId\0calledName\0callLine" — unique per call site.
741
741
  */
742
- async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPath) {
742
+ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPath, onProgress) {
743
743
  const results = new Map();
744
744
  // Collect eligible calls (TS/JS files with line+column info)
745
745
  const eligible = [];
@@ -764,13 +764,46 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
764
764
  }
765
765
  list.push(call);
766
766
  }
767
+ // Pre-filter: skip free-form calls ONLY when the function name is unambiguous
768
+ // in the symbol table. Heuristic resolves unique names perfectly.
769
+ // Ambiguous names (multiple symbols with same name) need tsgo for disambiguation.
770
+ const tsgoEligible = [];
771
+ let skippedHeuristic = 0;
772
+ for (const call of eligible) {
773
+ if (call.callForm === 'free' || call.callForm === undefined) {
774
+ const resolved = ctx.resolve(call.calledName, call.filePath);
775
+ // Unique match — heuristic handles this at high confidence
776
+ if (resolved && resolved.candidates.length === 1) {
777
+ skippedHeuristic++;
778
+ continue;
779
+ }
780
+ }
781
+ tsgoEligible.push(call);
782
+ }
783
+ // Regroup filtered calls by file
784
+ const tsgoByFile = new Map();
785
+ for (const call of tsgoEligible) {
786
+ let list = tsgoByFile.get(call.filePath);
787
+ if (!list) {
788
+ list = [];
789
+ tsgoByFile.set(call.filePath, list);
790
+ }
791
+ list.push(call);
792
+ }
767
793
  let resolved = 0;
768
794
  let failed = 0;
769
795
  const t0 = Date.now();
770
- console.error(`Code Mapper: tsgo resolving ${eligible.length} calls across ${byFile.size} files...`);
771
- for (const [filePath, calls] of byFile) {
796
+ console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files (skipped ${skippedHeuristic} heuristic-resolvable)...`);
797
+ let tsgoFilesProcessed = 0;
798
+ const tsgoTotalFiles = tsgoByFile.size;
799
+ for (const [filePath, calls] of tsgoByFile) {
800
+ tsgoFilesProcessed++;
801
+ if (tsgoFilesProcessed % 25 === 0) {
802
+ onProgress?.(tsgoFilesProcessed, tsgoTotalFiles);
803
+ await yieldToEventLoop();
804
+ }
772
805
  const absFilePath = path.resolve(repoPath, filePath);
773
- // Resolve all calls in this file sequentially
806
+ // Sequential LSP requests tsgo processes over stdio, concurrent floods cause hangs
774
807
  for (const call of calls) {
775
808
  try {
776
809
  const def = await tsgoService.resolveDefinition(absFilePath, call.callLine - 1, call.callColumn);
@@ -806,10 +839,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
806
839
  }
807
840
  }
808
841
  if (bestMatch) {
809
- // Drop self-referencing tsgo edges: these come from property access
810
- // on parameters (req.params, res.json) that tsgo resolves back to
811
- // the enclosing function's definition. Legitimate recursion is captured
812
- // by the heuristic path (free-form calls to the function's own name).
842
+ // Drop self-referencing tsgo edges
813
843
  if (bestMatch.nodeId === call.sourceId) {
814
844
  failed++;
815
845
  continue;
@@ -871,13 +901,13 @@ export const processCallsFromExtracted = async (graph, extractedCalls, ctx, onPr
871
901
  // Batch pre-resolve via tsgo LSP (highest confidence, TS/JS only)
872
902
  let tsgoResolved;
873
903
  if (tsgoService?.isReady() && repoPath) {
874
- tsgoResolved = await batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPath);
904
+ tsgoResolved = await batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPath, onProgress);
875
905
  }
876
906
  const totalFiles = byFile.size;
877
907
  let filesProcessed = 0;
878
908
  for (const [filePath, calls] of byFile) {
879
909
  filesProcessed++;
880
- if (filesProcessed % 100 === 0) {
910
+ if (filesProcessed % 25 === 0) {
881
911
  onProgress?.(filesProcessed, totalFiles);
882
912
  await yieldToEventLoop();
883
913
  }
@@ -4,7 +4,10 @@ import fs from 'fs/promises';
4
4
  import path from 'path';
5
5
  import { glob } from 'glob';
6
6
  import { createIgnoreFilter } from '../../config/ignore-service.js';
7
- const READ_CONCURRENCY = 32;
7
+ // Stat is metadata-only (no I/O), can be highly concurrent
8
+ const STAT_CONCURRENCY = 256;
9
+ // File reads move actual data, keep bounded to avoid fd exhaustion
10
+ const READ_CONCURRENCY = 64;
8
11
  /** Scan repository: stat files to get paths + sizes, no content loaded (~10MB for 100K files) */
9
12
  export const walkRepositoryPaths = async (repoPath, onProgress) => {
10
13
  const ignoreFilter = await createIgnoreFilter(repoPath);
@@ -16,8 +19,8 @@ export const walkRepositoryPaths = async (repoPath, onProgress) => {
16
19
  });
17
20
  const entries = [];
18
21
  let processed = 0;
19
- for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
20
- const batch = filtered.slice(start, start + READ_CONCURRENCY);
22
+ for (let start = 0; start < filtered.length; start += STAT_CONCURRENCY) {
23
+ const batch = filtered.slice(start, start + STAT_CONCURRENCY);
21
24
  const results = await Promise.allSettled(batch.map(async (relativePath) => {
22
25
  const fullPath = path.join(repoPath, relativePath);
23
26
  const stat = await fs.stat(fullPath);
@@ -23,10 +23,14 @@ const processParsingWithWorkers = async (graph, files, symbolTable, _astCache, w
23
23
  if (parseableFiles.length === 0)
24
24
  return { imports: [], calls: [], heritage: [], routes: [], constructorBindings: [] };
25
25
  const total = files.length;
26
- // Dispatch to worker pool
27
- const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
26
+ // Dispatch to worker pool with size-balanced distribution
27
+ const { results: chunkResults, failures } = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
28
28
  onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
29
- });
29
+ }, (item) => item.content.length);
30
+ // Log worker failures (don't throw — partial results are still valuable)
31
+ for (const failure of failures) {
32
+ console.error(` Worker failure (partial results preserved): ${failure.message}`);
33
+ }
30
34
  // Merge worker results into graph and symbol table
31
35
  const allImports = [];
32
36
  const allCalls = [];
@@ -21,7 +21,10 @@ import { fileURLToPath, pathToFileURL } from 'node:url';
21
21
  import { memoryGuard } from '../../lib/memory-guard.js';
22
22
  import { toNodeId, toEdgeId } from '../db/schema.js';
23
23
  import { getTsgoService } from '../semantic/tsgo-service.js';
24
- const isDev = process.env['NODE_ENV'] === 'development';
24
+ const verbose = (...args) => {
25
+ if (process.env['CODE_MAPPER_VERBOSE'])
26
+ console.error(...args);
27
+ };
25
28
  // Default chunk budget — used when memory is plentiful.
26
29
  // Under memory pressure, adaptiveBatchSize() shrinks this automatically.
27
30
  const DEFAULT_CHUNK_BYTE_BUDGET = 50 * 1024 * 1024;
@@ -100,7 +103,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
100
103
  if (totalParseable === 0) {
101
104
  onProgress({
102
105
  phase: 'parsing',
103
- percent: 82,
106
+ percent: 70,
104
107
  message: 'No parseable files found — skipping parsing phase',
105
108
  stats: { filesProcessed: 0, totalFiles: 0, nodesCreated: graph.nodeCount },
106
109
  });
@@ -122,9 +125,9 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
122
125
  if (currentChunk.length > 0)
123
126
  chunks.push(currentChunk);
124
127
  const numChunks = chunks.length;
125
- if (isDev) {
128
+ {
126
129
  const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
127
- console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${Math.round(chunkBudget / (1024 * 1024))}MB budget (${memoryGuard.summary()})`);
130
+ verbose(`[parse] ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${Math.round(chunkBudget / (1024 * 1024))}MB budget (${memoryGuard.summary()})`);
128
131
  }
129
132
  onProgress({
130
133
  phase: 'parsing',
@@ -148,8 +151,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
148
151
  workerPool = createWorkerPool(workerUrl);
149
152
  }
150
153
  catch (err) {
151
- if (isDev)
152
- console.warn('Worker pool creation failed, using sequential fallback:', err.message);
154
+ console.error('[parse] worker pool creation failed, using sequential fallback:', err.message);
153
155
  }
154
156
  let filesParsedSoFar = 0;
155
157
  // AST cache sized for one chunk (used by sequential fallback for import/call/heritage)
@@ -171,15 +173,20 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
171
173
  const chunkPaths = chunks[chunkIdx];
172
174
  if (!chunkPaths)
173
175
  continue;
176
+ const chunkStart = Date.now();
174
177
  // Read content for this chunk
175
178
  const chunkContents = await readFileContents(repoPath, chunkPaths);
176
179
  const chunkFiles = chunkPaths
177
180
  .filter(p => chunkContents.has(p))
178
181
  .map(p => ({ path: p, content: chunkContents.get(p) }));
182
+ const readMs = Date.now() - chunkStart;
183
+ const chunkMB = chunkFiles.reduce((s, f) => s + f.content.length, 0) / (1024 * 1024);
184
+ verbose(`[parse] chunk ${chunkIdx + 1}/${numChunks}: ${chunkFiles.length} files (${chunkMB.toFixed(1)}MB), read ${readMs}ms`);
179
185
  // Parse chunk (workers or sequential fallback)
186
+ const parseStart = Date.now();
180
187
  const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
181
188
  const globalCurrent = filesParsedSoFar + current;
182
- const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
189
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 50);
183
190
  onProgress({
184
191
  phase: 'parsing',
185
192
  percent: Math.round(parsingProgress),
@@ -188,7 +195,9 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
188
195
  stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
189
196
  });
190
197
  }, workerPool);
191
- const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 62);
198
+ const parseMs = Date.now() - parseStart;
199
+ verbose(`[parse] chunk ${chunkIdx + 1}/${numChunks}: parsed ${parseMs}ms (${memoryGuard.summary()})`);
200
+ const chunkBasePercent = 20 + ((filesParsedSoFar / totalParseable) * 50);
192
201
  if (chunkWorkerData) {
193
202
  // Resolve imports per-chunk (file-level, doesn't need full symbol table)
194
203
  await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, ctx, (current, total) => {
@@ -235,14 +244,14 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
235
244
  sequentialChunkPaths.push(chunkPaths);
236
245
  }
237
246
  filesParsedSoFar += chunkFiles.length;
247
+ const totalChunkMs = Date.now() - chunkStart;
248
+ verbose(`[parse] chunk ${chunkIdx + 1}/${numChunks}: total ${totalChunkMs}ms, ${filesParsedSoFar}/${totalParseable} files done`);
238
249
  // Clear AST cache between chunks to free memory; chunk locals go out of scope for GC
239
250
  astCache.clear();
240
251
  // Attempt GC between chunks if under memory pressure
241
252
  if (memoryGuard.isUnderPressure()) {
242
253
  memoryGuard.tryGC();
243
- if (isDev) {
244
- console.log(`⚠️ Memory pressure after chunk ${chunkIdx + 1}: ${memoryGuard.summary()}`);
245
- }
254
+ verbose(`[parse] memory pressure after chunk ${chunkIdx + 1}: ${memoryGuard.summary()}`);
246
255
  }
247
256
  }
248
257
  }
@@ -265,10 +274,11 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
265
274
  }
266
275
  let tsgoWasUsed = false;
267
276
  // Phase B: Resolve ALL deferred calls now that every symbol is registered
277
+ // Progress range: 70-82% (advancing, not fixed)
268
278
  if (allExtractedCalls.length > 0) {
269
279
  onProgress({
270
- phase: 'parsing',
271
- percent: 82,
280
+ phase: 'calls',
281
+ percent: 70,
272
282
  message: `Resolving ${allExtractedCalls.length} calls across all files...`,
273
283
  stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
274
284
  });
@@ -288,9 +298,10 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
288
298
  }
289
299
  try {
290
300
  await processCallsFromExtracted(graph, allExtractedCalls, ctx, (current, total) => {
301
+ const callPercent = 70 + Math.round((current / Math.max(total, 1)) * 12);
291
302
  onProgress({
292
- phase: 'parsing',
293
- percent: 82,
303
+ phase: 'calls',
304
+ percent: callPercent,
294
305
  message: `Resolving calls: ${current}/${total} files...`,
295
306
  stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
296
307
  });
@@ -301,12 +312,11 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
301
312
  tsgoService?.stop();
302
313
  }
303
314
  }
304
- // Log resolution cache stats in dev mode
305
- if (isDev) {
315
+ {
306
316
  const rcStats = ctx.getStats();
307
317
  const total = rcStats.cacheHits + rcStats.cacheMisses;
308
318
  const hitRate = total > 0 ? ((rcStats.cacheHits / total) * 100).toFixed(1) : '0';
309
- console.log(`🔍 Resolution cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
319
+ verbose(`[resolve] cache: ${rcStats.cacheHits} hits, ${rcStats.cacheMisses} misses (${hitRate}% hit rate)`);
310
320
  }
311
321
  // Free import resolution context (~94MB+ for large repos)
312
322
  allPathObjects.length = 0;
@@ -318,24 +328,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
318
328
  createDependsOnEdges(graph, ctx),
319
329
  createProvidesEdges(graph, ctx),
320
330
  ]);
321
- if (isDev && (diEdgeCount > 0 || providesEdgeCount > 0)) {
322
- console.log(`💉 DI: ${diEdgeCount} DEPENDS_ON edges, ${providesEdgeCount} PROVIDES edges`);
331
+ if (diEdgeCount > 0 || providesEdgeCount > 0) {
332
+ verbose(`[resolve] DI: ${diEdgeCount} DEPENDS_ON, ${providesEdgeCount} PROVIDES edges`);
323
333
  }
324
334
  // Phase 4.5a2: Interface dispatch — connect callers of interfaces to implementations
325
335
  const ifaceEdges = await resolveInterfaceDispatches(graph, ctx);
326
- if (isDev && ifaceEdges > 0) {
327
- console.log(`🔌 Interface dispatch: ${ifaceEdges} implementation edges`);
336
+ if (ifaceEdges > 0) {
337
+ verbose(`[resolve] interface dispatch: ${ifaceEdges} implementation edges`);
328
338
  }
329
339
  // Phase 4.5b: Method Resolution Order
330
340
  onProgress({
331
341
  phase: 'parsing',
332
- percent: 81,
342
+ percent: 82,
333
343
  message: 'Computing method resolution order...',
334
344
  stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
335
345
  });
336
346
  const mroResult = computeMRO(graph);
337
- if (isDev && mroResult.entries.length > 0) {
338
- console.log(`🔀 MRO: ${mroResult.entries.length} classes analyzed, ${mroResult.ambiguityCount} ambiguities found, ${mroResult.overrideEdges} OVERRIDES edges`);
347
+ if (mroResult.entries.length > 0) {
348
+ verbose(`[resolve] MRO: ${mroResult.entries.length} classes, ${mroResult.ambiguityCount} ambiguities, ${mroResult.overrideEdges} OVERRIDES edges`);
339
349
  }
340
350
  // Phase 5: Communities
341
351
  onProgress({
@@ -353,9 +363,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
353
363
  stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
354
364
  });
355
365
  });
356
- if (isDev) {
357
- console.log(`🏘️ Community detection: ${communityResult.stats.totalCommunities} communities found (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
358
- }
366
+ verbose(`[community] ${communityResult.stats.totalCommunities} communities (modularity: ${communityResult.stats.modularity.toFixed(3)})`);
359
367
  communityResult.communities.forEach(comm => {
360
368
  graph.addNode({
361
369
  id: toNodeId(comm.id),
@@ -399,9 +407,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, opts) => {
399
407
  stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
400
408
  });
401
409
  }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
402
- if (isDev) {
403
- console.log(`🔄 Process detection: ${processResult.stats.totalProcesses} processes found (${processResult.stats.crossCommunityCount} cross-community)`);
404
- }
410
+ verbose(`[process] ${processResult.stats.totalProcesses} processes (${processResult.stats.crossCommunityCount} cross-community)`);
405
411
  processResult.processes.forEach(proc => {
406
412
  graph.addNode({
407
413
  id: toNodeId(proc.id),
@@ -174,7 +174,7 @@ const processBatch = (files, onProgress) => {
174
174
  }
175
175
  let totalProcessed = 0;
176
176
  let lastReported = 0;
177
- const PROGRESS_INTERVAL = 100; // report every 100 files
177
+ const PROGRESS_INTERVAL = 25; // report every 25 files — resets the sub-batch timer
178
178
  const onFileProcessed = onProgress ? () => {
179
179
  totalProcessed++;
180
180
  if (totalProcessed - lastReported >= PROGRESS_INTERVAL) {
@@ -1,11 +1,16 @@
1
1
  /** @file Generic worker thread pool with sub-batch streaming for bounded memory usage */
2
2
  export interface WorkerPool {
3
3
  /**
4
- * Dispatch items across workers with sub-batch streaming
5
- * @param items - Items to process (split into chunks, one per worker)
4
+ * Dispatch items across workers with sub-batch streaming.
5
+ * Uses Promise.allSettled so one worker failure doesn't discard other workers' results.
6
+ * @param items - Items to process (split across workers using size-balanced round-robin)
6
7
  * @param onProgress - Optional callback for progress reporting
8
+ * @param getItemSize - Optional function to extract item size for balanced dispatch
7
9
  */
8
- dispatch<TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void): Promise<TResult[]>;
10
+ dispatch<TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void, getItemSize?: (item: TInput) => number): Promise<{
11
+ results: TResult[];
12
+ failures: Error[];
13
+ }>;
9
14
  /** Terminate all workers (must be called when done) */
10
15
  terminate(): Promise<void>;
11
16
  /** Number of workers in the pool */
@@ -5,9 +5,40 @@ import os from 'node:os';
5
5
  import fs from 'node:fs';
6
6
  import { fileURLToPath } from 'node:url';
7
7
  // Max files per postMessage to keep structured-clone memory bounded
8
- const SUB_BATCH_SIZE = 1500;
9
- // Per sub-batch timeout — large codebases with big files need more time
10
- const SUB_BATCH_TIMEOUT_MS = 120_000;
8
+ const SUB_BATCH_SIZE = 500;
9
+ // Base sub-batch timeout — extended proportionally to file count
10
+ const BASE_TIMEOUT_MS = 120_000;
11
+ // Per-file timeout extension (200ms per file in the sub-batch)
12
+ const PER_FILE_TIMEOUT_MS = 200;
13
+ /** Compute proportional timeout: max(base, fileCount * perFile) */
14
+ const computeTimeout = (fileCount) => Math.max(BASE_TIMEOUT_MS, fileCount * PER_FILE_TIMEOUT_MS);
15
+ /**
16
+ * Distribute items across N buckets using size-balanced round-robin (LPT heuristic).
17
+ * Items are sorted by size descending, then assigned to the bucket with the smallest total.
18
+ * This minimizes the makespan of the heaviest bucket.
19
+ */
20
+ const sizeBalancedDistribute = (items, bucketCount, getSize) => {
21
+ if (bucketCount <= 0)
22
+ return [];
23
+ if (items.length === 0)
24
+ return Array.from({ length: bucketCount }, () => []);
25
+ // Sort indices by size descending
26
+ const indices = items.map((_, i) => i);
27
+ indices.sort((a, b) => getSize(items[b]) - getSize(items[a]));
28
+ const buckets = Array.from({ length: bucketCount }, () => []);
29
+ const bucketSizes = new Array(bucketCount).fill(0);
30
+ for (const idx of indices) {
31
+ // Find the bucket with the smallest total size
32
+ let minBucket = 0;
33
+ for (let b = 1; b < bucketCount; b++) {
34
+ if (bucketSizes[b] < bucketSizes[minBucket])
35
+ minBucket = b;
36
+ }
37
+ buckets[minBucket].push(items[idx]);
38
+ bucketSizes[minBucket] += getSize(items[idx]);
39
+ }
40
+ return buckets;
41
+ };
11
42
  /** Create a pool of worker threads */
12
43
  export const createWorkerPool = (workerUrl, poolSize) => {
13
44
  // Validate worker script exists before spawning to prevent MODULE_NOT_FOUND crashes
@@ -20,13 +51,20 @@ export const createWorkerPool = (workerUrl, poolSize) => {
20
51
  for (let i = 0; i < size; i++) {
21
52
  workers.push(new Worker(workerUrl));
22
53
  }
23
- const dispatch = (items, onProgress) => {
54
+ const dispatch = (items, onProgress, getItemSize) => {
24
55
  if (items.length === 0)
25
- return Promise.resolve([]);
26
- const chunkSize = Math.ceil(items.length / size);
27
- const chunks = [];
28
- for (let i = 0; i < items.length; i += chunkSize) {
29
- chunks.push(items.slice(i, i + chunkSize));
56
+ return Promise.resolve({ results: [], failures: [] });
57
+ // Size-balanced dispatch when size function provided, otherwise equal split
58
+ let chunks;
59
+ if (getItemSize) {
60
+ chunks = sizeBalancedDistribute(items, Math.min(size, items.length), getItemSize);
61
+ }
62
+ else {
63
+ const chunkSize = Math.ceil(items.length / size);
64
+ chunks = [];
65
+ for (let i = 0; i < items.length; i += chunkSize) {
66
+ chunks.push(items.slice(i, i + chunkSize));
67
+ }
30
68
  }
31
69
  const workerProgress = new Array(chunks.length).fill(0);
32
70
  const promises = chunks.map((chunk, i) => {
@@ -37,6 +75,7 @@ export const createWorkerPool = (workerUrl, poolSize) => {
37
75
  return new Promise((resolve, reject) => {
38
76
  let settled = false;
39
77
  let subBatchTimer = null;
78
+ let currentSubBatchSize = 0;
40
79
  const cleanup = () => {
41
80
  if (subBatchTimer)
42
81
  clearTimeout(subBatchTimer);
@@ -47,13 +86,14 @@ export const createWorkerPool = (workerUrl, poolSize) => {
47
86
  const resetSubBatchTimer = () => {
48
87
  if (subBatchTimer)
49
88
  clearTimeout(subBatchTimer);
89
+ const timeout = computeTimeout(currentSubBatchSize);
50
90
  subBatchTimer = setTimeout(() => {
51
91
  if (!settled) {
52
92
  settled = true;
53
93
  cleanup();
54
- reject(new Error(`Worker ${i} sub-batch timed out after ${SUB_BATCH_TIMEOUT_MS / 1000}s (chunk: ${chunk.length} items).`));
94
+ reject(new Error(`Worker ${i} sub-batch timed out after ${timeout / 1000}s (chunk: ${chunk.length} items, sub-batch: ${currentSubBatchSize} items).`));
55
95
  }
56
- }, SUB_BATCH_TIMEOUT_MS);
96
+ }, timeout);
57
97
  };
58
98
  let subBatchIdx = 0;
59
99
  const sendNextSubBatch = () => {
@@ -63,6 +103,7 @@ export const createWorkerPool = (workerUrl, poolSize) => {
63
103
  return;
64
104
  }
65
105
  const subBatch = chunk.slice(start, start + SUB_BATCH_SIZE);
106
+ currentSubBatchSize = subBatch.length;
66
107
  subBatchIdx++;
67
108
  resetSubBatchTimer();
68
109
  worker.postMessage({ type: 'sub-batch', files: subBatch });
@@ -71,6 +112,8 @@ export const createWorkerPool = (workerUrl, poolSize) => {
71
112
  if (settled)
72
113
  return;
73
114
  if (msg && msg.type === 'progress') {
115
+ // BUG FIX: Reset timer on progress — worker is alive and making progress
116
+ resetSubBatchTimer();
74
117
  workerProgress[i] = msg.filesProcessed;
75
118
  if (onProgress) {
76
119
  const total = workerProgress.reduce((a, b) => a + b, 0);
@@ -116,7 +159,20 @@ export const createWorkerPool = (workerUrl, poolSize) => {
116
159
  sendNextSubBatch();
117
160
  });
118
161
  });
119
- return Promise.all(promises);
162
+ // Use allSettled so one worker failure doesn't discard other workers' results
163
+ return Promise.allSettled(promises).then(outcomes => {
164
+ const results = [];
165
+ const failures = [];
166
+ for (const outcome of outcomes) {
167
+ if (outcome.status === 'fulfilled') {
168
+ results.push(outcome.value);
169
+ }
170
+ else {
171
+ failures.push(outcome.reason instanceof Error ? outcome.reason : new Error(String(outcome.reason)));
172
+ }
173
+ }
174
+ return { results, failures };
175
+ });
120
176
  };
121
177
  const terminate = async () => {
122
178
  await Promise.all(workers.map(w => w.terminate()));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.3.2",
3
+ "version": "2.3.4",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",