@zuvia-software-solutions/code-mapper 2.3.9 → 2.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -298,22 +298,34 @@ export const analyzeCommand = async (inputPath, options) => {
298
298
  closeDb(dbPath);
299
299
  // Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
300
300
  // Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
301
- const { execFile } = await import('child_process');
301
+ const { spawn: spawnChild } = await import('child_process');
302
302
  const { fileURLToPath } = await import('url');
303
303
  const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
304
304
  await new Promise((resolve, reject) => {
305
- const proc = execFile('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
306
- maxBuffer: 10 * 1024 * 1024,
307
- timeout: 600_000, // 10 min max for huge codebases
308
- }, (err, _stdout, stderr) => {
309
- if (err) {
310
- console.error(stderr || '');
311
- reject(new Error(`Embedding failed: ${err.message}`));
305
+ // Use spawn (not execFile) no internal buffer limit, streams only.
306
+ // execFile buffers all stdout in memory which causes OOM/kill on large codebases.
307
+ const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
308
+ stdio: ['ignore', 'pipe', 'pipe'],
309
+ });
310
+ let stderrBuf = '';
311
+ proc.stderr?.on('data', (chunk) => {
312
+ stderrBuf += chunk.toString();
313
+ // Keep only last 10KB of stderr for error reporting
314
+ if (stderrBuf.length > 10240)
315
+ stderrBuf = stderrBuf.slice(-10240);
316
+ });
317
+ proc.on('close', (code) => {
318
+ if (code !== 0) {
319
+ console.error(stderrBuf);
320
+ reject(new Error(`Embedding failed: python3 exited with code ${code}`));
312
321
  }
313
322
  else {
314
323
  resolve();
315
324
  }
316
325
  });
326
+ proc.on('error', (err) => {
327
+ reject(new Error(`Embedding failed: ${err.message}`));
328
+ });
317
329
  // Stream progress from Python's JSON lines on stdout
318
330
  let lineBuf = '';
319
331
  proc.stdout?.on('data', (chunk) => {
@@ -753,20 +753,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
753
753
  }
754
754
  if (eligible.length === 0)
755
755
  return results;
756
- // Group calls by file — process one file at a time so tsgo only needs
757
- // one file hot in memory. LSP is sequential over stdio, so concurrent
758
- // requests just create a queue that causes timeouts.
759
- const byFile = new Map();
760
- for (const call of eligible) {
761
- let list = byFile.get(call.filePath);
762
- if (!list) {
763
- list = [];
764
- byFile.set(call.filePath, list);
765
- }
766
- list.push(call);
767
- }
768
756
  // Built-in receiver names that resolve to external types, not project code.
769
- // tsgo always fails on these — skip them to avoid wasted LSP round-trips.
770
757
  const BUILTIN_RECEIVERS = new Set([
771
758
  'console', 'Math', 'JSON', 'Object', 'Array', 'String', 'Number', 'Boolean',
772
759
  'Date', 'RegExp', 'Error', 'Promise', 'Map', 'Set', 'WeakMap', 'WeakSet',
@@ -779,13 +766,10 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
779
766
  // Pre-filter calls where tsgo won't add value:
780
767
  // A. Free-form calls with unambiguous name — heuristic resolves perfectly
781
768
  // B. Member calls on built-in receivers — tsgo always fails on these
782
- // Note: member calls with known receiver types are NOT skipped — tsgo provides
783
- // compiler-verified 0.99 confidence that the heuristic can't match.
784
769
  const tsgoEligible = [];
785
770
  let skippedUnambiguous = 0;
786
771
  let skippedBuiltin = 0;
787
772
  for (const call of eligible) {
788
- // A. Free-form, unique name match
789
773
  if (call.callForm === 'free' || call.callForm === undefined) {
790
774
  const resolved = ctx.resolve(call.calledName, call.filePath);
791
775
  if (resolved && resolved.candidates.length === 1) {
@@ -793,7 +777,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
793
777
  continue;
794
778
  }
795
779
  }
796
- // B. Built-in receiver — tsgo resolves to node_modules/lib.d.ts, never project code
797
780
  if (call.callForm === 'member' && call.receiverName && BUILTIN_RECEIVERS.has(call.receiverName)) {
798
781
  skippedBuiltin++;
799
782
  continue;
@@ -812,10 +795,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
812
795
  }
813
796
  const t0 = Date.now();
814
797
  const skippedTotal = skippedUnambiguous + skippedBuiltin;
815
- // Adaptive parallelism based on three constraints:
816
- // 1. CPU: 75% of cores — parsing workers are done, leave 25% for Node.js event loop + OS
817
- // 2. Memory: each tsgo loads the full project (~500MB estimate) — cap by free system memory
818
- // 3. Workload: at least 50 files per process to amortize ~0.5s startup cost
798
+ // Adaptive parallelism
819
799
  const osModule = await import('os');
820
800
  const cpuCount = osModule.cpus().length;
821
801
  const freeMemGB = osModule.freemem() / (1024 * 1024 * 1024);
@@ -826,12 +806,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
826
806
  if (process.env['CODE_MAPPER_VERBOSE']) {
827
807
  console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedBuiltin} builtin)...`);
828
808
  }
829
- // Dynamic dispatch: shared queue, each process grabs the next file when done.
830
- // Naturally self-balancing — fast processes get more work, zero idle time.
831
- // Sort heaviest files first so they're assigned early (avoids tail latency).
809
+ // Dynamic dispatch: shared queue sorted by call count descending
832
810
  const fileEntries = [...tsgoByFile.entries()];
833
811
  fileEntries.sort((a, b) => b[1].length - a[1].length);
834
- // Shared progress counter and file queue (single-threaded, no mutex needed)
835
812
  let totalFilesProcessed = 0;
836
813
  let nextFileIdx = 0;
837
814
  const tsgoTotalFiles = tsgoByFile.size;
@@ -840,7 +817,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
840
817
  return null;
841
818
  return fileEntries[nextFileIdx++];
842
819
  };
843
- /** Resolve files from the shared queue using a single tsgo service */
844
820
  const resolveWorker = async (service) => {
845
821
  const sliceResults = new Map();
846
822
  let sliceResolved = 0;
@@ -865,7 +841,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
865
841
  sliceFailed++;
866
842
  continue;
867
843
  }
868
- // Match by exact startLine, then by range containment
869
844
  let bestMatch;
870
845
  for (const sym of targetSymbols) {
871
846
  const node = graph.getNode(toNodeId(sym.nodeId));
@@ -908,8 +883,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
908
883
  sliceFailed++;
909
884
  }
910
885
  }
911
- // Close file after all its calls are resolved — frees tsgo memory,
912
- // prevents progressive slowdown as the type graph grows
913
886
  service.notifyFileDeleted(absFilePath);
914
887
  }
915
888
  return { resolved: sliceResolved, failed: sliceFailed, results: sliceResults };
@@ -917,7 +890,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
917
890
  let resolved = 0;
918
891
  let failed = 0;
919
892
  if (actualWorkers === 1) {
920
- // Single process — use the existing service (already started)
921
893
  const outcome = await resolveWorker(tsgoService);
922
894
  resolved = outcome.resolved;
923
895
  failed = outcome.failed;
@@ -925,10 +897,8 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
925
897
  results.set(k, v);
926
898
  }
927
899
  else {
928
- // Parallel — spawn extra services, all pull from shared queue
929
900
  const extraServices = [];
930
901
  try {
931
- // Start extra tsgo processes in parallel
932
902
  const startPromises = [];
933
903
  for (let i = 1; i < actualWorkers; i++) {
934
904
  startPromises.push((async () => {
@@ -943,11 +913,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
943
913
  if (svc)
944
914
  extraServices.push(svc);
945
915
  }
946
- // Build final service list: original + extras that started successfully
947
916
  const services = [tsgoService, ...extraServices];
948
917
  if (process.env['CODE_MAPPER_VERBOSE'])
949
918
  console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving with dynamic dispatch...`);
950
- // All workers pull from the shared queue — naturally self-balancing
951
919
  const outcomes = await Promise.all(services.map(svc => resolveWorker(svc)));
952
920
  for (const outcome of outcomes) {
953
921
  resolved += outcome.resolved;
@@ -957,7 +925,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
957
925
  }
958
926
  }
959
927
  finally {
960
- // Stop extra services (the original is stopped by the caller)
961
928
  for (const svc of extraServices)
962
929
  svc.stop();
963
930
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@zuvia-software-solutions/code-mapper",
3
- "version": "2.3.9",
3
+ "version": "2.3.10",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",