@zuvia-software-solutions/code-mapper 2.3.9 → 2.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/analyze.js
CHANGED
|
@@ -298,22 +298,34 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
298
298
|
closeDb(dbPath);
|
|
299
299
|
// Run Python embedder in batch mode — reads from SQLite, embeds, writes back.
|
|
300
300
|
// Zero IPC overhead: ~3x faster than Node↔Python JSON streaming.
|
|
301
|
-
const {
|
|
301
|
+
const { spawn: spawnChild } = await import('child_process');
|
|
302
302
|
const { fileURLToPath } = await import('url');
|
|
303
303
|
const mlxScript = path.resolve(path.dirname(fileURLToPath(import.meta.url)), '..', '..', 'models', 'mlx-embedder.py');
|
|
304
304
|
await new Promise((resolve, reject) => {
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
305
|
+
// Use spawn (not execFile) — no internal buffer limit, streams only.
|
|
306
|
+
// execFile buffers all stdout in memory which causes OOM/kill on large codebases.
|
|
307
|
+
const proc = spawnChild('python3', [mlxScript, 'batch', dbPath, '--dims', '256', '--max-tokens', '2048'], {
|
|
308
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
309
|
+
});
|
|
310
|
+
let stderrBuf = '';
|
|
311
|
+
proc.stderr?.on('data', (chunk) => {
|
|
312
|
+
stderrBuf += chunk.toString();
|
|
313
|
+
// Keep only last 10KB of stderr for error reporting
|
|
314
|
+
if (stderrBuf.length > 10240)
|
|
315
|
+
stderrBuf = stderrBuf.slice(-10240);
|
|
316
|
+
});
|
|
317
|
+
proc.on('close', (code) => {
|
|
318
|
+
if (code !== 0) {
|
|
319
|
+
console.error(stderrBuf);
|
|
320
|
+
reject(new Error(`Embedding failed: python3 exited with code ${code}`));
|
|
312
321
|
}
|
|
313
322
|
else {
|
|
314
323
|
resolve();
|
|
315
324
|
}
|
|
316
325
|
});
|
|
326
|
+
proc.on('error', (err) => {
|
|
327
|
+
reject(new Error(`Embedding failed: ${err.message}`));
|
|
328
|
+
});
|
|
317
329
|
// Stream progress from Python's JSON lines on stdout
|
|
318
330
|
let lineBuf = '';
|
|
319
331
|
proc.stdout?.on('data', (chunk) => {
|
|
@@ -753,20 +753,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
753
753
|
}
|
|
754
754
|
if (eligible.length === 0)
|
|
755
755
|
return results;
|
|
756
|
-
// Group calls by file — process one file at a time so tsgo only needs
|
|
757
|
-
// one file hot in memory. LSP is sequential over stdio, so concurrent
|
|
758
|
-
// requests just create a queue that causes timeouts.
|
|
759
|
-
const byFile = new Map();
|
|
760
|
-
for (const call of eligible) {
|
|
761
|
-
let list = byFile.get(call.filePath);
|
|
762
|
-
if (!list) {
|
|
763
|
-
list = [];
|
|
764
|
-
byFile.set(call.filePath, list);
|
|
765
|
-
}
|
|
766
|
-
list.push(call);
|
|
767
|
-
}
|
|
768
756
|
// Built-in receiver names that resolve to external types, not project code.
|
|
769
|
-
// tsgo always fails on these — skip them to avoid wasted LSP round-trips.
|
|
770
757
|
const BUILTIN_RECEIVERS = new Set([
|
|
771
758
|
'console', 'Math', 'JSON', 'Object', 'Array', 'String', 'Number', 'Boolean',
|
|
772
759
|
'Date', 'RegExp', 'Error', 'Promise', 'Map', 'Set', 'WeakMap', 'WeakSet',
|
|
@@ -779,13 +766,10 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
779
766
|
// Pre-filter calls where tsgo won't add value:
|
|
780
767
|
// A. Free-form calls with unambiguous name — heuristic resolves perfectly
|
|
781
768
|
// B. Member calls on built-in receivers — tsgo always fails on these
|
|
782
|
-
// Note: member calls with known receiver types are NOT skipped — tsgo provides
|
|
783
|
-
// compiler-verified 0.99 confidence that the heuristic can't match.
|
|
784
769
|
const tsgoEligible = [];
|
|
785
770
|
let skippedUnambiguous = 0;
|
|
786
771
|
let skippedBuiltin = 0;
|
|
787
772
|
for (const call of eligible) {
|
|
788
|
-
// A. Free-form, unique name match
|
|
789
773
|
if (call.callForm === 'free' || call.callForm === undefined) {
|
|
790
774
|
const resolved = ctx.resolve(call.calledName, call.filePath);
|
|
791
775
|
if (resolved && resolved.candidates.length === 1) {
|
|
@@ -793,7 +777,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
793
777
|
continue;
|
|
794
778
|
}
|
|
795
779
|
}
|
|
796
|
-
// B. Built-in receiver — tsgo resolves to node_modules/lib.d.ts, never project code
|
|
797
780
|
if (call.callForm === 'member' && call.receiverName && BUILTIN_RECEIVERS.has(call.receiverName)) {
|
|
798
781
|
skippedBuiltin++;
|
|
799
782
|
continue;
|
|
@@ -812,10 +795,7 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
812
795
|
}
|
|
813
796
|
const t0 = Date.now();
|
|
814
797
|
const skippedTotal = skippedUnambiguous + skippedBuiltin;
|
|
815
|
-
// Adaptive parallelism
|
|
816
|
-
// 1. CPU: 75% of cores — parsing workers are done, leave 25% for Node.js event loop + OS
|
|
817
|
-
// 2. Memory: each tsgo loads the full project (~500MB estimate) — cap by free system memory
|
|
818
|
-
// 3. Workload: at least 50 files per process to amortize ~0.5s startup cost
|
|
798
|
+
// Adaptive parallelism
|
|
819
799
|
const osModule = await import('os');
|
|
820
800
|
const cpuCount = osModule.cpus().length;
|
|
821
801
|
const freeMemGB = osModule.freemem() / (1024 * 1024 * 1024);
|
|
@@ -826,12 +806,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
826
806
|
if (process.env['CODE_MAPPER_VERBOSE']) {
|
|
827
807
|
console.error(`Code Mapper: tsgo resolving ${tsgoEligible.length} calls across ${tsgoByFile.size} files with ${actualWorkers} process${actualWorkers > 1 ? 'es' : ''} (skipped ${skippedTotal}: ${skippedUnambiguous} unambiguous, ${skippedBuiltin} builtin)...`);
|
|
828
808
|
}
|
|
829
|
-
// Dynamic dispatch: shared queue
|
|
830
|
-
// Naturally self-balancing — fast processes get more work, zero idle time.
|
|
831
|
-
// Sort heaviest files first so they're assigned early (avoids tail latency).
|
|
809
|
+
// Dynamic dispatch: shared queue sorted by call count descending
|
|
832
810
|
const fileEntries = [...tsgoByFile.entries()];
|
|
833
811
|
fileEntries.sort((a, b) => b[1].length - a[1].length);
|
|
834
|
-
// Shared progress counter and file queue (single-threaded, no mutex needed)
|
|
835
812
|
let totalFilesProcessed = 0;
|
|
836
813
|
let nextFileIdx = 0;
|
|
837
814
|
const tsgoTotalFiles = tsgoByFile.size;
|
|
@@ -840,7 +817,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
840
817
|
return null;
|
|
841
818
|
return fileEntries[nextFileIdx++];
|
|
842
819
|
};
|
|
843
|
-
/** Resolve files from the shared queue using a single tsgo service */
|
|
844
820
|
const resolveWorker = async (service) => {
|
|
845
821
|
const sliceResults = new Map();
|
|
846
822
|
let sliceResolved = 0;
|
|
@@ -865,7 +841,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
865
841
|
sliceFailed++;
|
|
866
842
|
continue;
|
|
867
843
|
}
|
|
868
|
-
// Match by exact startLine, then by range containment
|
|
869
844
|
let bestMatch;
|
|
870
845
|
for (const sym of targetSymbols) {
|
|
871
846
|
const node = graph.getNode(toNodeId(sym.nodeId));
|
|
@@ -908,8 +883,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
908
883
|
sliceFailed++;
|
|
909
884
|
}
|
|
910
885
|
}
|
|
911
|
-
// Close file after all its calls are resolved — frees tsgo memory,
|
|
912
|
-
// prevents progressive slowdown as the type graph grows
|
|
913
886
|
service.notifyFileDeleted(absFilePath);
|
|
914
887
|
}
|
|
915
888
|
return { resolved: sliceResolved, failed: sliceFailed, results: sliceResults };
|
|
@@ -917,7 +890,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
917
890
|
let resolved = 0;
|
|
918
891
|
let failed = 0;
|
|
919
892
|
if (actualWorkers === 1) {
|
|
920
|
-
// Single process — use the existing service (already started)
|
|
921
893
|
const outcome = await resolveWorker(tsgoService);
|
|
922
894
|
resolved = outcome.resolved;
|
|
923
895
|
failed = outcome.failed;
|
|
@@ -925,10 +897,8 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
925
897
|
results.set(k, v);
|
|
926
898
|
}
|
|
927
899
|
else {
|
|
928
|
-
// Parallel — spawn extra services, all pull from shared queue
|
|
929
900
|
const extraServices = [];
|
|
930
901
|
try {
|
|
931
|
-
// Start extra tsgo processes in parallel
|
|
932
902
|
const startPromises = [];
|
|
933
903
|
for (let i = 1; i < actualWorkers; i++) {
|
|
934
904
|
startPromises.push((async () => {
|
|
@@ -943,11 +913,9 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
943
913
|
if (svc)
|
|
944
914
|
extraServices.push(svc);
|
|
945
915
|
}
|
|
946
|
-
// Build final service list: original + extras that started successfully
|
|
947
916
|
const services = [tsgoService, ...extraServices];
|
|
948
917
|
if (process.env['CODE_MAPPER_VERBOSE'])
|
|
949
918
|
console.error(`Code Mapper: ${services.length} tsgo processes ready, resolving with dynamic dispatch...`);
|
|
950
|
-
// All workers pull from the shared queue — naturally self-balancing
|
|
951
919
|
const outcomes = await Promise.all(services.map(svc => resolveWorker(svc)));
|
|
952
920
|
for (const outcome of outcomes) {
|
|
953
921
|
resolved += outcome.resolved;
|
|
@@ -957,7 +925,6 @@ async function batchResolveTsgo(tsgoService, extractedCalls, ctx, graph, repoPat
|
|
|
957
925
|
}
|
|
958
926
|
}
|
|
959
927
|
finally {
|
|
960
|
-
// Stop extra services (the original is stopped by the caller)
|
|
961
928
|
for (const svc of extraServices)
|
|
962
929
|
svc.stop();
|
|
963
930
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@zuvia-software-solutions/code-mapper",
|
|
3
|
-
"version": "2.3.
|
|
3
|
+
"version": "2.3.10",
|
|
4
4
|
"description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
|
|
5
5
|
"author": "Abhigyan Patwari",
|
|
6
6
|
"license": "PolyForm-Noncommercial-1.0.0",
|