gitnexus 1.2.9 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,16 @@ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
8
8
  import { SupportedLanguages } from '../../config/supported-languages.js';
9
9
  const isDev = process.env.NODE_ENV === 'development';
10
10
  export const createImportMap = () => new Map();
11
+ /** Max entries in the resolve cache. Beyond this, the cache is cleared to bound memory.
12
+ * 100K entries ≈ 15MB — covers the most common import patterns. */
13
+ const RESOLVE_CACHE_CAP = 100_000;
14
+ export function buildImportResolutionContext(allPaths) {
15
+ const allFileList = allPaths;
16
+ const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
17
+ const allFilePaths = new Set(allFileList);
18
+ const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList);
19
+ return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() };
20
+ }
11
21
  /**
12
22
  * Parse tsconfig.json to extract path aliases.
13
23
  * Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order.
@@ -196,6 +206,16 @@ const resolveImportPath = (currentFile, importPath, allFiles, allFileList, norma
196
206
  if (resolveCache.has(cacheKey))
197
207
  return resolveCache.get(cacheKey) ?? null;
198
208
  const cache = (result) => {
209
+ // Evict oldest 20% when cap is reached instead of clearing all
210
+ if (resolveCache.size >= RESOLVE_CACHE_CAP) {
211
+ const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2);
212
+ const iter = resolveCache.keys();
213
+ for (let i = 0; i < evictCount; i++) {
214
+ const key = iter.next().value;
215
+ if (key !== undefined)
216
+ resolveCache.delete(key);
217
+ }
218
+ }
199
219
  resolveCache.set(cacheKey, result);
200
220
  return result;
201
221
  };
@@ -429,12 +449,12 @@ function resolveGoPackage(importPath, goModule, normalizedFileList, allFileList)
429
449
  // ============================================================================
430
450
  // MAIN IMPORT PROCESSOR
431
451
  // ============================================================================
432
- export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot) => {
433
- // Create a Set of all file paths for fast lookup during resolution
434
- const allFilePaths = new Set(files.map(f => f.path));
452
+ export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot, allPaths) => {
453
+ // Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
454
+ const allFileList = allPaths ?? files.map(f => f.path);
455
+ const allFilePaths = new Set(allFileList);
435
456
  const parser = await loadParser();
436
457
  const resolveCache = new Map();
437
- const allFileList = files.map(f => f.path);
438
458
  // Pre-compute normalized file list once (forward slashes)
439
459
  const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
440
460
  // Build suffix index for O(1) lookups
@@ -573,13 +593,9 @@ export const processImports = async (graph, files, astCache, importMap, onProgre
573
593
  // ============================================================================
574
594
  // FAST PATH: Resolve pre-extracted imports (no parsing needed)
575
595
  // ============================================================================
576
- export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot) => {
577
- const allFilePaths = new Set(files.map(f => f.path));
578
- const resolveCache = new Map();
579
- const allFileList = files.map(f => f.path);
580
- const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
581
- // Build suffix index for O(1) lookups
582
- const index = buildSuffixIndex(normalizedFileList, allFileList);
596
+ export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot, prebuiltCtx) => {
597
+ const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
598
+ const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
583
599
  let totalImportsFound = 0;
584
600
  let totalImportsResolved = 0;
585
601
  const effectiveRoot = repoRoot || '';
@@ -106,15 +106,13 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
106
106
  const parseableFiles = [];
107
107
  for (const file of files) {
108
108
  const lang = getLanguageFromFilename(file.path);
109
- if (lang) {
109
+ if (lang)
110
110
  parseableFiles.push({ path: file.path, content: file.content });
111
- }
112
111
  }
113
112
  if (parseableFiles.length === 0)
114
113
  return { imports: [], calls: [], heritage: [] };
115
114
  const total = files.length;
116
- // Dispatch to worker pool — pool handles splitting into chunks
117
- // Workers send progress messages during parsing so the bar updates smoothly
115
+ // Dispatch to worker pool — pool handles splitting into chunks and sub-batching
118
116
  const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
119
117
  onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
120
118
  });
@@ -1,34 +1,41 @@
1
1
  import { createKnowledgeGraph } from '../graph/graph.js';
2
2
  import { processStructure } from './structure-processor.js';
3
3
  import { processParsing } from './parsing-processor.js';
4
- import { processImports, processImportsFromExtracted, createImportMap } from './import-processor.js';
4
+ import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
5
5
  import { processCalls, processCallsFromExtracted } from './call-processor.js';
6
6
  import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
7
7
  import { processCommunities } from './community-processor.js';
8
8
  import { processProcesses } from './process-processor.js';
9
9
  import { createSymbolTable } from './symbol-table.js';
10
10
  import { createASTCache } from './ast-cache.js';
11
- import { walkRepository } from './filesystem-walker.js';
11
+ import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
12
+ import { getLanguageFromFilename } from './utils.js';
12
13
  import { createWorkerPool } from './workers/worker-pool.js';
13
14
  const isDev = process.env.NODE_ENV === 'development';
15
+ /** Max bytes of source content to load per parse chunk. Each chunk's source +
16
+ * parsed ASTs + extracted records + worker serialization overhead all live in
17
+ * memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
18
+ * peak working memory per chunk after parse expansion. */
19
+ const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
20
+ /** Max AST trees to keep in LRU cache */
21
+ const AST_CACHE_CAP = 50;
14
22
  export const runPipelineFromRepo = async (repoPath, onProgress) => {
15
23
  const graph = createKnowledgeGraph();
16
- const fileContents = new Map();
17
24
  const symbolTable = createSymbolTable();
18
- // AST cache sized after file scan — start with a placeholder, resize after we know file count
19
- let astCache = createASTCache(50);
25
+ let astCache = createASTCache(AST_CACHE_CAP);
20
26
  const importMap = createImportMap();
21
27
  const cleanup = () => {
22
28
  astCache.clear();
23
29
  symbolTable.clear();
24
30
  };
25
31
  try {
32
+ // ── Phase 1: Scan paths only (no content read) ─────────────────────
26
33
  onProgress({
27
34
  phase: 'extracting',
28
35
  percent: 0,
29
36
  message: 'Scanning repository...',
30
37
  });
31
- const files = await walkRepository(repoPath, (current, total, filePath) => {
38
+ const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
32
39
  const scanProgress = Math.round((current / total) * 15);
33
40
  onProgress({
34
41
  phase: 'extracting',
@@ -38,167 +45,165 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
38
45
  stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
39
46
  });
40
47
  });
41
- files.forEach(f => fileContents.set(f.path, f.content));
42
- // Resize AST cache to fit all files — avoids re-parsing in import/call/heritage phases
43
- astCache = createASTCache(files.length);
48
+ const totalFiles = scannedFiles.length;
44
49
  onProgress({
45
50
  phase: 'extracting',
46
51
  percent: 15,
47
52
  message: 'Repository scanned successfully',
48
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
53
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
49
54
  });
55
+ // ── Phase 2: Structure (paths only — no content needed) ────────────
50
56
  onProgress({
51
57
  phase: 'structure',
52
58
  percent: 15,
53
59
  message: 'Analyzing project structure...',
54
- stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
60
+ stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
55
61
  });
56
- const filePaths = files.map(f => f.path);
57
- processStructure(graph, filePaths);
62
+ const allPaths = scannedFiles.map(f => f.path);
63
+ processStructure(graph, allPaths);
58
64
  onProgress({
59
65
  phase: 'structure',
60
- percent: 30,
66
+ percent: 20,
61
67
  message: 'Project structure analyzed',
62
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
68
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
63
69
  });
70
+ // ── Phase 3+4: Chunked read + parse ────────────────────────────────
71
+ // Group parseable files into byte-budget chunks so only ~20MB of source
72
+ // is in memory at a time. Each chunk is: read → parse → extract → free.
73
+ const parseableScanned = scannedFiles.filter(f => getLanguageFromFilename(f.path));
74
+ const totalParseable = parseableScanned.length;
75
+ // Build byte-budget chunks
76
+ const chunks = [];
77
+ let currentChunk = [];
78
+ let currentBytes = 0;
79
+ for (const file of parseableScanned) {
80
+ if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
81
+ chunks.push(currentChunk);
82
+ currentChunk = [];
83
+ currentBytes = 0;
84
+ }
85
+ currentChunk.push(file.path);
86
+ currentBytes += file.size;
87
+ }
88
+ if (currentChunk.length > 0)
89
+ chunks.push(currentChunk);
90
+ const numChunks = chunks.length;
91
+ if (isDev) {
92
+ const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
93
+ console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
94
+ }
64
95
  onProgress({
65
96
  phase: 'parsing',
66
- percent: 30,
67
- message: 'Parsing code definitions...',
68
- stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
97
+ percent: 20,
98
+ message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
99
+ stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
69
100
  });
70
- // Create worker pool for parallel parsing, with graceful fallback
101
+ // Create worker pool once, reuse across chunks
71
102
  let workerPool;
72
103
  try {
73
104
  const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
74
105
  workerPool = createWorkerPool(workerUrl);
75
106
  }
76
107
  catch (err) {
77
- // Worker pool creation failed (e.g., single core) — sequential fallback
108
+ // Worker pool creation failed — sequential fallback
78
109
  }
79
- let workerData = null;
110
+ let filesParsedSoFar = 0;
111
+ // AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
112
+ const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
113
+ astCache = createASTCache(maxChunkFiles);
114
+ // Build import resolution context once — suffix index, file lists, resolve cache.
115
+ // Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
116
+ const importCtx = buildImportResolutionContext(allPaths);
117
+ const allPathObjects = allPaths.map(p => ({ path: p }));
118
+ // Single-pass: parse + resolve imports/calls/heritage per chunk.
119
+ // Calls/heritage use the symbol table built so far (symbols from earlier chunks
120
+ // are already registered). This trades ~5% cross-chunk resolution accuracy for
121
+ // 200-400MB less memory — critical for Linux-kernel-scale repos.
122
+ const sequentialChunkPaths = [];
80
123
  try {
81
- workerData = await processParsing(graph, files, symbolTable, astCache, (current, total, filePath) => {
82
- const parsingProgress = 30 + ((current / total) * 40);
83
- onProgress({
84
- phase: 'parsing',
85
- percent: Math.round(parsingProgress),
86
- message: 'Parsing code definitions...',
87
- detail: filePath,
88
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
89
- });
90
- }, workerPool);
124
+ for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
125
+ const chunkPaths = chunks[chunkIdx];
126
+ // Read content for this chunk only
127
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
128
+ const chunkFiles = chunkPaths
129
+ .filter(p => chunkContents.has(p))
130
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
131
+ // Parse this chunk (workers or sequential fallback)
132
+ const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
133
+ const globalCurrent = filesParsedSoFar + current;
134
+ const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
135
+ onProgress({
136
+ phase: 'parsing',
137
+ percent: Math.round(parsingProgress),
138
+ message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
139
+ detail: filePath,
140
+ stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
141
+ });
142
+ }, workerPool);
143
+ if (chunkWorkerData) {
144
+ // Imports
145
+ await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
146
+ // Calls — resolve immediately, then free the array
147
+ if (chunkWorkerData.calls.length > 0) {
148
+ await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
149
+ }
150
+ // Heritage — resolve immediately, then free
151
+ if (chunkWorkerData.heritage.length > 0) {
152
+ await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
153
+ }
154
+ }
155
+ else {
156
+ await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
157
+ sequentialChunkPaths.push(chunkPaths);
158
+ }
159
+ filesParsedSoFar += chunkFiles.length;
160
+ // Clear AST cache between chunks to free memory
161
+ astCache.clear();
162
+ // chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
163
+ }
91
164
  }
92
165
  finally {
93
166
  await workerPool?.terminate();
94
167
  }
95
- onProgress({
96
- phase: 'imports',
97
- percent: 70,
98
- message: 'Resolving imports...',
99
- stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
100
- });
101
- if (workerData) {
102
- // Fast path: imports already extracted by workers, just resolve paths
103
- await processImportsFromExtracted(graph, files, workerData.imports, importMap, (current, total) => {
104
- const importProgress = 70 + ((current / total) * 12);
105
- onProgress({
106
- phase: 'imports',
107
- percent: Math.round(importProgress),
108
- message: 'Resolving imports...',
109
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
110
- });
111
- }, repoPath);
112
- }
113
- else {
114
- // Fallback: full parse + resolve (sequential path)
115
- await processImports(graph, files, astCache, importMap, (current, total) => {
116
- const importProgress = 70 + ((current / total) * 12);
117
- onProgress({
118
- phase: 'imports',
119
- percent: Math.round(importProgress),
120
- message: 'Resolving imports...',
121
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
122
- });
123
- }, repoPath);
168
+ // Sequential fallback chunks: re-read source for call/heritage resolution
169
+ for (const chunkPaths of sequentialChunkPaths) {
170
+ const chunkContents = await readFileContents(repoPath, chunkPaths);
171
+ const chunkFiles = chunkPaths
172
+ .filter(p => chunkContents.has(p))
173
+ .map(p => ({ path: p, content: chunkContents.get(p) }));
174
+ astCache = createASTCache(chunkFiles.length);
175
+ await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
176
+ await processHeritage(graph, chunkFiles, astCache, symbolTable);
177
+ astCache.clear();
124
178
  }
179
+ // Free import resolution context — suffix index + resolve cache no longer needed
180
+ // (allPathObjects and importCtx hold ~94MB+ for large repos)
181
+ allPathObjects.length = 0;
182
+ importCtx.resolveCache.clear();
183
+ importCtx.suffixIndex = null;
184
+ importCtx.normalizedFileList = null;
125
185
  if (isDev) {
126
- const importsCount = graph.relationships.filter(r => r.type === 'IMPORTS').length;
127
- console.log(`📊 Pipeline: After import phase, graph has ${importsCount} IMPORTS relationships (total: ${graph.relationshipCount})`);
128
- }
129
- onProgress({
130
- phase: 'calls',
131
- percent: 82,
132
- message: 'Tracing function calls...',
133
- stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
134
- });
135
- if (workerData) {
136
- // Fast path: calls already extracted by workers, just resolve targets
137
- await processCallsFromExtracted(graph, workerData.calls, symbolTable, importMap, (current, total) => {
138
- const callProgress = 82 + ((current / total) * 10);
139
- onProgress({
140
- phase: 'calls',
141
- percent: Math.round(callProgress),
142
- message: 'Tracing function calls...',
143
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
144
- });
145
- });
146
- }
147
- else {
148
- // Fallback: full parse + resolve (sequential path)
149
- await processCalls(graph, files, astCache, symbolTable, importMap, (current, total) => {
150
- const callProgress = 82 + ((current / total) * 10);
151
- onProgress({
152
- phase: 'calls',
153
- percent: Math.round(callProgress),
154
- message: 'Tracing function calls...',
155
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
156
- });
157
- });
158
- }
159
- onProgress({
160
- phase: 'heritage',
161
- percent: 92,
162
- message: 'Extracting class inheritance...',
163
- stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
164
- });
165
- if (workerData) {
166
- // Fast path: heritage already extracted by workers, just resolve symbols
167
- await processHeritageFromExtracted(graph, workerData.heritage, symbolTable, (current, total) => {
168
- const heritageProgress = 88 + ((current / total) * 4);
169
- onProgress({
170
- phase: 'heritage',
171
- percent: Math.round(heritageProgress),
172
- message: 'Extracting class inheritance...',
173
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
174
- });
175
- });
176
- }
177
- else {
178
- // Fallback: full parse + resolve (sequential path)
179
- await processHeritage(graph, files, astCache, symbolTable, (current, total) => {
180
- const heritageProgress = 88 + ((current / total) * 4);
181
- onProgress({
182
- phase: 'heritage',
183
- percent: Math.round(heritageProgress),
184
- message: 'Extracting class inheritance...',
185
- stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
186
- });
187
- });
186
+ let importsCount = 0;
187
+ for (const r of graph.iterRelationships()) {
188
+ if (r.type === 'IMPORTS')
189
+ importsCount++;
190
+ }
191
+ console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
188
192
  }
193
+ // ── Phase 5: Communities ───────────────────────────────────────────
189
194
  onProgress({
190
195
  phase: 'communities',
191
- percent: 92,
196
+ percent: 82,
192
197
  message: 'Detecting code communities...',
193
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
198
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
194
199
  });
195
200
  const communityResult = await processCommunities(graph, (message, progress) => {
196
- const communityProgress = 92 + (progress * 0.06);
201
+ const communityProgress = 82 + (progress * 0.10);
197
202
  onProgress({
198
203
  phase: 'communities',
199
204
  percent: Math.round(communityProgress),
200
205
  message,
201
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
206
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
202
207
  });
203
208
  });
204
209
  if (isDev) {
@@ -227,22 +232,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
227
232
  reason: 'leiden-algorithm',
228
233
  });
229
234
  });
235
+ // ── Phase 6: Processes ─────────────────────────────────────────────
230
236
  onProgress({
231
237
  phase: 'processes',
232
- percent: 98,
238
+ percent: 94,
233
239
  message: 'Detecting execution flows...',
234
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
240
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
235
241
  });
236
- // Dynamic process cap based on codebase size
237
- const symbolCount = graph.nodes.filter(n => n.label !== 'File').length;
242
+ let symbolCount = 0;
243
+ graph.forEachNode(n => { if (n.label !== 'File')
244
+ symbolCount++; });
238
245
  const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
239
246
  const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
240
- const processProgress = 98 + (progress * 0.01);
247
+ const processProgress = 94 + (progress * 0.05);
241
248
  onProgress({
242
249
  phase: 'processes',
243
250
  percent: Math.round(processProgress),
244
251
  message,
245
- stats: { filesProcessed: files.length, totalFiles: files.length, nodesCreated: graph.nodeCount },
252
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
246
253
  });
247
254
  }, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
248
255
  if (isDev) {
@@ -280,13 +287,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
280
287
  percent: 100,
281
288
  message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
282
289
  stats: {
283
- filesProcessed: files.length,
284
- totalFiles: files.length,
290
+ filesProcessed: totalFiles,
291
+ totalFiles,
285
292
  nodesCreated: graph.nodeCount
286
293
  },
287
294
  });
288
295
  astCache.clear();
289
- return { graph, fileContents, communityResult, processResult };
296
+ return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
290
297
  }
291
298
  catch (error) {
292
299
  cleanup();
@@ -34,7 +34,8 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
34
34
  const callsEdges = buildCallsGraph(knowledgeGraph);
35
35
  const reverseCallsEdges = buildReverseCallsGraph(knowledgeGraph);
36
36
  const nodeMap = new Map();
37
- knowledgeGraph.nodes.forEach(n => nodeMap.set(n.id, n));
37
+ for (const n of knowledgeGraph.iterNodes())
38
+ nodeMap.set(n.id, n);
38
39
  // Step 1: Find entry points (functions that call others but have few callers)
39
40
  const entryPoints = findEntryPoints(knowledgeGraph, reverseCallsEdges, callsEdges);
40
41
  onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20);
@@ -129,26 +130,26 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
129
130
  const MIN_TRACE_CONFIDENCE = 0.5;
130
131
  const buildCallsGraph = (graph) => {
131
132
  const adj = new Map();
132
- graph.relationships.forEach(rel => {
133
+ for (const rel of graph.iterRelationships()) {
133
134
  if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
134
135
  if (!adj.has(rel.sourceId)) {
135
136
  adj.set(rel.sourceId, []);
136
137
  }
137
138
  adj.get(rel.sourceId).push(rel.targetId);
138
139
  }
139
- });
140
+ }
140
141
  return adj;
141
142
  };
142
143
  const buildReverseCallsGraph = (graph) => {
143
144
  const adj = new Map();
144
- graph.relationships.forEach(rel => {
145
+ for (const rel of graph.iterRelationships()) {
145
146
  if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
146
147
  if (!adj.has(rel.targetId)) {
147
148
  adj.set(rel.targetId, []);
148
149
  }
149
150
  adj.get(rel.targetId).push(rel.sourceId);
150
151
  }
151
- });
152
+ }
152
153
  return adj;
153
154
  };
154
155
  /**
@@ -164,32 +165,32 @@ const buildReverseCallsGraph = (graph) => {
164
165
  const findEntryPoints = (graph, reverseCallsEdges, callsEdges) => {
165
166
  const symbolTypes = new Set(['Function', 'Method']);
166
167
  const entryPointCandidates = [];
167
- graph.nodes.forEach(node => {
168
+ for (const node of graph.iterNodes()) {
168
169
  if (!symbolTypes.has(node.label))
169
- return;
170
+ continue;
170
171
  const filePath = node.properties.filePath || '';
171
172
  // Skip test files entirely
172
173
  if (isTestFile(filePath))
173
- return;
174
+ continue;
174
175
  const callers = reverseCallsEdges.get(node.id) || [];
175
176
  const callees = callsEdges.get(node.id) || [];
176
177
  // Must have at least 1 outgoing call to trace forward
177
178
  if (callees.length === 0)
178
- return;
179
+ continue;
179
180
  // Calculate entry point score using new scoring system
180
181
  const { score, reasons } = calculateEntryPointScore(node.properties.name, node.properties.language || 'javascript', node.properties.isExported ?? false, callers.length, callees.length, filePath // Pass filePath for framework detection
181
182
  );
182
183
  if (score > 0) {
183
184
  entryPointCandidates.push({ id: node.id, score, reasons });
184
185
  }
185
- });
186
+ }
186
187
  // Sort by score descending and return top candidates
187
188
  const sorted = entryPointCandidates.sort((a, b) => b.score - a.score);
188
189
  // DEBUG: Log top candidates with new scoring details
189
190
  if (sorted.length > 0 && isDev) {
190
191
  console.log(`[Process] Top 10 entry point candidates (new scoring):`);
191
192
  sorted.slice(0, 10).forEach((c, i) => {
192
- const node = graph.nodes.find(n => n.id === c.id);
193
+ const node = graph.getNode(c.id);
193
194
  const exported = node?.properties.isExported ? '✓' : '✗';
194
195
  const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || '';
195
196
  console.log(` ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`);
@@ -171,6 +171,7 @@ const findEnclosingFunctionId = (node, filePath) => {
171
171
  return null;
172
172
  };
173
173
  const BUILT_INS = new Set([
174
+ // JavaScript/TypeScript
174
175
  'console', 'log', 'warn', 'error', 'info', 'debug',
175
176
  'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
176
177
  'parseInt', 'parseFloat', 'isNaN', 'isFinite',
@@ -189,10 +190,32 @@ const BUILT_INS = new Set([
189
190
  'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
190
191
  'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
191
192
  'hasOwnProperty', 'toString', 'valueOf',
193
+ // Python
192
194
  'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
193
195
  'open', 'read', 'write', 'close', 'append', 'extend', 'update',
194
196
  'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
195
197
  'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
198
+ // C/C++ standard library
199
+ 'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
200
+ 'scanf', 'fscanf', 'sscanf',
201
+ 'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
202
+ 'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
203
+ 'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
204
+ 'sizeof', 'offsetof', 'typeof',
205
+ 'assert', 'abort', 'exit', '_exit',
206
+ 'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
207
+ // Linux kernel common macros/helpers (not real call targets)
208
+ 'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
209
+ 'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
210
+ 'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
211
+ 'min', 'max', 'clamp', 'abs', 'swap',
212
+ 'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
213
+ 'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
214
+ 'GFP_KERNEL', 'GFP_ATOMIC',
215
+ 'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
216
+ 'mutex_lock', 'mutex_unlock', 'mutex_init',
217
+ 'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
218
+ 'get', 'put',
196
219
  ]);
197
220
  // ============================================================================
198
221
  // Label detection from capture map
@@ -444,14 +467,52 @@ const processFileGroup = (files, language, queryString, result, onFileProcessed)
444
467
  }
445
468
  };
446
469
  // ============================================================================
447
- // Worker message handler
470
+ // Worker message handler — supports sub-batch streaming
448
471
  // ============================================================================
449
- parentPort.on('message', (files) => {
472
+ /** Accumulated result across sub-batches */
473
+ let accumulated = {
474
+ nodes: [], relationships: [], symbols: [],
475
+ imports: [], calls: [], heritage: [], fileCount: 0,
476
+ };
477
+ let cumulativeProcessed = 0;
478
+ const mergeResult = (target, src) => {
479
+ target.nodes.push(...src.nodes);
480
+ target.relationships.push(...src.relationships);
481
+ target.symbols.push(...src.symbols);
482
+ target.imports.push(...src.imports);
483
+ target.calls.push(...src.calls);
484
+ target.heritage.push(...src.heritage);
485
+ target.fileCount += src.fileCount;
486
+ };
487
+ parentPort.on('message', (msg) => {
450
488
  try {
451
- const result = processBatch(files, (filesProcessed) => {
452
- parentPort.postMessage({ type: 'progress', filesProcessed });
453
- });
454
- parentPort.postMessage({ type: 'result', data: result });
489
+ // Sub-batch mode: { type: 'sub-batch', files: [...] }
490
+ if (msg && msg.type === 'sub-batch') {
491
+ const result = processBatch(msg.files, (filesProcessed) => {
492
+ parentPort.postMessage({ type: 'progress', filesProcessed: cumulativeProcessed + filesProcessed });
493
+ });
494
+ cumulativeProcessed += result.fileCount;
495
+ mergeResult(accumulated, result);
496
+ // Signal ready for next sub-batch
497
+ parentPort.postMessage({ type: 'sub-batch-done' });
498
+ return;
499
+ }
500
+ // Flush: send accumulated results
501
+ if (msg && msg.type === 'flush') {
502
+ parentPort.postMessage({ type: 'result', data: accumulated });
503
+ // Reset for potential reuse
504
+ accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], heritage: [], fileCount: 0 };
505
+ cumulativeProcessed = 0;
506
+ return;
507
+ }
508
+ // Legacy single-message mode (backward compat): array of files
509
+ if (Array.isArray(msg)) {
510
+ const result = processBatch(msg, (filesProcessed) => {
511
+ parentPort.postMessage({ type: 'progress', filesProcessed });
512
+ });
513
+ parentPort.postMessage({ type: 'result', data: result });
514
+ return;
515
+ }
455
516
  }
456
517
  catch (err) {
457
518
  const message = err instanceof Error ? err.message : String(err);