gitnexus 1.2.9 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +69 -28
- package/dist/cli/index.js +20 -0
- package/dist/core/graph/graph.js +5 -0
- package/dist/core/graph/types.d.ts +12 -1
- package/dist/core/ingestion/call-processor.js +52 -32
- package/dist/core/ingestion/community-processor.js +75 -40
- package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
- package/dist/core/ingestion/filesystem-walker.js +38 -3
- package/dist/core/ingestion/import-processor.d.ts +11 -3
- package/dist/core/ingestion/import-processor.js +27 -11
- package/dist/core/ingestion/parsing-processor.js +2 -4
- package/dist/core/ingestion/pipeline.js +142 -135
- package/dist/core/ingestion/process-processor.js +12 -11
- package/dist/core/ingestion/workers/parse-worker.js +67 -6
- package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
- package/dist/core/ingestion/workers/worker-pool.js +39 -18
- package/dist/core/kuzu/csv-generator.d.ts +15 -8
- package/dist/core/kuzu/csv-generator.js +258 -196
- package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
- package/dist/core/kuzu/kuzu-adapter.js +75 -63
- package/dist/core/kuzu/schema.d.ts +1 -1
- package/dist/core/kuzu/schema.js +10 -0
- package/dist/types/pipeline.d.ts +6 -2
- package/dist/types/pipeline.js +6 -4
- package/package.json +1 -1
|
@@ -8,6 +8,16 @@ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
|
|
|
8
8
|
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
9
9
|
const isDev = process.env.NODE_ENV === 'development';
|
|
10
10
|
export const createImportMap = () => new Map();
|
|
11
|
+
/** Max entries in the resolve cache. Beyond this, the cache is cleared to bound memory.
|
|
12
|
+
* 100K entries ≈ 15MB — covers the most common import patterns. */
|
|
13
|
+
const RESOLVE_CACHE_CAP = 100_000;
|
|
14
|
+
export function buildImportResolutionContext(allPaths) {
|
|
15
|
+
const allFileList = allPaths;
|
|
16
|
+
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
17
|
+
const allFilePaths = new Set(allFileList);
|
|
18
|
+
const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList);
|
|
19
|
+
return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() };
|
|
20
|
+
}
|
|
11
21
|
/**
|
|
12
22
|
* Parse tsconfig.json to extract path aliases.
|
|
13
23
|
* Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order.
|
|
@@ -196,6 +206,16 @@ const resolveImportPath = (currentFile, importPath, allFiles, allFileList, norma
|
|
|
196
206
|
if (resolveCache.has(cacheKey))
|
|
197
207
|
return resolveCache.get(cacheKey) ?? null;
|
|
198
208
|
const cache = (result) => {
|
|
209
|
+
// Evict oldest 20% when cap is reached instead of clearing all
|
|
210
|
+
if (resolveCache.size >= RESOLVE_CACHE_CAP) {
|
|
211
|
+
const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2);
|
|
212
|
+
const iter = resolveCache.keys();
|
|
213
|
+
for (let i = 0; i < evictCount; i++) {
|
|
214
|
+
const key = iter.next().value;
|
|
215
|
+
if (key !== undefined)
|
|
216
|
+
resolveCache.delete(key);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
199
219
|
resolveCache.set(cacheKey, result);
|
|
200
220
|
return result;
|
|
201
221
|
};
|
|
@@ -429,12 +449,12 @@ function resolveGoPackage(importPath, goModule, normalizedFileList, allFileList)
|
|
|
429
449
|
// ============================================================================
|
|
430
450
|
// MAIN IMPORT PROCESSOR
|
|
431
451
|
// ============================================================================
|
|
432
|
-
export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot) => {
|
|
433
|
-
//
|
|
434
|
-
const
|
|
452
|
+
export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot, allPaths) => {
|
|
453
|
+
// Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
|
|
454
|
+
const allFileList = allPaths ?? files.map(f => f.path);
|
|
455
|
+
const allFilePaths = new Set(allFileList);
|
|
435
456
|
const parser = await loadParser();
|
|
436
457
|
const resolveCache = new Map();
|
|
437
|
-
const allFileList = files.map(f => f.path);
|
|
438
458
|
// Pre-compute normalized file list once (forward slashes)
|
|
439
459
|
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
440
460
|
// Build suffix index for O(1) lookups
|
|
@@ -573,13 +593,9 @@ export const processImports = async (graph, files, astCache, importMap, onProgre
|
|
|
573
593
|
// ============================================================================
|
|
574
594
|
// FAST PATH: Resolve pre-extracted imports (no parsing needed)
|
|
575
595
|
// ============================================================================
|
|
576
|
-
export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot) => {
|
|
577
|
-
const
|
|
578
|
-
const resolveCache =
|
|
579
|
-
const allFileList = files.map(f => f.path);
|
|
580
|
-
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
581
|
-
// Build suffix index for O(1) lookups
|
|
582
|
-
const index = buildSuffixIndex(normalizedFileList, allFileList);
|
|
596
|
+
export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot, prebuiltCtx) => {
|
|
597
|
+
const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
|
|
598
|
+
const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
|
|
583
599
|
let totalImportsFound = 0;
|
|
584
600
|
let totalImportsResolved = 0;
|
|
585
601
|
const effectiveRoot = repoRoot || '';
|
|
@@ -106,15 +106,13 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
|
|
|
106
106
|
const parseableFiles = [];
|
|
107
107
|
for (const file of files) {
|
|
108
108
|
const lang = getLanguageFromFilename(file.path);
|
|
109
|
-
if (lang)
|
|
109
|
+
if (lang)
|
|
110
110
|
parseableFiles.push({ path: file.path, content: file.content });
|
|
111
|
-
}
|
|
112
111
|
}
|
|
113
112
|
if (parseableFiles.length === 0)
|
|
114
113
|
return { imports: [], calls: [], heritage: [] };
|
|
115
114
|
const total = files.length;
|
|
116
|
-
// Dispatch to worker pool — pool handles splitting into chunks
|
|
117
|
-
// Workers send progress messages during parsing so the bar updates smoothly
|
|
115
|
+
// Dispatch to worker pool — pool handles splitting into chunks and sub-batching
|
|
118
116
|
const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
|
|
119
117
|
onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
|
|
120
118
|
});
|
|
@@ -1,34 +1,41 @@
|
|
|
1
1
|
import { createKnowledgeGraph } from '../graph/graph.js';
|
|
2
2
|
import { processStructure } from './structure-processor.js';
|
|
3
3
|
import { processParsing } from './parsing-processor.js';
|
|
4
|
-
import { processImports, processImportsFromExtracted, createImportMap } from './import-processor.js';
|
|
4
|
+
import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
|
|
5
5
|
import { processCalls, processCallsFromExtracted } from './call-processor.js';
|
|
6
6
|
import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
|
|
7
7
|
import { processCommunities } from './community-processor.js';
|
|
8
8
|
import { processProcesses } from './process-processor.js';
|
|
9
9
|
import { createSymbolTable } from './symbol-table.js';
|
|
10
10
|
import { createASTCache } from './ast-cache.js';
|
|
11
|
-
import {
|
|
11
|
+
import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
|
|
12
|
+
import { getLanguageFromFilename } from './utils.js';
|
|
12
13
|
import { createWorkerPool } from './workers/worker-pool.js';
|
|
13
14
|
const isDev = process.env.NODE_ENV === 'development';
|
|
15
|
+
/** Max bytes of source content to load per parse chunk. Each chunk's source +
|
|
16
|
+
* parsed ASTs + extracted records + worker serialization overhead all live in
|
|
17
|
+
* memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
|
|
18
|
+
* peak working memory per chunk after parse expansion. */
|
|
19
|
+
const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
|
|
20
|
+
/** Max AST trees to keep in LRU cache */
|
|
21
|
+
const AST_CACHE_CAP = 50;
|
|
14
22
|
export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
15
23
|
const graph = createKnowledgeGraph();
|
|
16
|
-
const fileContents = new Map();
|
|
17
24
|
const symbolTable = createSymbolTable();
|
|
18
|
-
|
|
19
|
-
let astCache = createASTCache(50);
|
|
25
|
+
let astCache = createASTCache(AST_CACHE_CAP);
|
|
20
26
|
const importMap = createImportMap();
|
|
21
27
|
const cleanup = () => {
|
|
22
28
|
astCache.clear();
|
|
23
29
|
symbolTable.clear();
|
|
24
30
|
};
|
|
25
31
|
try {
|
|
32
|
+
// ── Phase 1: Scan paths only (no content read) ─────────────────────
|
|
26
33
|
onProgress({
|
|
27
34
|
phase: 'extracting',
|
|
28
35
|
percent: 0,
|
|
29
36
|
message: 'Scanning repository...',
|
|
30
37
|
});
|
|
31
|
-
const
|
|
38
|
+
const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
|
|
32
39
|
const scanProgress = Math.round((current / total) * 15);
|
|
33
40
|
onProgress({
|
|
34
41
|
phase: 'extracting',
|
|
@@ -38,167 +45,165 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
38
45
|
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
39
46
|
});
|
|
40
47
|
});
|
|
41
|
-
|
|
42
|
-
// Resize AST cache to fit all files — avoids re-parsing in import/call/heritage phases
|
|
43
|
-
astCache = createASTCache(files.length);
|
|
48
|
+
const totalFiles = scannedFiles.length;
|
|
44
49
|
onProgress({
|
|
45
50
|
phase: 'extracting',
|
|
46
51
|
percent: 15,
|
|
47
52
|
message: 'Repository scanned successfully',
|
|
48
|
-
stats: { filesProcessed:
|
|
53
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
49
54
|
});
|
|
55
|
+
// ── Phase 2: Structure (paths only — no content needed) ────────────
|
|
50
56
|
onProgress({
|
|
51
57
|
phase: 'structure',
|
|
52
58
|
percent: 15,
|
|
53
59
|
message: 'Analyzing project structure...',
|
|
54
|
-
stats: { filesProcessed: 0, totalFiles
|
|
60
|
+
stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
|
|
55
61
|
});
|
|
56
|
-
const
|
|
57
|
-
processStructure(graph,
|
|
62
|
+
const allPaths = scannedFiles.map(f => f.path);
|
|
63
|
+
processStructure(graph, allPaths);
|
|
58
64
|
onProgress({
|
|
59
65
|
phase: 'structure',
|
|
60
|
-
percent:
|
|
66
|
+
percent: 20,
|
|
61
67
|
message: 'Project structure analyzed',
|
|
62
|
-
stats: { filesProcessed:
|
|
68
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
63
69
|
});
|
|
70
|
+
// ── Phase 3+4: Chunked read + parse ────────────────────────────────
|
|
71
|
+
// Group parseable files into byte-budget chunks so only ~20MB of source
|
|
72
|
+
// is in memory at a time. Each chunk is: read → parse → extract → free.
|
|
73
|
+
const parseableScanned = scannedFiles.filter(f => getLanguageFromFilename(f.path));
|
|
74
|
+
const totalParseable = parseableScanned.length;
|
|
75
|
+
// Build byte-budget chunks
|
|
76
|
+
const chunks = [];
|
|
77
|
+
let currentChunk = [];
|
|
78
|
+
let currentBytes = 0;
|
|
79
|
+
for (const file of parseableScanned) {
|
|
80
|
+
if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
|
|
81
|
+
chunks.push(currentChunk);
|
|
82
|
+
currentChunk = [];
|
|
83
|
+
currentBytes = 0;
|
|
84
|
+
}
|
|
85
|
+
currentChunk.push(file.path);
|
|
86
|
+
currentBytes += file.size;
|
|
87
|
+
}
|
|
88
|
+
if (currentChunk.length > 0)
|
|
89
|
+
chunks.push(currentChunk);
|
|
90
|
+
const numChunks = chunks.length;
|
|
91
|
+
if (isDev) {
|
|
92
|
+
const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
|
|
93
|
+
console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
|
|
94
|
+
}
|
|
64
95
|
onProgress({
|
|
65
96
|
phase: 'parsing',
|
|
66
|
-
percent:
|
|
67
|
-
message:
|
|
68
|
-
stats: { filesProcessed: 0, totalFiles:
|
|
97
|
+
percent: 20,
|
|
98
|
+
message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
|
|
99
|
+
stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
|
|
69
100
|
});
|
|
70
|
-
// Create worker pool
|
|
101
|
+
// Create worker pool once, reuse across chunks
|
|
71
102
|
let workerPool;
|
|
72
103
|
try {
|
|
73
104
|
const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
|
|
74
105
|
workerPool = createWorkerPool(workerUrl);
|
|
75
106
|
}
|
|
76
107
|
catch (err) {
|
|
77
|
-
// Worker pool creation failed
|
|
108
|
+
// Worker pool creation failed — sequential fallback
|
|
78
109
|
}
|
|
79
|
-
let
|
|
110
|
+
let filesParsedSoFar = 0;
|
|
111
|
+
// AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
|
|
112
|
+
const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
|
|
113
|
+
astCache = createASTCache(maxChunkFiles);
|
|
114
|
+
// Build import resolution context once — suffix index, file lists, resolve cache.
|
|
115
|
+
// Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
|
|
116
|
+
const importCtx = buildImportResolutionContext(allPaths);
|
|
117
|
+
const allPathObjects = allPaths.map(p => ({ path: p }));
|
|
118
|
+
// Single-pass: parse + resolve imports/calls/heritage per chunk.
|
|
119
|
+
// Calls/heritage use the symbol table built so far (symbols from earlier chunks
|
|
120
|
+
// are already registered). This trades ~5% cross-chunk resolution accuracy for
|
|
121
|
+
// 200-400MB less memory — critical for Linux-kernel-scale repos.
|
|
122
|
+
const sequentialChunkPaths = [];
|
|
80
123
|
try {
|
|
81
|
-
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
124
|
+
for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
|
|
125
|
+
const chunkPaths = chunks[chunkIdx];
|
|
126
|
+
// Read content for this chunk only
|
|
127
|
+
const chunkContents = await readFileContents(repoPath, chunkPaths);
|
|
128
|
+
const chunkFiles = chunkPaths
|
|
129
|
+
.filter(p => chunkContents.has(p))
|
|
130
|
+
.map(p => ({ path: p, content: chunkContents.get(p) }));
|
|
131
|
+
// Parse this chunk (workers or sequential fallback)
|
|
132
|
+
const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
|
|
133
|
+
const globalCurrent = filesParsedSoFar + current;
|
|
134
|
+
const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
|
|
135
|
+
onProgress({
|
|
136
|
+
phase: 'parsing',
|
|
137
|
+
percent: Math.round(parsingProgress),
|
|
138
|
+
message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
|
|
139
|
+
detail: filePath,
|
|
140
|
+
stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
|
|
141
|
+
});
|
|
142
|
+
}, workerPool);
|
|
143
|
+
if (chunkWorkerData) {
|
|
144
|
+
// Imports
|
|
145
|
+
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
|
|
146
|
+
// Calls — resolve immediately, then free the array
|
|
147
|
+
if (chunkWorkerData.calls.length > 0) {
|
|
148
|
+
await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
|
|
149
|
+
}
|
|
150
|
+
// Heritage — resolve immediately, then free
|
|
151
|
+
if (chunkWorkerData.heritage.length > 0) {
|
|
152
|
+
await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
|
|
157
|
+
sequentialChunkPaths.push(chunkPaths);
|
|
158
|
+
}
|
|
159
|
+
filesParsedSoFar += chunkFiles.length;
|
|
160
|
+
// Clear AST cache between chunks to free memory
|
|
161
|
+
astCache.clear();
|
|
162
|
+
// chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
|
|
163
|
+
}
|
|
91
164
|
}
|
|
92
165
|
finally {
|
|
93
166
|
await workerPool?.terminate();
|
|
94
167
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
await
|
|
104
|
-
|
|
105
|
-
onProgress({
|
|
106
|
-
phase: 'imports',
|
|
107
|
-
percent: Math.round(importProgress),
|
|
108
|
-
message: 'Resolving imports...',
|
|
109
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
110
|
-
});
|
|
111
|
-
}, repoPath);
|
|
112
|
-
}
|
|
113
|
-
else {
|
|
114
|
-
// Fallback: full parse + resolve (sequential path)
|
|
115
|
-
await processImports(graph, files, astCache, importMap, (current, total) => {
|
|
116
|
-
const importProgress = 70 + ((current / total) * 12);
|
|
117
|
-
onProgress({
|
|
118
|
-
phase: 'imports',
|
|
119
|
-
percent: Math.round(importProgress),
|
|
120
|
-
message: 'Resolving imports...',
|
|
121
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
122
|
-
});
|
|
123
|
-
}, repoPath);
|
|
168
|
+
// Sequential fallback chunks: re-read source for call/heritage resolution
|
|
169
|
+
for (const chunkPaths of sequentialChunkPaths) {
|
|
170
|
+
const chunkContents = await readFileContents(repoPath, chunkPaths);
|
|
171
|
+
const chunkFiles = chunkPaths
|
|
172
|
+
.filter(p => chunkContents.has(p))
|
|
173
|
+
.map(p => ({ path: p, content: chunkContents.get(p) }));
|
|
174
|
+
astCache = createASTCache(chunkFiles.length);
|
|
175
|
+
await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
|
|
176
|
+
await processHeritage(graph, chunkFiles, astCache, symbolTable);
|
|
177
|
+
astCache.clear();
|
|
124
178
|
}
|
|
179
|
+
// Free import resolution context — suffix index + resolve cache no longer needed
|
|
180
|
+
// (allPathObjects and importCtx hold ~94MB+ for large repos)
|
|
181
|
+
allPathObjects.length = 0;
|
|
182
|
+
importCtx.resolveCache.clear();
|
|
183
|
+
importCtx.suffixIndex = null;
|
|
184
|
+
importCtx.normalizedFileList = null;
|
|
125
185
|
if (isDev) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
message: 'Tracing function calls...',
|
|
133
|
-
stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
|
|
134
|
-
});
|
|
135
|
-
if (workerData) {
|
|
136
|
-
// Fast path: calls already extracted by workers, just resolve targets
|
|
137
|
-
await processCallsFromExtracted(graph, workerData.calls, symbolTable, importMap, (current, total) => {
|
|
138
|
-
const callProgress = 82 + ((current / total) * 10);
|
|
139
|
-
onProgress({
|
|
140
|
-
phase: 'calls',
|
|
141
|
-
percent: Math.round(callProgress),
|
|
142
|
-
message: 'Tracing function calls...',
|
|
143
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
144
|
-
});
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
else {
|
|
148
|
-
// Fallback: full parse + resolve (sequential path)
|
|
149
|
-
await processCalls(graph, files, astCache, symbolTable, importMap, (current, total) => {
|
|
150
|
-
const callProgress = 82 + ((current / total) * 10);
|
|
151
|
-
onProgress({
|
|
152
|
-
phase: 'calls',
|
|
153
|
-
percent: Math.round(callProgress),
|
|
154
|
-
message: 'Tracing function calls...',
|
|
155
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
156
|
-
});
|
|
157
|
-
});
|
|
158
|
-
}
|
|
159
|
-
onProgress({
|
|
160
|
-
phase: 'heritage',
|
|
161
|
-
percent: 92,
|
|
162
|
-
message: 'Extracting class inheritance...',
|
|
163
|
-
stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
|
|
164
|
-
});
|
|
165
|
-
if (workerData) {
|
|
166
|
-
// Fast path: heritage already extracted by workers, just resolve symbols
|
|
167
|
-
await processHeritageFromExtracted(graph, workerData.heritage, symbolTable, (current, total) => {
|
|
168
|
-
const heritageProgress = 88 + ((current / total) * 4);
|
|
169
|
-
onProgress({
|
|
170
|
-
phase: 'heritage',
|
|
171
|
-
percent: Math.round(heritageProgress),
|
|
172
|
-
message: 'Extracting class inheritance...',
|
|
173
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
174
|
-
});
|
|
175
|
-
});
|
|
176
|
-
}
|
|
177
|
-
else {
|
|
178
|
-
// Fallback: full parse + resolve (sequential path)
|
|
179
|
-
await processHeritage(graph, files, astCache, symbolTable, (current, total) => {
|
|
180
|
-
const heritageProgress = 88 + ((current / total) * 4);
|
|
181
|
-
onProgress({
|
|
182
|
-
phase: 'heritage',
|
|
183
|
-
percent: Math.round(heritageProgress),
|
|
184
|
-
message: 'Extracting class inheritance...',
|
|
185
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
186
|
-
});
|
|
187
|
-
});
|
|
186
|
+
let importsCount = 0;
|
|
187
|
+
for (const r of graph.iterRelationships()) {
|
|
188
|
+
if (r.type === 'IMPORTS')
|
|
189
|
+
importsCount++;
|
|
190
|
+
}
|
|
191
|
+
console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
|
|
188
192
|
}
|
|
193
|
+
// ── Phase 5: Communities ───────────────────────────────────────────
|
|
189
194
|
onProgress({
|
|
190
195
|
phase: 'communities',
|
|
191
|
-
percent:
|
|
196
|
+
percent: 82,
|
|
192
197
|
message: 'Detecting code communities...',
|
|
193
|
-
stats: { filesProcessed:
|
|
198
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
194
199
|
});
|
|
195
200
|
const communityResult = await processCommunities(graph, (message, progress) => {
|
|
196
|
-
const communityProgress =
|
|
201
|
+
const communityProgress = 82 + (progress * 0.10);
|
|
197
202
|
onProgress({
|
|
198
203
|
phase: 'communities',
|
|
199
204
|
percent: Math.round(communityProgress),
|
|
200
205
|
message,
|
|
201
|
-
stats: { filesProcessed:
|
|
206
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
202
207
|
});
|
|
203
208
|
});
|
|
204
209
|
if (isDev) {
|
|
@@ -227,22 +232,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
227
232
|
reason: 'leiden-algorithm',
|
|
228
233
|
});
|
|
229
234
|
});
|
|
235
|
+
// ── Phase 6: Processes ─────────────────────────────────────────────
|
|
230
236
|
onProgress({
|
|
231
237
|
phase: 'processes',
|
|
232
|
-
percent:
|
|
238
|
+
percent: 94,
|
|
233
239
|
message: 'Detecting execution flows...',
|
|
234
|
-
stats: { filesProcessed:
|
|
240
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
235
241
|
});
|
|
236
|
-
|
|
237
|
-
|
|
242
|
+
let symbolCount = 0;
|
|
243
|
+
graph.forEachNode(n => { if (n.label !== 'File')
|
|
244
|
+
symbolCount++; });
|
|
238
245
|
const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
|
|
239
246
|
const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
|
|
240
|
-
const processProgress =
|
|
247
|
+
const processProgress = 94 + (progress * 0.05);
|
|
241
248
|
onProgress({
|
|
242
249
|
phase: 'processes',
|
|
243
250
|
percent: Math.round(processProgress),
|
|
244
251
|
message,
|
|
245
|
-
stats: { filesProcessed:
|
|
252
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
246
253
|
});
|
|
247
254
|
}, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
|
|
248
255
|
if (isDev) {
|
|
@@ -280,13 +287,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
280
287
|
percent: 100,
|
|
281
288
|
message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
|
|
282
289
|
stats: {
|
|
283
|
-
filesProcessed:
|
|
284
|
-
totalFiles
|
|
290
|
+
filesProcessed: totalFiles,
|
|
291
|
+
totalFiles,
|
|
285
292
|
nodesCreated: graph.nodeCount
|
|
286
293
|
},
|
|
287
294
|
});
|
|
288
295
|
astCache.clear();
|
|
289
|
-
return { graph,
|
|
296
|
+
return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
|
|
290
297
|
}
|
|
291
298
|
catch (error) {
|
|
292
299
|
cleanup();
|
|
@@ -34,7 +34,8 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
|
|
|
34
34
|
const callsEdges = buildCallsGraph(knowledgeGraph);
|
|
35
35
|
const reverseCallsEdges = buildReverseCallsGraph(knowledgeGraph);
|
|
36
36
|
const nodeMap = new Map();
|
|
37
|
-
|
|
37
|
+
for (const n of knowledgeGraph.iterNodes())
|
|
38
|
+
nodeMap.set(n.id, n);
|
|
38
39
|
// Step 1: Find entry points (functions that call others but have few callers)
|
|
39
40
|
const entryPoints = findEntryPoints(knowledgeGraph, reverseCallsEdges, callsEdges);
|
|
40
41
|
onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20);
|
|
@@ -129,26 +130,26 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
|
|
|
129
130
|
const MIN_TRACE_CONFIDENCE = 0.5;
|
|
130
131
|
const buildCallsGraph = (graph) => {
|
|
131
132
|
const adj = new Map();
|
|
132
|
-
graph.
|
|
133
|
+
for (const rel of graph.iterRelationships()) {
|
|
133
134
|
if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
|
|
134
135
|
if (!adj.has(rel.sourceId)) {
|
|
135
136
|
adj.set(rel.sourceId, []);
|
|
136
137
|
}
|
|
137
138
|
adj.get(rel.sourceId).push(rel.targetId);
|
|
138
139
|
}
|
|
139
|
-
}
|
|
140
|
+
}
|
|
140
141
|
return adj;
|
|
141
142
|
};
|
|
142
143
|
const buildReverseCallsGraph = (graph) => {
|
|
143
144
|
const adj = new Map();
|
|
144
|
-
graph.
|
|
145
|
+
for (const rel of graph.iterRelationships()) {
|
|
145
146
|
if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
|
|
146
147
|
if (!adj.has(rel.targetId)) {
|
|
147
148
|
adj.set(rel.targetId, []);
|
|
148
149
|
}
|
|
149
150
|
adj.get(rel.targetId).push(rel.sourceId);
|
|
150
151
|
}
|
|
151
|
-
}
|
|
152
|
+
}
|
|
152
153
|
return adj;
|
|
153
154
|
};
|
|
154
155
|
/**
|
|
@@ -164,32 +165,32 @@ const buildReverseCallsGraph = (graph) => {
|
|
|
164
165
|
const findEntryPoints = (graph, reverseCallsEdges, callsEdges) => {
|
|
165
166
|
const symbolTypes = new Set(['Function', 'Method']);
|
|
166
167
|
const entryPointCandidates = [];
|
|
167
|
-
graph.
|
|
168
|
+
for (const node of graph.iterNodes()) {
|
|
168
169
|
if (!symbolTypes.has(node.label))
|
|
169
|
-
|
|
170
|
+
continue;
|
|
170
171
|
const filePath = node.properties.filePath || '';
|
|
171
172
|
// Skip test files entirely
|
|
172
173
|
if (isTestFile(filePath))
|
|
173
|
-
|
|
174
|
+
continue;
|
|
174
175
|
const callers = reverseCallsEdges.get(node.id) || [];
|
|
175
176
|
const callees = callsEdges.get(node.id) || [];
|
|
176
177
|
// Must have at least 1 outgoing call to trace forward
|
|
177
178
|
if (callees.length === 0)
|
|
178
|
-
|
|
179
|
+
continue;
|
|
179
180
|
// Calculate entry point score using new scoring system
|
|
180
181
|
const { score, reasons } = calculateEntryPointScore(node.properties.name, node.properties.language || 'javascript', node.properties.isExported ?? false, callers.length, callees.length, filePath // Pass filePath for framework detection
|
|
181
182
|
);
|
|
182
183
|
if (score > 0) {
|
|
183
184
|
entryPointCandidates.push({ id: node.id, score, reasons });
|
|
184
185
|
}
|
|
185
|
-
}
|
|
186
|
+
}
|
|
186
187
|
// Sort by score descending and return top candidates
|
|
187
188
|
const sorted = entryPointCandidates.sort((a, b) => b.score - a.score);
|
|
188
189
|
// DEBUG: Log top candidates with new scoring details
|
|
189
190
|
if (sorted.length > 0 && isDev) {
|
|
190
191
|
console.log(`[Process] Top 10 entry point candidates (new scoring):`);
|
|
191
192
|
sorted.slice(0, 10).forEach((c, i) => {
|
|
192
|
-
const node = graph.
|
|
193
|
+
const node = graph.getNode(c.id);
|
|
193
194
|
const exported = node?.properties.isExported ? '✓' : '✗';
|
|
194
195
|
const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || '';
|
|
195
196
|
console.log(` ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`);
|
|
@@ -171,6 +171,7 @@ const findEnclosingFunctionId = (node, filePath) => {
|
|
|
171
171
|
return null;
|
|
172
172
|
};
|
|
173
173
|
const BUILT_INS = new Set([
|
|
174
|
+
// JavaScript/TypeScript
|
|
174
175
|
'console', 'log', 'warn', 'error', 'info', 'debug',
|
|
175
176
|
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
|
|
176
177
|
'parseInt', 'parseFloat', 'isNaN', 'isFinite',
|
|
@@ -189,10 +190,32 @@ const BUILT_INS = new Set([
|
|
|
189
190
|
'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
|
|
190
191
|
'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
|
|
191
192
|
'hasOwnProperty', 'toString', 'valueOf',
|
|
193
|
+
// Python
|
|
192
194
|
'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
|
|
193
195
|
'open', 'read', 'write', 'close', 'append', 'extend', 'update',
|
|
194
196
|
'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
|
|
195
197
|
'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
|
|
198
|
+
// C/C++ standard library
|
|
199
|
+
'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
|
|
200
|
+
'scanf', 'fscanf', 'sscanf',
|
|
201
|
+
'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
|
|
202
|
+
'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
|
|
203
|
+
'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
|
|
204
|
+
'sizeof', 'offsetof', 'typeof',
|
|
205
|
+
'assert', 'abort', 'exit', '_exit',
|
|
206
|
+
'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
|
|
207
|
+
// Linux kernel common macros/helpers (not real call targets)
|
|
208
|
+
'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
|
|
209
|
+
'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
|
|
210
|
+
'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
|
|
211
|
+
'min', 'max', 'clamp', 'abs', 'swap',
|
|
212
|
+
'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
|
|
213
|
+
'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
|
|
214
|
+
'GFP_KERNEL', 'GFP_ATOMIC',
|
|
215
|
+
'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
|
|
216
|
+
'mutex_lock', 'mutex_unlock', 'mutex_init',
|
|
217
|
+
'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
|
|
218
|
+
'get', 'put',
|
|
196
219
|
]);
|
|
197
220
|
// ============================================================================
|
|
198
221
|
// Label detection from capture map
|
|
@@ -444,14 +467,52 @@ const processFileGroup = (files, language, queryString, result, onFileProcessed)
|
|
|
444
467
|
}
|
|
445
468
|
};
|
|
446
469
|
// ============================================================================
|
|
447
|
-
// Worker message handler
|
|
470
|
+
// Worker message handler — supports sub-batch streaming
|
|
448
471
|
// ============================================================================
|
|
449
|
-
|
|
472
|
+
/** Accumulated result across sub-batches */
|
|
473
|
+
let accumulated = {
|
|
474
|
+
nodes: [], relationships: [], symbols: [],
|
|
475
|
+
imports: [], calls: [], heritage: [], fileCount: 0,
|
|
476
|
+
};
|
|
477
|
+
let cumulativeProcessed = 0;
|
|
478
|
+
const mergeResult = (target, src) => {
|
|
479
|
+
target.nodes.push(...src.nodes);
|
|
480
|
+
target.relationships.push(...src.relationships);
|
|
481
|
+
target.symbols.push(...src.symbols);
|
|
482
|
+
target.imports.push(...src.imports);
|
|
483
|
+
target.calls.push(...src.calls);
|
|
484
|
+
target.heritage.push(...src.heritage);
|
|
485
|
+
target.fileCount += src.fileCount;
|
|
486
|
+
};
|
|
487
|
+
parentPort.on('message', (msg) => {
|
|
450
488
|
try {
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
489
|
+
// Sub-batch mode: { type: 'sub-batch', files: [...] }
|
|
490
|
+
if (msg && msg.type === 'sub-batch') {
|
|
491
|
+
const result = processBatch(msg.files, (filesProcessed) => {
|
|
492
|
+
parentPort.postMessage({ type: 'progress', filesProcessed: cumulativeProcessed + filesProcessed });
|
|
493
|
+
});
|
|
494
|
+
cumulativeProcessed += result.fileCount;
|
|
495
|
+
mergeResult(accumulated, result);
|
|
496
|
+
// Signal ready for next sub-batch
|
|
497
|
+
parentPort.postMessage({ type: 'sub-batch-done' });
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
// Flush: send accumulated results
|
|
501
|
+
if (msg && msg.type === 'flush') {
|
|
502
|
+
parentPort.postMessage({ type: 'result', data: accumulated });
|
|
503
|
+
// Reset for potential reuse
|
|
504
|
+
accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], heritage: [], fileCount: 0 };
|
|
505
|
+
cumulativeProcessed = 0;
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
// Legacy single-message mode (backward compat): array of files
|
|
509
|
+
if (Array.isArray(msg)) {
|
|
510
|
+
const result = processBatch(msg, (filesProcessed) => {
|
|
511
|
+
parentPort.postMessage({ type: 'progress', filesProcessed });
|
|
512
|
+
});
|
|
513
|
+
parentPort.postMessage({ type: 'result', data: result });
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
455
516
|
}
|
|
456
517
|
catch (err) {
|
|
457
518
|
const message = err instanceof Error ? err.message : String(err);
|