gitnexus 1.2.8 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +194 -186
- package/dist/cli/ai-context.js +71 -71
- package/dist/cli/analyze.js +69 -28
- package/dist/cli/index.js +20 -0
- package/dist/cli/setup.js +8 -1
- package/dist/cli/view.d.ts +13 -0
- package/dist/cli/view.js +59 -0
- package/dist/core/augmentation/engine.js +20 -20
- package/dist/core/embeddings/embedding-pipeline.js +26 -26
- package/dist/core/graph/graph.js +5 -0
- package/dist/core/graph/html-graph-viewer.d.ts +15 -0
- package/dist/core/graph/html-graph-viewer.js +542 -0
- package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
- package/dist/core/graph/html-graph-viewer.test.js +67 -0
- package/dist/core/graph/types.d.ts +12 -1
- package/dist/core/ingestion/call-processor.js +52 -32
- package/dist/core/ingestion/cluster-enricher.js +16 -16
- package/dist/core/ingestion/community-processor.js +75 -40
- package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
- package/dist/core/ingestion/filesystem-walker.js +38 -3
- package/dist/core/ingestion/import-processor.d.ts +11 -3
- package/dist/core/ingestion/import-processor.js +27 -11
- package/dist/core/ingestion/parsing-processor.js +2 -4
- package/dist/core/ingestion/pipeline.js +142 -135
- package/dist/core/ingestion/process-processor.js +12 -11
- package/dist/core/ingestion/workers/parse-worker.js +67 -6
- package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
- package/dist/core/ingestion/workers/worker-pool.js +39 -18
- package/dist/core/kuzu/csv-generator.d.ts +15 -8
- package/dist/core/kuzu/csv-generator.js +258 -196
- package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
- package/dist/core/kuzu/kuzu-adapter.js +84 -72
- package/dist/core/kuzu/schema.d.ts +1 -1
- package/dist/core/kuzu/schema.js +266 -256
- package/dist/core/search/bm25-index.js +5 -5
- package/dist/core/search/hybrid-search.js +3 -3
- package/dist/core/wiki/graph-queries.js +52 -52
- package/dist/core/wiki/html-viewer.js +192 -192
- package/dist/core/wiki/prompts.js +82 -82
- package/dist/mcp/core/embedder.js +8 -4
- package/dist/mcp/local/local-backend.d.ts +6 -0
- package/dist/mcp/local/local-backend.js +224 -117
- package/dist/mcp/resources.js +42 -42
- package/dist/mcp/server.js +16 -16
- package/dist/mcp/tools.js +86 -77
- package/dist/server/api.d.ts +4 -2
- package/dist/server/api.js +253 -83
- package/dist/types/pipeline.d.ts +6 -2
- package/dist/types/pipeline.js +6 -4
- package/hooks/claude/gitnexus-hook.cjs +135 -135
- package/hooks/claude/pre-tool-use.sh +78 -78
- package/hooks/claude/session-start.sh +42 -42
- package/package.json +82 -82
- package/skills/debugging.md +85 -85
- package/skills/exploring.md +75 -75
- package/skills/impact-analysis.md +94 -94
- package/skills/refactoring.md +113 -113
- package/vendor/leiden/index.cjs +355 -355
- package/vendor/leiden/utils.cjs +392 -392
|
@@ -1,34 +1,41 @@
|
|
|
1
1
|
import { createKnowledgeGraph } from '../graph/graph.js';
|
|
2
2
|
import { processStructure } from './structure-processor.js';
|
|
3
3
|
import { processParsing } from './parsing-processor.js';
|
|
4
|
-
import { processImports, processImportsFromExtracted, createImportMap } from './import-processor.js';
|
|
4
|
+
import { processImports, processImportsFromExtracted, createImportMap, buildImportResolutionContext } from './import-processor.js';
|
|
5
5
|
import { processCalls, processCallsFromExtracted } from './call-processor.js';
|
|
6
6
|
import { processHeritage, processHeritageFromExtracted } from './heritage-processor.js';
|
|
7
7
|
import { processCommunities } from './community-processor.js';
|
|
8
8
|
import { processProcesses } from './process-processor.js';
|
|
9
9
|
import { createSymbolTable } from './symbol-table.js';
|
|
10
10
|
import { createASTCache } from './ast-cache.js';
|
|
11
|
-
import {
|
|
11
|
+
import { walkRepositoryPaths, readFileContents } from './filesystem-walker.js';
|
|
12
|
+
import { getLanguageFromFilename } from './utils.js';
|
|
12
13
|
import { createWorkerPool } from './workers/worker-pool.js';
|
|
13
14
|
const isDev = process.env.NODE_ENV === 'development';
|
|
15
|
+
/** Max bytes of source content to load per parse chunk. Each chunk's source +
|
|
16
|
+
* parsed ASTs + extracted records + worker serialization overhead all live in
|
|
17
|
+
* memory simultaneously, so this must be conservative. 20MB source ≈ 200-400MB
|
|
18
|
+
* peak working memory per chunk after parse expansion. */
|
|
19
|
+
const CHUNK_BYTE_BUDGET = 20 * 1024 * 1024; // 20MB
|
|
20
|
+
/** Max AST trees to keep in LRU cache */
|
|
21
|
+
const AST_CACHE_CAP = 50;
|
|
14
22
|
export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
15
23
|
const graph = createKnowledgeGraph();
|
|
16
|
-
const fileContents = new Map();
|
|
17
24
|
const symbolTable = createSymbolTable();
|
|
18
|
-
|
|
19
|
-
let astCache = createASTCache(50);
|
|
25
|
+
let astCache = createASTCache(AST_CACHE_CAP);
|
|
20
26
|
const importMap = createImportMap();
|
|
21
27
|
const cleanup = () => {
|
|
22
28
|
astCache.clear();
|
|
23
29
|
symbolTable.clear();
|
|
24
30
|
};
|
|
25
31
|
try {
|
|
32
|
+
// ── Phase 1: Scan paths only (no content read) ─────────────────────
|
|
26
33
|
onProgress({
|
|
27
34
|
phase: 'extracting',
|
|
28
35
|
percent: 0,
|
|
29
36
|
message: 'Scanning repository...',
|
|
30
37
|
});
|
|
31
|
-
const
|
|
38
|
+
const scannedFiles = await walkRepositoryPaths(repoPath, (current, total, filePath) => {
|
|
32
39
|
const scanProgress = Math.round((current / total) * 15);
|
|
33
40
|
onProgress({
|
|
34
41
|
phase: 'extracting',
|
|
@@ -38,167 +45,165 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
38
45
|
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
39
46
|
});
|
|
40
47
|
});
|
|
41
|
-
|
|
42
|
-
// Resize AST cache to fit all files — avoids re-parsing in import/call/heritage phases
|
|
43
|
-
astCache = createASTCache(files.length);
|
|
48
|
+
const totalFiles = scannedFiles.length;
|
|
44
49
|
onProgress({
|
|
45
50
|
phase: 'extracting',
|
|
46
51
|
percent: 15,
|
|
47
52
|
message: 'Repository scanned successfully',
|
|
48
|
-
stats: { filesProcessed:
|
|
53
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
49
54
|
});
|
|
55
|
+
// ── Phase 2: Structure (paths only — no content needed) ────────────
|
|
50
56
|
onProgress({
|
|
51
57
|
phase: 'structure',
|
|
52
58
|
percent: 15,
|
|
53
59
|
message: 'Analyzing project structure...',
|
|
54
|
-
stats: { filesProcessed: 0, totalFiles
|
|
60
|
+
stats: { filesProcessed: 0, totalFiles, nodesCreated: graph.nodeCount },
|
|
55
61
|
});
|
|
56
|
-
const
|
|
57
|
-
processStructure(graph,
|
|
62
|
+
const allPaths = scannedFiles.map(f => f.path);
|
|
63
|
+
processStructure(graph, allPaths);
|
|
58
64
|
onProgress({
|
|
59
65
|
phase: 'structure',
|
|
60
|
-
percent:
|
|
66
|
+
percent: 20,
|
|
61
67
|
message: 'Project structure analyzed',
|
|
62
|
-
stats: { filesProcessed:
|
|
68
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
63
69
|
});
|
|
70
|
+
// ── Phase 3+4: Chunked read + parse ────────────────────────────────
|
|
71
|
+
// Group parseable files into byte-budget chunks so only ~20MB of source
|
|
72
|
+
// is in memory at a time. Each chunk is: read → parse → extract → free.
|
|
73
|
+
const parseableScanned = scannedFiles.filter(f => getLanguageFromFilename(f.path));
|
|
74
|
+
const totalParseable = parseableScanned.length;
|
|
75
|
+
// Build byte-budget chunks
|
|
76
|
+
const chunks = [];
|
|
77
|
+
let currentChunk = [];
|
|
78
|
+
let currentBytes = 0;
|
|
79
|
+
for (const file of parseableScanned) {
|
|
80
|
+
if (currentChunk.length > 0 && currentBytes + file.size > CHUNK_BYTE_BUDGET) {
|
|
81
|
+
chunks.push(currentChunk);
|
|
82
|
+
currentChunk = [];
|
|
83
|
+
currentBytes = 0;
|
|
84
|
+
}
|
|
85
|
+
currentChunk.push(file.path);
|
|
86
|
+
currentBytes += file.size;
|
|
87
|
+
}
|
|
88
|
+
if (currentChunk.length > 0)
|
|
89
|
+
chunks.push(currentChunk);
|
|
90
|
+
const numChunks = chunks.length;
|
|
91
|
+
if (isDev) {
|
|
92
|
+
const totalMB = parseableScanned.reduce((s, f) => s + f.size, 0) / (1024 * 1024);
|
|
93
|
+
console.log(`📂 Scan: ${totalFiles} paths, ${totalParseable} parseable (${totalMB.toFixed(0)}MB), ${numChunks} chunks @ ${CHUNK_BYTE_BUDGET / (1024 * 1024)}MB budget`);
|
|
94
|
+
}
|
|
64
95
|
onProgress({
|
|
65
96
|
phase: 'parsing',
|
|
66
|
-
percent:
|
|
67
|
-
message:
|
|
68
|
-
stats: { filesProcessed: 0, totalFiles:
|
|
97
|
+
percent: 20,
|
|
98
|
+
message: `Parsing ${totalParseable} files in ${numChunks} chunk${numChunks !== 1 ? 's' : ''}...`,
|
|
99
|
+
stats: { filesProcessed: 0, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
|
|
69
100
|
});
|
|
70
|
-
// Create worker pool
|
|
101
|
+
// Create worker pool once, reuse across chunks
|
|
71
102
|
let workerPool;
|
|
72
103
|
try {
|
|
73
104
|
const workerUrl = new URL('./workers/parse-worker.js', import.meta.url);
|
|
74
105
|
workerPool = createWorkerPool(workerUrl);
|
|
75
106
|
}
|
|
76
107
|
catch (err) {
|
|
77
|
-
// Worker pool creation failed
|
|
108
|
+
// Worker pool creation failed — sequential fallback
|
|
78
109
|
}
|
|
79
|
-
let
|
|
110
|
+
let filesParsedSoFar = 0;
|
|
111
|
+
// AST cache sized for one chunk (sequential fallback uses it for import/call/heritage)
|
|
112
|
+
const maxChunkFiles = chunks.reduce((max, c) => Math.max(max, c.length), 0);
|
|
113
|
+
astCache = createASTCache(maxChunkFiles);
|
|
114
|
+
// Build import resolution context once — suffix index, file lists, resolve cache.
|
|
115
|
+
// Reused across all chunks to avoid rebuilding O(files × path_depth) structures.
|
|
116
|
+
const importCtx = buildImportResolutionContext(allPaths);
|
|
117
|
+
const allPathObjects = allPaths.map(p => ({ path: p }));
|
|
118
|
+
// Single-pass: parse + resolve imports/calls/heritage per chunk.
|
|
119
|
+
// Calls/heritage use the symbol table built so far (symbols from earlier chunks
|
|
120
|
+
// are already registered). This trades ~5% cross-chunk resolution accuracy for
|
|
121
|
+
// 200-400MB less memory — critical for Linux-kernel-scale repos.
|
|
122
|
+
const sequentialChunkPaths = [];
|
|
80
123
|
try {
|
|
81
|
-
|
|
82
|
-
const
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
124
|
+
for (let chunkIdx = 0; chunkIdx < numChunks; chunkIdx++) {
|
|
125
|
+
const chunkPaths = chunks[chunkIdx];
|
|
126
|
+
// Read content for this chunk only
|
|
127
|
+
const chunkContents = await readFileContents(repoPath, chunkPaths);
|
|
128
|
+
const chunkFiles = chunkPaths
|
|
129
|
+
.filter(p => chunkContents.has(p))
|
|
130
|
+
.map(p => ({ path: p, content: chunkContents.get(p) }));
|
|
131
|
+
// Parse this chunk (workers or sequential fallback)
|
|
132
|
+
const chunkWorkerData = await processParsing(graph, chunkFiles, symbolTable, astCache, (current, _total, filePath) => {
|
|
133
|
+
const globalCurrent = filesParsedSoFar + current;
|
|
134
|
+
const parsingProgress = 20 + ((globalCurrent / totalParseable) * 62);
|
|
135
|
+
onProgress({
|
|
136
|
+
phase: 'parsing',
|
|
137
|
+
percent: Math.round(parsingProgress),
|
|
138
|
+
message: `Parsing chunk ${chunkIdx + 1}/${numChunks}...`,
|
|
139
|
+
detail: filePath,
|
|
140
|
+
stats: { filesProcessed: globalCurrent, totalFiles: totalParseable, nodesCreated: graph.nodeCount },
|
|
141
|
+
});
|
|
142
|
+
}, workerPool);
|
|
143
|
+
if (chunkWorkerData) {
|
|
144
|
+
// Imports
|
|
145
|
+
await processImportsFromExtracted(graph, allPathObjects, chunkWorkerData.imports, importMap, undefined, repoPath, importCtx);
|
|
146
|
+
// Calls — resolve immediately, then free the array
|
|
147
|
+
if (chunkWorkerData.calls.length > 0) {
|
|
148
|
+
await processCallsFromExtracted(graph, chunkWorkerData.calls, symbolTable, importMap);
|
|
149
|
+
}
|
|
150
|
+
// Heritage — resolve immediately, then free
|
|
151
|
+
if (chunkWorkerData.heritage.length > 0) {
|
|
152
|
+
await processHeritageFromExtracted(graph, chunkWorkerData.heritage, symbolTable);
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
else {
|
|
156
|
+
await processImports(graph, chunkFiles, astCache, importMap, undefined, repoPath, allPaths);
|
|
157
|
+
sequentialChunkPaths.push(chunkPaths);
|
|
158
|
+
}
|
|
159
|
+
filesParsedSoFar += chunkFiles.length;
|
|
160
|
+
// Clear AST cache between chunks to free memory
|
|
161
|
+
astCache.clear();
|
|
162
|
+
// chunkContents + chunkFiles + chunkWorkerData go out of scope → GC reclaims
|
|
163
|
+
}
|
|
91
164
|
}
|
|
92
165
|
finally {
|
|
93
166
|
await workerPool?.terminate();
|
|
94
167
|
}
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
await
|
|
104
|
-
|
|
105
|
-
onProgress({
|
|
106
|
-
phase: 'imports',
|
|
107
|
-
percent: Math.round(importProgress),
|
|
108
|
-
message: 'Resolving imports...',
|
|
109
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
110
|
-
});
|
|
111
|
-
}, repoPath);
|
|
112
|
-
}
|
|
113
|
-
else {
|
|
114
|
-
// Fallback: full parse + resolve (sequential path)
|
|
115
|
-
await processImports(graph, files, astCache, importMap, (current, total) => {
|
|
116
|
-
const importProgress = 70 + ((current / total) * 12);
|
|
117
|
-
onProgress({
|
|
118
|
-
phase: 'imports',
|
|
119
|
-
percent: Math.round(importProgress),
|
|
120
|
-
message: 'Resolving imports...',
|
|
121
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
122
|
-
});
|
|
123
|
-
}, repoPath);
|
|
168
|
+
// Sequential fallback chunks: re-read source for call/heritage resolution
|
|
169
|
+
for (const chunkPaths of sequentialChunkPaths) {
|
|
170
|
+
const chunkContents = await readFileContents(repoPath, chunkPaths);
|
|
171
|
+
const chunkFiles = chunkPaths
|
|
172
|
+
.filter(p => chunkContents.has(p))
|
|
173
|
+
.map(p => ({ path: p, content: chunkContents.get(p) }));
|
|
174
|
+
astCache = createASTCache(chunkFiles.length);
|
|
175
|
+
await processCalls(graph, chunkFiles, astCache, symbolTable, importMap);
|
|
176
|
+
await processHeritage(graph, chunkFiles, astCache, symbolTable);
|
|
177
|
+
astCache.clear();
|
|
124
178
|
}
|
|
179
|
+
// Free import resolution context — suffix index + resolve cache no longer needed
|
|
180
|
+
// (allPathObjects and importCtx hold ~94MB+ for large repos)
|
|
181
|
+
allPathObjects.length = 0;
|
|
182
|
+
importCtx.resolveCache.clear();
|
|
183
|
+
importCtx.suffixIndex = null;
|
|
184
|
+
importCtx.normalizedFileList = null;
|
|
125
185
|
if (isDev) {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
message: 'Tracing function calls...',
|
|
133
|
-
stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
|
|
134
|
-
});
|
|
135
|
-
if (workerData) {
|
|
136
|
-
// Fast path: calls already extracted by workers, just resolve targets
|
|
137
|
-
await processCallsFromExtracted(graph, workerData.calls, symbolTable, importMap, (current, total) => {
|
|
138
|
-
const callProgress = 82 + ((current / total) * 10);
|
|
139
|
-
onProgress({
|
|
140
|
-
phase: 'calls',
|
|
141
|
-
percent: Math.round(callProgress),
|
|
142
|
-
message: 'Tracing function calls...',
|
|
143
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
144
|
-
});
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
else {
|
|
148
|
-
// Fallback: full parse + resolve (sequential path)
|
|
149
|
-
await processCalls(graph, files, astCache, symbolTable, importMap, (current, total) => {
|
|
150
|
-
const callProgress = 82 + ((current / total) * 10);
|
|
151
|
-
onProgress({
|
|
152
|
-
phase: 'calls',
|
|
153
|
-
percent: Math.round(callProgress),
|
|
154
|
-
message: 'Tracing function calls...',
|
|
155
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
156
|
-
});
|
|
157
|
-
});
|
|
158
|
-
}
|
|
159
|
-
onProgress({
|
|
160
|
-
phase: 'heritage',
|
|
161
|
-
percent: 92,
|
|
162
|
-
message: 'Extracting class inheritance...',
|
|
163
|
-
stats: { filesProcessed: 0, totalFiles: files.length, nodesCreated: graph.nodeCount },
|
|
164
|
-
});
|
|
165
|
-
if (workerData) {
|
|
166
|
-
// Fast path: heritage already extracted by workers, just resolve symbols
|
|
167
|
-
await processHeritageFromExtracted(graph, workerData.heritage, symbolTable, (current, total) => {
|
|
168
|
-
const heritageProgress = 88 + ((current / total) * 4);
|
|
169
|
-
onProgress({
|
|
170
|
-
phase: 'heritage',
|
|
171
|
-
percent: Math.round(heritageProgress),
|
|
172
|
-
message: 'Extracting class inheritance...',
|
|
173
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
174
|
-
});
|
|
175
|
-
});
|
|
176
|
-
}
|
|
177
|
-
else {
|
|
178
|
-
// Fallback: full parse + resolve (sequential path)
|
|
179
|
-
await processHeritage(graph, files, astCache, symbolTable, (current, total) => {
|
|
180
|
-
const heritageProgress = 88 + ((current / total) * 4);
|
|
181
|
-
onProgress({
|
|
182
|
-
phase: 'heritage',
|
|
183
|
-
percent: Math.round(heritageProgress),
|
|
184
|
-
message: 'Extracting class inheritance...',
|
|
185
|
-
stats: { filesProcessed: current, totalFiles: total, nodesCreated: graph.nodeCount },
|
|
186
|
-
});
|
|
187
|
-
});
|
|
186
|
+
let importsCount = 0;
|
|
187
|
+
for (const r of graph.iterRelationships()) {
|
|
188
|
+
if (r.type === 'IMPORTS')
|
|
189
|
+
importsCount++;
|
|
190
|
+
}
|
|
191
|
+
console.log(`📊 Pipeline: graph has ${importsCount} IMPORTS, ${graph.relationshipCount} total relationships`);
|
|
188
192
|
}
|
|
193
|
+
// ── Phase 5: Communities ───────────────────────────────────────────
|
|
189
194
|
onProgress({
|
|
190
195
|
phase: 'communities',
|
|
191
|
-
percent:
|
|
196
|
+
percent: 82,
|
|
192
197
|
message: 'Detecting code communities...',
|
|
193
|
-
stats: { filesProcessed:
|
|
198
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
194
199
|
});
|
|
195
200
|
const communityResult = await processCommunities(graph, (message, progress) => {
|
|
196
|
-
const communityProgress =
|
|
201
|
+
const communityProgress = 82 + (progress * 0.10);
|
|
197
202
|
onProgress({
|
|
198
203
|
phase: 'communities',
|
|
199
204
|
percent: Math.round(communityProgress),
|
|
200
205
|
message,
|
|
201
|
-
stats: { filesProcessed:
|
|
206
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
202
207
|
});
|
|
203
208
|
});
|
|
204
209
|
if (isDev) {
|
|
@@ -227,22 +232,24 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
227
232
|
reason: 'leiden-algorithm',
|
|
228
233
|
});
|
|
229
234
|
});
|
|
235
|
+
// ── Phase 6: Processes ─────────────────────────────────────────────
|
|
230
236
|
onProgress({
|
|
231
237
|
phase: 'processes',
|
|
232
|
-
percent:
|
|
238
|
+
percent: 94,
|
|
233
239
|
message: 'Detecting execution flows...',
|
|
234
|
-
stats: { filesProcessed:
|
|
240
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
235
241
|
});
|
|
236
|
-
|
|
237
|
-
|
|
242
|
+
let symbolCount = 0;
|
|
243
|
+
graph.forEachNode(n => { if (n.label !== 'File')
|
|
244
|
+
symbolCount++; });
|
|
238
245
|
const dynamicMaxProcesses = Math.max(20, Math.min(300, Math.round(symbolCount / 10)));
|
|
239
246
|
const processResult = await processProcesses(graph, communityResult.memberships, (message, progress) => {
|
|
240
|
-
const processProgress =
|
|
247
|
+
const processProgress = 94 + (progress * 0.05);
|
|
241
248
|
onProgress({
|
|
242
249
|
phase: 'processes',
|
|
243
250
|
percent: Math.round(processProgress),
|
|
244
251
|
message,
|
|
245
|
-
stats: { filesProcessed:
|
|
252
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: graph.nodeCount },
|
|
246
253
|
});
|
|
247
254
|
}, { maxProcesses: dynamicMaxProcesses, minSteps: 3 });
|
|
248
255
|
if (isDev) {
|
|
@@ -280,13 +287,13 @@ export const runPipelineFromRepo = async (repoPath, onProgress) => {
|
|
|
280
287
|
percent: 100,
|
|
281
288
|
message: `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`,
|
|
282
289
|
stats: {
|
|
283
|
-
filesProcessed:
|
|
284
|
-
totalFiles
|
|
290
|
+
filesProcessed: totalFiles,
|
|
291
|
+
totalFiles,
|
|
285
292
|
nodesCreated: graph.nodeCount
|
|
286
293
|
},
|
|
287
294
|
});
|
|
288
295
|
astCache.clear();
|
|
289
|
-
return { graph,
|
|
296
|
+
return { graph, repoPath, totalFileCount: totalFiles, communityResult, processResult };
|
|
290
297
|
}
|
|
291
298
|
catch (error) {
|
|
292
299
|
cleanup();
|
|
@@ -34,7 +34,8 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
|
|
|
34
34
|
const callsEdges = buildCallsGraph(knowledgeGraph);
|
|
35
35
|
const reverseCallsEdges = buildReverseCallsGraph(knowledgeGraph);
|
|
36
36
|
const nodeMap = new Map();
|
|
37
|
-
|
|
37
|
+
for (const n of knowledgeGraph.iterNodes())
|
|
38
|
+
nodeMap.set(n.id, n);
|
|
38
39
|
// Step 1: Find entry points (functions that call others but have few callers)
|
|
39
40
|
const entryPoints = findEntryPoints(knowledgeGraph, reverseCallsEdges, callsEdges);
|
|
40
41
|
onProgress?.(`Found ${entryPoints.length} entry points, tracing flows...`, 20);
|
|
@@ -129,26 +130,26 @@ export const processProcesses = async (knowledgeGraph, memberships, onProgress,
|
|
|
129
130
|
const MIN_TRACE_CONFIDENCE = 0.5;
|
|
130
131
|
const buildCallsGraph = (graph) => {
|
|
131
132
|
const adj = new Map();
|
|
132
|
-
graph.
|
|
133
|
+
for (const rel of graph.iterRelationships()) {
|
|
133
134
|
if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
|
|
134
135
|
if (!adj.has(rel.sourceId)) {
|
|
135
136
|
adj.set(rel.sourceId, []);
|
|
136
137
|
}
|
|
137
138
|
adj.get(rel.sourceId).push(rel.targetId);
|
|
138
139
|
}
|
|
139
|
-
}
|
|
140
|
+
}
|
|
140
141
|
return adj;
|
|
141
142
|
};
|
|
142
143
|
const buildReverseCallsGraph = (graph) => {
|
|
143
144
|
const adj = new Map();
|
|
144
|
-
graph.
|
|
145
|
+
for (const rel of graph.iterRelationships()) {
|
|
145
146
|
if (rel.type === 'CALLS' && rel.confidence >= MIN_TRACE_CONFIDENCE) {
|
|
146
147
|
if (!adj.has(rel.targetId)) {
|
|
147
148
|
adj.set(rel.targetId, []);
|
|
148
149
|
}
|
|
149
150
|
adj.get(rel.targetId).push(rel.sourceId);
|
|
150
151
|
}
|
|
151
|
-
}
|
|
152
|
+
}
|
|
152
153
|
return adj;
|
|
153
154
|
};
|
|
154
155
|
/**
|
|
@@ -164,32 +165,32 @@ const buildReverseCallsGraph = (graph) => {
|
|
|
164
165
|
const findEntryPoints = (graph, reverseCallsEdges, callsEdges) => {
|
|
165
166
|
const symbolTypes = new Set(['Function', 'Method']);
|
|
166
167
|
const entryPointCandidates = [];
|
|
167
|
-
graph.
|
|
168
|
+
for (const node of graph.iterNodes()) {
|
|
168
169
|
if (!symbolTypes.has(node.label))
|
|
169
|
-
|
|
170
|
+
continue;
|
|
170
171
|
const filePath = node.properties.filePath || '';
|
|
171
172
|
// Skip test files entirely
|
|
172
173
|
if (isTestFile(filePath))
|
|
173
|
-
|
|
174
|
+
continue;
|
|
174
175
|
const callers = reverseCallsEdges.get(node.id) || [];
|
|
175
176
|
const callees = callsEdges.get(node.id) || [];
|
|
176
177
|
// Must have at least 1 outgoing call to trace forward
|
|
177
178
|
if (callees.length === 0)
|
|
178
|
-
|
|
179
|
+
continue;
|
|
179
180
|
// Calculate entry point score using new scoring system
|
|
180
181
|
const { score, reasons } = calculateEntryPointScore(node.properties.name, node.properties.language || 'javascript', node.properties.isExported ?? false, callers.length, callees.length, filePath // Pass filePath for framework detection
|
|
181
182
|
);
|
|
182
183
|
if (score > 0) {
|
|
183
184
|
entryPointCandidates.push({ id: node.id, score, reasons });
|
|
184
185
|
}
|
|
185
|
-
}
|
|
186
|
+
}
|
|
186
187
|
// Sort by score descending and return top candidates
|
|
187
188
|
const sorted = entryPointCandidates.sort((a, b) => b.score - a.score);
|
|
188
189
|
// DEBUG: Log top candidates with new scoring details
|
|
189
190
|
if (sorted.length > 0 && isDev) {
|
|
190
191
|
console.log(`[Process] Top 10 entry point candidates (new scoring):`);
|
|
191
192
|
sorted.slice(0, 10).forEach((c, i) => {
|
|
192
|
-
const node = graph.
|
|
193
|
+
const node = graph.getNode(c.id);
|
|
193
194
|
const exported = node?.properties.isExported ? '✓' : '✗';
|
|
194
195
|
const shortPath = node?.properties.filePath?.split('/').slice(-2).join('/') || '';
|
|
195
196
|
console.log(` ${i + 1}. ${node?.properties.name} [exported:${exported}] (${shortPath})`);
|
|
@@ -171,6 +171,7 @@ const findEnclosingFunctionId = (node, filePath) => {
|
|
|
171
171
|
return null;
|
|
172
172
|
};
|
|
173
173
|
const BUILT_INS = new Set([
|
|
174
|
+
// JavaScript/TypeScript
|
|
174
175
|
'console', 'log', 'warn', 'error', 'info', 'debug',
|
|
175
176
|
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
|
|
176
177
|
'parseInt', 'parseFloat', 'isNaN', 'isFinite',
|
|
@@ -189,10 +190,32 @@ const BUILT_INS = new Set([
|
|
|
189
190
|
'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
|
|
190
191
|
'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
|
|
191
192
|
'hasOwnProperty', 'toString', 'valueOf',
|
|
193
|
+
// Python
|
|
192
194
|
'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
|
|
193
195
|
'open', 'read', 'write', 'close', 'append', 'extend', 'update',
|
|
194
196
|
'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
|
|
195
197
|
'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
|
|
198
|
+
// C/C++ standard library
|
|
199
|
+
'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
|
|
200
|
+
'scanf', 'fscanf', 'sscanf',
|
|
201
|
+
'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
|
|
202
|
+
'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
|
|
203
|
+
'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
|
|
204
|
+
'sizeof', 'offsetof', 'typeof',
|
|
205
|
+
'assert', 'abort', 'exit', '_exit',
|
|
206
|
+
'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
|
|
207
|
+
// Linux kernel common macros/helpers (not real call targets)
|
|
208
|
+
'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
|
|
209
|
+
'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
|
|
210
|
+
'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
|
|
211
|
+
'min', 'max', 'clamp', 'abs', 'swap',
|
|
212
|
+
'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
|
|
213
|
+
'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
|
|
214
|
+
'GFP_KERNEL', 'GFP_ATOMIC',
|
|
215
|
+
'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
|
|
216
|
+
'mutex_lock', 'mutex_unlock', 'mutex_init',
|
|
217
|
+
'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
|
|
218
|
+
'get', 'put',
|
|
196
219
|
]);
|
|
197
220
|
// ============================================================================
|
|
198
221
|
// Label detection from capture map
|
|
@@ -444,14 +467,52 @@ const processFileGroup = (files, language, queryString, result, onFileProcessed)
|
|
|
444
467
|
}
|
|
445
468
|
};
|
|
446
469
|
// ============================================================================
|
|
447
|
-
// Worker message handler
|
|
470
|
+
// Worker message handler — supports sub-batch streaming
|
|
448
471
|
// ============================================================================
|
|
449
|
-
|
|
472
|
+
/** Accumulated result across sub-batches */
|
|
473
|
+
let accumulated = {
|
|
474
|
+
nodes: [], relationships: [], symbols: [],
|
|
475
|
+
imports: [], calls: [], heritage: [], fileCount: 0,
|
|
476
|
+
};
|
|
477
|
+
let cumulativeProcessed = 0;
|
|
478
|
+
const mergeResult = (target, src) => {
|
|
479
|
+
target.nodes.push(...src.nodes);
|
|
480
|
+
target.relationships.push(...src.relationships);
|
|
481
|
+
target.symbols.push(...src.symbols);
|
|
482
|
+
target.imports.push(...src.imports);
|
|
483
|
+
target.calls.push(...src.calls);
|
|
484
|
+
target.heritage.push(...src.heritage);
|
|
485
|
+
target.fileCount += src.fileCount;
|
|
486
|
+
};
|
|
487
|
+
parentPort.on('message', (msg) => {
|
|
450
488
|
try {
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
489
|
+
// Sub-batch mode: { type: 'sub-batch', files: [...] }
|
|
490
|
+
if (msg && msg.type === 'sub-batch') {
|
|
491
|
+
const result = processBatch(msg.files, (filesProcessed) => {
|
|
492
|
+
parentPort.postMessage({ type: 'progress', filesProcessed: cumulativeProcessed + filesProcessed });
|
|
493
|
+
});
|
|
494
|
+
cumulativeProcessed += result.fileCount;
|
|
495
|
+
mergeResult(accumulated, result);
|
|
496
|
+
// Signal ready for next sub-batch
|
|
497
|
+
parentPort.postMessage({ type: 'sub-batch-done' });
|
|
498
|
+
return;
|
|
499
|
+
}
|
|
500
|
+
// Flush: send accumulated results
|
|
501
|
+
if (msg && msg.type === 'flush') {
|
|
502
|
+
parentPort.postMessage({ type: 'result', data: accumulated });
|
|
503
|
+
// Reset for potential reuse
|
|
504
|
+
accumulated = { nodes: [], relationships: [], symbols: [], imports: [], calls: [], heritage: [], fileCount: 0 };
|
|
505
|
+
cumulativeProcessed = 0;
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
// Legacy single-message mode (backward compat): array of files
|
|
509
|
+
if (Array.isArray(msg)) {
|
|
510
|
+
const result = processBatch(msg, (filesProcessed) => {
|
|
511
|
+
parentPort.postMessage({ type: 'progress', filesProcessed });
|
|
512
|
+
});
|
|
513
|
+
parentPort.postMessage({ type: 'result', data: result });
|
|
514
|
+
return;
|
|
515
|
+
}
|
|
455
516
|
}
|
|
456
517
|
catch (err) {
|
|
457
518
|
const message = err instanceof Error ? err.message : String(err);
|
|
@@ -1,22 +1,16 @@
|
|
|
1
1
|
export interface WorkerPool {
|
|
2
2
|
/**
|
|
3
3
|
* Dispatch items across workers. Items are split into chunks (one per worker),
|
|
4
|
-
* each worker processes its chunk
|
|
5
|
-
*
|
|
6
|
-
* @param onProgress - Called with cumulative files processed across all workers
|
|
4
|
+
* each worker processes its chunk via sub-batches to limit peak memory,
|
|
5
|
+
* and results are concatenated back in order.
|
|
7
6
|
*/
|
|
8
7
|
dispatch<TInput, TResult>(items: TInput[], onProgress?: (filesProcessed: number) => void): Promise<TResult[]>;
|
|
9
|
-
/**
|
|
10
|
-
* Terminate all workers. Must be called when done.
|
|
11
|
-
*/
|
|
8
|
+
/** Terminate all workers. Must be called when done. */
|
|
12
9
|
terminate(): Promise<void>;
|
|
13
10
|
/** Number of workers in the pool */
|
|
14
11
|
readonly size: number;
|
|
15
12
|
}
|
|
16
13
|
/**
|
|
17
14
|
* Create a pool of worker threads.
|
|
18
|
-
*
|
|
19
|
-
* @param workerUrl - URL to the worker script (use `new URL('./parse-worker.js', import.meta.url)`)
|
|
20
|
-
* @param poolSize - Number of workers (defaults to cpus - 1, minimum 1)
|
|
21
15
|
*/
|
|
22
16
|
export declare const createWorkerPool: (workerUrl: URL, poolSize?: number) => WorkerPool;
|