gitnexus 1.2.8 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +194 -186
- package/dist/cli/ai-context.js +71 -71
- package/dist/cli/analyze.js +69 -28
- package/dist/cli/index.js +20 -0
- package/dist/cli/setup.js +8 -1
- package/dist/cli/view.d.ts +13 -0
- package/dist/cli/view.js +59 -0
- package/dist/core/augmentation/engine.js +20 -20
- package/dist/core/embeddings/embedding-pipeline.js +26 -26
- package/dist/core/graph/graph.js +5 -0
- package/dist/core/graph/html-graph-viewer.d.ts +15 -0
- package/dist/core/graph/html-graph-viewer.js +542 -0
- package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
- package/dist/core/graph/html-graph-viewer.test.js +67 -0
- package/dist/core/graph/types.d.ts +12 -1
- package/dist/core/ingestion/call-processor.js +52 -32
- package/dist/core/ingestion/cluster-enricher.js +16 -16
- package/dist/core/ingestion/community-processor.js +75 -40
- package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
- package/dist/core/ingestion/filesystem-walker.js +38 -3
- package/dist/core/ingestion/import-processor.d.ts +11 -3
- package/dist/core/ingestion/import-processor.js +27 -11
- package/dist/core/ingestion/parsing-processor.js +2 -4
- package/dist/core/ingestion/pipeline.js +142 -135
- package/dist/core/ingestion/process-processor.js +12 -11
- package/dist/core/ingestion/workers/parse-worker.js +67 -6
- package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
- package/dist/core/ingestion/workers/worker-pool.js +39 -18
- package/dist/core/kuzu/csv-generator.d.ts +15 -8
- package/dist/core/kuzu/csv-generator.js +258 -196
- package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
- package/dist/core/kuzu/kuzu-adapter.js +84 -72
- package/dist/core/kuzu/schema.d.ts +1 -1
- package/dist/core/kuzu/schema.js +266 -256
- package/dist/core/search/bm25-index.js +5 -5
- package/dist/core/search/hybrid-search.js +3 -3
- package/dist/core/wiki/graph-queries.js +52 -52
- package/dist/core/wiki/html-viewer.js +192 -192
- package/dist/core/wiki/prompts.js +82 -82
- package/dist/mcp/core/embedder.js +8 -4
- package/dist/mcp/local/local-backend.d.ts +6 -0
- package/dist/mcp/local/local-backend.js +224 -117
- package/dist/mcp/resources.js +42 -42
- package/dist/mcp/server.js +16 -16
- package/dist/mcp/tools.js +86 -77
- package/dist/server/api.d.ts +4 -2
- package/dist/server/api.js +253 -83
- package/dist/types/pipeline.d.ts +6 -2
- package/dist/types/pipeline.js +6 -4
- package/hooks/claude/gitnexus-hook.cjs +135 -135
- package/hooks/claude/pre-tool-use.sh +78 -78
- package/hooks/claude/session-start.sh +42 -42
- package/package.json +82 -82
- package/skills/debugging.md +85 -85
- package/skills/exploring.md +75 -75
- package/skills/impact-analysis.md +94 -94
- package/skills/refactoring.md +113 -113
- package/vendor/leiden/index.cjs +355 -355
- package/vendor/leiden/utils.cjs +392 -392
|
@@ -232,38 +232,58 @@ const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
|
|
|
232
232
|
* Filter out common built-in functions and noise
|
|
233
233
|
* that shouldn't be tracked as calls
|
|
234
234
|
*/
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
235
|
+
/** Pre-built set (module-level singleton) to avoid re-creating per call */
|
|
236
|
+
const BUILT_IN_NAMES = new Set([
|
|
237
|
+
// JavaScript/TypeScript built-ins
|
|
238
|
+
'console', 'log', 'warn', 'error', 'info', 'debug',
|
|
239
|
+
'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
|
|
240
|
+
'parseInt', 'parseFloat', 'isNaN', 'isFinite',
|
|
241
|
+
'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
|
|
242
|
+
'JSON', 'parse', 'stringify',
|
|
243
|
+
'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
|
|
244
|
+
'Map', 'Set', 'WeakMap', 'WeakSet',
|
|
245
|
+
'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
|
|
246
|
+
'Math', 'Date', 'RegExp', 'Error',
|
|
247
|
+
'require', 'import', 'export',
|
|
248
|
+
'fetch', 'Response', 'Request',
|
|
249
|
+
// React hooks and common functions
|
|
250
|
+
'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
|
|
251
|
+
'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
|
|
252
|
+
'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
|
|
253
|
+
// Common array/object methods
|
|
254
|
+
'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
|
|
255
|
+
'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
|
|
256
|
+
'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
|
|
257
|
+
'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
|
|
258
|
+
'hasOwnProperty', 'toString', 'valueOf',
|
|
259
|
+
// Python built-ins
|
|
260
|
+
'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
|
|
261
|
+
'open', 'read', 'write', 'close', 'append', 'extend', 'update',
|
|
262
|
+
'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
|
|
263
|
+
'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
|
|
264
|
+
// C/C++ standard library and common kernel helpers
|
|
265
|
+
'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
|
|
266
|
+
'scanf', 'fscanf', 'sscanf',
|
|
267
|
+
'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
|
|
268
|
+
'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
|
|
269
|
+
'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
|
|
270
|
+
'sizeof', 'offsetof', 'typeof',
|
|
271
|
+
'assert', 'abort', 'exit', '_exit',
|
|
272
|
+
'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
|
|
273
|
+
// Linux kernel common macros/helpers (not real call targets)
|
|
274
|
+
'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
|
|
275
|
+
'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
|
|
276
|
+
'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
|
|
277
|
+
'min', 'max', 'clamp', 'abs', 'swap',
|
|
278
|
+
'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
|
|
279
|
+
'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
|
|
280
|
+
'GFP_KERNEL', 'GFP_ATOMIC',
|
|
281
|
+
'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
|
|
282
|
+
'mutex_lock', 'mutex_unlock', 'mutex_init',
|
|
283
|
+
'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
|
|
284
|
+
'get', 'put',
|
|
285
|
+
]);
|
|
286
|
+
const isBuiltInOrNoise = (name) => BUILT_IN_NAMES.has(name);
|
|
267
287
|
/**
|
|
268
288
|
* Fast path: resolve pre-extracted call sites from workers.
|
|
269
289
|
* No AST parsing — workers already extracted calledName + sourceId.
|
|
@@ -13,12 +13,12 @@ const buildEnrichmentPrompt = (members, heuristicLabel) => {
|
|
|
13
13
|
const memberList = limitedMembers
|
|
14
14
|
.map(m => `${m.name} (${m.type})`)
|
|
15
15
|
.join(', ');
|
|
16
|
-
return `Analyze this code cluster and provide a semantic name and short description.
|
|
17
|
-
|
|
18
|
-
Heuristic: "${heuristicLabel}"
|
|
19
|
-
Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
|
|
20
|
-
|
|
21
|
-
Reply with JSON only:
|
|
16
|
+
return `Analyze this code cluster and provide a semantic name and short description.
|
|
17
|
+
|
|
18
|
+
Heuristic: "${heuristicLabel}"
|
|
19
|
+
Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
|
|
20
|
+
|
|
21
|
+
Reply with JSON only:
|
|
22
22
|
{"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
|
|
23
23
|
};
|
|
24
24
|
// ============================================================================
|
|
@@ -115,18 +115,18 @@ export const enrichClustersBatch = async (communities, memberMap, llmClient, bat
|
|
|
115
115
|
const memberList = limitedMembers
|
|
116
116
|
.map(m => `${m.name} (${m.type})`)
|
|
117
117
|
.join(', ');
|
|
118
|
-
return `Cluster ${idx + 1} (id: ${community.id}):
|
|
119
|
-
Heuristic: "${community.heuristicLabel}"
|
|
118
|
+
return `Cluster ${idx + 1} (id: ${community.id}):
|
|
119
|
+
Heuristic: "${community.heuristicLabel}"
|
|
120
120
|
Members: ${memberList}`;
|
|
121
121
|
}).join('\n\n');
|
|
122
|
-
const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
|
|
123
|
-
|
|
124
|
-
${batchPrompt}
|
|
125
|
-
|
|
126
|
-
Output JSON array:
|
|
127
|
-
[
|
|
128
|
-
{"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
|
|
129
|
-
...
|
|
122
|
+
const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
|
|
123
|
+
|
|
124
|
+
${batchPrompt}
|
|
125
|
+
|
|
126
|
+
Output JSON array:
|
|
127
|
+
[
|
|
128
|
+
{"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
|
|
129
|
+
...
|
|
130
130
|
]`;
|
|
131
131
|
try {
|
|
132
132
|
const response = await llmClient.generate(prompt);
|
|
@@ -51,23 +51,51 @@ export const getCommunityColor = (communityIndex) => {
|
|
|
51
51
|
*/
|
|
52
52
|
export const processCommunities = async (knowledgeGraph, onProgress) => {
|
|
53
53
|
onProgress?.('Building graph for community detection...', 0);
|
|
54
|
-
//
|
|
55
|
-
|
|
56
|
-
|
|
54
|
+
// Pre-check total symbol count to determine large-graph mode before building
|
|
55
|
+
let symbolCount = 0;
|
|
56
|
+
knowledgeGraph.forEachNode(node => {
|
|
57
|
+
if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
|
|
58
|
+
symbolCount++;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
const isLarge = symbolCount > 10_000;
|
|
62
|
+
const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
|
|
57
63
|
if (graph.order === 0) {
|
|
58
|
-
// No nodes to cluster
|
|
59
64
|
return {
|
|
60
65
|
communities: [],
|
|
61
66
|
memberships: [],
|
|
62
67
|
stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
|
|
63
68
|
};
|
|
64
69
|
}
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
70
|
+
const nodeCount = graph.order;
|
|
71
|
+
const edgeCount = graph.size;
|
|
72
|
+
onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
|
|
73
|
+
// Large graphs: higher resolution + capped iterations (matching Python leidenalg default of 2).
|
|
74
|
+
// The first 2 iterations capture ~95%+ of modularity; additional iterations have diminishing returns.
|
|
75
|
+
// Timeout: abort after 60s for pathological graph structures.
|
|
76
|
+
const LEIDEN_TIMEOUT_MS = 60_000;
|
|
77
|
+
let details;
|
|
78
|
+
try {
|
|
79
|
+
details = await Promise.race([
|
|
80
|
+
Promise.resolve(leiden.detailed(graph, {
|
|
81
|
+
resolution: isLarge ? 2.0 : 1.0,
|
|
82
|
+
maxIterations: isLarge ? 3 : 0,
|
|
83
|
+
})),
|
|
84
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
|
|
85
|
+
]);
|
|
86
|
+
}
|
|
87
|
+
catch (e) {
|
|
88
|
+
if (e.message === 'Leiden timeout') {
|
|
89
|
+
onProgress?.('Community detection timed out, using fallback...', 60);
|
|
90
|
+
// Fallback: assign all nodes to community 0
|
|
91
|
+
const communities = {};
|
|
92
|
+
graph.forEachNode((node) => { communities[node] = 0; });
|
|
93
|
+
details = { communities, count: 1, modularity: 0 };
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
throw e;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
71
99
|
onProgress?.(`Found ${details.count} communities...`, 60);
|
|
72
100
|
// Step 3: Create community nodes with heuristic labels
|
|
73
101
|
const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
|
|
@@ -95,41 +123,48 @@ export const processCommunities = async (knowledgeGraph, onProgress) => {
|
|
|
95
123
|
// HELPER: Build graphology graph from knowledge graph
|
|
96
124
|
// ============================================================================
|
|
97
125
|
/**
|
|
98
|
-
* Build a graphology graph containing only symbol nodes and
|
|
99
|
-
*
|
|
126
|
+
* Build a graphology graph containing only symbol nodes and clustering edges.
|
|
127
|
+
* For large graphs (>10K symbols), filter out low-confidence fuzzy-global edges
|
|
128
|
+
* and degree-1 nodes that add noise and massively increase Leiden runtime.
|
|
100
129
|
*/
|
|
101
|
-
const
|
|
102
|
-
|
|
130
|
+
const MIN_CONFIDENCE_LARGE = 0.5;
|
|
131
|
+
const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
|
|
103
132
|
const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
|
|
104
|
-
// Symbol types that should be clustered
|
|
105
133
|
const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
|
|
106
|
-
// First pass: collect which nodes participate in clustering edges
|
|
107
134
|
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
108
135
|
const connectedNodes = new Set();
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
136
|
+
const nodeDegree = new Map();
|
|
137
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
138
|
+
if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
|
|
139
|
+
return;
|
|
140
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
141
|
+
return;
|
|
142
|
+
connectedNodes.add(rel.sourceId);
|
|
143
|
+
connectedNodes.add(rel.targetId);
|
|
144
|
+
nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
|
|
145
|
+
nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
|
|
114
146
|
});
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
147
|
+
knowledgeGraph.forEachNode(node => {
|
|
148
|
+
if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
|
|
149
|
+
return;
|
|
150
|
+
// For large graphs, skip degree-1 nodes — they just become singletons or
|
|
151
|
+
// get absorbed into their single neighbor's community, but cost iteration time.
|
|
152
|
+
if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
|
|
153
|
+
return;
|
|
154
|
+
graph.addNode(node.id, {
|
|
155
|
+
name: node.properties.name,
|
|
156
|
+
filePath: node.properties.filePath,
|
|
157
|
+
type: node.label,
|
|
158
|
+
});
|
|
125
159
|
});
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
160
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
161
|
+
if (!clusteringRelTypes.has(rel.type))
|
|
162
|
+
return;
|
|
163
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
164
|
+
return;
|
|
165
|
+
if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
|
|
166
|
+
if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
|
|
167
|
+
graph.addEdge(rel.sourceId, rel.targetId);
|
|
133
168
|
}
|
|
134
169
|
}
|
|
135
170
|
});
|
|
@@ -152,11 +187,11 @@ const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph
|
|
|
152
187
|
});
|
|
153
188
|
// Build node lookup for file paths
|
|
154
189
|
const nodePathMap = new Map();
|
|
155
|
-
knowledgeGraph.
|
|
190
|
+
for (const node of knowledgeGraph.iterNodes()) {
|
|
156
191
|
if (node.properties.filePath) {
|
|
157
192
|
nodePathMap.set(node.id, node.properties.filePath);
|
|
158
193
|
}
|
|
159
|
-
}
|
|
194
|
+
}
|
|
160
195
|
// Create community nodes - SKIP SINGLETONS (isolated nodes)
|
|
161
196
|
const communityNodes = [];
|
|
162
197
|
communityMembers.forEach((memberIds, commNum) => {
|
|
@@ -2,4 +2,27 @@ export interface FileEntry {
|
|
|
2
2
|
path: string;
|
|
3
3
|
content: string;
|
|
4
4
|
}
|
|
5
|
+
/** Lightweight entry — path + size from stat, no content in memory */
|
|
6
|
+
export interface ScannedFile {
|
|
7
|
+
path: string;
|
|
8
|
+
size: number;
|
|
9
|
+
}
|
|
10
|
+
/** Path-only reference (for type signatures) */
|
|
11
|
+
export interface FilePath {
|
|
12
|
+
path: string;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
|
|
16
|
+
* Memory: ~10MB for 100K files vs ~1GB+ with content.
|
|
17
|
+
*/
|
|
18
|
+
export declare const walkRepositoryPaths: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<ScannedFile[]>;
|
|
19
|
+
/**
|
|
20
|
+
* Phase 2: Read file contents for a specific set of relative paths.
|
|
21
|
+
* Returns a Map for O(1) lookup. Silently skips files that fail to read.
|
|
22
|
+
*/
|
|
23
|
+
export declare const readFileContents: (repoPath: string, relativePaths: string[]) => Promise<Map<string, string>>;
|
|
24
|
+
/**
|
|
25
|
+
* Legacy API — scans and reads everything into memory.
|
|
26
|
+
* Used by sequential fallback path only.
|
|
27
|
+
*/
|
|
5
28
|
export declare const walkRepository: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<FileEntry[]>;
|
|
@@ -5,7 +5,11 @@ import { shouldIgnorePath } from '../../config/ignore-service.js';
|
|
|
5
5
|
const READ_CONCURRENCY = 32;
|
|
6
6
|
/** Skip files larger than 512KB — they're usually generated/vendored and crash tree-sitter */
|
|
7
7
|
const MAX_FILE_SIZE = 512 * 1024;
|
|
8
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
|
|
10
|
+
* Memory: ~10MB for 100K files vs ~1GB+ with content.
|
|
11
|
+
*/
|
|
12
|
+
export const walkRepositoryPaths = async (repoPath, onProgress) => {
|
|
9
13
|
const files = await glob('**/*', {
|
|
10
14
|
cwd: repoPath,
|
|
11
15
|
nodir: true,
|
|
@@ -24,8 +28,7 @@ export const walkRepository = async (repoPath, onProgress) => {
|
|
|
24
28
|
skippedLarge++;
|
|
25
29
|
return null;
|
|
26
30
|
}
|
|
27
|
-
|
|
28
|
-
return { path: relativePath.replace(/\\/g, '/'), content };
|
|
31
|
+
return { path: relativePath.replace(/\\/g, '/'), size: stat.size };
|
|
29
32
|
}));
|
|
30
33
|
for (const result of results) {
|
|
31
34
|
processed++;
|
|
@@ -43,3 +46,35 @@ export const walkRepository = async (repoPath, onProgress) => {
|
|
|
43
46
|
}
|
|
44
47
|
return entries;
|
|
45
48
|
};
|
|
49
|
+
/**
|
|
50
|
+
* Phase 2: Read file contents for a specific set of relative paths.
|
|
51
|
+
* Returns a Map for O(1) lookup. Silently skips files that fail to read.
|
|
52
|
+
*/
|
|
53
|
+
export const readFileContents = async (repoPath, relativePaths) => {
|
|
54
|
+
const contents = new Map();
|
|
55
|
+
for (let start = 0; start < relativePaths.length; start += READ_CONCURRENCY) {
|
|
56
|
+
const batch = relativePaths.slice(start, start + READ_CONCURRENCY);
|
|
57
|
+
const results = await Promise.allSettled(batch.map(async (relativePath) => {
|
|
58
|
+
const fullPath = path.join(repoPath, relativePath);
|
|
59
|
+
const content = await fs.readFile(fullPath, 'utf-8');
|
|
60
|
+
return { path: relativePath, content };
|
|
61
|
+
}));
|
|
62
|
+
for (const result of results) {
|
|
63
|
+
if (result.status === 'fulfilled') {
|
|
64
|
+
contents.set(result.value.path, result.value.content);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return contents;
|
|
69
|
+
};
|
|
70
|
+
/**
|
|
71
|
+
* Legacy API — scans and reads everything into memory.
|
|
72
|
+
* Used by sequential fallback path only.
|
|
73
|
+
*/
|
|
74
|
+
export const walkRepository = async (repoPath, onProgress) => {
|
|
75
|
+
const scanned = await walkRepositoryPaths(repoPath, onProgress);
|
|
76
|
+
const contents = await readFileContents(repoPath, scanned.map(f => f.path));
|
|
77
|
+
return scanned
|
|
78
|
+
.filter(f => contents.has(f.path))
|
|
79
|
+
.map(f => ({ path: f.path, content: contents.get(f.path) }));
|
|
80
|
+
};
|
|
@@ -3,6 +3,15 @@ import { ASTCache } from './ast-cache.js';
|
|
|
3
3
|
import type { ExtractedImport } from './workers/parse-worker.js';
|
|
4
4
|
export type ImportMap = Map<string, Set<string>>;
|
|
5
5
|
export declare const createImportMap: () => ImportMap;
|
|
6
|
+
/** Pre-built lookup structures for import resolution. Build once, reuse across chunks. */
|
|
7
|
+
export interface ImportResolutionContext {
|
|
8
|
+
allFilePaths: Set<string>;
|
|
9
|
+
allFileList: string[];
|
|
10
|
+
normalizedFileList: string[];
|
|
11
|
+
suffixIndex: SuffixIndex;
|
|
12
|
+
resolveCache: Map<string, string | null>;
|
|
13
|
+
}
|
|
14
|
+
export declare function buildImportResolutionContext(allPaths: string[]): ImportResolutionContext;
|
|
6
15
|
/**
|
|
7
16
|
* Build a suffix index for O(1) endsWith lookups.
|
|
8
17
|
* Maps every possible path suffix to its original file path.
|
|
@@ -23,8 +32,7 @@ export interface SuffixIndex {
|
|
|
23
32
|
export declare const processImports: (graph: KnowledgeGraph, files: {
|
|
24
33
|
path: string;
|
|
25
34
|
content: string;
|
|
26
|
-
}[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
|
|
35
|
+
}[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[]) => Promise<void>;
|
|
27
36
|
export declare const processImportsFromExtracted: (graph: KnowledgeGraph, files: {
|
|
28
37
|
path: string;
|
|
29
|
-
|
|
30
|
-
}[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
|
|
38
|
+
}[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, prebuiltCtx?: ImportResolutionContext) => Promise<void>;
|
|
@@ -8,6 +8,16 @@ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
|
|
|
8
8
|
import { SupportedLanguages } from '../../config/supported-languages.js';
|
|
9
9
|
const isDev = process.env.NODE_ENV === 'development';
|
|
10
10
|
export const createImportMap = () => new Map();
|
|
11
|
+
/** Max entries in the resolve cache. Beyond this, the cache is cleared to bound memory.
|
|
12
|
+
* 100K entries ≈ 15MB — covers the most common import patterns. */
|
|
13
|
+
const RESOLVE_CACHE_CAP = 100_000;
|
|
14
|
+
export function buildImportResolutionContext(allPaths) {
|
|
15
|
+
const allFileList = allPaths;
|
|
16
|
+
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
17
|
+
const allFilePaths = new Set(allFileList);
|
|
18
|
+
const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList);
|
|
19
|
+
return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() };
|
|
20
|
+
}
|
|
11
21
|
/**
|
|
12
22
|
* Parse tsconfig.json to extract path aliases.
|
|
13
23
|
* Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order.
|
|
@@ -196,6 +206,16 @@ const resolveImportPath = (currentFile, importPath, allFiles, allFileList, norma
|
|
|
196
206
|
if (resolveCache.has(cacheKey))
|
|
197
207
|
return resolveCache.get(cacheKey) ?? null;
|
|
198
208
|
const cache = (result) => {
|
|
209
|
+
// Evict oldest 20% when cap is reached instead of clearing all
|
|
210
|
+
if (resolveCache.size >= RESOLVE_CACHE_CAP) {
|
|
211
|
+
const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2);
|
|
212
|
+
const iter = resolveCache.keys();
|
|
213
|
+
for (let i = 0; i < evictCount; i++) {
|
|
214
|
+
const key = iter.next().value;
|
|
215
|
+
if (key !== undefined)
|
|
216
|
+
resolveCache.delete(key);
|
|
217
|
+
}
|
|
218
|
+
}
|
|
199
219
|
resolveCache.set(cacheKey, result);
|
|
200
220
|
return result;
|
|
201
221
|
};
|
|
@@ -429,12 +449,12 @@ function resolveGoPackage(importPath, goModule, normalizedFileList, allFileList)
|
|
|
429
449
|
// ============================================================================
|
|
430
450
|
// MAIN IMPORT PROCESSOR
|
|
431
451
|
// ============================================================================
|
|
432
|
-
export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot) => {
|
|
433
|
-
//
|
|
434
|
-
const
|
|
452
|
+
export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot, allPaths) => {
|
|
453
|
+
// Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
|
|
454
|
+
const allFileList = allPaths ?? files.map(f => f.path);
|
|
455
|
+
const allFilePaths = new Set(allFileList);
|
|
435
456
|
const parser = await loadParser();
|
|
436
457
|
const resolveCache = new Map();
|
|
437
|
-
const allFileList = files.map(f => f.path);
|
|
438
458
|
// Pre-compute normalized file list once (forward slashes)
|
|
439
459
|
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
440
460
|
// Build suffix index for O(1) lookups
|
|
@@ -573,13 +593,9 @@ export const processImports = async (graph, files, astCache, importMap, onProgre
|
|
|
573
593
|
// ============================================================================
|
|
574
594
|
// FAST PATH: Resolve pre-extracted imports (no parsing needed)
|
|
575
595
|
// ============================================================================
|
|
576
|
-
export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot) => {
|
|
577
|
-
const
|
|
578
|
-
const resolveCache =
|
|
579
|
-
const allFileList = files.map(f => f.path);
|
|
580
|
-
const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
|
|
581
|
-
// Build suffix index for O(1) lookups
|
|
582
|
-
const index = buildSuffixIndex(normalizedFileList, allFileList);
|
|
596
|
+
export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot, prebuiltCtx) => {
|
|
597
|
+
const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
|
|
598
|
+
const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
|
|
583
599
|
let totalImportsFound = 0;
|
|
584
600
|
let totalImportsResolved = 0;
|
|
585
601
|
const effectiveRoot = repoRoot || '';
|
|
@@ -106,15 +106,13 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
|
|
|
106
106
|
const parseableFiles = [];
|
|
107
107
|
for (const file of files) {
|
|
108
108
|
const lang = getLanguageFromFilename(file.path);
|
|
109
|
-
if (lang)
|
|
109
|
+
if (lang)
|
|
110
110
|
parseableFiles.push({ path: file.path, content: file.content });
|
|
111
|
-
}
|
|
112
111
|
}
|
|
113
112
|
if (parseableFiles.length === 0)
|
|
114
113
|
return { imports: [], calls: [], heritage: [] };
|
|
115
114
|
const total = files.length;
|
|
116
|
-
// Dispatch to worker pool — pool handles splitting into chunks
|
|
117
|
-
// Workers send progress messages during parsing so the bar updates smoothly
|
|
115
|
+
// Dispatch to worker pool — pool handles splitting into chunks and sub-batching
|
|
118
116
|
const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
|
|
119
117
|
onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
|
|
120
118
|
});
|