gitnexus 1.2.8 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. package/README.md +194 -186
  2. package/dist/cli/ai-context.js +71 -71
  3. package/dist/cli/analyze.js +69 -28
  4. package/dist/cli/index.js +20 -0
  5. package/dist/cli/setup.js +8 -1
  6. package/dist/cli/view.d.ts +13 -0
  7. package/dist/cli/view.js +59 -0
  8. package/dist/core/augmentation/engine.js +20 -20
  9. package/dist/core/embeddings/embedding-pipeline.js +26 -26
  10. package/dist/core/graph/graph.js +5 -0
  11. package/dist/core/graph/html-graph-viewer.d.ts +15 -0
  12. package/dist/core/graph/html-graph-viewer.js +542 -0
  13. package/dist/core/graph/html-graph-viewer.test.d.ts +1 -0
  14. package/dist/core/graph/html-graph-viewer.test.js +67 -0
  15. package/dist/core/graph/types.d.ts +12 -1
  16. package/dist/core/ingestion/call-processor.js +52 -32
  17. package/dist/core/ingestion/cluster-enricher.js +16 -16
  18. package/dist/core/ingestion/community-processor.js +75 -40
  19. package/dist/core/ingestion/filesystem-walker.d.ts +23 -0
  20. package/dist/core/ingestion/filesystem-walker.js +38 -3
  21. package/dist/core/ingestion/import-processor.d.ts +11 -3
  22. package/dist/core/ingestion/import-processor.js +27 -11
  23. package/dist/core/ingestion/parsing-processor.js +2 -4
  24. package/dist/core/ingestion/pipeline.js +142 -135
  25. package/dist/core/ingestion/process-processor.js +12 -11
  26. package/dist/core/ingestion/workers/parse-worker.js +67 -6
  27. package/dist/core/ingestion/workers/worker-pool.d.ts +3 -9
  28. package/dist/core/ingestion/workers/worker-pool.js +39 -18
  29. package/dist/core/kuzu/csv-generator.d.ts +15 -8
  30. package/dist/core/kuzu/csv-generator.js +258 -196
  31. package/dist/core/kuzu/kuzu-adapter.d.ts +1 -4
  32. package/dist/core/kuzu/kuzu-adapter.js +84 -72
  33. package/dist/core/kuzu/schema.d.ts +1 -1
  34. package/dist/core/kuzu/schema.js +266 -256
  35. package/dist/core/search/bm25-index.js +5 -5
  36. package/dist/core/search/hybrid-search.js +3 -3
  37. package/dist/core/wiki/graph-queries.js +52 -52
  38. package/dist/core/wiki/html-viewer.js +192 -192
  39. package/dist/core/wiki/prompts.js +82 -82
  40. package/dist/mcp/core/embedder.js +8 -4
  41. package/dist/mcp/local/local-backend.d.ts +6 -0
  42. package/dist/mcp/local/local-backend.js +224 -117
  43. package/dist/mcp/resources.js +42 -42
  44. package/dist/mcp/server.js +16 -16
  45. package/dist/mcp/tools.js +86 -77
  46. package/dist/server/api.d.ts +4 -2
  47. package/dist/server/api.js +253 -83
  48. package/dist/types/pipeline.d.ts +6 -2
  49. package/dist/types/pipeline.js +6 -4
  50. package/hooks/claude/gitnexus-hook.cjs +135 -135
  51. package/hooks/claude/pre-tool-use.sh +78 -78
  52. package/hooks/claude/session-start.sh +42 -42
  53. package/package.json +82 -82
  54. package/skills/debugging.md +85 -85
  55. package/skills/exploring.md +75 -75
  56. package/skills/impact-analysis.md +94 -94
  57. package/skills/refactoring.md +113 -113
  58. package/vendor/leiden/index.cjs +355 -355
  59. package/vendor/leiden/utils.cjs +392 -392
@@ -232,38 +232,58 @@ const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
232
232
  * Filter out common built-in functions and noise
233
233
  * that shouldn't be tracked as calls
234
234
  */
235
- const isBuiltInOrNoise = (name) => {
236
- const builtIns = new Set([
237
- // JavaScript/TypeScript built-ins
238
- 'console', 'log', 'warn', 'error', 'info', 'debug',
239
- 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
240
- 'parseInt', 'parseFloat', 'isNaN', 'isFinite',
241
- 'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
242
- 'JSON', 'parse', 'stringify',
243
- 'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
244
- 'Map', 'Set', 'WeakMap', 'WeakSet',
245
- 'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
246
- 'Math', 'Date', 'RegExp', 'Error',
247
- 'require', 'import', 'export',
248
- 'fetch', 'Response', 'Request',
249
- // React hooks and common functions
250
- 'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
251
- 'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
252
- 'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
253
- // Common array/object methods
254
- 'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
255
- 'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
256
- 'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
257
- 'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
258
- 'hasOwnProperty', 'toString', 'valueOf',
259
- // Python built-ins
260
- 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
261
- 'open', 'read', 'write', 'close', 'append', 'extend', 'update',
262
- 'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
263
- 'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
264
- ]);
265
- return builtIns.has(name);
266
- };
235
+ /** Pre-built set (module-level singleton) to avoid re-creating per call */
236
+ const BUILT_IN_NAMES = new Set([
237
+ // JavaScript/TypeScript built-ins
238
+ 'console', 'log', 'warn', 'error', 'info', 'debug',
239
+ 'setTimeout', 'setInterval', 'clearTimeout', 'clearInterval',
240
+ 'parseInt', 'parseFloat', 'isNaN', 'isFinite',
241
+ 'encodeURI', 'decodeURI', 'encodeURIComponent', 'decodeURIComponent',
242
+ 'JSON', 'parse', 'stringify',
243
+ 'Object', 'Array', 'String', 'Number', 'Boolean', 'Symbol', 'BigInt',
244
+ 'Map', 'Set', 'WeakMap', 'WeakSet',
245
+ 'Promise', 'resolve', 'reject', 'then', 'catch', 'finally',
246
+ 'Math', 'Date', 'RegExp', 'Error',
247
+ 'require', 'import', 'export',
248
+ 'fetch', 'Response', 'Request',
249
+ // React hooks and common functions
250
+ 'useState', 'useEffect', 'useCallback', 'useMemo', 'useRef', 'useContext',
251
+ 'useReducer', 'useLayoutEffect', 'useImperativeHandle', 'useDebugValue',
252
+ 'createElement', 'createContext', 'createRef', 'forwardRef', 'memo', 'lazy',
253
+ // Common array/object methods
254
+ 'map', 'filter', 'reduce', 'forEach', 'find', 'findIndex', 'some', 'every',
255
+ 'includes', 'indexOf', 'slice', 'splice', 'concat', 'join', 'split',
256
+ 'push', 'pop', 'shift', 'unshift', 'sort', 'reverse',
257
+ 'keys', 'values', 'entries', 'assign', 'freeze', 'seal',
258
+ 'hasOwnProperty', 'toString', 'valueOf',
259
+ // Python built-ins
260
+ 'print', 'len', 'range', 'str', 'int', 'float', 'list', 'dict', 'set', 'tuple',
261
+ 'open', 'read', 'write', 'close', 'append', 'extend', 'update',
262
+ 'super', 'type', 'isinstance', 'issubclass', 'getattr', 'setattr', 'hasattr',
263
+ 'enumerate', 'zip', 'sorted', 'reversed', 'min', 'max', 'sum', 'abs',
264
+ // C/C++ standard library and common kernel helpers
265
+ 'printf', 'fprintf', 'sprintf', 'snprintf', 'vprintf', 'vfprintf', 'vsprintf', 'vsnprintf',
266
+ 'scanf', 'fscanf', 'sscanf',
267
+ 'malloc', 'calloc', 'realloc', 'free', 'memcpy', 'memmove', 'memset', 'memcmp',
268
+ 'strlen', 'strcpy', 'strncpy', 'strcat', 'strncat', 'strcmp', 'strncmp', 'strstr', 'strchr', 'strrchr',
269
+ 'atoi', 'atol', 'atof', 'strtol', 'strtoul', 'strtoll', 'strtoull', 'strtod',
270
+ 'sizeof', 'offsetof', 'typeof',
271
+ 'assert', 'abort', 'exit', '_exit',
272
+ 'fopen', 'fclose', 'fread', 'fwrite', 'fseek', 'ftell', 'rewind', 'fflush', 'fgets', 'fputs',
273
+ // Linux kernel common macros/helpers (not real call targets)
274
+ 'likely', 'unlikely', 'BUG', 'BUG_ON', 'WARN', 'WARN_ON', 'WARN_ONCE',
275
+ 'IS_ERR', 'PTR_ERR', 'ERR_PTR', 'IS_ERR_OR_NULL',
276
+ 'ARRAY_SIZE', 'container_of', 'list_for_each_entry', 'list_for_each_entry_safe',
277
+ 'min', 'max', 'clamp', 'abs', 'swap',
278
+ 'pr_info', 'pr_warn', 'pr_err', 'pr_debug', 'pr_notice', 'pr_crit', 'pr_emerg',
279
+ 'printk', 'dev_info', 'dev_warn', 'dev_err', 'dev_dbg',
280
+ 'GFP_KERNEL', 'GFP_ATOMIC',
281
+ 'spin_lock', 'spin_unlock', 'spin_lock_irqsave', 'spin_unlock_irqrestore',
282
+ 'mutex_lock', 'mutex_unlock', 'mutex_init',
283
+ 'kfree', 'kmalloc', 'kzalloc', 'kcalloc', 'krealloc', 'kvmalloc', 'kvfree',
284
+ 'get', 'put',
285
+ ]);
286
+ const isBuiltInOrNoise = (name) => BUILT_IN_NAMES.has(name);
267
287
  /**
268
288
  * Fast path: resolve pre-extracted call sites from workers.
269
289
  * No AST parsing — workers already extracted calledName + sourceId.
@@ -13,12 +13,12 @@ const buildEnrichmentPrompt = (members, heuristicLabel) => {
13
13
  const memberList = limitedMembers
14
14
  .map(m => `${m.name} (${m.type})`)
15
15
  .join(', ');
16
- return `Analyze this code cluster and provide a semantic name and short description.
17
-
18
- Heuristic: "${heuristicLabel}"
19
- Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
20
-
21
- Reply with JSON only:
16
+ return `Analyze this code cluster and provide a semantic name and short description.
17
+
18
+ Heuristic: "${heuristicLabel}"
19
+ Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
20
+
21
+ Reply with JSON only:
22
22
  {"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
23
23
  };
24
24
  // ============================================================================
@@ -115,18 +115,18 @@ export const enrichClustersBatch = async (communities, memberMap, llmClient, bat
115
115
  const memberList = limitedMembers
116
116
  .map(m => `${m.name} (${m.type})`)
117
117
  .join(', ');
118
- return `Cluster ${idx + 1} (id: ${community.id}):
119
- Heuristic: "${community.heuristicLabel}"
118
+ return `Cluster ${idx + 1} (id: ${community.id}):
119
+ Heuristic: "${community.heuristicLabel}"
120
120
  Members: ${memberList}`;
121
121
  }).join('\n\n');
122
- const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
123
-
124
- ${batchPrompt}
125
-
126
- Output JSON array:
127
- [
128
- {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
129
- ...
122
+ const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
123
+
124
+ ${batchPrompt}
125
+
126
+ Output JSON array:
127
+ [
128
+ {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
129
+ ...
130
130
  ]`;
131
131
  try {
132
132
  const response = await llmClient.generate(prompt);
@@ -51,23 +51,51 @@ export const getCommunityColor = (communityIndex) => {
51
51
  */
52
52
  export const processCommunities = async (knowledgeGraph, onProgress) => {
53
53
  onProgress?.('Building graph for community detection...', 0);
54
- // Step 1: Build a graphology graph from the knowledge graph
55
- // We only include symbol nodes (Function, Class, Method) and CALLS edges
56
- const graph = buildGraphologyGraph(knowledgeGraph);
54
+ // Pre-check total symbol count to determine large-graph mode before building
55
+ let symbolCount = 0;
56
+ knowledgeGraph.forEachNode(node => {
57
+ if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
58
+ symbolCount++;
59
+ }
60
+ });
61
+ const isLarge = symbolCount > 10_000;
62
+ const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
57
63
  if (graph.order === 0) {
58
- // No nodes to cluster
59
64
  return {
60
65
  communities: [],
61
66
  memberships: [],
62
67
  stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
63
68
  };
64
69
  }
65
- onProgress?.(`Running Leiden algorithm on ${graph.order} nodes...`, 30);
66
- // Step 2: Run Leiden algorithm for community detection
67
- const details = leiden.detailed(graph, {
68
- resolution: 1.0, // Default resolution, can be tuned
69
- randomWalk: true,
70
- });
70
+ const nodeCount = graph.order;
71
+ const edgeCount = graph.size;
72
+ onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
73
+ // Large graphs: higher resolution + capped iterations (matching Python leidenalg default of 2).
74
+ // The first 2 iterations capture ~95%+ of modularity; additional iterations have diminishing returns.
75
+ // Timeout: abort after 60s for pathological graph structures.
76
+ const LEIDEN_TIMEOUT_MS = 60_000;
77
+ let details;
78
+ try {
79
+ details = await Promise.race([
80
+ Promise.resolve(leiden.detailed(graph, {
81
+ resolution: isLarge ? 2.0 : 1.0,
82
+ maxIterations: isLarge ? 3 : 0,
83
+ })),
84
+ new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
85
+ ]);
86
+ }
87
+ catch (e) {
88
+ if (e.message === 'Leiden timeout') {
89
+ onProgress?.('Community detection timed out, using fallback...', 60);
90
+ // Fallback: assign all nodes to community 0
91
+ const communities = {};
92
+ graph.forEachNode((node) => { communities[node] = 0; });
93
+ details = { communities, count: 1, modularity: 0 };
94
+ }
95
+ else {
96
+ throw e;
97
+ }
98
+ }
71
99
  onProgress?.(`Found ${details.count} communities...`, 60);
72
100
  // Step 3: Create community nodes with heuristic labels
73
101
  const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
@@ -95,41 +123,48 @@ export const processCommunities = async (knowledgeGraph, onProgress) => {
95
123
  // HELPER: Build graphology graph from knowledge graph
96
124
  // ============================================================================
97
125
  /**
98
- * Build a graphology graph containing only symbol nodes and CALLS edges
99
- * This is what the Leiden algorithm will cluster
126
+ * Build a graphology graph containing only symbol nodes and clustering edges.
127
+ * For large graphs (>10K symbols), filter out low-confidence fuzzy-global edges
128
+ * and degree-1 nodes that add noise and massively increase Leiden runtime.
100
129
  */
101
- const buildGraphologyGraph = (knowledgeGraph) => {
102
- // Use undirected graph for Leiden - it looks at edge density, not direction
130
+ const MIN_CONFIDENCE_LARGE = 0.5;
131
+ const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
103
132
  const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
104
- // Symbol types that should be clustered
105
133
  const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
106
- // First pass: collect which nodes participate in clustering edges
107
134
  const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
108
135
  const connectedNodes = new Set();
109
- knowledgeGraph.relationships.forEach(rel => {
110
- if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
111
- connectedNodes.add(rel.sourceId);
112
- connectedNodes.add(rel.targetId);
113
- }
136
+ const nodeDegree = new Map();
137
+ knowledgeGraph.forEachRelationship(rel => {
138
+ if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
139
+ return;
140
+ if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
141
+ return;
142
+ connectedNodes.add(rel.sourceId);
143
+ connectedNodes.add(rel.targetId);
144
+ nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
145
+ nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
114
146
  });
115
- // Only add nodes that have at least one clustering edge
116
- // Isolated nodes would just become singletons (skipped anyway)
117
- knowledgeGraph.nodes.forEach(node => {
118
- if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
119
- graph.addNode(node.id, {
120
- name: node.properties.name,
121
- filePath: node.properties.filePath,
122
- type: node.label,
123
- });
124
- }
147
+ knowledgeGraph.forEachNode(node => {
148
+ if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
149
+ return;
150
+ // For large graphs, skip degree-1 nodes — they just become singletons or
151
+ // get absorbed into their single neighbor's community, but cost iteration time.
152
+ if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
153
+ return;
154
+ graph.addNode(node.id, {
155
+ name: node.properties.name,
156
+ filePath: node.properties.filePath,
157
+ type: node.label,
158
+ });
125
159
  });
126
- // Add edges
127
- knowledgeGraph.relationships.forEach(rel => {
128
- if (clusteringRelTypes.has(rel.type)) {
129
- if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
130
- if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
131
- graph.addEdge(rel.sourceId, rel.targetId);
132
- }
160
+ knowledgeGraph.forEachRelationship(rel => {
161
+ if (!clusteringRelTypes.has(rel.type))
162
+ return;
163
+ if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
164
+ return;
165
+ if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
166
+ if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
167
+ graph.addEdge(rel.sourceId, rel.targetId);
133
168
  }
134
169
  }
135
170
  });
@@ -152,11 +187,11 @@ const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph
152
187
  });
153
188
  // Build node lookup for file paths
154
189
  const nodePathMap = new Map();
155
- knowledgeGraph.nodes.forEach(node => {
190
+ for (const node of knowledgeGraph.iterNodes()) {
156
191
  if (node.properties.filePath) {
157
192
  nodePathMap.set(node.id, node.properties.filePath);
158
193
  }
159
- });
194
+ }
160
195
  // Create community nodes - SKIP SINGLETONS (isolated nodes)
161
196
  const communityNodes = [];
162
197
  communityMembers.forEach((memberIds, commNum) => {
@@ -2,4 +2,27 @@ export interface FileEntry {
2
2
  path: string;
3
3
  content: string;
4
4
  }
5
+ /** Lightweight entry — path + size from stat, no content in memory */
6
+ export interface ScannedFile {
7
+ path: string;
8
+ size: number;
9
+ }
10
+ /** Path-only reference (for type signatures) */
11
+ export interface FilePath {
12
+ path: string;
13
+ }
14
+ /**
15
+ * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
16
+ * Memory: ~10MB for 100K files vs ~1GB+ with content.
17
+ */
18
+ export declare const walkRepositoryPaths: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<ScannedFile[]>;
19
+ /**
20
+ * Phase 2: Read file contents for a specific set of relative paths.
21
+ * Returns a Map for O(1) lookup. Silently skips files that fail to read.
22
+ */
23
+ export declare const readFileContents: (repoPath: string, relativePaths: string[]) => Promise<Map<string, string>>;
24
+ /**
25
+ * Legacy API — scans and reads everything into memory.
26
+ * Used by sequential fallback path only.
27
+ */
5
28
  export declare const walkRepository: (repoPath: string, onProgress?: (current: number, total: number, filePath: string) => void) => Promise<FileEntry[]>;
@@ -5,7 +5,11 @@ import { shouldIgnorePath } from '../../config/ignore-service.js';
5
5
  const READ_CONCURRENCY = 32;
6
6
  /** Skip files larger than 512KB — they're usually generated/vendored and crash tree-sitter */
7
7
  const MAX_FILE_SIZE = 512 * 1024;
8
- export const walkRepository = async (repoPath, onProgress) => {
8
+ /**
9
+ * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
10
+ * Memory: ~10MB for 100K files vs ~1GB+ with content.
11
+ */
12
+ export const walkRepositoryPaths = async (repoPath, onProgress) => {
9
13
  const files = await glob('**/*', {
10
14
  cwd: repoPath,
11
15
  nodir: true,
@@ -24,8 +28,7 @@ export const walkRepository = async (repoPath, onProgress) => {
24
28
  skippedLarge++;
25
29
  return null;
26
30
  }
27
- const content = await fs.readFile(fullPath, 'utf-8');
28
- return { path: relativePath.replace(/\\/g, '/'), content };
31
+ return { path: relativePath.replace(/\\/g, '/'), size: stat.size };
29
32
  }));
30
33
  for (const result of results) {
31
34
  processed++;
@@ -43,3 +46,35 @@ export const walkRepository = async (repoPath, onProgress) => {
43
46
  }
44
47
  return entries;
45
48
  };
49
+ /**
50
+ * Phase 2: Read file contents for a specific set of relative paths.
51
+ * Returns a Map for O(1) lookup. Silently skips files that fail to read.
52
+ */
53
+ export const readFileContents = async (repoPath, relativePaths) => {
54
+ const contents = new Map();
55
+ for (let start = 0; start < relativePaths.length; start += READ_CONCURRENCY) {
56
+ const batch = relativePaths.slice(start, start + READ_CONCURRENCY);
57
+ const results = await Promise.allSettled(batch.map(async (relativePath) => {
58
+ const fullPath = path.join(repoPath, relativePath);
59
+ const content = await fs.readFile(fullPath, 'utf-8');
60
+ return { path: relativePath, content };
61
+ }));
62
+ for (const result of results) {
63
+ if (result.status === 'fulfilled') {
64
+ contents.set(result.value.path, result.value.content);
65
+ }
66
+ }
67
+ }
68
+ return contents;
69
+ };
70
+ /**
71
+ * Legacy API — scans and reads everything into memory.
72
+ * Used by sequential fallback path only.
73
+ */
74
+ export const walkRepository = async (repoPath, onProgress) => {
75
+ const scanned = await walkRepositoryPaths(repoPath, onProgress);
76
+ const contents = await readFileContents(repoPath, scanned.map(f => f.path));
77
+ return scanned
78
+ .filter(f => contents.has(f.path))
79
+ .map(f => ({ path: f.path, content: contents.get(f.path) }));
80
+ };
@@ -3,6 +3,15 @@ import { ASTCache } from './ast-cache.js';
3
3
  import type { ExtractedImport } from './workers/parse-worker.js';
4
4
  export type ImportMap = Map<string, Set<string>>;
5
5
  export declare const createImportMap: () => ImportMap;
6
+ /** Pre-built lookup structures for import resolution. Build once, reuse across chunks. */
7
+ export interface ImportResolutionContext {
8
+ allFilePaths: Set<string>;
9
+ allFileList: string[];
10
+ normalizedFileList: string[];
11
+ suffixIndex: SuffixIndex;
12
+ resolveCache: Map<string, string | null>;
13
+ }
14
+ export declare function buildImportResolutionContext(allPaths: string[]): ImportResolutionContext;
6
15
  /**
7
16
  * Build a suffix index for O(1) endsWith lookups.
8
17
  * Maps every possible path suffix to its original file path.
@@ -23,8 +32,7 @@ export interface SuffixIndex {
23
32
  export declare const processImports: (graph: KnowledgeGraph, files: {
24
33
  path: string;
25
34
  content: string;
26
- }[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
35
+ }[], astCache: ASTCache, importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, allPaths?: string[]) => Promise<void>;
27
36
  export declare const processImportsFromExtracted: (graph: KnowledgeGraph, files: {
28
37
  path: string;
29
- content: string;
30
- }[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string) => Promise<void>;
38
+ }[], extractedImports: ExtractedImport[], importMap: ImportMap, onProgress?: (current: number, total: number) => void, repoRoot?: string, prebuiltCtx?: ImportResolutionContext) => Promise<void>;
@@ -8,6 +8,16 @@ import { getLanguageFromFilename, yieldToEventLoop } from './utils.js';
8
8
  import { SupportedLanguages } from '../../config/supported-languages.js';
9
9
  const isDev = process.env.NODE_ENV === 'development';
10
10
  export const createImportMap = () => new Map();
11
+ /** Max entries in the resolve cache. Beyond this, the cache is cleared to bound memory.
12
+ * 100K entries ≈ 15MB — covers the most common import patterns. */
13
+ const RESOLVE_CACHE_CAP = 100_000;
14
+ export function buildImportResolutionContext(allPaths) {
15
+ const allFileList = allPaths;
16
+ const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
17
+ const allFilePaths = new Set(allFileList);
18
+ const suffixIndex = buildSuffixIndex(normalizedFileList, allFileList);
19
+ return { allFilePaths, allFileList, normalizedFileList, suffixIndex, resolveCache: new Map() };
20
+ }
11
21
  /**
12
22
  * Parse tsconfig.json to extract path aliases.
13
23
  * Tries tsconfig.json, tsconfig.app.json, tsconfig.base.json in order.
@@ -196,6 +206,16 @@ const resolveImportPath = (currentFile, importPath, allFiles, allFileList, norma
196
206
  if (resolveCache.has(cacheKey))
197
207
  return resolveCache.get(cacheKey) ?? null;
198
208
  const cache = (result) => {
209
+ // Evict oldest 20% when cap is reached instead of clearing all
210
+ if (resolveCache.size >= RESOLVE_CACHE_CAP) {
211
+ const evictCount = Math.floor(RESOLVE_CACHE_CAP * 0.2);
212
+ const iter = resolveCache.keys();
213
+ for (let i = 0; i < evictCount; i++) {
214
+ const key = iter.next().value;
215
+ if (key !== undefined)
216
+ resolveCache.delete(key);
217
+ }
218
+ }
199
219
  resolveCache.set(cacheKey, result);
200
220
  return result;
201
221
  };
@@ -429,12 +449,12 @@ function resolveGoPackage(importPath, goModule, normalizedFileList, allFileList)
429
449
  // ============================================================================
430
450
  // MAIN IMPORT PROCESSOR
431
451
  // ============================================================================
432
- export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot) => {
433
- // Create a Set of all file paths for fast lookup during resolution
434
- const allFilePaths = new Set(files.map(f => f.path));
452
+ export const processImports = async (graph, files, astCache, importMap, onProgress, repoRoot, allPaths) => {
453
+ // Use allPaths (full repo) when available for cross-chunk resolution, else fall back to chunk files
454
+ const allFileList = allPaths ?? files.map(f => f.path);
455
+ const allFilePaths = new Set(allFileList);
435
456
  const parser = await loadParser();
436
457
  const resolveCache = new Map();
437
- const allFileList = files.map(f => f.path);
438
458
  // Pre-compute normalized file list once (forward slashes)
439
459
  const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
440
460
  // Build suffix index for O(1) lookups
@@ -573,13 +593,9 @@ export const processImports = async (graph, files, astCache, importMap, onProgre
573
593
  // ============================================================================
574
594
  // FAST PATH: Resolve pre-extracted imports (no parsing needed)
575
595
  // ============================================================================
576
- export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot) => {
577
- const allFilePaths = new Set(files.map(f => f.path));
578
- const resolveCache = new Map();
579
- const allFileList = files.map(f => f.path);
580
- const normalizedFileList = allFileList.map(p => p.replace(/\\/g, '/'));
581
- // Build suffix index for O(1) lookups
582
- const index = buildSuffixIndex(normalizedFileList, allFileList);
596
+ export const processImportsFromExtracted = async (graph, files, extractedImports, importMap, onProgress, repoRoot, prebuiltCtx) => {
597
+ const ctx = prebuiltCtx ?? buildImportResolutionContext(files.map(f => f.path));
598
+ const { allFilePaths, allFileList, normalizedFileList, suffixIndex: index, resolveCache } = ctx;
583
599
  let totalImportsFound = 0;
584
600
  let totalImportsResolved = 0;
585
601
  const effectiveRoot = repoRoot || '';
@@ -106,15 +106,13 @@ const processParsingWithWorkers = async (graph, files, symbolTable, astCache, wo
106
106
  const parseableFiles = [];
107
107
  for (const file of files) {
108
108
  const lang = getLanguageFromFilename(file.path);
109
- if (lang) {
109
+ if (lang)
110
110
  parseableFiles.push({ path: file.path, content: file.content });
111
- }
112
111
  }
113
112
  if (parseableFiles.length === 0)
114
113
  return { imports: [], calls: [], heritage: [] };
115
114
  const total = files.length;
116
- // Dispatch to worker pool — pool handles splitting into chunks
117
- // Workers send progress messages during parsing so the bar updates smoothly
115
+ // Dispatch to worker pool — pool handles splitting into chunks and sub-batching
118
116
  const chunkResults = await workerPool.dispatch(parseableFiles, (filesProcessed) => {
119
117
  onFileProgress?.(Math.min(filesProcessed, total), total, 'Parsing...');
120
118
  });