gitnexus 1.1.9 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/README.md +50 -59
  2. package/dist/cli/analyze.js +114 -32
  3. package/dist/cli/eval-server.d.ts +30 -0
  4. package/dist/cli/eval-server.js +372 -0
  5. package/dist/cli/index.js +51 -1
  6. package/dist/cli/mcp.js +9 -0
  7. package/dist/cli/setup.js +44 -7
  8. package/dist/cli/tool.d.ts +37 -0
  9. package/dist/cli/tool.js +91 -0
  10. package/dist/cli/wiki.d.ts +13 -0
  11. package/dist/cli/wiki.js +199 -0
  12. package/dist/core/embeddings/embedder.d.ts +2 -2
  13. package/dist/core/embeddings/embedder.js +10 -10
  14. package/dist/core/embeddings/embedding-pipeline.d.ts +2 -1
  15. package/dist/core/embeddings/embedding-pipeline.js +12 -4
  16. package/dist/core/embeddings/types.d.ts +2 -2
  17. package/dist/core/ingestion/call-processor.d.ts +7 -0
  18. package/dist/core/ingestion/call-processor.js +61 -23
  19. package/dist/core/ingestion/community-processor.js +34 -26
  20. package/dist/core/ingestion/filesystem-walker.js +15 -10
  21. package/dist/core/ingestion/heritage-processor.d.ts +6 -0
  22. package/dist/core/ingestion/heritage-processor.js +68 -5
  23. package/dist/core/ingestion/import-processor.d.ts +22 -0
  24. package/dist/core/ingestion/import-processor.js +214 -19
  25. package/dist/core/ingestion/parsing-processor.d.ts +8 -1
  26. package/dist/core/ingestion/parsing-processor.js +66 -25
  27. package/dist/core/ingestion/pipeline.js +103 -39
  28. package/dist/core/ingestion/workers/parse-worker.d.ts +58 -0
  29. package/dist/core/ingestion/workers/parse-worker.js +451 -0
  30. package/dist/core/ingestion/workers/worker-pool.d.ts +22 -0
  31. package/dist/core/ingestion/workers/worker-pool.js +65 -0
  32. package/dist/core/kuzu/kuzu-adapter.d.ts +15 -1
  33. package/dist/core/kuzu/kuzu-adapter.js +177 -67
  34. package/dist/core/kuzu/schema.d.ts +1 -1
  35. package/dist/core/kuzu/schema.js +3 -0
  36. package/dist/core/wiki/generator.d.ts +96 -0
  37. package/dist/core/wiki/generator.js +674 -0
  38. package/dist/core/wiki/graph-queries.d.ts +80 -0
  39. package/dist/core/wiki/graph-queries.js +238 -0
  40. package/dist/core/wiki/html-viewer.d.ts +10 -0
  41. package/dist/core/wiki/html-viewer.js +297 -0
  42. package/dist/core/wiki/llm-client.d.ts +36 -0
  43. package/dist/core/wiki/llm-client.js +111 -0
  44. package/dist/core/wiki/prompts.d.ts +53 -0
  45. package/dist/core/wiki/prompts.js +174 -0
  46. package/dist/mcp/core/embedder.js +4 -2
  47. package/dist/mcp/core/kuzu-adapter.d.ts +2 -1
  48. package/dist/mcp/core/kuzu-adapter.js +35 -15
  49. package/dist/mcp/local/local-backend.js +9 -2
  50. package/dist/mcp/server.js +1 -1
  51. package/dist/storage/git.d.ts +0 -1
  52. package/dist/storage/git.js +1 -8
  53. package/dist/storage/repo-manager.d.ts +17 -0
  54. package/dist/storage/repo-manager.js +26 -0
  55. package/package.json +1 -1
@@ -0,0 +1,199 @@
1
+ /**
2
+ * Wiki Command
3
+ *
4
+ * Generates repository documentation from the knowledge graph.
5
+ * Usage: gitnexus wiki [path] [options]
6
+ */
7
+ import path from 'path';
8
+ import readline from 'readline';
9
+ import cliProgress from 'cli-progress';
10
+ import { getGitRoot, isGitRepo } from '../storage/git.js';
11
+ import { getStoragePaths, loadMeta, loadCLIConfig, saveCLIConfig } from '../storage/repo-manager.js';
12
+ import { WikiGenerator } from '../core/wiki/generator.js';
13
+ import { resolveLLMConfig } from '../core/wiki/llm-client.js';
14
+ /**
15
+ * Prompt the user for input via stdin.
16
+ */
17
+ function prompt(question, hide = false) {
18
+ return new Promise((resolve) => {
19
+ const rl = readline.createInterface({
20
+ input: process.stdin,
21
+ output: process.stdout,
22
+ });
23
+ if (hide && process.stdin.isTTY) {
24
+ // Mask input for API keys
25
+ process.stdout.write(question);
26
+ let input = '';
27
+ process.stdin.setRawMode(true);
28
+ process.stdin.resume();
29
+ process.stdin.setEncoding('utf-8');
30
+ const onData = (char) => {
31
+ if (char === '\n' || char === '\r' || char === '\u0004') {
32
+ process.stdin.setRawMode(false);
33
+ process.stdin.removeListener('data', onData);
34
+ process.stdout.write('\n');
35
+ rl.close();
36
+ resolve(input);
37
+ }
38
+ else if (char === '\u0003') {
39
+ // Ctrl+C
40
+ process.stdin.setRawMode(false);
41
+ rl.close();
42
+ process.exit(1);
43
+ }
44
+ else if (char === '\u007F' || char === '\b') {
45
+ // Backspace
46
+ if (input.length > 0) {
47
+ input = input.slice(0, -1);
48
+ process.stdout.write('\b \b');
49
+ }
50
+ }
51
+ else {
52
+ input += char;
53
+ process.stdout.write('*');
54
+ }
55
+ };
56
+ process.stdin.on('data', onData);
57
+ }
58
+ else {
59
+ rl.question(question, (answer) => {
60
+ rl.close();
61
+ resolve(answer.trim());
62
+ });
63
+ }
64
+ });
65
+ }
66
+ export const wikiCommand = async (inputPath, options) => {
67
+ console.log('\n GitNexus Wiki Generator\n');
68
+ // ── Resolve repo path ───────────────────────────────────────────────
69
+ let repoPath;
70
+ if (inputPath) {
71
+ repoPath = path.resolve(inputPath);
72
+ }
73
+ else {
74
+ const gitRoot = getGitRoot(process.cwd());
75
+ if (!gitRoot) {
76
+ console.log(' Error: Not inside a git repository\n');
77
+ process.exitCode = 1;
78
+ return;
79
+ }
80
+ repoPath = gitRoot;
81
+ }
82
+ if (!isGitRepo(repoPath)) {
83
+ console.log(' Error: Not a git repository\n');
84
+ process.exitCode = 1;
85
+ return;
86
+ }
87
+ // ── Check for existing index ────────────────────────────────────────
88
+ const { storagePath, kuzuPath } = getStoragePaths(repoPath);
89
+ const meta = await loadMeta(storagePath);
90
+ if (!meta) {
91
+ console.log(' Error: No GitNexus index found.');
92
+ console.log(' Run `gitnexus analyze` first to index this repository.\n');
93
+ process.exitCode = 1;
94
+ return;
95
+ }
96
+ // ── Resolve LLM config (with interactive fallback) ─────────────────
97
+ // If --api-key was passed via CLI, save it immediately
98
+ if (options?.apiKey) {
99
+ const existing = await loadCLIConfig();
100
+ await saveCLIConfig({ ...existing, apiKey: options.apiKey });
101
+ console.log(' API key saved to ~/.gitnexus/config.json\n');
102
+ }
103
+ let llmConfig = await resolveLLMConfig({
104
+ model: options?.model,
105
+ baseUrl: options?.baseUrl,
106
+ apiKey: options?.apiKey,
107
+ });
108
+ if (!llmConfig.apiKey) {
109
+ if (!process.stdin.isTTY) {
110
+ console.log(' Error: No LLM API key found.');
111
+ console.log(' Set OPENAI_API_KEY or GITNEXUS_API_KEY environment variable,');
112
+ console.log(' or pass --api-key <key>.\n');
113
+ process.exitCode = 1;
114
+ return;
115
+ }
116
+ console.log(' No API key configured.\n');
117
+ console.log(' The wiki command requires an LLM API key (OpenAI-compatible).');
118
+ console.log(' You can also set OPENAI_API_KEY or GITNEXUS_API_KEY env var.\n');
119
+ const key = await prompt(' Enter your API key: ', true);
120
+ if (!key) {
121
+ console.log('\n No key provided. Aborting.\n');
122
+ process.exitCode = 1;
123
+ return;
124
+ }
125
+ const save = await prompt(' Save key to ~/.gitnexus/config.json for future use? (Y/n): ');
126
+ if (!save || save.toLowerCase() === 'y' || save.toLowerCase() === 'yes') {
127
+ const existing = await loadCLIConfig();
128
+ await saveCLIConfig({ ...existing, apiKey: key });
129
+ console.log(' Key saved.\n');
130
+ }
131
+ else {
132
+ console.log(' Key will be used for this session only.\n');
133
+ }
134
+ llmConfig = { ...llmConfig, apiKey: key };
135
+ }
136
+ // ── Setup progress bar ──────────────────────────────────────────────
137
+ const bar = new cliProgress.SingleBar({
138
+ format: ' {bar} {percentage}% | {phase}',
139
+ barCompleteChar: '\u2588',
140
+ barIncompleteChar: '\u2591',
141
+ hideCursor: true,
142
+ barGlue: '',
143
+ autopadding: true,
144
+ clearOnComplete: false,
145
+ stopOnComplete: false,
146
+ }, cliProgress.Presets.shades_grey);
147
+ bar.start(100, 0, { phase: 'Initializing...' });
148
+ const t0 = Date.now();
149
+ // ── Run generator ───────────────────────────────────────────────────
150
+ const wikiOptions = {
151
+ force: options?.force,
152
+ model: options?.model,
153
+ baseUrl: options?.baseUrl,
154
+ };
155
+ const generator = new WikiGenerator(repoPath, storagePath, kuzuPath, llmConfig, wikiOptions, (phase, percent, detail) => {
156
+ bar.update(percent, { phase: detail || phase });
157
+ });
158
+ try {
159
+ const result = await generator.run();
160
+ bar.update(100, { phase: 'Done' });
161
+ bar.stop();
162
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
163
+ if (result.mode === 'up-to-date' && !options?.force) {
164
+ console.log('\n Wiki is already up to date.');
165
+ console.log(` ${path.join(storagePath, 'wiki')}\n`);
166
+ return;
167
+ }
168
+ const wikiDir = path.join(storagePath, 'wiki');
169
+ console.log(`\n Wiki generated successfully (${elapsed}s)\n`);
170
+ console.log(` Mode: ${result.mode}`);
171
+ console.log(` Pages: ${result.pagesGenerated}`);
172
+ console.log(` Output: ${wikiDir}`);
173
+ console.log(` Viewer: ${path.join(wikiDir, 'index.html')}`);
174
+ if (result.failedModules && result.failedModules.length > 0) {
175
+ console.log(`\n Failed modules (${result.failedModules.length}):`);
176
+ for (const mod of result.failedModules) {
177
+ console.log(` - ${mod}`);
178
+ }
179
+ console.log(' Re-run to retry failed modules (pages will be regenerated).');
180
+ }
181
+ console.log('');
182
+ }
183
+ catch (err) {
184
+ bar.stop();
185
+ if (err.message?.includes('No source files')) {
186
+ console.log(`\n ${err.message}\n`);
187
+ }
188
+ else if (err.message?.includes('API key') || err.message?.includes('API error')) {
189
+ console.log(`\n LLM Error: ${err.message}\n`);
190
+ }
191
+ else {
192
+ console.log(`\n Error: ${err.message}\n`);
193
+ if (process.env.DEBUG) {
194
+ console.error(err);
195
+ }
196
+ }
197
+ process.exitCode = 1;
198
+ }
199
+ };
@@ -15,7 +15,7 @@ export type ModelProgressCallback = (progress: ModelProgress) => void;
15
15
  /**
16
16
  * Get the current device being used for inference
17
17
  */
18
- export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm" | null;
18
+ export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
19
19
  /**
20
20
  * Initialize the embedding model
21
21
  * Uses singleton pattern - only loads once, subsequent calls return cached instance
@@ -25,7 +25,7 @@ export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm"
25
25
  * @param forceDevice - Force a specific device
26
26
  * @returns Promise resolving to the embedder pipeline
27
27
  */
28
- export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "webgpu" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
28
+ export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
29
29
  /**
30
30
  * Check if the embedder is initialized and ready
31
31
  */
@@ -37,10 +37,10 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
37
37
  }
38
38
  isInitializing = true;
39
39
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
40
- // On Windows, use webgpu for GPU acceleration (via DirectX12/DirectML)
41
- // CUDA is only available on Linux with onnxruntime-node
40
+ // On Windows, use DirectML for GPU acceleration (via DirectX12)
41
+ // CUDA is only available on Linux x64 with onnxruntime-node
42
42
  const isWindows = process.platform === 'win32';
43
- const gpuDevice = isWindows ? 'webgpu' : 'cuda';
43
+ const gpuDevice = isWindows ? 'dml' : 'cuda';
44
44
  let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
45
45
  initPromise = (async () => {
46
46
  try {
@@ -61,14 +61,14 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
61
61
  onProgress(progress);
62
62
  } : undefined;
63
63
  // Try GPU first if auto, fall back to CPU
64
- // Windows: webgpu (DirectX12/DirectML), Linux: cuda
65
- const devicesToTry = (requestedDevice === 'webgpu' || requestedDevice === 'cuda')
64
+ // Windows: dml (DirectML/DirectX12), Linux: cuda
65
+ const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
66
66
  ? [requestedDevice, 'cpu']
67
67
  : [requestedDevice];
68
68
  for (const device of devicesToTry) {
69
69
  try {
70
- if (isDev && device === 'webgpu') {
71
- console.log('🔧 Trying WebGPU (DirectX12) backend...');
70
+ if (isDev && device === 'dml') {
71
+ console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
72
72
  }
73
73
  else if (isDev && device === 'cuda') {
74
74
  console.log('🔧 Trying CUDA GPU backend...');
@@ -86,7 +86,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
86
86
  });
87
87
  currentDevice = device;
88
88
  if (isDev) {
89
- const label = device === 'webgpu' ? 'GPU (WebGPU/DirectX12)'
89
+ const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
90
90
  : device === 'cuda' ? 'GPU (CUDA)'
91
91
  : device.toUpperCase();
92
92
  console.log(`✅ Using ${label} backend`);
@@ -95,8 +95,8 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
95
95
  return embedderInstance;
96
96
  }
97
97
  catch (deviceError) {
98
- if (isDev && (device === 'cuda' || device === 'webgpu')) {
99
- const gpuType = device === 'webgpu' ? 'WebGPU' : 'CUDA';
98
+ if (isDev && (device === 'cuda' || device === 'dml')) {
99
+ const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
100
100
  console.log(`⚠️ ${gpuType} not available, falling back to CPU...`);
101
101
  }
102
102
  // Continue to next device in list
@@ -20,8 +20,9 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
20
20
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
21
21
  * @param onProgress - Callback for progress updates
22
22
  * @param config - Optional configuration override
23
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
23
24
  */
24
- export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>) => Promise<void>;
25
+ export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
25
26
  /**
26
27
  * Perform semantic search using the vector index
27
28
  *
@@ -97,8 +97,9 @@ const createVectorIndex = async (executeQuery) => {
97
97
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
98
98
  * @param onProgress - Callback for progress updates
99
99
  * @param config - Optional configuration override
100
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
100
101
  */
101
- export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}) => {
102
+ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
102
103
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
103
104
  try {
104
105
  // Phase 1: Load embedding model
@@ -108,11 +109,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
108
109
  modelDownloadPercent: 0,
109
110
  });
110
111
  await initEmbedder((modelProgress) => {
111
- // Report model download progress
112
112
  const downloadPercent = modelProgress.progress ?? 0;
113
113
  onProgress({
114
114
  phase: 'loading-model',
115
- percent: Math.round(downloadPercent * 0.2), // 0-20% for model loading
115
+ percent: Math.round(downloadPercent * 0.2),
116
116
  modelDownloadPercent: downloadPercent,
117
117
  });
118
118
  }, finalConfig);
@@ -125,7 +125,15 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
125
125
  console.log('🔍 Querying embeddable nodes...');
126
126
  }
127
127
  // Phase 2: Query embeddable nodes
128
- const nodes = await queryEmbeddableNodes(executeQuery);
128
+ let nodes = await queryEmbeddableNodes(executeQuery);
129
+ // Incremental mode: filter out nodes that already have embeddings
130
+ if (skipNodeIds && skipNodeIds.size > 0) {
131
+ const beforeCount = nodes.length;
132
+ nodes = nodes.filter(n => !skipNodeIds.has(n.id));
133
+ if (isDev) {
134
+ console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
135
+ }
136
+ }
129
137
  const totalNodes = nodes.length;
130
138
  if (isDev) {
131
139
  console.log(`📊 Found ${totalNodes} embeddable nodes`);
@@ -40,8 +40,8 @@ export interface EmbeddingConfig {
40
40
  batchSize: number;
41
41
  /** Embedding vector dimensions */
42
42
  dimensions: number;
43
- /** Device to use for inference: 'auto' tries GPU first, falls back to CPU */
44
- device: 'auto' | 'webgpu' | 'cuda' | 'cpu' | 'wasm';
43
+ /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
44
+ device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
45
45
  /** Maximum characters of code snippet to include */
46
46
  maxSnippetLength: number;
47
47
  }
@@ -2,7 +2,14 @@ import { KnowledgeGraph } from '../graph/types.js';
2
2
  import { ASTCache } from './ast-cache.js';
3
3
  import { SymbolTable } from './symbol-table.js';
4
4
  import { ImportMap } from './import-processor.js';
5
+ import type { ExtractedCall } from './workers/parse-worker.js';
5
6
  export declare const processCalls: (graph: KnowledgeGraph, files: {
6
7
  path: string;
7
8
  content: string;
8
9
  }[], astCache: ASTCache, symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
10
+ /**
11
+ * Fast path: resolve pre-extracted call sites from workers.
12
+ * No AST parsing — workers already extracted calledName + sourceId.
13
+ * This function only does symbol table lookups + graph mutations.
14
+ */
15
+ export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
@@ -145,6 +145,8 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
145
145
  continue;
146
146
  }
147
147
  wasReparsed = true;
148
+ // Cache re-parsed tree so heritage phase gets hits
149
+ astCache.set(file.path, tree);
148
150
  }
149
151
  let query;
150
152
  let matches;
@@ -155,8 +157,6 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
155
157
  }
156
158
  catch (queryError) {
157
159
  console.warn(`Query error for ${file.path}:`, queryError);
158
- if (wasReparsed)
159
- tree.delete?.();
160
160
  continue;
161
161
  }
162
162
  // 3. Process each call match
@@ -192,10 +192,7 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
192
192
  reason: resolved.reason,
193
193
  });
194
194
  });
195
- // Cleanup if re-parsed
196
- if (wasReparsed) {
197
- tree.delete?.();
198
- }
195
+ // Tree is now owned by the LRU cache — no manual delete needed
199
196
  }
200
197
  };
201
198
  /**
@@ -207,27 +204,27 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
207
204
  * Returns confidence score so agents know what to trust.
208
205
  */
209
206
  const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
210
- // Strategy A: Check imported files (HIGH confidence - we know the import chain)
211
- const importedFiles = importMap.get(currentFile);
212
- if (importedFiles) {
213
- for (const importedFile of importedFiles) {
214
- const nodeId = symbolTable.lookupExact(importedFile, calledName);
215
- if (nodeId) {
216
- return { nodeId, confidence: 0.9, reason: 'import-resolved' };
217
- }
218
- }
219
- }
220
- // Strategy B: Check local file (HIGH confidence - same file definition)
207
+ // Strategy B first (cheapest single map lookup): Check local file
221
208
  const localNodeId = symbolTable.lookupExact(currentFile, calledName);
222
209
  if (localNodeId) {
223
210
  return { nodeId: localNodeId, confidence: 0.85, reason: 'same-file' };
224
211
  }
225
- // Strategy C: Fuzzy global search (LOW confidence - just matching by name)
226
- const fuzzyMatches = symbolTable.lookupFuzzy(calledName);
227
- if (fuzzyMatches.length > 0) {
228
- // Lower confidence if multiple matches exist (more ambiguous)
229
- const confidence = fuzzyMatches.length === 1 ? 0.5 : 0.3;
230
- return { nodeId: fuzzyMatches[0].nodeId, confidence, reason: 'fuzzy-global' };
212
+ // Strategy A: Check if any definition of calledName is in an imported file
213
+ // Reversed: instead of iterating all imports and checking each, get all definitions
214
+ // and check if any is imported. O(definitions) instead of O(imports).
215
+ const allDefs = symbolTable.lookupFuzzy(calledName);
216
+ if (allDefs.length > 0) {
217
+ const importedFiles = importMap.get(currentFile);
218
+ if (importedFiles) {
219
+ for (const def of allDefs) {
220
+ if (importedFiles.has(def.filePath)) {
221
+ return { nodeId: def.nodeId, confidence: 0.9, reason: 'import-resolved' };
222
+ }
223
+ }
224
+ }
225
+ // Strategy C: Fuzzy global (no import match found)
226
+ const confidence = allDefs.length === 1 ? 0.5 : 0.3;
227
+ return { nodeId: allDefs[0].nodeId, confidence, reason: 'fuzzy-global' };
231
228
  }
232
229
  return null;
233
230
  };
@@ -267,3 +264,44 @@ const isBuiltInOrNoise = (name) => {
267
264
  ]);
268
265
  return builtIns.has(name);
269
266
  };
267
+ /**
268
+ * Fast path: resolve pre-extracted call sites from workers.
269
+ * No AST parsing — workers already extracted calledName + sourceId.
270
+ * This function only does symbol table lookups + graph mutations.
271
+ */
272
+ export const processCallsFromExtracted = async (graph, extractedCalls, symbolTable, importMap, onProgress) => {
273
+ // Group by file for progress reporting
274
+ const byFile = new Map();
275
+ for (const call of extractedCalls) {
276
+ let list = byFile.get(call.filePath);
277
+ if (!list) {
278
+ list = [];
279
+ byFile.set(call.filePath, list);
280
+ }
281
+ list.push(call);
282
+ }
283
+ const totalFiles = byFile.size;
284
+ let filesProcessed = 0;
285
+ for (const [_filePath, calls] of byFile) {
286
+ filesProcessed++;
287
+ if (filesProcessed % 100 === 0) {
288
+ onProgress?.(filesProcessed, totalFiles);
289
+ await yieldToEventLoop();
290
+ }
291
+ for (const call of calls) {
292
+ const resolved = resolveCallTarget(call.calledName, call.filePath, symbolTable, importMap);
293
+ if (!resolved)
294
+ continue;
295
+ const relId = generateId('CALLS', `${call.sourceId}:${call.calledName}->${resolved.nodeId}`);
296
+ graph.addRelationship({
297
+ id: relId,
298
+ sourceId: call.sourceId,
299
+ targetId: resolved.nodeId,
300
+ type: 'CALLS',
301
+ confidence: resolved.confidence,
302
+ reason: resolved.reason,
303
+ });
304
+ }
305
+ }
306
+ onProgress?.(totalFiles, totalFiles);
307
+ };
@@ -103,9 +103,19 @@ const buildGraphologyGraph = (knowledgeGraph) => {
103
103
  const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
104
104
  // Symbol types that should be clustered
105
105
  const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
106
- // Add symbol nodes
106
+ // First pass: collect which nodes participate in clustering edges
107
+ const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
108
+ const connectedNodes = new Set();
109
+ knowledgeGraph.relationships.forEach(rel => {
110
+ if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
111
+ connectedNodes.add(rel.sourceId);
112
+ connectedNodes.add(rel.targetId);
113
+ }
114
+ });
115
+ // Only add nodes that have at least one clustering edge
116
+ // Isolated nodes would just become singletons (skipped anyway)
107
117
  knowledgeGraph.nodes.forEach(node => {
108
- if (symbolTypes.has(node.label)) {
118
+ if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
109
119
  graph.addNode(node.id, {
110
120
  name: node.properties.name,
111
121
  filePath: node.properties.filePath,
@@ -113,15 +123,10 @@ const buildGraphologyGraph = (knowledgeGraph) => {
113
123
  });
114
124
  }
115
125
  });
116
- // Add CALLS edges (primary clustering signal)
117
- // We can also include EXTENDS/IMPLEMENTS for OOP clustering
118
- const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
126
+ // Add edges
119
127
  knowledgeGraph.relationships.forEach(rel => {
120
128
  if (clusteringRelTypes.has(rel.type)) {
121
- // Only add edge if both nodes exist in our symbol graph
122
- // Also skip self-loops (recursive calls) - not allowed in undirected graph
123
129
  if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
124
- // Avoid duplicate edges
125
130
  if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
126
131
  graph.addEdge(rel.sourceId, rel.targetId);
127
132
  }
@@ -241,29 +246,32 @@ const findCommonPrefix = (strings) => {
241
246
  // HELPER: Calculate community cohesion
242
247
  // ============================================================================
243
248
  /**
244
- * Calculate cohesion score (0-1) based on internal edge density
245
- * Higher cohesion = more internal connections relative to size
249
+ * Estimate cohesion score (0-1) based on internal edge density.
250
+ * Uses sampling for large communities to avoid O(N^2) cost.
246
251
  */
247
252
  const calculateCohesion = (memberIds, graph) => {
248
253
  if (memberIds.length <= 1)
249
254
  return 1.0;
250
255
  const memberSet = new Set(memberIds);
256
+ // Sample up to 50 members for large communities
257
+ const SAMPLE_SIZE = 50;
258
+ const sample = memberIds.length <= SAMPLE_SIZE
259
+ ? memberIds
260
+ : memberIds.slice(0, SAMPLE_SIZE);
251
261
  let internalEdges = 0;
252
- // Count edges within the community
253
- memberIds.forEach(nodeId => {
254
- if (graph.hasNode(nodeId)) {
255
- graph.forEachNeighbor(nodeId, neighbor => {
256
- if (memberSet.has(neighbor)) {
257
- internalEdges++;
258
- }
259
- });
260
- }
261
- });
262
- // Each edge is counted twice (once from each end), so divide by 2
263
- internalEdges = internalEdges / 2;
264
- // Maximum possible internal edges for n nodes: n*(n-1)/2
265
- const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
266
- if (maxPossibleEdges === 0)
262
+ let totalEdges = 0;
263
+ for (const nodeId of sample) {
264
+ if (!graph.hasNode(nodeId))
265
+ continue;
266
+ graph.forEachNeighbor(nodeId, (neighbor) => {
267
+ totalEdges++;
268
+ if (memberSet.has(neighbor)) {
269
+ internalEdges++;
270
+ }
271
+ });
272
+ }
273
+ // Cohesion = fraction of edges that stay internal
274
+ if (totalEdges === 0)
267
275
  return 1.0;
268
- return Math.min(1.0, internalEdges / maxPossibleEdges);
276
+ return Math.min(1.0, internalEdges / totalEdges);
269
277
  };
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
2
2
  import path from 'path';
3
3
  import { glob } from 'glob';
4
4
  import { shouldIgnorePath } from '../../config/ignore-service.js';
5
+ const READ_CONCURRENCY = 32;
5
6
  export const walkRepository = async (repoPath, onProgress) => {
6
7
  const files = await glob('**/*', {
7
8
  cwd: repoPath,
@@ -10,16 +11,20 @@ export const walkRepository = async (repoPath, onProgress) => {
10
11
  });
11
12
  const filtered = files.filter(file => !shouldIgnorePath(file));
12
13
  const entries = [];
13
- for (let i = 0; i < filtered.length; i++) {
14
- const relativePath = filtered[i];
15
- const fullPath = path.join(repoPath, relativePath);
16
- try {
17
- const content = await fs.readFile(fullPath, 'utf-8');
18
- entries.push({ path: relativePath.replace(/\\/g, '/'), content });
19
- onProgress?.(i + 1, filtered.length, relativePath);
20
- }
21
- catch {
22
- onProgress?.(i + 1, filtered.length, relativePath);
14
+ let processed = 0;
15
+ for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
16
+ const batch = filtered.slice(start, start + READ_CONCURRENCY);
17
+ const results = await Promise.allSettled(batch.map(relativePath => fs.readFile(path.join(repoPath, relativePath), 'utf-8')
18
+ .then(content => ({ path: relativePath.replace(/\\/g, '/'), content }))));
19
+ for (const result of results) {
20
+ processed++;
21
+ if (result.status === 'fulfilled') {
22
+ entries.push(result.value);
23
+ onProgress?.(processed, filtered.length, result.value.path);
24
+ }
25
+ else {
26
+ onProgress?.(processed, filtered.length, batch[results.indexOf(result)]);
27
+ }
23
28
  }
24
29
  }
25
30
  return entries;
@@ -8,7 +8,13 @@
8
8
  import { KnowledgeGraph } from '../graph/types.js';
9
9
  import { ASTCache } from './ast-cache.js';
10
10
  import { SymbolTable } from './symbol-table.js';
11
+ import type { ExtractedHeritage } from './workers/parse-worker.js';
11
12
  export declare const processHeritage: (graph: KnowledgeGraph, files: {
12
13
  path: string;
13
14
  content: string;
14
15
  }[], astCache: ASTCache, symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
16
+ /**
17
+ * Fast path: resolve pre-extracted heritage from workers.
18
+ * No AST parsing — workers already extracted className + parentName + kind.
19
+ */
20
+ export declare const processHeritageFromExtracted: (graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;