gitnexus 1.1.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +50 -59
  2. package/dist/cli/ai-context.js +9 -9
  3. package/dist/cli/analyze.js +139 -47
  4. package/dist/cli/augment.d.ts +13 -0
  5. package/dist/cli/augment.js +33 -0
  6. package/dist/cli/claude-hooks.d.ts +22 -0
  7. package/dist/cli/claude-hooks.js +97 -0
  8. package/dist/cli/eval-server.d.ts +30 -0
  9. package/dist/cli/eval-server.js +372 -0
  10. package/dist/cli/index.js +56 -1
  11. package/dist/cli/mcp.js +9 -0
  12. package/dist/cli/setup.js +184 -5
  13. package/dist/cli/tool.d.ts +37 -0
  14. package/dist/cli/tool.js +91 -0
  15. package/dist/cli/wiki.d.ts +13 -0
  16. package/dist/cli/wiki.js +199 -0
  17. package/dist/core/augmentation/engine.d.ts +26 -0
  18. package/dist/core/augmentation/engine.js +213 -0
  19. package/dist/core/embeddings/embedder.d.ts +2 -2
  20. package/dist/core/embeddings/embedder.js +11 -11
  21. package/dist/core/embeddings/embedding-pipeline.d.ts +2 -1
  22. package/dist/core/embeddings/embedding-pipeline.js +13 -5
  23. package/dist/core/embeddings/types.d.ts +2 -2
  24. package/dist/core/ingestion/call-processor.d.ts +7 -0
  25. package/dist/core/ingestion/call-processor.js +61 -23
  26. package/dist/core/ingestion/community-processor.js +34 -26
  27. package/dist/core/ingestion/filesystem-walker.js +15 -10
  28. package/dist/core/ingestion/heritage-processor.d.ts +6 -0
  29. package/dist/core/ingestion/heritage-processor.js +68 -5
  30. package/dist/core/ingestion/import-processor.d.ts +22 -0
  31. package/dist/core/ingestion/import-processor.js +215 -20
  32. package/dist/core/ingestion/parsing-processor.d.ts +8 -1
  33. package/dist/core/ingestion/parsing-processor.js +66 -25
  34. package/dist/core/ingestion/pipeline.js +104 -40
  35. package/dist/core/ingestion/process-processor.js +1 -1
  36. package/dist/core/ingestion/workers/parse-worker.d.ts +58 -0
  37. package/dist/core/ingestion/workers/parse-worker.js +451 -0
  38. package/dist/core/ingestion/workers/worker-pool.d.ts +22 -0
  39. package/dist/core/ingestion/workers/worker-pool.js +65 -0
  40. package/dist/core/kuzu/kuzu-adapter.d.ts +15 -1
  41. package/dist/core/kuzu/kuzu-adapter.js +177 -63
  42. package/dist/core/kuzu/schema.d.ts +1 -1
  43. package/dist/core/kuzu/schema.js +3 -0
  44. package/dist/core/search/bm25-index.js +13 -15
  45. package/dist/core/wiki/generator.d.ts +96 -0
  46. package/dist/core/wiki/generator.js +674 -0
  47. package/dist/core/wiki/graph-queries.d.ts +80 -0
  48. package/dist/core/wiki/graph-queries.js +238 -0
  49. package/dist/core/wiki/html-viewer.d.ts +10 -0
  50. package/dist/core/wiki/html-viewer.js +297 -0
  51. package/dist/core/wiki/llm-client.d.ts +36 -0
  52. package/dist/core/wiki/llm-client.js +111 -0
  53. package/dist/core/wiki/prompts.d.ts +53 -0
  54. package/dist/core/wiki/prompts.js +174 -0
  55. package/dist/mcp/core/embedder.js +4 -2
  56. package/dist/mcp/core/kuzu-adapter.d.ts +2 -1
  57. package/dist/mcp/core/kuzu-adapter.js +35 -15
  58. package/dist/mcp/local/local-backend.d.ts +54 -1
  59. package/dist/mcp/local/local-backend.js +716 -171
  60. package/dist/mcp/resources.d.ts +1 -1
  61. package/dist/mcp/resources.js +111 -73
  62. package/dist/mcp/server.d.ts +1 -1
  63. package/dist/mcp/server.js +91 -22
  64. package/dist/mcp/tools.js +80 -61
  65. package/dist/storage/git.d.ts +0 -1
  66. package/dist/storage/git.js +1 -8
  67. package/dist/storage/repo-manager.d.ts +17 -0
  68. package/dist/storage/repo-manager.js +26 -0
  69. package/hooks/claude/gitnexus-hook.cjs +135 -0
  70. package/hooks/claude/pre-tool-use.sh +78 -0
  71. package/hooks/claude/session-start.sh +42 -0
  72. package/package.json +4 -2
  73. package/skills/debugging.md +24 -22
  74. package/skills/exploring.md +26 -24
  75. package/skills/impact-analysis.md +19 -13
  76. package/skills/refactoring.md +37 -26
@@ -0,0 +1,213 @@
1
+ /**
2
+ * Augmentation Engine
3
+ *
4
+ * Lightweight, fast-path enrichment of search patterns with knowledge graph context.
5
+ * Designed to be called from platform hooks (Claude Code PreToolUse, Cursor beforeShellExecution)
6
+ * when an agent runs grep/glob/search.
7
+ *
8
+ * Performance target: <500ms cold start, <200ms warm.
9
+ *
10
+ * Design decisions:
11
+ * - Uses only BM25 search (no semantic/embedding) for speed
12
+ * - Clusters used internally for ranking, NEVER in output
13
+ * - Output is pure relationships: callers, callees, process participation
14
+ * - Graceful failure: any error → return empty string
15
+ */
16
+ import path from 'path';
17
+ import { listRegisteredRepos } from '../../storage/repo-manager.js';
18
+ /**
19
+ * Find the best matching repo for a given working directory.
20
+ * Matches by checking if cwd is within the repo's path.
21
+ */
22
+ async function findRepoForCwd(cwd) {
23
+ try {
24
+ const entries = await listRegisteredRepos({ validate: true });
25
+ const resolved = path.resolve(cwd);
26
+ // Normalize to lowercase on Windows (drive letters can differ: D: vs d:)
27
+ const isWindows = process.platform === 'win32';
28
+ const normalizedCwd = isWindows ? resolved.toLowerCase() : resolved;
29
+ const sep = path.sep;
30
+ // Find the LONGEST matching repo path (most specific match wins)
31
+ let bestMatch = null;
32
+ let bestLen = 0;
33
+ for (const entry of entries) {
34
+ const repoResolved = path.resolve(entry.path);
35
+ const normalizedRepo = isWindows ? repoResolved.toLowerCase() : repoResolved;
36
+ // Check if cwd is inside repo OR repo is inside cwd
37
+ // Must match at a path separator boundary to avoid false positives
38
+ // (e.g. /projects/gitnexusv2 should NOT match /projects/gitnexus)
39
+ let matched = false;
40
+ if (normalizedCwd === normalizedRepo) {
41
+ matched = true;
42
+ }
43
+ else if (normalizedCwd.startsWith(normalizedRepo + sep)) {
44
+ matched = true;
45
+ }
46
+ else if (normalizedRepo.startsWith(normalizedCwd + sep)) {
47
+ matched = true;
48
+ }
49
+ if (matched && normalizedRepo.length > bestLen) {
50
+ bestMatch = entry;
51
+ bestLen = normalizedRepo.length;
52
+ }
53
+ }
54
+ if (!bestMatch)
55
+ return null;
56
+ return {
57
+ name: bestMatch.name,
58
+ storagePath: bestMatch.storagePath,
59
+ kuzuPath: path.join(bestMatch.storagePath, 'kuzu'),
60
+ };
61
+ }
62
+ catch {
63
+ return null;
64
+ }
65
+ }
66
+ /**
67
+ * Augment a search pattern with knowledge graph context.
68
+ *
69
+ * 1. BM25 search for the pattern
70
+ * 2. For top matches, fetch callers/callees/processes
71
+ * 3. Rank by internal cluster cohesion (not exposed)
72
+ * 4. Format as structured text block
73
+ *
74
+ * Returns empty string on any error (graceful failure).
75
+ */
76
+ export async function augment(pattern, cwd) {
77
+ if (!pattern || pattern.length < 3)
78
+ return '';
79
+ const workDir = cwd || process.cwd();
80
+ try {
81
+ const repo = await findRepoForCwd(workDir);
82
+ if (!repo)
83
+ return '';
84
+ // Lazy-load kuzu adapter (skip unnecessary init)
85
+ const { initKuzu, executeQuery, isKuzuReady } = await import('../../mcp/core/kuzu-adapter.js');
86
+ const { searchFTSFromKuzu } = await import('../search/bm25-index.js');
87
+ const repoId = repo.name.toLowerCase();
88
+ // Init KuzuDB if not already
89
+ if (!isKuzuReady(repoId)) {
90
+ await initKuzu(repoId, repo.kuzuPath);
91
+ }
92
+ // Step 1: BM25 search (fast, no embeddings)
93
+ const bm25Results = await searchFTSFromKuzu(pattern, 10, repoId);
94
+ if (bm25Results.length === 0)
95
+ return '';
96
+ // Step 2: Map BM25 file results to symbols
97
+ const symbolMatches = [];
98
+ for (const result of bm25Results.slice(0, 5)) {
99
+ const escaped = result.filePath.replace(/'/g, "''");
100
+ try {
101
+ const symbols = await executeQuery(repoId, `
102
+ MATCH (n) WHERE n.filePath = '${escaped}'
103
+ AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
104
+ RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
105
+ LIMIT 3
106
+ `);
107
+ for (const sym of symbols) {
108
+ symbolMatches.push({
109
+ nodeId: sym.id || sym[0],
110
+ name: sym.name || sym[1],
111
+ type: sym.type || sym[2],
112
+ filePath: sym.filePath || sym[3],
113
+ score: result.score,
114
+ });
115
+ }
116
+ }
117
+ catch { /* skip */ }
118
+ }
119
+ if (symbolMatches.length === 0)
120
+ return '';
121
+ // Step 3: For top matches, fetch callers/callees/processes
122
+ // Also get cluster cohesion internally for ranking
123
+ const enriched = [];
124
+ const seen = new Set();
125
+ for (const sym of symbolMatches.slice(0, 5)) {
126
+ if (seen.has(sym.nodeId))
127
+ continue;
128
+ seen.add(sym.nodeId);
129
+ const escaped = sym.nodeId.replace(/'/g, "''");
130
+ // Callers
131
+ let callers = [];
132
+ try {
133
+ const rows = await executeQuery(repoId, `
134
+ MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
135
+ RETURN caller.name AS name
136
+ LIMIT 3
137
+ `);
138
+ callers = rows.map((r) => r.name || r[0]).filter(Boolean);
139
+ }
140
+ catch { /* skip */ }
141
+ // Callees
142
+ let callees = [];
143
+ try {
144
+ const rows = await executeQuery(repoId, `
145
+ MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
146
+ RETURN callee.name AS name
147
+ LIMIT 3
148
+ `);
149
+ callees = rows.map((r) => r.name || r[0]).filter(Boolean);
150
+ }
151
+ catch { /* skip */ }
152
+ // Processes
153
+ let processes = [];
154
+ try {
155
+ const rows = await executeQuery(repoId, `
156
+ MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
157
+ RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
158
+ `);
159
+ processes = rows.map((r) => {
160
+ const label = r.label || r[0];
161
+ const step = r.step || r[1];
162
+ const stepCount = r.stepCount || r[2];
163
+ return `${label} (step ${step}/${stepCount})`;
164
+ }).filter(Boolean);
165
+ }
166
+ catch { /* skip */ }
167
+ // Cluster cohesion (internal ranking signal)
168
+ let cohesion = 0;
169
+ try {
170
+ const rows = await executeQuery(repoId, `
171
+ MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
172
+ RETURN c.cohesion AS cohesion
173
+ LIMIT 1
174
+ `);
175
+ if (rows.length > 0) {
176
+ cohesion = (rows[0].cohesion ?? rows[0][0]) || 0;
177
+ }
178
+ }
179
+ catch { /* skip */ }
180
+ enriched.push({
181
+ name: sym.name,
182
+ filePath: sym.filePath,
183
+ callers,
184
+ callees,
185
+ processes,
186
+ cohesion,
187
+ });
188
+ }
189
+ if (enriched.length === 0)
190
+ return '';
191
+ // Step 4: Rank by cohesion (internal signal) and format
192
+ enriched.sort((a, b) => b.cohesion - a.cohesion);
193
+ const lines = [`[GitNexus] ${enriched.length} related symbols found:`, ''];
194
+ for (const item of enriched) {
195
+ lines.push(`${item.name} (${item.filePath})`);
196
+ if (item.callers.length > 0) {
197
+ lines.push(` Called by: ${item.callers.join(', ')}`);
198
+ }
199
+ if (item.callees.length > 0) {
200
+ lines.push(` Calls: ${item.callees.join(', ')}`);
201
+ }
202
+ if (item.processes.length > 0) {
203
+ lines.push(` Flows: ${item.processes.join(', ')}`);
204
+ }
205
+ lines.push('');
206
+ }
207
+ return lines.join('\n').trim();
208
+ }
209
+ catch {
210
+ // Graceful failure — never break the original tool
211
+ return '';
212
+ }
213
+ }
@@ -15,7 +15,7 @@ export type ModelProgressCallback = (progress: ModelProgress) => void;
15
15
  /**
16
16
  * Get the current device being used for inference
17
17
  */
18
- export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm" | null;
18
+ export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
19
19
  /**
20
20
  * Initialize the embedding model
21
21
  * Uses singleton pattern - only loads once, subsequent calls return cached instance
@@ -25,7 +25,7 @@ export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm"
25
25
  * @param forceDevice - Force a specific device
26
26
  * @returns Promise resolving to the embedder pipeline
27
27
  */
28
- export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "webgpu" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
28
+ export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
29
29
  /**
30
30
  * Check if the embedder is initialized and ready
31
31
  */
@@ -37,16 +37,16 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
37
37
  }
38
38
  isInitializing = true;
39
39
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
40
- // On Windows, use webgpu for GPU acceleration (via DirectX12/DirectML)
41
- // CUDA is only available on Linux with onnxruntime-node
40
+ // On Windows, use DirectML for GPU acceleration (via DirectX12)
41
+ // CUDA is only available on Linux x64 with onnxruntime-node
42
42
  const isWindows = process.platform === 'win32';
43
- const gpuDevice = isWindows ? 'webgpu' : 'cuda';
43
+ const gpuDevice = isWindows ? 'dml' : 'cuda';
44
44
  let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
45
45
  initPromise = (async () => {
46
46
  try {
47
47
  // Configure transformers.js environment
48
48
  env.allowLocalModels = false;
49
- const isDev = process.env.NODE_ENV !== 'production';
49
+ const isDev = process.env.NODE_ENV === 'development';
50
50
  if (isDev) {
51
51
  console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
52
52
  }
@@ -61,14 +61,14 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
61
61
  onProgress(progress);
62
62
  } : undefined;
63
63
  // Try GPU first if auto, fall back to CPU
64
- // Windows: webgpu (DirectX12/DirectML), Linux: cuda
65
- const devicesToTry = (requestedDevice === 'webgpu' || requestedDevice === 'cuda')
64
+ // Windows: dml (DirectML/DirectX12), Linux: cuda
65
+ const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
66
66
  ? [requestedDevice, 'cpu']
67
67
  : [requestedDevice];
68
68
  for (const device of devicesToTry) {
69
69
  try {
70
- if (isDev && device === 'webgpu') {
71
- console.log('🔧 Trying WebGPU (DirectX12) backend...');
70
+ if (isDev && device === 'dml') {
71
+ console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
72
72
  }
73
73
  else if (isDev && device === 'cuda') {
74
74
  console.log('🔧 Trying CUDA GPU backend...');
@@ -86,7 +86,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
86
86
  });
87
87
  currentDevice = device;
88
88
  if (isDev) {
89
- const label = device === 'webgpu' ? 'GPU (WebGPU/DirectX12)'
89
+ const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
90
90
  : device === 'cuda' ? 'GPU (CUDA)'
91
91
  : device.toUpperCase();
92
92
  console.log(`✅ Using ${label} backend`);
@@ -95,8 +95,8 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
95
95
  return embedderInstance;
96
96
  }
97
97
  catch (deviceError) {
98
- if (isDev && (device === 'cuda' || device === 'webgpu')) {
99
- const gpuType = device === 'webgpu' ? 'WebGPU' : 'CUDA';
98
+ if (isDev && (device === 'cuda' || device === 'dml')) {
99
+ const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
100
100
  console.log(`⚠️ ${gpuType} not available, falling back to CPU...`);
101
101
  }
102
102
  // Continue to next device in list
@@ -20,8 +20,9 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
20
20
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
21
21
  * @param onProgress - Callback for progress updates
22
22
  * @param config - Optional configuration override
23
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
23
24
  */
24
- export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>) => Promise<void>;
25
+ export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
25
26
  /**
26
27
  * Perform semantic search using the vector index
27
28
  *
@@ -11,7 +11,7 @@
11
11
  import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
12
12
  import { generateBatchEmbeddingTexts } from './text-generator.js';
13
13
  import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
14
- const isDev = process.env.NODE_ENV !== 'production';
14
+ const isDev = process.env.NODE_ENV === 'development';
15
15
  /**
16
16
  * Query all embeddable nodes from KuzuDB
17
17
  * Uses table-specific queries (File has different schema than code elements)
@@ -97,8 +97,9 @@ const createVectorIndex = async (executeQuery) => {
97
97
  * @param executeWithReusedStatement - Function to execute with reused prepared statement
98
98
  * @param onProgress - Callback for progress updates
99
99
  * @param config - Optional configuration override
100
+ * @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
100
101
  */
101
- export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}) => {
102
+ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
102
103
  const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
103
104
  try {
104
105
  // Phase 1: Load embedding model
@@ -108,11 +109,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
108
109
  modelDownloadPercent: 0,
109
110
  });
110
111
  await initEmbedder((modelProgress) => {
111
- // Report model download progress
112
112
  const downloadPercent = modelProgress.progress ?? 0;
113
113
  onProgress({
114
114
  phase: 'loading-model',
115
- percent: Math.round(downloadPercent * 0.2), // 0-20% for model loading
115
+ percent: Math.round(downloadPercent * 0.2),
116
116
  modelDownloadPercent: downloadPercent,
117
117
  });
118
118
  }, finalConfig);
@@ -125,7 +125,15 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
125
125
  console.log('🔍 Querying embeddable nodes...');
126
126
  }
127
127
  // Phase 2: Query embeddable nodes
128
- const nodes = await queryEmbeddableNodes(executeQuery);
128
+ let nodes = await queryEmbeddableNodes(executeQuery);
129
+ // Incremental mode: filter out nodes that already have embeddings
130
+ if (skipNodeIds && skipNodeIds.size > 0) {
131
+ const beforeCount = nodes.length;
132
+ nodes = nodes.filter(n => !skipNodeIds.has(n.id));
133
+ if (isDev) {
134
+ console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
135
+ }
136
+ }
129
137
  const totalNodes = nodes.length;
130
138
  if (isDev) {
131
139
  console.log(`📊 Found ${totalNodes} embeddable nodes`);
@@ -40,8 +40,8 @@ export interface EmbeddingConfig {
40
40
  batchSize: number;
41
41
  /** Embedding vector dimensions */
42
42
  dimensions: number;
43
- /** Device to use for inference: 'auto' tries GPU first, falls back to CPU */
44
- device: 'auto' | 'webgpu' | 'cuda' | 'cpu' | 'wasm';
43
+ /** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
44
+ device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
45
45
  /** Maximum characters of code snippet to include */
46
46
  maxSnippetLength: number;
47
47
  }
@@ -2,7 +2,14 @@ import { KnowledgeGraph } from '../graph/types.js';
2
2
  import { ASTCache } from './ast-cache.js';
3
3
  import { SymbolTable } from './symbol-table.js';
4
4
  import { ImportMap } from './import-processor.js';
5
+ import type { ExtractedCall } from './workers/parse-worker.js';
5
6
  export declare const processCalls: (graph: KnowledgeGraph, files: {
6
7
  path: string;
7
8
  content: string;
8
9
  }[], astCache: ASTCache, symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
10
+ /**
11
+ * Fast path: resolve pre-extracted call sites from workers.
12
+ * No AST parsing — workers already extracted calledName + sourceId.
13
+ * This function only does symbol table lookups + graph mutations.
14
+ */
15
+ export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
@@ -145,6 +145,8 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
145
145
  continue;
146
146
  }
147
147
  wasReparsed = true;
148
+ // Cache re-parsed tree so heritage phase gets hits
149
+ astCache.set(file.path, tree);
148
150
  }
149
151
  let query;
150
152
  let matches;
@@ -155,8 +157,6 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
155
157
  }
156
158
  catch (queryError) {
157
159
  console.warn(`Query error for ${file.path}:`, queryError);
158
- if (wasReparsed)
159
- tree.delete?.();
160
160
  continue;
161
161
  }
162
162
  // 3. Process each call match
@@ -192,10 +192,7 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
192
192
  reason: resolved.reason,
193
193
  });
194
194
  });
195
- // Cleanup if re-parsed
196
- if (wasReparsed) {
197
- tree.delete?.();
198
- }
195
+ // Tree is now owned by the LRU cache — no manual delete needed
199
196
  }
200
197
  };
201
198
  /**
@@ -207,27 +204,27 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
207
204
  * Returns confidence score so agents know what to trust.
208
205
  */
209
206
  const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
210
- // Strategy A: Check imported files (HIGH confidence - we know the import chain)
211
- const importedFiles = importMap.get(currentFile);
212
- if (importedFiles) {
213
- for (const importedFile of importedFiles) {
214
- const nodeId = symbolTable.lookupExact(importedFile, calledName);
215
- if (nodeId) {
216
- return { nodeId, confidence: 0.9, reason: 'import-resolved' };
217
- }
218
- }
219
- }
220
- // Strategy B: Check local file (HIGH confidence - same file definition)
207
+ // Strategy B first (cheapest single map lookup): Check local file
221
208
  const localNodeId = symbolTable.lookupExact(currentFile, calledName);
222
209
  if (localNodeId) {
223
210
  return { nodeId: localNodeId, confidence: 0.85, reason: 'same-file' };
224
211
  }
225
- // Strategy C: Fuzzy global search (LOW confidence - just matching by name)
226
- const fuzzyMatches = symbolTable.lookupFuzzy(calledName);
227
- if (fuzzyMatches.length > 0) {
228
- // Lower confidence if multiple matches exist (more ambiguous)
229
- const confidence = fuzzyMatches.length === 1 ? 0.5 : 0.3;
230
- return { nodeId: fuzzyMatches[0].nodeId, confidence, reason: 'fuzzy-global' };
212
+ // Strategy A: Check if any definition of calledName is in an imported file
213
+ // Reversed: instead of iterating all imports and checking each, get all definitions
214
+ // and check if any is imported. O(definitions) instead of O(imports).
215
+ const allDefs = symbolTable.lookupFuzzy(calledName);
216
+ if (allDefs.length > 0) {
217
+ const importedFiles = importMap.get(currentFile);
218
+ if (importedFiles) {
219
+ for (const def of allDefs) {
220
+ if (importedFiles.has(def.filePath)) {
221
+ return { nodeId: def.nodeId, confidence: 0.9, reason: 'import-resolved' };
222
+ }
223
+ }
224
+ }
225
+ // Strategy C: Fuzzy global (no import match found)
226
+ const confidence = allDefs.length === 1 ? 0.5 : 0.3;
227
+ return { nodeId: allDefs[0].nodeId, confidence, reason: 'fuzzy-global' };
231
228
  }
232
229
  return null;
233
230
  };
@@ -267,3 +264,44 @@ const isBuiltInOrNoise = (name) => {
267
264
  ]);
268
265
  return builtIns.has(name);
269
266
  };
267
+ /**
268
+ * Fast path: resolve pre-extracted call sites from workers.
269
+ * No AST parsing — workers already extracted calledName + sourceId.
270
+ * This function only does symbol table lookups + graph mutations.
271
+ */
272
+ export const processCallsFromExtracted = async (graph, extractedCalls, symbolTable, importMap, onProgress) => {
273
+ // Group by file for progress reporting
274
+ const byFile = new Map();
275
+ for (const call of extractedCalls) {
276
+ let list = byFile.get(call.filePath);
277
+ if (!list) {
278
+ list = [];
279
+ byFile.set(call.filePath, list);
280
+ }
281
+ list.push(call);
282
+ }
283
+ const totalFiles = byFile.size;
284
+ let filesProcessed = 0;
285
+ for (const [_filePath, calls] of byFile) {
286
+ filesProcessed++;
287
+ if (filesProcessed % 100 === 0) {
288
+ onProgress?.(filesProcessed, totalFiles);
289
+ await yieldToEventLoop();
290
+ }
291
+ for (const call of calls) {
292
+ const resolved = resolveCallTarget(call.calledName, call.filePath, symbolTable, importMap);
293
+ if (!resolved)
294
+ continue;
295
+ const relId = generateId('CALLS', `${call.sourceId}:${call.calledName}->${resolved.nodeId}`);
296
+ graph.addRelationship({
297
+ id: relId,
298
+ sourceId: call.sourceId,
299
+ targetId: resolved.nodeId,
300
+ type: 'CALLS',
301
+ confidence: resolved.confidence,
302
+ reason: resolved.reason,
303
+ });
304
+ }
305
+ }
306
+ onProgress?.(totalFiles, totalFiles);
307
+ };
@@ -103,9 +103,19 @@ const buildGraphologyGraph = (knowledgeGraph) => {
103
103
  const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
104
104
  // Symbol types that should be clustered
105
105
  const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
106
- // Add symbol nodes
106
+ // First pass: collect which nodes participate in clustering edges
107
+ const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
108
+ const connectedNodes = new Set();
109
+ knowledgeGraph.relationships.forEach(rel => {
110
+ if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
111
+ connectedNodes.add(rel.sourceId);
112
+ connectedNodes.add(rel.targetId);
113
+ }
114
+ });
115
+ // Only add nodes that have at least one clustering edge
116
+ // Isolated nodes would just become singletons (skipped anyway)
107
117
  knowledgeGraph.nodes.forEach(node => {
108
- if (symbolTypes.has(node.label)) {
118
+ if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
109
119
  graph.addNode(node.id, {
110
120
  name: node.properties.name,
111
121
  filePath: node.properties.filePath,
@@ -113,15 +123,10 @@ const buildGraphologyGraph = (knowledgeGraph) => {
113
123
  });
114
124
  }
115
125
  });
116
- // Add CALLS edges (primary clustering signal)
117
- // We can also include EXTENDS/IMPLEMENTS for OOP clustering
118
- const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
126
+ // Add edges
119
127
  knowledgeGraph.relationships.forEach(rel => {
120
128
  if (clusteringRelTypes.has(rel.type)) {
121
- // Only add edge if both nodes exist in our symbol graph
122
- // Also skip self-loops (recursive calls) - not allowed in undirected graph
123
129
  if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
124
- // Avoid duplicate edges
125
130
  if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
126
131
  graph.addEdge(rel.sourceId, rel.targetId);
127
132
  }
@@ -241,29 +246,32 @@ const findCommonPrefix = (strings) => {
241
246
  // HELPER: Calculate community cohesion
242
247
  // ============================================================================
243
248
  /**
244
- * Calculate cohesion score (0-1) based on internal edge density
245
- * Higher cohesion = more internal connections relative to size
249
+ * Estimate cohesion score (0-1) based on internal edge density.
250
+ * Uses sampling for large communities to avoid O(N^2) cost.
246
251
  */
247
252
  const calculateCohesion = (memberIds, graph) => {
248
253
  if (memberIds.length <= 1)
249
254
  return 1.0;
250
255
  const memberSet = new Set(memberIds);
256
+ // Sample up to 50 members for large communities
257
+ const SAMPLE_SIZE = 50;
258
+ const sample = memberIds.length <= SAMPLE_SIZE
259
+ ? memberIds
260
+ : memberIds.slice(0, SAMPLE_SIZE);
251
261
  let internalEdges = 0;
252
- // Count edges within the community
253
- memberIds.forEach(nodeId => {
254
- if (graph.hasNode(nodeId)) {
255
- graph.forEachNeighbor(nodeId, neighbor => {
256
- if (memberSet.has(neighbor)) {
257
- internalEdges++;
258
- }
259
- });
260
- }
261
- });
262
- // Each edge is counted twice (once from each end), so divide by 2
263
- internalEdges = internalEdges / 2;
264
- // Maximum possible internal edges for n nodes: n*(n-1)/2
265
- const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
266
- if (maxPossibleEdges === 0)
262
+ let totalEdges = 0;
263
+ for (const nodeId of sample) {
264
+ if (!graph.hasNode(nodeId))
265
+ continue;
266
+ graph.forEachNeighbor(nodeId, (neighbor) => {
267
+ totalEdges++;
268
+ if (memberSet.has(neighbor)) {
269
+ internalEdges++;
270
+ }
271
+ });
272
+ }
273
+ // Cohesion = fraction of edges that stay internal
274
+ if (totalEdges === 0)
267
275
  return 1.0;
268
- return Math.min(1.0, internalEdges / maxPossibleEdges);
276
+ return Math.min(1.0, internalEdges / totalEdges);
269
277
  };
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
2
2
  import path from 'path';
3
3
  import { glob } from 'glob';
4
4
  import { shouldIgnorePath } from '../../config/ignore-service.js';
5
+ const READ_CONCURRENCY = 32;
5
6
  export const walkRepository = async (repoPath, onProgress) => {
6
7
  const files = await glob('**/*', {
7
8
  cwd: repoPath,
@@ -10,16 +11,20 @@ export const walkRepository = async (repoPath, onProgress) => {
10
11
  });
11
12
  const filtered = files.filter(file => !shouldIgnorePath(file));
12
13
  const entries = [];
13
- for (let i = 0; i < filtered.length; i++) {
14
- const relativePath = filtered[i];
15
- const fullPath = path.join(repoPath, relativePath);
16
- try {
17
- const content = await fs.readFile(fullPath, 'utf-8');
18
- entries.push({ path: relativePath.replace(/\\/g, '/'), content });
19
- onProgress?.(i + 1, filtered.length, relativePath);
20
- }
21
- catch {
22
- onProgress?.(i + 1, filtered.length, relativePath);
14
+ let processed = 0;
15
+ for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
16
+ const batch = filtered.slice(start, start + READ_CONCURRENCY);
17
+ const results = await Promise.allSettled(batch.map(relativePath => fs.readFile(path.join(repoPath, relativePath), 'utf-8')
18
+ .then(content => ({ path: relativePath.replace(/\\/g, '/'), content }))));
19
+ for (const result of results) {
20
+ processed++;
21
+ if (result.status === 'fulfilled') {
22
+ entries.push(result.value);
23
+ onProgress?.(processed, filtered.length, result.value.path);
24
+ }
25
+ else {
26
+ onProgress?.(processed, filtered.length, batch[results.indexOf(result)]);
27
+ }
23
28
  }
24
29
  }
25
30
  return entries;
@@ -8,7 +8,13 @@
8
8
  import { KnowledgeGraph } from '../graph/types.js';
9
9
  import { ASTCache } from './ast-cache.js';
10
10
  import { SymbolTable } from './symbol-table.js';
11
+ import type { ExtractedHeritage } from './workers/parse-worker.js';
11
12
  export declare const processHeritage: (graph: KnowledgeGraph, files: {
12
13
  path: string;
13
14
  content: string;
14
15
  }[], astCache: ASTCache, symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
16
+ /**
17
+ * Fast path: resolve pre-extracted heritage from workers.
18
+ * No AST parsing — workers already extracted className + parentName + kind.
19
+ */
20
+ export declare const processHeritageFromExtracted: (graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;