gitnexus 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +50 -59
- package/dist/cli/ai-context.js +9 -9
- package/dist/cli/analyze.js +139 -47
- package/dist/cli/augment.d.ts +13 -0
- package/dist/cli/augment.js +33 -0
- package/dist/cli/claude-hooks.d.ts +22 -0
- package/dist/cli/claude-hooks.js +97 -0
- package/dist/cli/eval-server.d.ts +30 -0
- package/dist/cli/eval-server.js +372 -0
- package/dist/cli/index.js +56 -1
- package/dist/cli/mcp.js +9 -0
- package/dist/cli/setup.js +184 -5
- package/dist/cli/tool.d.ts +37 -0
- package/dist/cli/tool.js +91 -0
- package/dist/cli/wiki.d.ts +13 -0
- package/dist/cli/wiki.js +199 -0
- package/dist/core/augmentation/engine.d.ts +26 -0
- package/dist/core/augmentation/engine.js +213 -0
- package/dist/core/embeddings/embedder.d.ts +2 -2
- package/dist/core/embeddings/embedder.js +11 -11
- package/dist/core/embeddings/embedding-pipeline.d.ts +2 -1
- package/dist/core/embeddings/embedding-pipeline.js +13 -5
- package/dist/core/embeddings/types.d.ts +2 -2
- package/dist/core/ingestion/call-processor.d.ts +7 -0
- package/dist/core/ingestion/call-processor.js +61 -23
- package/dist/core/ingestion/community-processor.js +34 -26
- package/dist/core/ingestion/filesystem-walker.js +15 -10
- package/dist/core/ingestion/heritage-processor.d.ts +6 -0
- package/dist/core/ingestion/heritage-processor.js +68 -5
- package/dist/core/ingestion/import-processor.d.ts +22 -0
- package/dist/core/ingestion/import-processor.js +215 -20
- package/dist/core/ingestion/parsing-processor.d.ts +8 -1
- package/dist/core/ingestion/parsing-processor.js +66 -25
- package/dist/core/ingestion/pipeline.js +104 -40
- package/dist/core/ingestion/process-processor.js +1 -1
- package/dist/core/ingestion/workers/parse-worker.d.ts +58 -0
- package/dist/core/ingestion/workers/parse-worker.js +451 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +22 -0
- package/dist/core/ingestion/workers/worker-pool.js +65 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +15 -1
- package/dist/core/kuzu/kuzu-adapter.js +177 -63
- package/dist/core/kuzu/schema.d.ts +1 -1
- package/dist/core/kuzu/schema.js +3 -0
- package/dist/core/search/bm25-index.js +13 -15
- package/dist/core/wiki/generator.d.ts +96 -0
- package/dist/core/wiki/generator.js +674 -0
- package/dist/core/wiki/graph-queries.d.ts +80 -0
- package/dist/core/wiki/graph-queries.js +238 -0
- package/dist/core/wiki/html-viewer.d.ts +10 -0
- package/dist/core/wiki/html-viewer.js +297 -0
- package/dist/core/wiki/llm-client.d.ts +36 -0
- package/dist/core/wiki/llm-client.js +111 -0
- package/dist/core/wiki/prompts.d.ts +53 -0
- package/dist/core/wiki/prompts.js +174 -0
- package/dist/mcp/core/embedder.js +4 -2
- package/dist/mcp/core/kuzu-adapter.d.ts +2 -1
- package/dist/mcp/core/kuzu-adapter.js +35 -15
- package/dist/mcp/local/local-backend.d.ts +54 -1
- package/dist/mcp/local/local-backend.js +716 -171
- package/dist/mcp/resources.d.ts +1 -1
- package/dist/mcp/resources.js +111 -73
- package/dist/mcp/server.d.ts +1 -1
- package/dist/mcp/server.js +91 -22
- package/dist/mcp/tools.js +80 -61
- package/dist/storage/git.d.ts +0 -1
- package/dist/storage/git.js +1 -8
- package/dist/storage/repo-manager.d.ts +17 -0
- package/dist/storage/repo-manager.js +26 -0
- package/hooks/claude/gitnexus-hook.cjs +135 -0
- package/hooks/claude/pre-tool-use.sh +78 -0
- package/hooks/claude/session-start.sh +42 -0
- package/package.json +4 -2
- package/skills/debugging.md +24 -22
- package/skills/exploring.md +26 -24
- package/skills/impact-analysis.md +19 -13
- package/skills/refactoring.md +37 -26
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Augmentation Engine
|
|
3
|
+
*
|
|
4
|
+
* Lightweight, fast-path enrichment of search patterns with knowledge graph context.
|
|
5
|
+
* Designed to be called from platform hooks (Claude Code PreToolUse, Cursor beforeShellExecution)
|
|
6
|
+
* when an agent runs grep/glob/search.
|
|
7
|
+
*
|
|
8
|
+
* Performance target: <500ms cold start, <200ms warm.
|
|
9
|
+
*
|
|
10
|
+
* Design decisions:
|
|
11
|
+
* - Uses only BM25 search (no semantic/embedding) for speed
|
|
12
|
+
* - Clusters used internally for ranking, NEVER in output
|
|
13
|
+
* - Output is pure relationships: callers, callees, process participation
|
|
14
|
+
* - Graceful failure: any error → return empty string
|
|
15
|
+
*/
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import { listRegisteredRepos } from '../../storage/repo-manager.js';
|
|
18
|
+
/**
|
|
19
|
+
* Find the best matching repo for a given working directory.
|
|
20
|
+
* Matches by checking if cwd is within the repo's path.
|
|
21
|
+
*/
|
|
22
|
+
async function findRepoForCwd(cwd) {
|
|
23
|
+
try {
|
|
24
|
+
const entries = await listRegisteredRepos({ validate: true });
|
|
25
|
+
const resolved = path.resolve(cwd);
|
|
26
|
+
// Normalize to lowercase on Windows (drive letters can differ: D: vs d:)
|
|
27
|
+
const isWindows = process.platform === 'win32';
|
|
28
|
+
const normalizedCwd = isWindows ? resolved.toLowerCase() : resolved;
|
|
29
|
+
const sep = path.sep;
|
|
30
|
+
// Find the LONGEST matching repo path (most specific match wins)
|
|
31
|
+
let bestMatch = null;
|
|
32
|
+
let bestLen = 0;
|
|
33
|
+
for (const entry of entries) {
|
|
34
|
+
const repoResolved = path.resolve(entry.path);
|
|
35
|
+
const normalizedRepo = isWindows ? repoResolved.toLowerCase() : repoResolved;
|
|
36
|
+
// Check if cwd is inside repo OR repo is inside cwd
|
|
37
|
+
// Must match at a path separator boundary to avoid false positives
|
|
38
|
+
// (e.g. /projects/gitnexusv2 should NOT match /projects/gitnexus)
|
|
39
|
+
let matched = false;
|
|
40
|
+
if (normalizedCwd === normalizedRepo) {
|
|
41
|
+
matched = true;
|
|
42
|
+
}
|
|
43
|
+
else if (normalizedCwd.startsWith(normalizedRepo + sep)) {
|
|
44
|
+
matched = true;
|
|
45
|
+
}
|
|
46
|
+
else if (normalizedRepo.startsWith(normalizedCwd + sep)) {
|
|
47
|
+
matched = true;
|
|
48
|
+
}
|
|
49
|
+
if (matched && normalizedRepo.length > bestLen) {
|
|
50
|
+
bestMatch = entry;
|
|
51
|
+
bestLen = normalizedRepo.length;
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
if (!bestMatch)
|
|
55
|
+
return null;
|
|
56
|
+
return {
|
|
57
|
+
name: bestMatch.name,
|
|
58
|
+
storagePath: bestMatch.storagePath,
|
|
59
|
+
kuzuPath: path.join(bestMatch.storagePath, 'kuzu'),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
catch {
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Augment a search pattern with knowledge graph context.
|
|
68
|
+
*
|
|
69
|
+
* 1. BM25 search for the pattern
|
|
70
|
+
* 2. For top matches, fetch callers/callees/processes
|
|
71
|
+
* 3. Rank by internal cluster cohesion (not exposed)
|
|
72
|
+
* 4. Format as structured text block
|
|
73
|
+
*
|
|
74
|
+
* Returns empty string on any error (graceful failure).
|
|
75
|
+
*/
|
|
76
|
+
export async function augment(pattern, cwd) {
|
|
77
|
+
if (!pattern || pattern.length < 3)
|
|
78
|
+
return '';
|
|
79
|
+
const workDir = cwd || process.cwd();
|
|
80
|
+
try {
|
|
81
|
+
const repo = await findRepoForCwd(workDir);
|
|
82
|
+
if (!repo)
|
|
83
|
+
return '';
|
|
84
|
+
// Lazy-load kuzu adapter (skip unnecessary init)
|
|
85
|
+
const { initKuzu, executeQuery, isKuzuReady } = await import('../../mcp/core/kuzu-adapter.js');
|
|
86
|
+
const { searchFTSFromKuzu } = await import('../search/bm25-index.js');
|
|
87
|
+
const repoId = repo.name.toLowerCase();
|
|
88
|
+
// Init KuzuDB if not already
|
|
89
|
+
if (!isKuzuReady(repoId)) {
|
|
90
|
+
await initKuzu(repoId, repo.kuzuPath);
|
|
91
|
+
}
|
|
92
|
+
// Step 1: BM25 search (fast, no embeddings)
|
|
93
|
+
const bm25Results = await searchFTSFromKuzu(pattern, 10, repoId);
|
|
94
|
+
if (bm25Results.length === 0)
|
|
95
|
+
return '';
|
|
96
|
+
// Step 2: Map BM25 file results to symbols
|
|
97
|
+
const symbolMatches = [];
|
|
98
|
+
for (const result of bm25Results.slice(0, 5)) {
|
|
99
|
+
const escaped = result.filePath.replace(/'/g, "''");
|
|
100
|
+
try {
|
|
101
|
+
const symbols = await executeQuery(repoId, `
|
|
102
|
+
MATCH (n) WHERE n.filePath = '${escaped}'
|
|
103
|
+
AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
|
|
104
|
+
RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
|
|
105
|
+
LIMIT 3
|
|
106
|
+
`);
|
|
107
|
+
for (const sym of symbols) {
|
|
108
|
+
symbolMatches.push({
|
|
109
|
+
nodeId: sym.id || sym[0],
|
|
110
|
+
name: sym.name || sym[1],
|
|
111
|
+
type: sym.type || sym[2],
|
|
112
|
+
filePath: sym.filePath || sym[3],
|
|
113
|
+
score: result.score,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
catch { /* skip */ }
|
|
118
|
+
}
|
|
119
|
+
if (symbolMatches.length === 0)
|
|
120
|
+
return '';
|
|
121
|
+
// Step 3: For top matches, fetch callers/callees/processes
|
|
122
|
+
// Also get cluster cohesion internally for ranking
|
|
123
|
+
const enriched = [];
|
|
124
|
+
const seen = new Set();
|
|
125
|
+
for (const sym of symbolMatches.slice(0, 5)) {
|
|
126
|
+
if (seen.has(sym.nodeId))
|
|
127
|
+
continue;
|
|
128
|
+
seen.add(sym.nodeId);
|
|
129
|
+
const escaped = sym.nodeId.replace(/'/g, "''");
|
|
130
|
+
// Callers
|
|
131
|
+
let callers = [];
|
|
132
|
+
try {
|
|
133
|
+
const rows = await executeQuery(repoId, `
|
|
134
|
+
MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
|
|
135
|
+
RETURN caller.name AS name
|
|
136
|
+
LIMIT 3
|
|
137
|
+
`);
|
|
138
|
+
callers = rows.map((r) => r.name || r[0]).filter(Boolean);
|
|
139
|
+
}
|
|
140
|
+
catch { /* skip */ }
|
|
141
|
+
// Callees
|
|
142
|
+
let callees = [];
|
|
143
|
+
try {
|
|
144
|
+
const rows = await executeQuery(repoId, `
|
|
145
|
+
MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
|
|
146
|
+
RETURN callee.name AS name
|
|
147
|
+
LIMIT 3
|
|
148
|
+
`);
|
|
149
|
+
callees = rows.map((r) => r.name || r[0]).filter(Boolean);
|
|
150
|
+
}
|
|
151
|
+
catch { /* skip */ }
|
|
152
|
+
// Processes
|
|
153
|
+
let processes = [];
|
|
154
|
+
try {
|
|
155
|
+
const rows = await executeQuery(repoId, `
|
|
156
|
+
MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
|
|
157
|
+
RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
|
|
158
|
+
`);
|
|
159
|
+
processes = rows.map((r) => {
|
|
160
|
+
const label = r.label || r[0];
|
|
161
|
+
const step = r.step || r[1];
|
|
162
|
+
const stepCount = r.stepCount || r[2];
|
|
163
|
+
return `${label} (step ${step}/${stepCount})`;
|
|
164
|
+
}).filter(Boolean);
|
|
165
|
+
}
|
|
166
|
+
catch { /* skip */ }
|
|
167
|
+
// Cluster cohesion (internal ranking signal)
|
|
168
|
+
let cohesion = 0;
|
|
169
|
+
try {
|
|
170
|
+
const rows = await executeQuery(repoId, `
|
|
171
|
+
MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
|
|
172
|
+
RETURN c.cohesion AS cohesion
|
|
173
|
+
LIMIT 1
|
|
174
|
+
`);
|
|
175
|
+
if (rows.length > 0) {
|
|
176
|
+
cohesion = (rows[0].cohesion ?? rows[0][0]) || 0;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
catch { /* skip */ }
|
|
180
|
+
enriched.push({
|
|
181
|
+
name: sym.name,
|
|
182
|
+
filePath: sym.filePath,
|
|
183
|
+
callers,
|
|
184
|
+
callees,
|
|
185
|
+
processes,
|
|
186
|
+
cohesion,
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
if (enriched.length === 0)
|
|
190
|
+
return '';
|
|
191
|
+
// Step 4: Rank by cohesion (internal signal) and format
|
|
192
|
+
enriched.sort((a, b) => b.cohesion - a.cohesion);
|
|
193
|
+
const lines = [`[GitNexus] ${enriched.length} related symbols found:`, ''];
|
|
194
|
+
for (const item of enriched) {
|
|
195
|
+
lines.push(`${item.name} (${item.filePath})`);
|
|
196
|
+
if (item.callers.length > 0) {
|
|
197
|
+
lines.push(` Called by: ${item.callers.join(', ')}`);
|
|
198
|
+
}
|
|
199
|
+
if (item.callees.length > 0) {
|
|
200
|
+
lines.push(` Calls: ${item.callees.join(', ')}`);
|
|
201
|
+
}
|
|
202
|
+
if (item.processes.length > 0) {
|
|
203
|
+
lines.push(` Flows: ${item.processes.join(', ')}`);
|
|
204
|
+
}
|
|
205
|
+
lines.push('');
|
|
206
|
+
}
|
|
207
|
+
return lines.join('\n').trim();
|
|
208
|
+
}
|
|
209
|
+
catch {
|
|
210
|
+
// Graceful failure — never break the original tool
|
|
211
|
+
return '';
|
|
212
|
+
}
|
|
213
|
+
}
|
|
@@ -15,7 +15,7 @@ export type ModelProgressCallback = (progress: ModelProgress) => void;
|
|
|
15
15
|
/**
|
|
16
16
|
* Get the current device being used for inference
|
|
17
17
|
*/
|
|
18
|
-
export declare const getCurrentDevice: () => "
|
|
18
|
+
export declare const getCurrentDevice: () => "dml" | "cuda" | "cpu" | "wasm" | null;
|
|
19
19
|
/**
|
|
20
20
|
* Initialize the embedding model
|
|
21
21
|
* Uses singleton pattern - only loads once, subsequent calls return cached instance
|
|
@@ -25,7 +25,7 @@ export declare const getCurrentDevice: () => "webgpu" | "cuda" | "cpu" | "wasm"
|
|
|
25
25
|
* @param forceDevice - Force a specific device
|
|
26
26
|
* @returns Promise resolving to the embedder pipeline
|
|
27
27
|
*/
|
|
28
|
-
export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "
|
|
28
|
+
export declare const initEmbedder: (onProgress?: ModelProgressCallback, config?: Partial<EmbeddingConfig>, forceDevice?: "dml" | "cuda" | "cpu" | "wasm") => Promise<FeatureExtractionPipeline>;
|
|
29
29
|
/**
|
|
30
30
|
* Check if the embedder is initialized and ready
|
|
31
31
|
*/
|
|
@@ -37,16 +37,16 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
37
37
|
}
|
|
38
38
|
isInitializing = true;
|
|
39
39
|
const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
|
|
40
|
-
// On Windows, use
|
|
41
|
-
// CUDA is only available on Linux with onnxruntime-node
|
|
40
|
+
// On Windows, use DirectML for GPU acceleration (via DirectX12)
|
|
41
|
+
// CUDA is only available on Linux x64 with onnxruntime-node
|
|
42
42
|
const isWindows = process.platform === 'win32';
|
|
43
|
-
const gpuDevice = isWindows ? '
|
|
43
|
+
const gpuDevice = isWindows ? 'dml' : 'cuda';
|
|
44
44
|
let requestedDevice = forceDevice || (finalConfig.device === 'auto' ? gpuDevice : finalConfig.device);
|
|
45
45
|
initPromise = (async () => {
|
|
46
46
|
try {
|
|
47
47
|
// Configure transformers.js environment
|
|
48
48
|
env.allowLocalModels = false;
|
|
49
|
-
const isDev = process.env.NODE_ENV
|
|
49
|
+
const isDev = process.env.NODE_ENV === 'development';
|
|
50
50
|
if (isDev) {
|
|
51
51
|
console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
|
|
52
52
|
}
|
|
@@ -61,14 +61,14 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
61
61
|
onProgress(progress);
|
|
62
62
|
} : undefined;
|
|
63
63
|
// Try GPU first if auto, fall back to CPU
|
|
64
|
-
// Windows:
|
|
65
|
-
const devicesToTry = (requestedDevice === '
|
|
64
|
+
// Windows: dml (DirectML/DirectX12), Linux: cuda
|
|
65
|
+
const devicesToTry = (requestedDevice === 'dml' || requestedDevice === 'cuda')
|
|
66
66
|
? [requestedDevice, 'cpu']
|
|
67
67
|
: [requestedDevice];
|
|
68
68
|
for (const device of devicesToTry) {
|
|
69
69
|
try {
|
|
70
|
-
if (isDev && device === '
|
|
71
|
-
console.log('🔧 Trying
|
|
70
|
+
if (isDev && device === 'dml') {
|
|
71
|
+
console.log('🔧 Trying DirectML (DirectX12) GPU backend...');
|
|
72
72
|
}
|
|
73
73
|
else if (isDev && device === 'cuda') {
|
|
74
74
|
console.log('🔧 Trying CUDA GPU backend...');
|
|
@@ -86,7 +86,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
86
86
|
});
|
|
87
87
|
currentDevice = device;
|
|
88
88
|
if (isDev) {
|
|
89
|
-
const label = device === '
|
|
89
|
+
const label = device === 'dml' ? 'GPU (DirectML/DirectX12)'
|
|
90
90
|
: device === 'cuda' ? 'GPU (CUDA)'
|
|
91
91
|
: device.toUpperCase();
|
|
92
92
|
console.log(`✅ Using ${label} backend`);
|
|
@@ -95,8 +95,8 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
95
95
|
return embedderInstance;
|
|
96
96
|
}
|
|
97
97
|
catch (deviceError) {
|
|
98
|
-
if (isDev && (device === 'cuda' || device === '
|
|
99
|
-
const gpuType = device === '
|
|
98
|
+
if (isDev && (device === 'cuda' || device === 'dml')) {
|
|
99
|
+
const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
|
|
100
100
|
console.log(`⚠️ ${gpuType} not available, falling back to CPU...`);
|
|
101
101
|
}
|
|
102
102
|
// Continue to next device in list
|
|
@@ -20,8 +20,9 @@ export type EmbeddingProgressCallback = (progress: EmbeddingProgress) => void;
|
|
|
20
20
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
21
21
|
* @param onProgress - Callback for progress updates
|
|
22
22
|
* @param config - Optional configuration override
|
|
23
|
+
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
23
24
|
*/
|
|
24
|
-
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>) => Promise<void>;
|
|
25
|
+
export declare const runEmbeddingPipeline: (executeQuery: (cypher: string) => Promise<any[]>, executeWithReusedStatement: (cypher: string, paramsList: Array<Record<string, any>>) => Promise<void>, onProgress: EmbeddingProgressCallback, config?: Partial<EmbeddingConfig>, skipNodeIds?: Set<string>) => Promise<void>;
|
|
25
26
|
/**
|
|
26
27
|
* Perform semantic search using the vector index
|
|
27
28
|
*
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { initEmbedder, embedBatch, embedText, embeddingToArray, isEmbedderReady } from './embedder.js';
|
|
12
12
|
import { generateBatchEmbeddingTexts } from './text-generator.js';
|
|
13
13
|
import { DEFAULT_EMBEDDING_CONFIG, EMBEDDABLE_LABELS, } from './types.js';
|
|
14
|
-
const isDev = process.env.NODE_ENV
|
|
14
|
+
const isDev = process.env.NODE_ENV === 'development';
|
|
15
15
|
/**
|
|
16
16
|
* Query all embeddable nodes from KuzuDB
|
|
17
17
|
* Uses table-specific queries (File has different schema than code elements)
|
|
@@ -97,8 +97,9 @@ const createVectorIndex = async (executeQuery) => {
|
|
|
97
97
|
* @param executeWithReusedStatement - Function to execute with reused prepared statement
|
|
98
98
|
* @param onProgress - Callback for progress updates
|
|
99
99
|
* @param config - Optional configuration override
|
|
100
|
+
* @param skipNodeIds - Optional set of node IDs that already have embeddings (incremental mode)
|
|
100
101
|
*/
|
|
101
|
-
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}) => {
|
|
102
|
+
export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatement, onProgress, config = {}, skipNodeIds) => {
|
|
102
103
|
const finalConfig = { ...DEFAULT_EMBEDDING_CONFIG, ...config };
|
|
103
104
|
try {
|
|
104
105
|
// Phase 1: Load embedding model
|
|
@@ -108,11 +109,10 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
108
109
|
modelDownloadPercent: 0,
|
|
109
110
|
});
|
|
110
111
|
await initEmbedder((modelProgress) => {
|
|
111
|
-
// Report model download progress
|
|
112
112
|
const downloadPercent = modelProgress.progress ?? 0;
|
|
113
113
|
onProgress({
|
|
114
114
|
phase: 'loading-model',
|
|
115
|
-
percent: Math.round(downloadPercent * 0.2),
|
|
115
|
+
percent: Math.round(downloadPercent * 0.2),
|
|
116
116
|
modelDownloadPercent: downloadPercent,
|
|
117
117
|
});
|
|
118
118
|
}, finalConfig);
|
|
@@ -125,7 +125,15 @@ export const runEmbeddingPipeline = async (executeQuery, executeWithReusedStatem
|
|
|
125
125
|
console.log('🔍 Querying embeddable nodes...');
|
|
126
126
|
}
|
|
127
127
|
// Phase 2: Query embeddable nodes
|
|
128
|
-
|
|
128
|
+
let nodes = await queryEmbeddableNodes(executeQuery);
|
|
129
|
+
// Incremental mode: filter out nodes that already have embeddings
|
|
130
|
+
if (skipNodeIds && skipNodeIds.size > 0) {
|
|
131
|
+
const beforeCount = nodes.length;
|
|
132
|
+
nodes = nodes.filter(n => !skipNodeIds.has(n.id));
|
|
133
|
+
if (isDev) {
|
|
134
|
+
console.log(`📦 Incremental embeddings: ${beforeCount} total, ${skipNodeIds.size} cached, ${nodes.length} to embed`);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
129
137
|
const totalNodes = nodes.length;
|
|
130
138
|
if (isDev) {
|
|
131
139
|
console.log(`📊 Found ${totalNodes} embeddable nodes`);
|
|
@@ -40,8 +40,8 @@ export interface EmbeddingConfig {
|
|
|
40
40
|
batchSize: number;
|
|
41
41
|
/** Embedding vector dimensions */
|
|
42
42
|
dimensions: number;
|
|
43
|
-
/** Device to use for inference: 'auto' tries GPU first, falls back to CPU */
|
|
44
|
-
device: 'auto' | '
|
|
43
|
+
/** Device to use for inference: 'auto' tries GPU first (DirectML on Windows, CUDA on Linux), falls back to CPU */
|
|
44
|
+
device: 'auto' | 'dml' | 'cuda' | 'cpu' | 'wasm';
|
|
45
45
|
/** Maximum characters of code snippet to include */
|
|
46
46
|
maxSnippetLength: number;
|
|
47
47
|
}
|
|
@@ -2,7 +2,14 @@ import { KnowledgeGraph } from '../graph/types.js';
|
|
|
2
2
|
import { ASTCache } from './ast-cache.js';
|
|
3
3
|
import { SymbolTable } from './symbol-table.js';
|
|
4
4
|
import { ImportMap } from './import-processor.js';
|
|
5
|
+
import type { ExtractedCall } from './workers/parse-worker.js';
|
|
5
6
|
export declare const processCalls: (graph: KnowledgeGraph, files: {
|
|
6
7
|
path: string;
|
|
7
8
|
content: string;
|
|
8
9
|
}[], astCache: ASTCache, symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
|
|
10
|
+
/**
|
|
11
|
+
* Fast path: resolve pre-extracted call sites from workers.
|
|
12
|
+
* No AST parsing — workers already extracted calledName + sourceId.
|
|
13
|
+
* This function only does symbol table lookups + graph mutations.
|
|
14
|
+
*/
|
|
15
|
+
export declare const processCallsFromExtracted: (graph: KnowledgeGraph, extractedCalls: ExtractedCall[], symbolTable: SymbolTable, importMap: ImportMap, onProgress?: (current: number, total: number) => void) => Promise<void>;
|
|
@@ -145,6 +145,8 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
|
|
|
145
145
|
continue;
|
|
146
146
|
}
|
|
147
147
|
wasReparsed = true;
|
|
148
|
+
// Cache re-parsed tree so heritage phase gets hits
|
|
149
|
+
astCache.set(file.path, tree);
|
|
148
150
|
}
|
|
149
151
|
let query;
|
|
150
152
|
let matches;
|
|
@@ -155,8 +157,6 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
|
|
|
155
157
|
}
|
|
156
158
|
catch (queryError) {
|
|
157
159
|
console.warn(`Query error for ${file.path}:`, queryError);
|
|
158
|
-
if (wasReparsed)
|
|
159
|
-
tree.delete?.();
|
|
160
160
|
continue;
|
|
161
161
|
}
|
|
162
162
|
// 3. Process each call match
|
|
@@ -192,10 +192,7 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
|
|
|
192
192
|
reason: resolved.reason,
|
|
193
193
|
});
|
|
194
194
|
});
|
|
195
|
-
//
|
|
196
|
-
if (wasReparsed) {
|
|
197
|
-
tree.delete?.();
|
|
198
|
-
}
|
|
195
|
+
// Tree is now owned by the LRU cache — no manual delete needed
|
|
199
196
|
}
|
|
200
197
|
};
|
|
201
198
|
/**
|
|
@@ -207,27 +204,27 @@ export const processCalls = async (graph, files, astCache, symbolTable, importMa
|
|
|
207
204
|
* Returns confidence score so agents know what to trust.
|
|
208
205
|
*/
|
|
209
206
|
const resolveCallTarget = (calledName, currentFile, symbolTable, importMap) => {
|
|
210
|
-
// Strategy
|
|
211
|
-
const importedFiles = importMap.get(currentFile);
|
|
212
|
-
if (importedFiles) {
|
|
213
|
-
for (const importedFile of importedFiles) {
|
|
214
|
-
const nodeId = symbolTable.lookupExact(importedFile, calledName);
|
|
215
|
-
if (nodeId) {
|
|
216
|
-
return { nodeId, confidence: 0.9, reason: 'import-resolved' };
|
|
217
|
-
}
|
|
218
|
-
}
|
|
219
|
-
}
|
|
220
|
-
// Strategy B: Check local file (HIGH confidence - same file definition)
|
|
207
|
+
// Strategy B first (cheapest — single map lookup): Check local file
|
|
221
208
|
const localNodeId = symbolTable.lookupExact(currentFile, calledName);
|
|
222
209
|
if (localNodeId) {
|
|
223
210
|
return { nodeId: localNodeId, confidence: 0.85, reason: 'same-file' };
|
|
224
211
|
}
|
|
225
|
-
// Strategy
|
|
226
|
-
|
|
227
|
-
if
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
212
|
+
// Strategy A: Check if any definition of calledName is in an imported file
|
|
213
|
+
// Reversed: instead of iterating all imports and checking each, get all definitions
|
|
214
|
+
// and check if any is imported. O(definitions) instead of O(imports).
|
|
215
|
+
const allDefs = symbolTable.lookupFuzzy(calledName);
|
|
216
|
+
if (allDefs.length > 0) {
|
|
217
|
+
const importedFiles = importMap.get(currentFile);
|
|
218
|
+
if (importedFiles) {
|
|
219
|
+
for (const def of allDefs) {
|
|
220
|
+
if (importedFiles.has(def.filePath)) {
|
|
221
|
+
return { nodeId: def.nodeId, confidence: 0.9, reason: 'import-resolved' };
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
// Strategy C: Fuzzy global (no import match found)
|
|
226
|
+
const confidence = allDefs.length === 1 ? 0.5 : 0.3;
|
|
227
|
+
return { nodeId: allDefs[0].nodeId, confidence, reason: 'fuzzy-global' };
|
|
231
228
|
}
|
|
232
229
|
return null;
|
|
233
230
|
};
|
|
@@ -267,3 +264,44 @@ const isBuiltInOrNoise = (name) => {
|
|
|
267
264
|
]);
|
|
268
265
|
return builtIns.has(name);
|
|
269
266
|
};
|
|
267
|
+
/**
|
|
268
|
+
* Fast path: resolve pre-extracted call sites from workers.
|
|
269
|
+
* No AST parsing — workers already extracted calledName + sourceId.
|
|
270
|
+
* This function only does symbol table lookups + graph mutations.
|
|
271
|
+
*/
|
|
272
|
+
export const processCallsFromExtracted = async (graph, extractedCalls, symbolTable, importMap, onProgress) => {
|
|
273
|
+
// Group by file for progress reporting
|
|
274
|
+
const byFile = new Map();
|
|
275
|
+
for (const call of extractedCalls) {
|
|
276
|
+
let list = byFile.get(call.filePath);
|
|
277
|
+
if (!list) {
|
|
278
|
+
list = [];
|
|
279
|
+
byFile.set(call.filePath, list);
|
|
280
|
+
}
|
|
281
|
+
list.push(call);
|
|
282
|
+
}
|
|
283
|
+
const totalFiles = byFile.size;
|
|
284
|
+
let filesProcessed = 0;
|
|
285
|
+
for (const [_filePath, calls] of byFile) {
|
|
286
|
+
filesProcessed++;
|
|
287
|
+
if (filesProcessed % 100 === 0) {
|
|
288
|
+
onProgress?.(filesProcessed, totalFiles);
|
|
289
|
+
await yieldToEventLoop();
|
|
290
|
+
}
|
|
291
|
+
for (const call of calls) {
|
|
292
|
+
const resolved = resolveCallTarget(call.calledName, call.filePath, symbolTable, importMap);
|
|
293
|
+
if (!resolved)
|
|
294
|
+
continue;
|
|
295
|
+
const relId = generateId('CALLS', `${call.sourceId}:${call.calledName}->${resolved.nodeId}`);
|
|
296
|
+
graph.addRelationship({
|
|
297
|
+
id: relId,
|
|
298
|
+
sourceId: call.sourceId,
|
|
299
|
+
targetId: resolved.nodeId,
|
|
300
|
+
type: 'CALLS',
|
|
301
|
+
confidence: resolved.confidence,
|
|
302
|
+
reason: resolved.reason,
|
|
303
|
+
});
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
onProgress?.(totalFiles, totalFiles);
|
|
307
|
+
};
|
|
@@ -103,9 +103,19 @@ const buildGraphologyGraph = (knowledgeGraph) => {
|
|
|
103
103
|
const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
|
|
104
104
|
// Symbol types that should be clustered
|
|
105
105
|
const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
|
|
106
|
-
//
|
|
106
|
+
// First pass: collect which nodes participate in clustering edges
|
|
107
|
+
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
108
|
+
const connectedNodes = new Set();
|
|
109
|
+
knowledgeGraph.relationships.forEach(rel => {
|
|
110
|
+
if (clusteringRelTypes.has(rel.type) && rel.sourceId !== rel.targetId) {
|
|
111
|
+
connectedNodes.add(rel.sourceId);
|
|
112
|
+
connectedNodes.add(rel.targetId);
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
// Only add nodes that have at least one clustering edge
|
|
116
|
+
// Isolated nodes would just become singletons (skipped anyway)
|
|
107
117
|
knowledgeGraph.nodes.forEach(node => {
|
|
108
|
-
if (symbolTypes.has(node.label)) {
|
|
118
|
+
if (symbolTypes.has(node.label) && connectedNodes.has(node.id)) {
|
|
109
119
|
graph.addNode(node.id, {
|
|
110
120
|
name: node.properties.name,
|
|
111
121
|
filePath: node.properties.filePath,
|
|
@@ -113,15 +123,10 @@ const buildGraphologyGraph = (knowledgeGraph) => {
|
|
|
113
123
|
});
|
|
114
124
|
}
|
|
115
125
|
});
|
|
116
|
-
// Add
|
|
117
|
-
// We can also include EXTENDS/IMPLEMENTS for OOP clustering
|
|
118
|
-
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
126
|
+
// Add edges
|
|
119
127
|
knowledgeGraph.relationships.forEach(rel => {
|
|
120
128
|
if (clusteringRelTypes.has(rel.type)) {
|
|
121
|
-
// Only add edge if both nodes exist in our symbol graph
|
|
122
|
-
// Also skip self-loops (recursive calls) - not allowed in undirected graph
|
|
123
129
|
if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
|
|
124
|
-
// Avoid duplicate edges
|
|
125
130
|
if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
|
|
126
131
|
graph.addEdge(rel.sourceId, rel.targetId);
|
|
127
132
|
}
|
|
@@ -241,29 +246,32 @@ const findCommonPrefix = (strings) => {
|
|
|
241
246
|
// HELPER: Calculate community cohesion
|
|
242
247
|
// ============================================================================
|
|
243
248
|
/**
|
|
244
|
-
*
|
|
245
|
-
*
|
|
249
|
+
* Estimate cohesion score (0-1) based on internal edge density.
|
|
250
|
+
* Uses sampling for large communities to avoid O(N^2) cost.
|
|
246
251
|
*/
|
|
247
252
|
const calculateCohesion = (memberIds, graph) => {
|
|
248
253
|
if (memberIds.length <= 1)
|
|
249
254
|
return 1.0;
|
|
250
255
|
const memberSet = new Set(memberIds);
|
|
256
|
+
// Sample up to 50 members for large communities
|
|
257
|
+
const SAMPLE_SIZE = 50;
|
|
258
|
+
const sample = memberIds.length <= SAMPLE_SIZE
|
|
259
|
+
? memberIds
|
|
260
|
+
: memberIds.slice(0, SAMPLE_SIZE);
|
|
251
261
|
let internalEdges = 0;
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
if (graph.hasNode(nodeId))
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
|
|
266
|
-
if (maxPossibleEdges === 0)
|
|
262
|
+
let totalEdges = 0;
|
|
263
|
+
for (const nodeId of sample) {
|
|
264
|
+
if (!graph.hasNode(nodeId))
|
|
265
|
+
continue;
|
|
266
|
+
graph.forEachNeighbor(nodeId, (neighbor) => {
|
|
267
|
+
totalEdges++;
|
|
268
|
+
if (memberSet.has(neighbor)) {
|
|
269
|
+
internalEdges++;
|
|
270
|
+
}
|
|
271
|
+
});
|
|
272
|
+
}
|
|
273
|
+
// Cohesion = fraction of edges that stay internal
|
|
274
|
+
if (totalEdges === 0)
|
|
267
275
|
return 1.0;
|
|
268
|
-
return Math.min(1.0, internalEdges /
|
|
276
|
+
return Math.min(1.0, internalEdges / totalEdges);
|
|
269
277
|
};
|
|
@@ -2,6 +2,7 @@ import fs from 'fs/promises';
|
|
|
2
2
|
import path from 'path';
|
|
3
3
|
import { glob } from 'glob';
|
|
4
4
|
import { shouldIgnorePath } from '../../config/ignore-service.js';
|
|
5
|
+
const READ_CONCURRENCY = 32;
|
|
5
6
|
export const walkRepository = async (repoPath, onProgress) => {
|
|
6
7
|
const files = await glob('**/*', {
|
|
7
8
|
cwd: repoPath,
|
|
@@ -10,16 +11,20 @@ export const walkRepository = async (repoPath, onProgress) => {
|
|
|
10
11
|
});
|
|
11
12
|
const filtered = files.filter(file => !shouldIgnorePath(file));
|
|
12
13
|
const entries = [];
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
const
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
14
|
+
let processed = 0;
|
|
15
|
+
for (let start = 0; start < filtered.length; start += READ_CONCURRENCY) {
|
|
16
|
+
const batch = filtered.slice(start, start + READ_CONCURRENCY);
|
|
17
|
+
const results = await Promise.allSettled(batch.map(relativePath => fs.readFile(path.join(repoPath, relativePath), 'utf-8')
|
|
18
|
+
.then(content => ({ path: relativePath.replace(/\\/g, '/'), content }))));
|
|
19
|
+
for (const result of results) {
|
|
20
|
+
processed++;
|
|
21
|
+
if (result.status === 'fulfilled') {
|
|
22
|
+
entries.push(result.value);
|
|
23
|
+
onProgress?.(processed, filtered.length, result.value.path);
|
|
24
|
+
}
|
|
25
|
+
else {
|
|
26
|
+
onProgress?.(processed, filtered.length, batch[results.indexOf(result)]);
|
|
27
|
+
}
|
|
23
28
|
}
|
|
24
29
|
}
|
|
25
30
|
return entries;
|
|
@@ -8,7 +8,13 @@
|
|
|
8
8
|
import { KnowledgeGraph } from '../graph/types.js';
|
|
9
9
|
import { ASTCache } from './ast-cache.js';
|
|
10
10
|
import { SymbolTable } from './symbol-table.js';
|
|
11
|
+
import type { ExtractedHeritage } from './workers/parse-worker.js';
|
|
11
12
|
export declare const processHeritage: (graph: KnowledgeGraph, files: {
|
|
12
13
|
path: string;
|
|
13
14
|
content: string;
|
|
14
15
|
}[], astCache: ASTCache, symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
|
|
16
|
+
/**
|
|
17
|
+
* Fast path: resolve pre-extracted heritage from workers.
|
|
18
|
+
* No AST parsing — workers already extracted className + parentName + kind.
|
|
19
|
+
*/
|
|
20
|
+
export declare const processHeritageFromExtracted: (graph: KnowledgeGraph, extractedHeritage: ExtractedHeritage[], symbolTable: SymbolTable, onProgress?: (current: number, total: number) => void) => Promise<void>;
|