@veewo/gitnexus 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +234 -0
- package/dist/benchmark/agent-context/evaluators.d.ts +9 -0
- package/dist/benchmark/agent-context/evaluators.js +196 -0
- package/dist/benchmark/agent-context/evaluators.test.d.ts +1 -0
- package/dist/benchmark/agent-context/evaluators.test.js +39 -0
- package/dist/benchmark/agent-context/io.d.ts +2 -0
- package/dist/benchmark/agent-context/io.js +23 -0
- package/dist/benchmark/agent-context/io.test.d.ts +1 -0
- package/dist/benchmark/agent-context/io.test.js +19 -0
- package/dist/benchmark/agent-context/report.d.ts +2 -0
- package/dist/benchmark/agent-context/report.js +59 -0
- package/dist/benchmark/agent-context/report.test.d.ts +1 -0
- package/dist/benchmark/agent-context/report.test.js +85 -0
- package/dist/benchmark/agent-context/runner.d.ts +46 -0
- package/dist/benchmark/agent-context/runner.js +111 -0
- package/dist/benchmark/agent-context/runner.test.d.ts +1 -0
- package/dist/benchmark/agent-context/runner.test.js +79 -0
- package/dist/benchmark/agent-context/tool-runner.d.ts +7 -0
- package/dist/benchmark/agent-context/tool-runner.js +18 -0
- package/dist/benchmark/agent-context/tool-runner.test.d.ts +1 -0
- package/dist/benchmark/agent-context/tool-runner.test.js +11 -0
- package/dist/benchmark/agent-context/types.d.ts +40 -0
- package/dist/benchmark/agent-context/types.js +1 -0
- package/dist/benchmark/analyze-runner.d.ts +16 -0
- package/dist/benchmark/analyze-runner.js +51 -0
- package/dist/benchmark/analyze-runner.test.d.ts +1 -0
- package/dist/benchmark/analyze-runner.test.js +37 -0
- package/dist/benchmark/evaluators.d.ts +6 -0
- package/dist/benchmark/evaluators.js +10 -0
- package/dist/benchmark/evaluators.test.d.ts +1 -0
- package/dist/benchmark/evaluators.test.js +12 -0
- package/dist/benchmark/io.d.ts +7 -0
- package/dist/benchmark/io.js +25 -0
- package/dist/benchmark/io.test.d.ts +1 -0
- package/dist/benchmark/io.test.js +35 -0
- package/dist/benchmark/neonspark-candidates.d.ts +19 -0
- package/dist/benchmark/neonspark-candidates.js +94 -0
- package/dist/benchmark/neonspark-candidates.test.d.ts +1 -0
- package/dist/benchmark/neonspark-candidates.test.js +43 -0
- package/dist/benchmark/neonspark-materialize.d.ts +19 -0
- package/dist/benchmark/neonspark-materialize.js +111 -0
- package/dist/benchmark/neonspark-materialize.test.d.ts +1 -0
- package/dist/benchmark/neonspark-materialize.test.js +124 -0
- package/dist/benchmark/neonspark-sync.d.ts +3 -0
- package/dist/benchmark/neonspark-sync.js +53 -0
- package/dist/benchmark/neonspark-sync.test.d.ts +1 -0
- package/dist/benchmark/neonspark-sync.test.js +20 -0
- package/dist/benchmark/report.d.ts +1 -0
- package/dist/benchmark/report.js +7 -0
- package/dist/benchmark/runner.d.ts +48 -0
- package/dist/benchmark/runner.js +302 -0
- package/dist/benchmark/runner.test.d.ts +1 -0
- package/dist/benchmark/runner.test.js +50 -0
- package/dist/benchmark/scoring.d.ts +16 -0
- package/dist/benchmark/scoring.js +27 -0
- package/dist/benchmark/scoring.test.d.ts +1 -0
- package/dist/benchmark/scoring.test.js +24 -0
- package/dist/benchmark/tool-runner.d.ts +6 -0
- package/dist/benchmark/tool-runner.js +17 -0
- package/dist/benchmark/types.d.ts +36 -0
- package/dist/benchmark/types.js +1 -0
- package/dist/cli/ai-context.d.ts +22 -0
- package/dist/cli/ai-context.js +184 -0
- package/dist/cli/ai-context.test.d.ts +1 -0
- package/dist/cli/ai-context.test.js +30 -0
- package/dist/cli/analyze-multi-scope-regression.test.d.ts +1 -0
- package/dist/cli/analyze-multi-scope-regression.test.js +22 -0
- package/dist/cli/analyze-options.d.ts +7 -0
- package/dist/cli/analyze-options.js +56 -0
- package/dist/cli/analyze-options.test.d.ts +1 -0
- package/dist/cli/analyze-options.test.js +36 -0
- package/dist/cli/analyze.d.ts +14 -0
- package/dist/cli/analyze.js +384 -0
- package/dist/cli/augment.d.ts +13 -0
- package/dist/cli/augment.js +33 -0
- package/dist/cli/benchmark-agent-context.d.ts +29 -0
- package/dist/cli/benchmark-agent-context.js +61 -0
- package/dist/cli/benchmark-agent-context.test.d.ts +1 -0
- package/dist/cli/benchmark-agent-context.test.js +80 -0
- package/dist/cli/benchmark-unity.d.ts +15 -0
- package/dist/cli/benchmark-unity.js +31 -0
- package/dist/cli/benchmark-unity.test.d.ts +1 -0
- package/dist/cli/benchmark-unity.test.js +18 -0
- package/dist/cli/claude-hooks.d.ts +22 -0
- package/dist/cli/claude-hooks.js +97 -0
- package/dist/cli/clean.d.ts +10 -0
- package/dist/cli/clean.js +60 -0
- package/dist/cli/eval-server.d.ts +30 -0
- package/dist/cli/eval-server.js +372 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +182 -0
- package/dist/cli/list.d.ts +6 -0
- package/dist/cli/list.js +33 -0
- package/dist/cli/mcp.d.ts +8 -0
- package/dist/cli/mcp.js +34 -0
- package/dist/cli/repo-manager-alias.test.d.ts +1 -0
- package/dist/cli/repo-manager-alias.test.js +40 -0
- package/dist/cli/scope-filter.test.d.ts +1 -0
- package/dist/cli/scope-filter.test.js +49 -0
- package/dist/cli/serve.d.ts +4 -0
- package/dist/cli/serve.js +6 -0
- package/dist/cli/setup.d.ts +8 -0
- package/dist/cli/setup.js +311 -0
- package/dist/cli/setup.test.d.ts +1 -0
- package/dist/cli/setup.test.js +31 -0
- package/dist/cli/status.d.ts +6 -0
- package/dist/cli/status.js +27 -0
- package/dist/cli/tool.d.ts +40 -0
- package/dist/cli/tool.js +94 -0
- package/dist/cli/version.test.d.ts +1 -0
- package/dist/cli/version.test.js +19 -0
- package/dist/cli/wiki.d.ts +15 -0
- package/dist/cli/wiki.js +361 -0
- package/dist/config/ignore-service.d.ts +1 -0
- package/dist/config/ignore-service.js +210 -0
- package/dist/config/supported-languages.d.ts +12 -0
- package/dist/config/supported-languages.js +15 -0
- package/dist/core/augmentation/engine.d.ts +26 -0
- package/dist/core/augmentation/engine.js +213 -0
- package/dist/core/embeddings/embedder.d.ts +60 -0
- package/dist/core/embeddings/embedder.js +251 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +51 -0
- package/dist/core/embeddings/embedding-pipeline.js +329 -0
- package/dist/core/embeddings/index.d.ts +9 -0
- package/dist/core/embeddings/index.js +9 -0
- package/dist/core/embeddings/text-generator.d.ts +24 -0
- package/dist/core/embeddings/text-generator.js +182 -0
- package/dist/core/embeddings/types.d.ts +87 -0
- package/dist/core/embeddings/types.js +32 -0
- package/dist/core/graph/graph.d.ts +2 -0
- package/dist/core/graph/graph.js +66 -0
- package/dist/core/graph/types.d.ts +61 -0
- package/dist/core/graph/types.js +1 -0
- package/dist/core/ingestion/ast-cache.d.ts +11 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +15 -0
- package/dist/core/ingestion/call-processor.js +327 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
- package/dist/core/ingestion/cluster-enricher.js +170 -0
- package/dist/core/ingestion/community-processor.d.ts +39 -0
- package/dist/core/ingestion/community-processor.js +312 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
- package/dist/core/ingestion/entry-point-scoring.js +260 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +28 -0
- package/dist/core/ingestion/filesystem-walker.js +80 -0
- package/dist/core/ingestion/framework-detection.d.ts +39 -0
- package/dist/core/ingestion/framework-detection.js +235 -0
- package/dist/core/ingestion/heritage-processor.d.ts +20 -0
- package/dist/core/ingestion/heritage-processor.js +197 -0
- package/dist/core/ingestion/import-processor.d.ts +38 -0
- package/dist/core/ingestion/import-processor.js +778 -0
- package/dist/core/ingestion/parsing-processor.d.ts +15 -0
- package/dist/core/ingestion/parsing-processor.js +291 -0
- package/dist/core/ingestion/pipeline.d.ts +5 -0
- package/dist/core/ingestion/pipeline.js +323 -0
- package/dist/core/ingestion/process-processor.d.ts +51 -0
- package/dist/core/ingestion/process-processor.js +309 -0
- package/dist/core/ingestion/scope-filter.d.ts +25 -0
- package/dist/core/ingestion/scope-filter.js +100 -0
- package/dist/core/ingestion/structure-processor.d.ts +2 -0
- package/dist/core/ingestion/structure-processor.js +36 -0
- package/dist/core/ingestion/symbol-table.d.ts +33 -0
- package/dist/core/ingestion/symbol-table.js +38 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +12 -0
- package/dist/core/ingestion/tree-sitter-queries.js +398 -0
- package/dist/core/ingestion/utils.d.ts +10 -0
- package/dist/core/ingestion/utils.js +50 -0
- package/dist/core/ingestion/workers/parse-worker.d.ts +59 -0
- package/dist/core/ingestion/workers/parse-worker.js +672 -0
- package/dist/core/ingestion/workers/worker-pool.d.ts +16 -0
- package/dist/core/ingestion/workers/worker-pool.js +120 -0
- package/dist/core/kuzu/csv-generator.d.ts +29 -0
- package/dist/core/kuzu/csv-generator.js +336 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +101 -0
- package/dist/core/kuzu/kuzu-adapter.js +753 -0
- package/dist/core/kuzu/schema.d.ts +53 -0
- package/dist/core/kuzu/schema.js +407 -0
- package/dist/core/search/bm25-index.d.ts +23 -0
- package/dist/core/search/bm25-index.js +95 -0
- package/dist/core/search/hybrid-search.d.ts +49 -0
- package/dist/core/search/hybrid-search.js +118 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
- package/dist/core/tree-sitter/parser-loader.js +44 -0
- package/dist/core/wiki/generator.d.ts +110 -0
- package/dist/core/wiki/generator.js +786 -0
- package/dist/core/wiki/graph-queries.d.ts +80 -0
- package/dist/core/wiki/graph-queries.js +238 -0
- package/dist/core/wiki/html-viewer.d.ts +10 -0
- package/dist/core/wiki/html-viewer.js +297 -0
- package/dist/core/wiki/llm-client.d.ts +40 -0
- package/dist/core/wiki/llm-client.js +162 -0
- package/dist/core/wiki/prompts.d.ts +53 -0
- package/dist/core/wiki/prompts.js +174 -0
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.js +3 -0
- package/dist/mcp/core/embedder.d.ts +27 -0
- package/dist/mcp/core/embedder.js +108 -0
- package/dist/mcp/core/kuzu-adapter.d.ts +34 -0
- package/dist/mcp/core/kuzu-adapter.js +231 -0
- package/dist/mcp/local/local-backend.d.ts +160 -0
- package/dist/mcp/local/local-backend.js +1646 -0
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +407 -0
- package/dist/mcp/server.d.ts +23 -0
- package/dist/mcp/server.js +251 -0
- package/dist/mcp/staleness.d.ts +15 -0
- package/dist/mcp/staleness.js +29 -0
- package/dist/mcp/tools.d.ts +24 -0
- package/dist/mcp/tools.js +195 -0
- package/dist/server/api.d.ts +10 -0
- package/dist/server/api.js +344 -0
- package/dist/server/mcp-http.d.ts +13 -0
- package/dist/server/mcp-http.js +100 -0
- package/dist/storage/git.d.ts +6 -0
- package/dist/storage/git.js +32 -0
- package/dist/storage/repo-manager.d.ts +125 -0
- package/dist/storage/repo-manager.js +257 -0
- package/dist/types/pipeline.d.ts +34 -0
- package/dist/types/pipeline.js +18 -0
- package/hooks/claude/gitnexus-hook.cjs +135 -0
- package/hooks/claude/pre-tool-use.sh +78 -0
- package/hooks/claude/session-start.sh +42 -0
- package/package.json +92 -0
- package/skills/gitnexus-cli.md +82 -0
- package/skills/gitnexus-debugging.md +89 -0
- package/skills/gitnexus-exploring.md +78 -0
- package/skills/gitnexus-guide.md +64 -0
- package/skills/gitnexus-impact-analysis.md +97 -0
- package/skills/gitnexus-refactoring.md +121 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster Enricher
|
|
3
|
+
*
|
|
4
|
+
* LLM-based enrichment for community clusters.
|
|
5
|
+
* Generates semantic names, keywords, and descriptions using an LLM.
|
|
6
|
+
*/
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// PROMPT TEMPLATE
|
|
9
|
+
// ============================================================================
|
|
10
|
+
const buildEnrichmentPrompt = (members, heuristicLabel) => {
|
|
11
|
+
// Limit to first 20 members to control token usage
|
|
12
|
+
const limitedMembers = members.slice(0, 20);
|
|
13
|
+
const memberList = limitedMembers
|
|
14
|
+
.map(m => `${m.name} (${m.type})`)
|
|
15
|
+
.join(', ');
|
|
16
|
+
return `Analyze this code cluster and provide a semantic name and short description.
|
|
17
|
+
|
|
18
|
+
Heuristic: "${heuristicLabel}"
|
|
19
|
+
Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
|
|
20
|
+
|
|
21
|
+
Reply with JSON only:
|
|
22
|
+
{"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
|
|
23
|
+
};
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// PARSE LLM RESPONSE
|
|
26
|
+
// ============================================================================
|
|
27
|
+
const parseEnrichmentResponse = (response, fallbackLabel) => {
|
|
28
|
+
try {
|
|
29
|
+
// Extract JSON from response (handles markdown code blocks)
|
|
30
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
31
|
+
if (!jsonMatch) {
|
|
32
|
+
throw new Error('No JSON found in response');
|
|
33
|
+
}
|
|
34
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
35
|
+
return {
|
|
36
|
+
name: parsed.name || fallbackLabel,
|
|
37
|
+
keywords: Array.isArray(parsed.keywords) ? parsed.keywords : [],
|
|
38
|
+
description: parsed.description || '',
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
// Fallback if parsing fails
|
|
43
|
+
return {
|
|
44
|
+
name: fallbackLabel,
|
|
45
|
+
keywords: [],
|
|
46
|
+
description: '',
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
// ============================================================================
|
|
51
|
+
// MAIN ENRICHMENT FUNCTION
|
|
52
|
+
// ============================================================================
|
|
53
|
+
/**
|
|
54
|
+
* Enrich clusters with LLM-generated names, keywords, and descriptions
|
|
55
|
+
*
|
|
56
|
+
* @param communities - Community nodes to enrich
|
|
57
|
+
* @param memberMap - Map of communityId -> member info
|
|
58
|
+
* @param llmClient - LLM client for generation
|
|
59
|
+
* @param onProgress - Progress callback
|
|
60
|
+
*/
|
|
61
|
+
export const enrichClusters = async (communities, memberMap, llmClient, onProgress) => {
|
|
62
|
+
const enrichments = new Map();
|
|
63
|
+
let tokensUsed = 0;
|
|
64
|
+
for (let i = 0; i < communities.length; i++) {
|
|
65
|
+
const community = communities[i];
|
|
66
|
+
const members = memberMap.get(community.id) || [];
|
|
67
|
+
onProgress?.(i + 1, communities.length);
|
|
68
|
+
if (members.length === 0) {
|
|
69
|
+
// No members, use heuristic
|
|
70
|
+
enrichments.set(community.id, {
|
|
71
|
+
name: community.heuristicLabel,
|
|
72
|
+
keywords: [],
|
|
73
|
+
description: '',
|
|
74
|
+
});
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
try {
|
|
78
|
+
const prompt = buildEnrichmentPrompt(members, community.heuristicLabel);
|
|
79
|
+
const response = await llmClient.generate(prompt);
|
|
80
|
+
// Rough token estimate
|
|
81
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
82
|
+
const enrichment = parseEnrichmentResponse(response, community.heuristicLabel);
|
|
83
|
+
enrichments.set(community.id, enrichment);
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
// On error, fallback to heuristic
|
|
87
|
+
console.warn(`Failed to enrich cluster ${community.id}:`, error);
|
|
88
|
+
enrichments.set(community.id, {
|
|
89
|
+
name: community.heuristicLabel,
|
|
90
|
+
keywords: [],
|
|
91
|
+
description: '',
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { enrichments, tokensUsed };
|
|
96
|
+
};
|
|
97
|
+
// ============================================================================
|
|
98
|
+
// BATCH ENRICHMENT (more efficient)
|
|
99
|
+
// ============================================================================
|
|
100
|
+
/**
|
|
101
|
+
* Enrich multiple clusters in a single LLM call (batch mode)
|
|
102
|
+
* More efficient for token usage but requires larger context window
|
|
103
|
+
*/
|
|
104
|
+
export const enrichClustersBatch = async (communities, memberMap, llmClient, batchSize = 5, onProgress) => {
|
|
105
|
+
const enrichments = new Map();
|
|
106
|
+
let tokensUsed = 0;
|
|
107
|
+
// Process in batches
|
|
108
|
+
for (let i = 0; i < communities.length; i += batchSize) {
|
|
109
|
+
// Report progress
|
|
110
|
+
onProgress?.(Math.min(i + batchSize, communities.length), communities.length);
|
|
111
|
+
const batch = communities.slice(i, i + batchSize);
|
|
112
|
+
const batchPrompt = batch.map((community, idx) => {
|
|
113
|
+
const members = memberMap.get(community.id) || [];
|
|
114
|
+
const limitedMembers = members.slice(0, 15);
|
|
115
|
+
const memberList = limitedMembers
|
|
116
|
+
.map(m => `${m.name} (${m.type})`)
|
|
117
|
+
.join(', ');
|
|
118
|
+
return `Cluster ${idx + 1} (id: ${community.id}):
|
|
119
|
+
Heuristic: "${community.heuristicLabel}"
|
|
120
|
+
Members: ${memberList}`;
|
|
121
|
+
}).join('\n\n');
|
|
122
|
+
const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
|
|
123
|
+
|
|
124
|
+
${batchPrompt}
|
|
125
|
+
|
|
126
|
+
Output JSON array:
|
|
127
|
+
[
|
|
128
|
+
{"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
|
|
129
|
+
...
|
|
130
|
+
]`;
|
|
131
|
+
try {
|
|
132
|
+
const response = await llmClient.generate(prompt);
|
|
133
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
134
|
+
// Parse batch response
|
|
135
|
+
const jsonMatch = response.match(/\[[\s\S]*\]/);
|
|
136
|
+
if (jsonMatch) {
|
|
137
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
138
|
+
for (const item of parsed) {
|
|
139
|
+
enrichments.set(item.id, {
|
|
140
|
+
name: item.name,
|
|
141
|
+
keywords: item.keywords || [],
|
|
142
|
+
description: item.description || '',
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
console.warn('Batch enrichment failed, falling back to heuristics:', error);
|
|
149
|
+
// Fallback for this batch
|
|
150
|
+
for (const community of batch) {
|
|
151
|
+
enrichments.set(community.id, {
|
|
152
|
+
name: community.heuristicLabel,
|
|
153
|
+
keywords: [],
|
|
154
|
+
description: '',
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Fill in any missing communities
|
|
160
|
+
for (const community of communities) {
|
|
161
|
+
if (!enrichments.has(community.id)) {
|
|
162
|
+
enrichments.set(community.id, {
|
|
163
|
+
name: community.heuristicLabel,
|
|
164
|
+
keywords: [],
|
|
165
|
+
description: '',
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return { enrichments, tokensUsed };
|
|
170
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection Processor
|
|
3
|
+
*
|
|
4
|
+
* Uses the Leiden algorithm (via graphology-communities-leiden) to detect
|
|
5
|
+
* communities/clusters in the code graph based on CALLS relationships.
|
|
6
|
+
*
|
|
7
|
+
* Communities represent groups of code that work together frequently,
|
|
8
|
+
* helping agents navigate the codebase by functional area rather than file structure.
|
|
9
|
+
*/
|
|
10
|
+
import { KnowledgeGraph } from '../graph/types.js';
|
|
11
|
+
export interface CommunityNode {
|
|
12
|
+
id: string;
|
|
13
|
+
label: string;
|
|
14
|
+
heuristicLabel: string;
|
|
15
|
+
cohesion: number;
|
|
16
|
+
symbolCount: number;
|
|
17
|
+
}
|
|
18
|
+
export interface CommunityMembership {
|
|
19
|
+
nodeId: string;
|
|
20
|
+
communityId: string;
|
|
21
|
+
}
|
|
22
|
+
export interface CommunityDetectionResult {
|
|
23
|
+
communities: CommunityNode[];
|
|
24
|
+
memberships: CommunityMembership[];
|
|
25
|
+
stats: {
|
|
26
|
+
totalCommunities: number;
|
|
27
|
+
modularity: number;
|
|
28
|
+
nodesProcessed: number;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export declare const COMMUNITY_COLORS: string[];
|
|
32
|
+
export declare const getCommunityColor: (communityIndex: number) => string;
|
|
33
|
+
/**
|
|
34
|
+
* Detect communities in the knowledge graph using Leiden algorithm
|
|
35
|
+
*
|
|
36
|
+
* This runs AFTER all relationships (CALLS, IMPORTS, etc.) have been built.
|
|
37
|
+
* It uses primarily CALLS edges to cluster code that works together.
|
|
38
|
+
*/
|
|
39
|
+
export declare const processCommunities: (knowledgeGraph: KnowledgeGraph, onProgress?: (message: string, progress: number) => void) => Promise<CommunityDetectionResult>;
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection Processor
|
|
3
|
+
*
|
|
4
|
+
* Uses the Leiden algorithm (via graphology-communities-leiden) to detect
|
|
5
|
+
* communities/clusters in the code graph based on CALLS relationships.
|
|
6
|
+
*
|
|
7
|
+
* Communities represent groups of code that work together frequently,
|
|
8
|
+
* helping agents navigate the codebase by functional area rather than file structure.
|
|
9
|
+
*/
|
|
10
|
+
// NOTE: The Leiden algorithm source is vendored from graphology's repo
|
|
11
|
+
// (src/communities-leiden) because it was never published to npm.
|
|
12
|
+
// We use createRequire to load the CommonJS vendored files in ESM context.
|
|
13
|
+
import Graph from 'graphology';
|
|
14
|
+
import { createRequire } from 'node:module';
|
|
15
|
+
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import { dirname, resolve } from 'node:path';
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = dirname(__filename);
|
|
19
|
+
// Navigate to package root (works from both src/ and dist/)
|
|
20
|
+
const leidenPath = resolve(__dirname, '..', '..', '..', 'vendor', 'leiden', 'index.cjs');
|
|
21
|
+
const _require = createRequire(import.meta.url);
|
|
22
|
+
const leiden = _require(leidenPath);
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// COMMUNITY COLORS (for visualization)
|
|
25
|
+
// ============================================================================
|
|
26
|
+
export const COMMUNITY_COLORS = [
|
|
27
|
+
'#ef4444', // red
|
|
28
|
+
'#f97316', // orange
|
|
29
|
+
'#eab308', // yellow
|
|
30
|
+
'#22c55e', // green
|
|
31
|
+
'#06b6d4', // cyan
|
|
32
|
+
'#3b82f6', // blue
|
|
33
|
+
'#8b5cf6', // violet
|
|
34
|
+
'#d946ef', // fuchsia
|
|
35
|
+
'#ec4899', // pink
|
|
36
|
+
'#f43f5e', // rose
|
|
37
|
+
'#14b8a6', // teal
|
|
38
|
+
'#84cc16', // lime
|
|
39
|
+
];
|
|
40
|
+
export const getCommunityColor = (communityIndex) => {
|
|
41
|
+
return COMMUNITY_COLORS[communityIndex % COMMUNITY_COLORS.length];
|
|
42
|
+
};
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// MAIN PROCESSOR
|
|
45
|
+
// ============================================================================
|
|
46
|
+
/**
|
|
47
|
+
* Detect communities in the knowledge graph using Leiden algorithm
|
|
48
|
+
*
|
|
49
|
+
* This runs AFTER all relationships (CALLS, IMPORTS, etc.) have been built.
|
|
50
|
+
* It uses primarily CALLS edges to cluster code that works together.
|
|
51
|
+
*/
|
|
52
|
+
export const processCommunities = async (knowledgeGraph, onProgress) => {
|
|
53
|
+
onProgress?.('Building graph for community detection...', 0);
|
|
54
|
+
// Pre-check total symbol count to determine large-graph mode before building
|
|
55
|
+
let symbolCount = 0;
|
|
56
|
+
knowledgeGraph.forEachNode(node => {
|
|
57
|
+
if (node.label === 'Function' || node.label === 'Class' || node.label === 'Method' || node.label === 'Interface') {
|
|
58
|
+
symbolCount++;
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
const isLarge = symbolCount > 10_000;
|
|
62
|
+
const graph = buildGraphologyGraph(knowledgeGraph, isLarge);
|
|
63
|
+
if (graph.order === 0) {
|
|
64
|
+
return {
|
|
65
|
+
communities: [],
|
|
66
|
+
memberships: [],
|
|
67
|
+
stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
const nodeCount = graph.order;
|
|
71
|
+
const edgeCount = graph.size;
|
|
72
|
+
onProgress?.(`Running Leiden on ${nodeCount} nodes, ${edgeCount} edges${isLarge ? ` (filtered from ${symbolCount} symbols)` : ''}...`, 30);
|
|
73
|
+
// Large graphs: higher resolution + capped iterations (matching Python leidenalg default of 2).
|
|
74
|
+
// The first 2 iterations capture ~95%+ of modularity; additional iterations have diminishing returns.
|
|
75
|
+
// Timeout: abort after 60s for pathological graph structures.
|
|
76
|
+
const LEIDEN_TIMEOUT_MS = 60_000;
|
|
77
|
+
let details;
|
|
78
|
+
try {
|
|
79
|
+
details = await Promise.race([
|
|
80
|
+
Promise.resolve(leiden.detailed(graph, {
|
|
81
|
+
resolution: isLarge ? 2.0 : 1.0,
|
|
82
|
+
maxIterations: isLarge ? 3 : 0,
|
|
83
|
+
})),
|
|
84
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('Leiden timeout')), LEIDEN_TIMEOUT_MS)),
|
|
85
|
+
]);
|
|
86
|
+
}
|
|
87
|
+
catch (e) {
|
|
88
|
+
if (e.message === 'Leiden timeout') {
|
|
89
|
+
onProgress?.('Community detection timed out, using fallback...', 60);
|
|
90
|
+
// Fallback: assign all nodes to community 0
|
|
91
|
+
const communities = {};
|
|
92
|
+
graph.forEachNode((node) => { communities[node] = 0; });
|
|
93
|
+
details = { communities, count: 1, modularity: 0 };
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
throw e;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
onProgress?.(`Found ${details.count} communities...`, 60);
|
|
100
|
+
// Step 3: Create community nodes with heuristic labels
|
|
101
|
+
const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
|
|
102
|
+
onProgress?.('Creating membership edges...', 80);
|
|
103
|
+
// Step 4: Create membership mappings
|
|
104
|
+
const memberships = [];
|
|
105
|
+
Object.entries(details.communities).forEach(([nodeId, communityNum]) => {
|
|
106
|
+
memberships.push({
|
|
107
|
+
nodeId,
|
|
108
|
+
communityId: `comm_${communityNum}`,
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
onProgress?.('Community detection complete!', 100);
|
|
112
|
+
return {
|
|
113
|
+
communities: communityNodes,
|
|
114
|
+
memberships,
|
|
115
|
+
stats: {
|
|
116
|
+
totalCommunities: details.count,
|
|
117
|
+
modularity: details.modularity,
|
|
118
|
+
nodesProcessed: graph.order,
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
};
|
|
122
|
+
// ============================================================================
|
|
123
|
+
// HELPER: Build graphology graph from knowledge graph
|
|
124
|
+
// ============================================================================
|
|
125
|
+
/**
|
|
126
|
+
* Build a graphology graph containing only symbol nodes and clustering edges.
|
|
127
|
+
* For large graphs (>10K symbols), filter out low-confidence fuzzy-global edges
|
|
128
|
+
* and degree-1 nodes that add noise and massively increase Leiden runtime.
|
|
129
|
+
*/
|
|
130
|
+
const MIN_CONFIDENCE_LARGE = 0.5;
|
|
131
|
+
const buildGraphologyGraph = (knowledgeGraph, isLarge) => {
|
|
132
|
+
const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
|
|
133
|
+
const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
|
|
134
|
+
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
135
|
+
const connectedNodes = new Set();
|
|
136
|
+
const nodeDegree = new Map();
|
|
137
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
138
|
+
if (!clusteringRelTypes.has(rel.type) || rel.sourceId === rel.targetId)
|
|
139
|
+
return;
|
|
140
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
141
|
+
return;
|
|
142
|
+
connectedNodes.add(rel.sourceId);
|
|
143
|
+
connectedNodes.add(rel.targetId);
|
|
144
|
+
nodeDegree.set(rel.sourceId, (nodeDegree.get(rel.sourceId) || 0) + 1);
|
|
145
|
+
nodeDegree.set(rel.targetId, (nodeDegree.get(rel.targetId) || 0) + 1);
|
|
146
|
+
});
|
|
147
|
+
knowledgeGraph.forEachNode(node => {
|
|
148
|
+
if (!symbolTypes.has(node.label) || !connectedNodes.has(node.id))
|
|
149
|
+
return;
|
|
150
|
+
// For large graphs, skip degree-1 nodes — they just become singletons or
|
|
151
|
+
// get absorbed into their single neighbor's community, but cost iteration time.
|
|
152
|
+
if (isLarge && (nodeDegree.get(node.id) || 0) < 2)
|
|
153
|
+
return;
|
|
154
|
+
graph.addNode(node.id, {
|
|
155
|
+
name: node.properties.name,
|
|
156
|
+
filePath: node.properties.filePath,
|
|
157
|
+
type: node.label,
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
knowledgeGraph.forEachRelationship(rel => {
|
|
161
|
+
if (!clusteringRelTypes.has(rel.type))
|
|
162
|
+
return;
|
|
163
|
+
if (isLarge && rel.confidence < MIN_CONFIDENCE_LARGE)
|
|
164
|
+
return;
|
|
165
|
+
if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
|
|
166
|
+
if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
|
|
167
|
+
graph.addEdge(rel.sourceId, rel.targetId);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
});
|
|
171
|
+
return graph;
|
|
172
|
+
};
|
|
173
|
+
// ============================================================================
|
|
174
|
+
// HELPER: Create community nodes with heuristic labels
|
|
175
|
+
// ============================================================================
|
|
176
|
+
/**
|
|
177
|
+
* Create Community nodes with auto-generated labels based on member file paths
|
|
178
|
+
*/
|
|
179
|
+
const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph) => {
|
|
180
|
+
// Group node IDs by community
|
|
181
|
+
const communityMembers = new Map();
|
|
182
|
+
Object.entries(communities).forEach(([nodeId, commNum]) => {
|
|
183
|
+
if (!communityMembers.has(commNum)) {
|
|
184
|
+
communityMembers.set(commNum, []);
|
|
185
|
+
}
|
|
186
|
+
communityMembers.get(commNum).push(nodeId);
|
|
187
|
+
});
|
|
188
|
+
// Build node lookup for file paths
|
|
189
|
+
const nodePathMap = new Map();
|
|
190
|
+
for (const node of knowledgeGraph.iterNodes()) {
|
|
191
|
+
if (node.properties.filePath) {
|
|
192
|
+
nodePathMap.set(node.id, node.properties.filePath);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
// Create community nodes - SKIP SINGLETONS (isolated nodes)
|
|
196
|
+
const communityNodes = [];
|
|
197
|
+
communityMembers.forEach((memberIds, commNum) => {
|
|
198
|
+
// Skip singleton communities - they're just isolated nodes
|
|
199
|
+
if (memberIds.length < 2)
|
|
200
|
+
return;
|
|
201
|
+
const heuristicLabel = generateHeuristicLabel(memberIds, nodePathMap, graph, commNum);
|
|
202
|
+
communityNodes.push({
|
|
203
|
+
id: `comm_${commNum}`,
|
|
204
|
+
label: heuristicLabel,
|
|
205
|
+
heuristicLabel,
|
|
206
|
+
cohesion: calculateCohesion(memberIds, graph),
|
|
207
|
+
symbolCount: memberIds.length,
|
|
208
|
+
});
|
|
209
|
+
});
|
|
210
|
+
// Sort by size descending
|
|
211
|
+
communityNodes.sort((a, b) => b.symbolCount - a.symbolCount);
|
|
212
|
+
return communityNodes;
|
|
213
|
+
};
|
|
214
|
+
// ============================================================================
|
|
215
|
+
// HELPER: Generate heuristic label from folder patterns
|
|
216
|
+
// ============================================================================
|
|
217
|
+
/**
|
|
218
|
+
* Generate a human-readable label from the most common folder name in the community
|
|
219
|
+
*/
|
|
220
|
+
const generateHeuristicLabel = (memberIds, nodePathMap, graph, commNum) => {
|
|
221
|
+
// Collect folder names from file paths
|
|
222
|
+
const folderCounts = new Map();
|
|
223
|
+
memberIds.forEach(nodeId => {
|
|
224
|
+
const filePath = nodePathMap.get(nodeId) || '';
|
|
225
|
+
const parts = filePath.split('/').filter(Boolean);
|
|
226
|
+
// Get the most specific folder (parent directory)
|
|
227
|
+
if (parts.length >= 2) {
|
|
228
|
+
const folder = parts[parts.length - 2];
|
|
229
|
+
// Skip generic folder names
|
|
230
|
+
if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) {
|
|
231
|
+
folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
// Find most common folder
|
|
236
|
+
let maxCount = 0;
|
|
237
|
+
let bestFolder = '';
|
|
238
|
+
folderCounts.forEach((count, folder) => {
|
|
239
|
+
if (count > maxCount) {
|
|
240
|
+
maxCount = count;
|
|
241
|
+
bestFolder = folder;
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
if (bestFolder) {
|
|
245
|
+
// Capitalize first letter
|
|
246
|
+
return bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1);
|
|
247
|
+
}
|
|
248
|
+
// Fallback: use function names to detect patterns
|
|
249
|
+
const names = [];
|
|
250
|
+
memberIds.forEach(nodeId => {
|
|
251
|
+
const name = graph.getNodeAttribute(nodeId, 'name');
|
|
252
|
+
if (name)
|
|
253
|
+
names.push(name);
|
|
254
|
+
});
|
|
255
|
+
// Look for common prefixes
|
|
256
|
+
if (names.length > 2) {
|
|
257
|
+
const commonPrefix = findCommonPrefix(names);
|
|
258
|
+
if (commonPrefix.length > 2) {
|
|
259
|
+
return commonPrefix.charAt(0).toUpperCase() + commonPrefix.slice(1);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// Last resort: generic name with community ID for uniqueness
|
|
263
|
+
return `Cluster_${commNum}`;
|
|
264
|
+
};
|
|
265
|
+
/**
|
|
266
|
+
* Find common prefix among strings
|
|
267
|
+
*/
|
|
268
|
+
const findCommonPrefix = (strings) => {
|
|
269
|
+
if (strings.length === 0)
|
|
270
|
+
return '';
|
|
271
|
+
const sorted = strings.slice().sort();
|
|
272
|
+
const first = sorted[0];
|
|
273
|
+
const last = sorted[sorted.length - 1];
|
|
274
|
+
let i = 0;
|
|
275
|
+
while (i < first.length && first[i] === last[i]) {
|
|
276
|
+
i++;
|
|
277
|
+
}
|
|
278
|
+
return first.substring(0, i);
|
|
279
|
+
};
|
|
280
|
+
// ============================================================================
|
|
281
|
+
// HELPER: Calculate community cohesion
|
|
282
|
+
// ============================================================================
|
|
283
|
+
/**
|
|
284
|
+
* Estimate cohesion score (0-1) based on internal edge density.
|
|
285
|
+
* Uses sampling for large communities to avoid O(N^2) cost.
|
|
286
|
+
*/
|
|
287
|
+
const calculateCohesion = (memberIds, graph) => {
|
|
288
|
+
if (memberIds.length <= 1)
|
|
289
|
+
return 1.0;
|
|
290
|
+
const memberSet = new Set(memberIds);
|
|
291
|
+
// Sample up to 50 members for large communities
|
|
292
|
+
const SAMPLE_SIZE = 50;
|
|
293
|
+
const sample = memberIds.length <= SAMPLE_SIZE
|
|
294
|
+
? memberIds
|
|
295
|
+
: memberIds.slice(0, SAMPLE_SIZE);
|
|
296
|
+
let internalEdges = 0;
|
|
297
|
+
let totalEdges = 0;
|
|
298
|
+
for (const nodeId of sample) {
|
|
299
|
+
if (!graph.hasNode(nodeId))
|
|
300
|
+
continue;
|
|
301
|
+
graph.forEachNeighbor(nodeId, (neighbor) => {
|
|
302
|
+
totalEdges++;
|
|
303
|
+
if (memberSet.has(neighbor)) {
|
|
304
|
+
internalEdges++;
|
|
305
|
+
}
|
|
306
|
+
});
|
|
307
|
+
}
|
|
308
|
+
// Cohesion = fraction of edges that stay internal
|
|
309
|
+
if (totalEdges === 0)
|
|
310
|
+
return 1.0;
|
|
311
|
+
return Math.min(1.0, internalEdges / totalEdges);
|
|
312
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entry Point Scoring
|
|
3
|
+
*
|
|
4
|
+
* Calculates entry point scores for process detection based on:
|
|
5
|
+
* 1. Call ratio (existing algorithm - callees / (callers + 1))
|
|
6
|
+
* 2. Export status (exported functions get higher priority)
|
|
7
|
+
* 3. Name patterns (functions matching entry point patterns like handle*, on*, *Controller)
|
|
8
|
+
* 4. Framework detection (path-based detection for Next.js, Express, Django, etc.)
|
|
9
|
+
*
|
|
10
|
+
* This module is language-agnostic - language-specific patterns are defined per language.
|
|
11
|
+
*/
|
|
12
|
+
export interface EntryPointScoreResult {
|
|
13
|
+
score: number;
|
|
14
|
+
reasons: string[];
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Calculate an entry point score for a function/method
|
|
18
|
+
*
|
|
19
|
+
* Higher scores indicate better entry point candidates.
|
|
20
|
+
* Score = baseScore × exportMultiplier × nameMultiplier
|
|
21
|
+
*
|
|
22
|
+
* @param name - Function/method name
|
|
23
|
+
* @param language - Programming language
|
|
24
|
+
* @param isExported - Whether the function is exported/public
|
|
25
|
+
* @param callerCount - Number of functions that call this function
|
|
26
|
+
* @param calleeCount - Number of functions this function calls
|
|
27
|
+
* @returns Score and array of reasons explaining the score
|
|
28
|
+
*/
|
|
29
|
+
export declare function calculateEntryPointScore(name: string, language: string, isExported: boolean, callerCount: number, calleeCount: number, filePath?: string): EntryPointScoreResult;
|
|
30
|
+
/**
|
|
31
|
+
* Check if a file path is a test file (should be excluded from entry points)
|
|
32
|
+
* Covers common test file patterns across all supported languages
|
|
33
|
+
*/
|
|
34
|
+
export declare function isTestFile(filePath: string): boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Check if a file path is likely a utility/helper file
|
|
37
|
+
* These might still have entry points but should be lower priority
|
|
38
|
+
*/
|
|
39
|
+
export declare function isUtilityFile(filePath: string): boolean;
|