gitnexus 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +181 -0
- package/dist/cli/ai-context.d.ts +21 -0
- package/dist/cli/ai-context.js +219 -0
- package/dist/cli/analyze.d.ts +10 -0
- package/dist/cli/analyze.js +118 -0
- package/dist/cli/clean.d.ts +8 -0
- package/dist/cli/clean.js +29 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +42 -0
- package/dist/cli/list.d.ts +6 -0
- package/dist/cli/list.js +27 -0
- package/dist/cli/mcp.d.ts +7 -0
- package/dist/cli/mcp.js +85 -0
- package/dist/cli/serve.d.ts +3 -0
- package/dist/cli/serve.js +5 -0
- package/dist/cli/status.d.ts +6 -0
- package/dist/cli/status.js +27 -0
- package/dist/config/ignore-service.d.ts +1 -0
- package/dist/config/ignore-service.js +208 -0
- package/dist/config/supported-languages.d.ts +11 -0
- package/dist/config/supported-languages.js +15 -0
- package/dist/core/embeddings/embedder.d.ts +60 -0
- package/dist/core/embeddings/embedder.js +205 -0
- package/dist/core/embeddings/embedding-pipeline.d.ts +50 -0
- package/dist/core/embeddings/embedding-pipeline.js +321 -0
- package/dist/core/embeddings/index.d.ts +9 -0
- package/dist/core/embeddings/index.js +9 -0
- package/dist/core/embeddings/text-generator.d.ts +24 -0
- package/dist/core/embeddings/text-generator.js +182 -0
- package/dist/core/embeddings/types.d.ts +87 -0
- package/dist/core/embeddings/types.js +32 -0
- package/dist/core/graph/graph.d.ts +2 -0
- package/dist/core/graph/graph.js +61 -0
- package/dist/core/graph/types.d.ts +50 -0
- package/dist/core/graph/types.js +1 -0
- package/dist/core/ingestion/ast-cache.d.ts +11 -0
- package/dist/core/ingestion/ast-cache.js +34 -0
- package/dist/core/ingestion/call-processor.d.ts +8 -0
- package/dist/core/ingestion/call-processor.js +269 -0
- package/dist/core/ingestion/cluster-enricher.d.ts +38 -0
- package/dist/core/ingestion/cluster-enricher.js +170 -0
- package/dist/core/ingestion/community-processor.d.ts +39 -0
- package/dist/core/ingestion/community-processor.js +269 -0
- package/dist/core/ingestion/entry-point-scoring.d.ts +39 -0
- package/dist/core/ingestion/entry-point-scoring.js +235 -0
- package/dist/core/ingestion/filesystem-walker.d.ts +5 -0
- package/dist/core/ingestion/filesystem-walker.js +26 -0
- package/dist/core/ingestion/framework-detection.d.ts +38 -0
- package/dist/core/ingestion/framework-detection.js +183 -0
- package/dist/core/ingestion/heritage-processor.d.ts +14 -0
- package/dist/core/ingestion/heritage-processor.js +134 -0
- package/dist/core/ingestion/import-processor.d.ts +8 -0
- package/dist/core/ingestion/import-processor.js +490 -0
- package/dist/core/ingestion/parsing-processor.d.ts +8 -0
- package/dist/core/ingestion/parsing-processor.js +249 -0
- package/dist/core/ingestion/pipeline.d.ts +2 -0
- package/dist/core/ingestion/pipeline.js +228 -0
- package/dist/core/ingestion/process-processor.d.ts +51 -0
- package/dist/core/ingestion/process-processor.js +278 -0
- package/dist/core/ingestion/structure-processor.d.ts +2 -0
- package/dist/core/ingestion/structure-processor.js +36 -0
- package/dist/core/ingestion/symbol-table.d.ts +33 -0
- package/dist/core/ingestion/symbol-table.js +38 -0
- package/dist/core/ingestion/tree-sitter-queries.d.ts +11 -0
- package/dist/core/ingestion/tree-sitter-queries.js +319 -0
- package/dist/core/ingestion/utils.d.ts +10 -0
- package/dist/core/ingestion/utils.js +44 -0
- package/dist/core/kuzu/csv-generator.d.ts +22 -0
- package/dist/core/kuzu/csv-generator.js +272 -0
- package/dist/core/kuzu/kuzu-adapter.d.ts +81 -0
- package/dist/core/kuzu/kuzu-adapter.js +568 -0
- package/dist/core/kuzu/schema.d.ts +53 -0
- package/dist/core/kuzu/schema.js +380 -0
- package/dist/core/search/bm25-index.d.ts +22 -0
- package/dist/core/search/bm25-index.js +52 -0
- package/dist/core/search/hybrid-search.d.ts +49 -0
- package/dist/core/search/hybrid-search.js +118 -0
- package/dist/core/tree-sitter/parser-loader.d.ts +4 -0
- package/dist/core/tree-sitter/parser-loader.js +42 -0
- package/dist/lib/utils.d.ts +1 -0
- package/dist/lib/utils.js +3 -0
- package/dist/mcp/core/embedder.d.ts +27 -0
- package/dist/mcp/core/embedder.js +93 -0
- package/dist/mcp/core/kuzu-adapter.d.ts +23 -0
- package/dist/mcp/core/kuzu-adapter.js +62 -0
- package/dist/mcp/local/local-backend.d.ts +73 -0
- package/dist/mcp/local/local-backend.js +752 -0
- package/dist/mcp/resources.d.ts +31 -0
- package/dist/mcp/resources.js +279 -0
- package/dist/mcp/server.d.ts +12 -0
- package/dist/mcp/server.js +130 -0
- package/dist/mcp/staleness.d.ts +15 -0
- package/dist/mcp/staleness.js +29 -0
- package/dist/mcp/tools.d.ts +24 -0
- package/dist/mcp/tools.js +160 -0
- package/dist/server/api.d.ts +6 -0
- package/dist/server/api.js +156 -0
- package/dist/storage/git.d.ts +7 -0
- package/dist/storage/git.js +39 -0
- package/dist/storage/repo-manager.d.ts +61 -0
- package/dist/storage/repo-manager.js +106 -0
- package/dist/types/pipeline.d.ts +28 -0
- package/dist/types/pipeline.js +16 -0
- package/package.json +80 -0
- package/skills/debugging.md +104 -0
- package/skills/exploring.md +112 -0
- package/skills/impact-analysis.md +114 -0
- package/skills/refactoring.md +119 -0
- package/vendor/leiden/index.cjs +355 -0
- package/vendor/leiden/utils.cjs +392 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cluster Enricher
|
|
3
|
+
*
|
|
4
|
+
* LLM-based enrichment for community clusters.
|
|
5
|
+
* Generates semantic names, keywords, and descriptions using an LLM.
|
|
6
|
+
*/
|
|
7
|
+
// ============================================================================
|
|
8
|
+
// PROMPT TEMPLATE
|
|
9
|
+
// ============================================================================
|
|
10
|
+
const buildEnrichmentPrompt = (members, heuristicLabel) => {
|
|
11
|
+
// Limit to first 20 members to control token usage
|
|
12
|
+
const limitedMembers = members.slice(0, 20);
|
|
13
|
+
const memberList = limitedMembers
|
|
14
|
+
.map(m => `${m.name} (${m.type})`)
|
|
15
|
+
.join(', ');
|
|
16
|
+
return `Analyze this code cluster and provide a semantic name and short description.
|
|
17
|
+
|
|
18
|
+
Heuristic: "${heuristicLabel}"
|
|
19
|
+
Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
|
|
20
|
+
|
|
21
|
+
Reply with JSON only:
|
|
22
|
+
{"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
|
|
23
|
+
};
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// PARSE LLM RESPONSE
|
|
26
|
+
// ============================================================================
|
|
27
|
+
const parseEnrichmentResponse = (response, fallbackLabel) => {
|
|
28
|
+
try {
|
|
29
|
+
// Extract JSON from response (handles markdown code blocks)
|
|
30
|
+
const jsonMatch = response.match(/\{[\s\S]*\}/);
|
|
31
|
+
if (!jsonMatch) {
|
|
32
|
+
throw new Error('No JSON found in response');
|
|
33
|
+
}
|
|
34
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
35
|
+
return {
|
|
36
|
+
name: parsed.name || fallbackLabel,
|
|
37
|
+
keywords: Array.isArray(parsed.keywords) ? parsed.keywords : [],
|
|
38
|
+
description: parsed.description || '',
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
catch {
|
|
42
|
+
// Fallback if parsing fails
|
|
43
|
+
return {
|
|
44
|
+
name: fallbackLabel,
|
|
45
|
+
keywords: [],
|
|
46
|
+
description: '',
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
};
|
|
50
|
+
// ============================================================================
|
|
51
|
+
// MAIN ENRICHMENT FUNCTION
|
|
52
|
+
// ============================================================================
|
|
53
|
+
/**
|
|
54
|
+
* Enrich clusters with LLM-generated names, keywords, and descriptions
|
|
55
|
+
*
|
|
56
|
+
* @param communities - Community nodes to enrich
|
|
57
|
+
* @param memberMap - Map of communityId -> member info
|
|
58
|
+
* @param llmClient - LLM client for generation
|
|
59
|
+
* @param onProgress - Progress callback
|
|
60
|
+
*/
|
|
61
|
+
export const enrichClusters = async (communities, memberMap, llmClient, onProgress) => {
|
|
62
|
+
const enrichments = new Map();
|
|
63
|
+
let tokensUsed = 0;
|
|
64
|
+
for (let i = 0; i < communities.length; i++) {
|
|
65
|
+
const community = communities[i];
|
|
66
|
+
const members = memberMap.get(community.id) || [];
|
|
67
|
+
onProgress?.(i + 1, communities.length);
|
|
68
|
+
if (members.length === 0) {
|
|
69
|
+
// No members, use heuristic
|
|
70
|
+
enrichments.set(community.id, {
|
|
71
|
+
name: community.heuristicLabel,
|
|
72
|
+
keywords: [],
|
|
73
|
+
description: '',
|
|
74
|
+
});
|
|
75
|
+
continue;
|
|
76
|
+
}
|
|
77
|
+
try {
|
|
78
|
+
const prompt = buildEnrichmentPrompt(members, community.heuristicLabel);
|
|
79
|
+
const response = await llmClient.generate(prompt);
|
|
80
|
+
// Rough token estimate
|
|
81
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
82
|
+
const enrichment = parseEnrichmentResponse(response, community.heuristicLabel);
|
|
83
|
+
enrichments.set(community.id, enrichment);
|
|
84
|
+
}
|
|
85
|
+
catch (error) {
|
|
86
|
+
// On error, fallback to heuristic
|
|
87
|
+
console.warn(`Failed to enrich cluster ${community.id}:`, error);
|
|
88
|
+
enrichments.set(community.id, {
|
|
89
|
+
name: community.heuristicLabel,
|
|
90
|
+
keywords: [],
|
|
91
|
+
description: '',
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
return { enrichments, tokensUsed };
|
|
96
|
+
};
|
|
97
|
+
// ============================================================================
|
|
98
|
+
// BATCH ENRICHMENT (more efficient)
|
|
99
|
+
// ============================================================================
|
|
100
|
+
/**
|
|
101
|
+
* Enrich multiple clusters in a single LLM call (batch mode)
|
|
102
|
+
* More efficient for token usage but requires larger context window
|
|
103
|
+
*/
|
|
104
|
+
export const enrichClustersBatch = async (communities, memberMap, llmClient, batchSize = 5, onProgress) => {
|
|
105
|
+
const enrichments = new Map();
|
|
106
|
+
let tokensUsed = 0;
|
|
107
|
+
// Process in batches
|
|
108
|
+
for (let i = 0; i < communities.length; i += batchSize) {
|
|
109
|
+
// Report progress
|
|
110
|
+
onProgress?.(Math.min(i + batchSize, communities.length), communities.length);
|
|
111
|
+
const batch = communities.slice(i, i + batchSize);
|
|
112
|
+
const batchPrompt = batch.map((community, idx) => {
|
|
113
|
+
const members = memberMap.get(community.id) || [];
|
|
114
|
+
const limitedMembers = members.slice(0, 15);
|
|
115
|
+
const memberList = limitedMembers
|
|
116
|
+
.map(m => `${m.name} (${m.type})`)
|
|
117
|
+
.join(', ');
|
|
118
|
+
return `Cluster ${idx + 1} (id: ${community.id}):
|
|
119
|
+
Heuristic: "${community.heuristicLabel}"
|
|
120
|
+
Members: ${memberList}`;
|
|
121
|
+
}).join('\n\n');
|
|
122
|
+
const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
|
|
123
|
+
|
|
124
|
+
${batchPrompt}
|
|
125
|
+
|
|
126
|
+
Output JSON array:
|
|
127
|
+
[
|
|
128
|
+
{"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
|
|
129
|
+
...
|
|
130
|
+
]`;
|
|
131
|
+
try {
|
|
132
|
+
const response = await llmClient.generate(prompt);
|
|
133
|
+
tokensUsed += prompt.length / 4 + response.length / 4;
|
|
134
|
+
// Parse batch response
|
|
135
|
+
const jsonMatch = response.match(/\[[\s\S]*\]/);
|
|
136
|
+
if (jsonMatch) {
|
|
137
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
138
|
+
for (const item of parsed) {
|
|
139
|
+
enrichments.set(item.id, {
|
|
140
|
+
name: item.name,
|
|
141
|
+
keywords: item.keywords || [],
|
|
142
|
+
description: item.description || '',
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
catch (error) {
|
|
148
|
+
console.warn('Batch enrichment failed, falling back to heuristics:', error);
|
|
149
|
+
// Fallback for this batch
|
|
150
|
+
for (const community of batch) {
|
|
151
|
+
enrichments.set(community.id, {
|
|
152
|
+
name: community.heuristicLabel,
|
|
153
|
+
keywords: [],
|
|
154
|
+
description: '',
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// Fill in any missing communities
|
|
160
|
+
for (const community of communities) {
|
|
161
|
+
if (!enrichments.has(community.id)) {
|
|
162
|
+
enrichments.set(community.id, {
|
|
163
|
+
name: community.heuristicLabel,
|
|
164
|
+
keywords: [],
|
|
165
|
+
description: '',
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
return { enrichments, tokensUsed };
|
|
170
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection Processor
|
|
3
|
+
*
|
|
4
|
+
* Uses the Leiden algorithm (via graphology-communities-leiden) to detect
|
|
5
|
+
* communities/clusters in the code graph based on CALLS relationships.
|
|
6
|
+
*
|
|
7
|
+
* Communities represent groups of code that work together frequently,
|
|
8
|
+
* helping agents navigate the codebase by functional area rather than file structure.
|
|
9
|
+
*/
|
|
10
|
+
import { KnowledgeGraph } from '../graph/types.js';
|
|
11
|
+
export interface CommunityNode {
|
|
12
|
+
id: string;
|
|
13
|
+
label: string;
|
|
14
|
+
heuristicLabel: string;
|
|
15
|
+
cohesion: number;
|
|
16
|
+
symbolCount: number;
|
|
17
|
+
}
|
|
18
|
+
export interface CommunityMembership {
|
|
19
|
+
nodeId: string;
|
|
20
|
+
communityId: string;
|
|
21
|
+
}
|
|
22
|
+
export interface CommunityDetectionResult {
|
|
23
|
+
communities: CommunityNode[];
|
|
24
|
+
memberships: CommunityMembership[];
|
|
25
|
+
stats: {
|
|
26
|
+
totalCommunities: number;
|
|
27
|
+
modularity: number;
|
|
28
|
+
nodesProcessed: number;
|
|
29
|
+
};
|
|
30
|
+
}
|
|
31
|
+
export declare const COMMUNITY_COLORS: string[];
|
|
32
|
+
export declare const getCommunityColor: (communityIndex: number) => string;
|
|
33
|
+
/**
|
|
34
|
+
* Detect communities in the knowledge graph using Leiden algorithm
|
|
35
|
+
*
|
|
36
|
+
* This runs AFTER all relationships (CALLS, IMPORTS, etc.) have been built.
|
|
37
|
+
* It uses primarily CALLS edges to cluster code that works together.
|
|
38
|
+
*/
|
|
39
|
+
export declare const processCommunities: (knowledgeGraph: KnowledgeGraph, onProgress?: (message: string, progress: number) => void) => Promise<CommunityDetectionResult>;
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Community Detection Processor
|
|
3
|
+
*
|
|
4
|
+
* Uses the Leiden algorithm (via graphology-communities-leiden) to detect
|
|
5
|
+
* communities/clusters in the code graph based on CALLS relationships.
|
|
6
|
+
*
|
|
7
|
+
* Communities represent groups of code that work together frequently,
|
|
8
|
+
* helping agents navigate the codebase by functional area rather than file structure.
|
|
9
|
+
*/
|
|
10
|
+
// NOTE: The Leiden algorithm source is vendored from graphology's repo
|
|
11
|
+
// (src/communities-leiden) because it was never published to npm.
|
|
12
|
+
// We use createRequire to load the CommonJS vendored files in ESM context.
|
|
13
|
+
import Graph from 'graphology';
|
|
14
|
+
import { createRequire } from 'node:module';
|
|
15
|
+
import { fileURLToPath } from 'node:url';
|
|
16
|
+
import { dirname, resolve } from 'node:path';
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = dirname(__filename);
|
|
19
|
+
// Navigate to package root (works from both src/ and dist/)
|
|
20
|
+
const leidenPath = resolve(__dirname, '..', '..', '..', 'vendor', 'leiden', 'index.cjs');
|
|
21
|
+
const _require = createRequire(import.meta.url);
|
|
22
|
+
const leiden = _require(leidenPath);
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// COMMUNITY COLORS (for visualization)
|
|
25
|
+
// ============================================================================
|
|
26
|
+
export const COMMUNITY_COLORS = [
|
|
27
|
+
'#ef4444', // red
|
|
28
|
+
'#f97316', // orange
|
|
29
|
+
'#eab308', // yellow
|
|
30
|
+
'#22c55e', // green
|
|
31
|
+
'#06b6d4', // cyan
|
|
32
|
+
'#3b82f6', // blue
|
|
33
|
+
'#8b5cf6', // violet
|
|
34
|
+
'#d946ef', // fuchsia
|
|
35
|
+
'#ec4899', // pink
|
|
36
|
+
'#f43f5e', // rose
|
|
37
|
+
'#14b8a6', // teal
|
|
38
|
+
'#84cc16', // lime
|
|
39
|
+
];
|
|
40
|
+
export const getCommunityColor = (communityIndex) => {
|
|
41
|
+
return COMMUNITY_COLORS[communityIndex % COMMUNITY_COLORS.length];
|
|
42
|
+
};
|
|
43
|
+
// ============================================================================
|
|
44
|
+
// MAIN PROCESSOR
|
|
45
|
+
// ============================================================================
|
|
46
|
+
/**
|
|
47
|
+
* Detect communities in the knowledge graph using Leiden algorithm
|
|
48
|
+
*
|
|
49
|
+
* This runs AFTER all relationships (CALLS, IMPORTS, etc.) have been built.
|
|
50
|
+
* It uses primarily CALLS edges to cluster code that works together.
|
|
51
|
+
*/
|
|
52
|
+
export const processCommunities = async (knowledgeGraph, onProgress) => {
|
|
53
|
+
onProgress?.('Building graph for community detection...', 0);
|
|
54
|
+
// Step 1: Build a graphology graph from the knowledge graph
|
|
55
|
+
// We only include symbol nodes (Function, Class, Method) and CALLS edges
|
|
56
|
+
const graph = buildGraphologyGraph(knowledgeGraph);
|
|
57
|
+
if (graph.order === 0) {
|
|
58
|
+
// No nodes to cluster
|
|
59
|
+
return {
|
|
60
|
+
communities: [],
|
|
61
|
+
memberships: [],
|
|
62
|
+
stats: { totalCommunities: 0, modularity: 0, nodesProcessed: 0 }
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
onProgress?.(`Running Leiden algorithm on ${graph.order} nodes...`, 30);
|
|
66
|
+
// Step 2: Run Leiden algorithm for community detection
|
|
67
|
+
const details = leiden.detailed(graph, {
|
|
68
|
+
resolution: 1.0, // Default resolution, can be tuned
|
|
69
|
+
randomWalk: true,
|
|
70
|
+
});
|
|
71
|
+
onProgress?.(`Found ${details.count} communities...`, 60);
|
|
72
|
+
// Step 3: Create community nodes with heuristic labels
|
|
73
|
+
const communityNodes = createCommunityNodes(details.communities, details.count, graph, knowledgeGraph);
|
|
74
|
+
onProgress?.('Creating membership edges...', 80);
|
|
75
|
+
// Step 4: Create membership mappings
|
|
76
|
+
const memberships = [];
|
|
77
|
+
Object.entries(details.communities).forEach(([nodeId, communityNum]) => {
|
|
78
|
+
memberships.push({
|
|
79
|
+
nodeId,
|
|
80
|
+
communityId: `comm_${communityNum}`,
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
onProgress?.('Community detection complete!', 100);
|
|
84
|
+
return {
|
|
85
|
+
communities: communityNodes,
|
|
86
|
+
memberships,
|
|
87
|
+
stats: {
|
|
88
|
+
totalCommunities: details.count,
|
|
89
|
+
modularity: details.modularity,
|
|
90
|
+
nodesProcessed: graph.order,
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
};
|
|
94
|
+
// ============================================================================
|
|
95
|
+
// HELPER: Build graphology graph from knowledge graph
|
|
96
|
+
// ============================================================================
|
|
97
|
+
/**
|
|
98
|
+
* Build a graphology graph containing only symbol nodes and CALLS edges
|
|
99
|
+
* This is what the Leiden algorithm will cluster
|
|
100
|
+
*/
|
|
101
|
+
const buildGraphologyGraph = (knowledgeGraph) => {
|
|
102
|
+
// Use undirected graph for Leiden - it looks at edge density, not direction
|
|
103
|
+
const graph = new Graph({ type: 'undirected', allowSelfLoops: false });
|
|
104
|
+
// Symbol types that should be clustered
|
|
105
|
+
const symbolTypes = new Set(['Function', 'Class', 'Method', 'Interface']);
|
|
106
|
+
// Add symbol nodes
|
|
107
|
+
knowledgeGraph.nodes.forEach(node => {
|
|
108
|
+
if (symbolTypes.has(node.label)) {
|
|
109
|
+
graph.addNode(node.id, {
|
|
110
|
+
name: node.properties.name,
|
|
111
|
+
filePath: node.properties.filePath,
|
|
112
|
+
type: node.label,
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
// Add CALLS edges (primary clustering signal)
|
|
117
|
+
// We can also include EXTENDS/IMPLEMENTS for OOP clustering
|
|
118
|
+
const clusteringRelTypes = new Set(['CALLS', 'EXTENDS', 'IMPLEMENTS']);
|
|
119
|
+
knowledgeGraph.relationships.forEach(rel => {
|
|
120
|
+
if (clusteringRelTypes.has(rel.type)) {
|
|
121
|
+
// Only add edge if both nodes exist in our symbol graph
|
|
122
|
+
// Also skip self-loops (recursive calls) - not allowed in undirected graph
|
|
123
|
+
if (graph.hasNode(rel.sourceId) && graph.hasNode(rel.targetId) && rel.sourceId !== rel.targetId) {
|
|
124
|
+
// Avoid duplicate edges
|
|
125
|
+
if (!graph.hasEdge(rel.sourceId, rel.targetId)) {
|
|
126
|
+
graph.addEdge(rel.sourceId, rel.targetId);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
return graph;
|
|
132
|
+
};
|
|
133
|
+
// ============================================================================
|
|
134
|
+
// HELPER: Create community nodes with heuristic labels
|
|
135
|
+
// ============================================================================
|
|
136
|
+
/**
|
|
137
|
+
* Create Community nodes with auto-generated labels based on member file paths
|
|
138
|
+
*/
|
|
139
|
+
const createCommunityNodes = (communities, communityCount, graph, knowledgeGraph) => {
|
|
140
|
+
// Group node IDs by community
|
|
141
|
+
const communityMembers = new Map();
|
|
142
|
+
Object.entries(communities).forEach(([nodeId, commNum]) => {
|
|
143
|
+
if (!communityMembers.has(commNum)) {
|
|
144
|
+
communityMembers.set(commNum, []);
|
|
145
|
+
}
|
|
146
|
+
communityMembers.get(commNum).push(nodeId);
|
|
147
|
+
});
|
|
148
|
+
// Build node lookup for file paths
|
|
149
|
+
const nodePathMap = new Map();
|
|
150
|
+
knowledgeGraph.nodes.forEach(node => {
|
|
151
|
+
if (node.properties.filePath) {
|
|
152
|
+
nodePathMap.set(node.id, node.properties.filePath);
|
|
153
|
+
}
|
|
154
|
+
});
|
|
155
|
+
// Create community nodes - SKIP SINGLETONS (isolated nodes)
|
|
156
|
+
const communityNodes = [];
|
|
157
|
+
communityMembers.forEach((memberIds, commNum) => {
|
|
158
|
+
// Skip singleton communities - they're just isolated nodes
|
|
159
|
+
if (memberIds.length < 2)
|
|
160
|
+
return;
|
|
161
|
+
const heuristicLabel = generateHeuristicLabel(memberIds, nodePathMap, graph, commNum);
|
|
162
|
+
communityNodes.push({
|
|
163
|
+
id: `comm_${commNum}`,
|
|
164
|
+
label: heuristicLabel,
|
|
165
|
+
heuristicLabel,
|
|
166
|
+
cohesion: calculateCohesion(memberIds, graph),
|
|
167
|
+
symbolCount: memberIds.length,
|
|
168
|
+
});
|
|
169
|
+
});
|
|
170
|
+
// Sort by size descending
|
|
171
|
+
communityNodes.sort((a, b) => b.symbolCount - a.symbolCount);
|
|
172
|
+
return communityNodes;
|
|
173
|
+
};
|
|
174
|
+
// ============================================================================
|
|
175
|
+
// HELPER: Generate heuristic label from folder patterns
|
|
176
|
+
// ============================================================================
|
|
177
|
+
/**
|
|
178
|
+
* Generate a human-readable label from the most common folder name in the community
|
|
179
|
+
*/
|
|
180
|
+
const generateHeuristicLabel = (memberIds, nodePathMap, graph, commNum) => {
|
|
181
|
+
// Collect folder names from file paths
|
|
182
|
+
const folderCounts = new Map();
|
|
183
|
+
memberIds.forEach(nodeId => {
|
|
184
|
+
const filePath = nodePathMap.get(nodeId) || '';
|
|
185
|
+
const parts = filePath.split('/').filter(Boolean);
|
|
186
|
+
// Get the most specific folder (parent directory)
|
|
187
|
+
if (parts.length >= 2) {
|
|
188
|
+
const folder = parts[parts.length - 2];
|
|
189
|
+
// Skip generic folder names
|
|
190
|
+
if (!['src', 'lib', 'core', 'utils', 'common', 'shared', 'helpers'].includes(folder.toLowerCase())) {
|
|
191
|
+
folderCounts.set(folder, (folderCounts.get(folder) || 0) + 1);
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
});
|
|
195
|
+
// Find most common folder
|
|
196
|
+
let maxCount = 0;
|
|
197
|
+
let bestFolder = '';
|
|
198
|
+
folderCounts.forEach((count, folder) => {
|
|
199
|
+
if (count > maxCount) {
|
|
200
|
+
maxCount = count;
|
|
201
|
+
bestFolder = folder;
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
if (bestFolder) {
|
|
205
|
+
// Capitalize first letter
|
|
206
|
+
return bestFolder.charAt(0).toUpperCase() + bestFolder.slice(1);
|
|
207
|
+
}
|
|
208
|
+
// Fallback: use function names to detect patterns
|
|
209
|
+
const names = [];
|
|
210
|
+
memberIds.forEach(nodeId => {
|
|
211
|
+
const name = graph.getNodeAttribute(nodeId, 'name');
|
|
212
|
+
if (name)
|
|
213
|
+
names.push(name);
|
|
214
|
+
});
|
|
215
|
+
// Look for common prefixes
|
|
216
|
+
if (names.length > 2) {
|
|
217
|
+
const commonPrefix = findCommonPrefix(names);
|
|
218
|
+
if (commonPrefix.length > 2) {
|
|
219
|
+
return commonPrefix.charAt(0).toUpperCase() + commonPrefix.slice(1);
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Last resort: generic name with community ID for uniqueness
|
|
223
|
+
return `Cluster_${commNum}`;
|
|
224
|
+
};
|
|
225
|
+
/**
|
|
226
|
+
* Find common prefix among strings
|
|
227
|
+
*/
|
|
228
|
+
const findCommonPrefix = (strings) => {
|
|
229
|
+
if (strings.length === 0)
|
|
230
|
+
return '';
|
|
231
|
+
const sorted = strings.slice().sort();
|
|
232
|
+
const first = sorted[0];
|
|
233
|
+
const last = sorted[sorted.length - 1];
|
|
234
|
+
let i = 0;
|
|
235
|
+
while (i < first.length && first[i] === last[i]) {
|
|
236
|
+
i++;
|
|
237
|
+
}
|
|
238
|
+
return first.substring(0, i);
|
|
239
|
+
};
|
|
240
|
+
// ============================================================================
|
|
241
|
+
// HELPER: Calculate community cohesion
|
|
242
|
+
// ============================================================================
|
|
243
|
+
/**
|
|
244
|
+
* Calculate cohesion score (0-1) based on internal edge density
|
|
245
|
+
* Higher cohesion = more internal connections relative to size
|
|
246
|
+
*/
|
|
247
|
+
const calculateCohesion = (memberIds, graph) => {
|
|
248
|
+
if (memberIds.length <= 1)
|
|
249
|
+
return 1.0;
|
|
250
|
+
const memberSet = new Set(memberIds);
|
|
251
|
+
let internalEdges = 0;
|
|
252
|
+
// Count edges within the community
|
|
253
|
+
memberIds.forEach(nodeId => {
|
|
254
|
+
if (graph.hasNode(nodeId)) {
|
|
255
|
+
graph.forEachNeighbor(nodeId, neighbor => {
|
|
256
|
+
if (memberSet.has(neighbor)) {
|
|
257
|
+
internalEdges++;
|
|
258
|
+
}
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
});
|
|
262
|
+
// Each edge is counted twice (once from each end), so divide by 2
|
|
263
|
+
internalEdges = internalEdges / 2;
|
|
264
|
+
// Maximum possible internal edges for n nodes: n*(n-1)/2
|
|
265
|
+
const maxPossibleEdges = (memberIds.length * (memberIds.length - 1)) / 2;
|
|
266
|
+
if (maxPossibleEdges === 0)
|
|
267
|
+
return 1.0;
|
|
268
|
+
return Math.min(1.0, internalEdges / maxPossibleEdges);
|
|
269
|
+
};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entry Point Scoring
|
|
3
|
+
*
|
|
4
|
+
* Calculates entry point scores for process detection based on:
|
|
5
|
+
* 1. Call ratio (existing algorithm - callees / (callers + 1))
|
|
6
|
+
* 2. Export status (exported functions get higher priority)
|
|
7
|
+
* 3. Name patterns (functions matching entry point patterns like handle*, on*, *Controller)
|
|
8
|
+
* 4. Framework detection (path-based detection for Next.js, Express, Django, etc.)
|
|
9
|
+
*
|
|
10
|
+
* This module is language-agnostic - language-specific patterns are defined per language.
|
|
11
|
+
*/
|
|
12
|
+
export interface EntryPointScoreResult {
|
|
13
|
+
score: number;
|
|
14
|
+
reasons: string[];
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Calculate an entry point score for a function/method
|
|
18
|
+
*
|
|
19
|
+
* Higher scores indicate better entry point candidates.
|
|
20
|
+
* Score = baseScore × exportMultiplier × nameMultiplier
|
|
21
|
+
*
|
|
22
|
+
* @param name - Function/method name
|
|
23
|
+
* @param language - Programming language
|
|
24
|
+
* @param isExported - Whether the function is exported/public
|
|
25
|
+
* @param callerCount - Number of functions that call this function
|
|
26
|
+
* @param calleeCount - Number of functions this function calls
|
|
27
|
+
* @returns Score and array of reasons explaining the score
|
|
28
|
+
*/
|
|
29
|
+
export declare function calculateEntryPointScore(name: string, language: string, isExported: boolean, callerCount: number, calleeCount: number, filePath?: string): EntryPointScoreResult;
|
|
30
|
+
/**
|
|
31
|
+
* Check if a file path is a test file (should be excluded from entry points)
|
|
32
|
+
* Covers common test file patterns across all supported languages
|
|
33
|
+
*/
|
|
34
|
+
export declare function isTestFile(filePath: string): boolean;
|
|
35
|
+
/**
|
|
36
|
+
* Check if a file path is likely a utility/helper file
|
|
37
|
+
* These might still have entry points but should be lower priority
|
|
38
|
+
*/
|
|
39
|
+
export declare function isUtilityFile(filePath: string): boolean;
|