@getmikk/core 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +431 -0
- package/package.json +6 -2
- package/src/contract/contract-generator.ts +85 -85
- package/src/contract/contract-reader.ts +28 -28
- package/src/contract/contract-writer.ts +114 -114
- package/src/contract/index.ts +12 -12
- package/src/contract/lock-compiler.ts +221 -221
- package/src/contract/lock-reader.ts +34 -34
- package/src/contract/schema.ts +147 -147
- package/src/graph/cluster-detector.ts +312 -312
- package/src/graph/graph-builder.ts +211 -211
- package/src/graph/impact-analyzer.ts +55 -55
- package/src/graph/index.ts +4 -4
- package/src/graph/types.ts +59 -59
- package/src/hash/file-hasher.ts +30 -30
- package/src/hash/hash-store.ts +119 -119
- package/src/hash/index.ts +3 -3
- package/src/hash/tree-hasher.ts +20 -20
- package/src/index.ts +12 -12
- package/src/parser/base-parser.ts +16 -16
- package/src/parser/boundary-checker.ts +211 -211
- package/src/parser/index.ts +46 -46
- package/src/parser/types.ts +90 -90
- package/src/parser/typescript/ts-extractor.ts +543 -543
- package/src/parser/typescript/ts-parser.ts +41 -41
- package/src/parser/typescript/ts-resolver.ts +86 -86
- package/src/utils/errors.ts +42 -42
- package/src/utils/fs.ts +75 -75
- package/src/utils/fuzzy-match.ts +186 -186
- package/src/utils/logger.ts +36 -36
- package/src/utils/minimatch.ts +19 -19
- package/tests/contract.test.ts +134 -134
- package/tests/fixtures/simple-api/package.json +5 -5
- package/tests/fixtures/simple-api/src/auth/middleware.ts +9 -9
- package/tests/fixtures/simple-api/src/auth/verify.ts +6 -6
- package/tests/fixtures/simple-api/src/index.ts +9 -9
- package/tests/fixtures/simple-api/src/utils/jwt.ts +3 -3
- package/tests/fixtures/simple-api/tsconfig.json +8 -8
- package/tests/fuzzy-match.test.ts +142 -142
- package/tests/graph.test.ts +169 -169
- package/tests/hash.test.ts +49 -49
- package/tests/helpers.ts +83 -83
- package/tests/parser.test.ts +218 -218
- package/tsconfig.json +15 -15
|
@@ -1,312 +1,312 @@
|
|
|
1
|
-
import * as path from 'node:path'
|
|
2
|
-
import type { DependencyGraph, ModuleCluster, GraphNode } from './types.js'
|
|
3
|
-
|
|
4
|
-
/**
|
|
5
|
-
* ClusterDetector — analyzes the dependency graph and groups files
|
|
6
|
-
* into natural module clusters using greedy agglomeration with coupling scores.
|
|
7
|
-
*
|
|
8
|
-
* Algorithm (Section 4 of Mikk Technical Reference):
|
|
9
|
-
* 1. Build a coupling matrix: coupling(A,B) = (edges between A,B) / (total edges of A + total edges of B)
|
|
10
|
-
* 2. Sort files by total edge count (most connected first)
|
|
11
|
-
* 3. Greedy agglomeration: seed clusters from most connected files,
|
|
12
|
-
* expand by pulling in strongly-coupled neighbors
|
|
13
|
-
* 4. Orphan files go to single-file clusters with low confidence
|
|
14
|
-
*/
|
|
15
|
-
export class ClusterDetector {
|
|
16
|
-
constructor(
|
|
17
|
-
private graph: DependencyGraph,
|
|
18
|
-
private minClusterSize: number = 2,
|
|
19
|
-
private minCouplingScore: number = 0.15
|
|
20
|
-
) { }
|
|
21
|
-
|
|
22
|
-
/** Returns groups of files that naturally belong together, sorted by confidence */
|
|
23
|
-
detect(): ModuleCluster[] {
|
|
24
|
-
const fileNodes = [...this.graph.nodes.values()].filter(n => n.type === 'file')
|
|
25
|
-
if (fileNodes.length === 0) return []
|
|
26
|
-
|
|
27
|
-
const files = fileNodes.map(n => n.id)
|
|
28
|
-
const couplingMatrix = this.computeCouplingMatrix(files)
|
|
29
|
-
const assigned = new Set<string>()
|
|
30
|
-
const clusters: ModuleCluster[] = []
|
|
31
|
-
|
|
32
|
-
// Sort files by total edge count (most connected first)
|
|
33
|
-
const sortedFiles = [...files].sort((a, b) =>
|
|
34
|
-
this.getTotalEdges(b) - this.getTotalEdges(a)
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
for (const seedFile of sortedFiles) {
|
|
38
|
-
if (assigned.has(seedFile)) continue
|
|
39
|
-
|
|
40
|
-
// Start a new cluster with this file as seed
|
|
41
|
-
const cluster: string[] = [seedFile]
|
|
42
|
-
assigned.add(seedFile)
|
|
43
|
-
|
|
44
|
-
// Expand: find files strongly coupled to any file in this cluster
|
|
45
|
-
let expanded = true
|
|
46
|
-
while (expanded) {
|
|
47
|
-
expanded = false
|
|
48
|
-
|
|
49
|
-
for (const clusterFile of [...cluster]) {
|
|
50
|
-
const partners = couplingMatrix.get(clusterFile) || new Map()
|
|
51
|
-
|
|
52
|
-
for (const [candidate, score] of partners) {
|
|
53
|
-
if (assigned.has(candidate)) continue
|
|
54
|
-
if (score < this.minCouplingScore) continue
|
|
55
|
-
|
|
56
|
-
// Is this candidate more coupled to this cluster than to others?
|
|
57
|
-
const clusterAffinity = this.computeClusterAffinity(
|
|
58
|
-
candidate, cluster, couplingMatrix
|
|
59
|
-
)
|
|
60
|
-
const bestOutsideAffinity = this.computeBestOutsideAffinity(
|
|
61
|
-
candidate, cluster, couplingMatrix, assigned
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
if (clusterAffinity > bestOutsideAffinity) {
|
|
65
|
-
cluster.push(candidate)
|
|
66
|
-
assigned.add(candidate)
|
|
67
|
-
expanded = true
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
if (cluster.length >= this.minClusterSize) {
|
|
74
|
-
const filePathsForCluster = cluster.map(id => this.getNodeFile(id))
|
|
75
|
-
clusters.push({
|
|
76
|
-
id: this.inferClusterId(filePathsForCluster),
|
|
77
|
-
files: filePathsForCluster,
|
|
78
|
-
confidence: this.computeClusterConfidence(cluster),
|
|
79
|
-
suggestedName: this.inferClusterName(filePathsForCluster),
|
|
80
|
-
functions: this.getFunctionIdsForFiles(cluster),
|
|
81
|
-
})
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
// Orphan files get their own single-file clusters
|
|
86
|
-
for (const file of files) {
|
|
87
|
-
if (!assigned.has(file)) {
|
|
88
|
-
const filePath = this.getNodeFile(file)
|
|
89
|
-
clusters.push({
|
|
90
|
-
id: this.inferClusterId([filePath]),
|
|
91
|
-
files: [filePath],
|
|
92
|
-
confidence: 0.3,
|
|
93
|
-
suggestedName: this.inferClusterName([filePath]),
|
|
94
|
-
functions: this.getFunctionIdsForFiles([file]),
|
|
95
|
-
})
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return clusters.sort((a, b) => b.confidence - a.confidence)
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
// ─── Coupling Matrix ──────────────────────────────────────────
|
|
103
|
-
|
|
104
|
-
/**
|
|
105
|
-
* Build coupling matrix: for every pair of files, compute
|
|
106
|
-
* coupling(A,B) = (edges between A,B * 2) / (totalEdges(A) + totalEdges(B))
|
|
107
|
-
*/
|
|
108
|
-
private computeCouplingMatrix(files: string[]): Map<string, Map<string, number>> {
|
|
109
|
-
const matrix = new Map<string, Map<string, number>>()
|
|
110
|
-
const fileEdgeCounts = new Map<string, number>()
|
|
111
|
-
const pairCounts = new Map<string, number>()
|
|
112
|
-
|
|
113
|
-
// Count total edges per file
|
|
114
|
-
for (const fileId of files) {
|
|
115
|
-
const outCount = (this.graph.outEdges.get(fileId) || []).length
|
|
116
|
-
const inCount = (this.graph.inEdges.get(fileId) || []).length
|
|
117
|
-
fileEdgeCounts.set(fileId, outCount + inCount)
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
const fileSet = new Set(files)
|
|
121
|
-
|
|
122
|
-
// Count edges between each pair of files (file-level imports + function-level calls)
|
|
123
|
-
for (const edge of this.graph.edges) {
|
|
124
|
-
if (edge.type !== 'imports' && edge.type !== 'calls') continue
|
|
125
|
-
|
|
126
|
-
const sourceFile = this.getFileForNode(edge.source)
|
|
127
|
-
const targetFile = this.getFileForNode(edge.target)
|
|
128
|
-
|
|
129
|
-
if (!sourceFile || !targetFile || sourceFile === targetFile) continue
|
|
130
|
-
if (!fileSet.has(sourceFile) || !fileSet.has(targetFile)) continue
|
|
131
|
-
|
|
132
|
-
// Increment pair count for both directions
|
|
133
|
-
this.incrementPair(matrix, sourceFile, targetFile)
|
|
134
|
-
this.incrementPair(matrix, targetFile, sourceFile)
|
|
135
|
-
}
|
|
136
|
-
|
|
137
|
-
// Normalize to coupling scores
|
|
138
|
-
for (const [file, partners] of matrix) {
|
|
139
|
-
const totalEdges = fileEdgeCounts.get(file) || 1
|
|
140
|
-
for (const [partner, edgeCount] of partners) {
|
|
141
|
-
const partnerEdges = fileEdgeCounts.get(partner) || 1
|
|
142
|
-
const score = (edgeCount * 2) / (totalEdges + partnerEdges)
|
|
143
|
-
partners.set(partner, score)
|
|
144
|
-
}
|
|
145
|
-
}
|
|
146
|
-
|
|
147
|
-
return matrix
|
|
148
|
-
}
|
|
149
|
-
|
|
150
|
-
private incrementPair(matrix: Map<string, Map<string, number>>, a: string, b: string): void {
|
|
151
|
-
if (!matrix.has(a)) matrix.set(a, new Map())
|
|
152
|
-
const partners = matrix.get(a)!
|
|
153
|
-
partners.set(b, (partners.get(b) || 0) + 1)
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
// ─── Affinity Computation ─────────────────────────────────────
|
|
157
|
-
|
|
158
|
-
/** Average coupling score of a candidate to all files in the cluster */
|
|
159
|
-
private computeClusterAffinity(
|
|
160
|
-
candidate: string,
|
|
161
|
-
cluster: string[],
|
|
162
|
-
couplingMatrix: Map<string, Map<string, number>>
|
|
163
|
-
): number {
|
|
164
|
-
const partners = couplingMatrix.get(candidate) || new Map()
|
|
165
|
-
let totalScore = 0
|
|
166
|
-
let count = 0
|
|
167
|
-
for (const clusterFile of cluster) {
|
|
168
|
-
const score = partners.get(clusterFile) || 0
|
|
169
|
-
totalScore += score
|
|
170
|
-
count++
|
|
171
|
-
}
|
|
172
|
-
return count > 0 ? totalScore / count : 0
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
/** Best coupling score of a candidate to any file NOT in the cluster and not yet assigned */
|
|
176
|
-
private computeBestOutsideAffinity(
|
|
177
|
-
candidate: string,
|
|
178
|
-
cluster: string[],
|
|
179
|
-
couplingMatrix: Map<string, Map<string, number>>,
|
|
180
|
-
assigned: Set<string>
|
|
181
|
-
): number {
|
|
182
|
-
const partners = couplingMatrix.get(candidate) || new Map()
|
|
183
|
-
const clusterSet = new Set(cluster)
|
|
184
|
-
let best = 0
|
|
185
|
-
for (const [partner, score] of partners) {
|
|
186
|
-
if (clusterSet.has(partner) || assigned.has(partner)) continue
|
|
187
|
-
if (score > best) best = score
|
|
188
|
-
}
|
|
189
|
-
return best
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
// ─── Confidence ───────────────────────────────────────────────
|
|
193
|
-
|
|
194
|
-
/**
|
|
195
|
-
* Confidence = (internal edges) / (internal edges + external edges)
|
|
196
|
-
* Score of 1.0 = perfectly self-contained
|
|
197
|
-
* Score of 0.0 = all edges go outside
|
|
198
|
-
*/
|
|
199
|
-
computeClusterConfidence(files: string[]): number {
|
|
200
|
-
const fileSet = new Set(files)
|
|
201
|
-
let internalEdges = 0
|
|
202
|
-
let externalEdges = 0
|
|
203
|
-
|
|
204
|
-
for (const file of files) {
|
|
205
|
-
const outEdges = this.graph.outEdges.get(file) || []
|
|
206
|
-
for (const edge of outEdges) {
|
|
207
|
-
if (edge.type === 'imports') {
|
|
208
|
-
if (fileSet.has(edge.target)) {
|
|
209
|
-
internalEdges++
|
|
210
|
-
} else {
|
|
211
|
-
externalEdges++
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
}
|
|
215
|
-
}
|
|
216
|
-
|
|
217
|
-
// Also count function-level call edges
|
|
218
|
-
for (const file of files) {
|
|
219
|
-
const containEdges = this.graph.outEdges.get(file) || []
|
|
220
|
-
for (const containEdge of containEdges) {
|
|
221
|
-
if (containEdge.type === 'contains') {
|
|
222
|
-
const fnOutEdges = this.graph.outEdges.get(containEdge.target) || []
|
|
223
|
-
for (const callEdge of fnOutEdges) {
|
|
224
|
-
if (callEdge.type === 'calls') {
|
|
225
|
-
const targetNode = this.graph.nodes.get(callEdge.target)
|
|
226
|
-
if (targetNode && fileSet.has(targetNode.file)) {
|
|
227
|
-
internalEdges++
|
|
228
|
-
} else if (targetNode) {
|
|
229
|
-
externalEdges++
|
|
230
|
-
}
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
}
|
|
234
|
-
}
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
const total = internalEdges + externalEdges
|
|
238
|
-
if (total === 0) return 0.5
|
|
239
|
-
return internalEdges / total
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
// ─── Helpers ──────────────────────────────────────────────────
|
|
243
|
-
|
|
244
|
-
/** Total edges (in + out) for a node */
|
|
245
|
-
private getTotalEdges(nodeId: string): number {
|
|
246
|
-
return (this.graph.outEdges.get(nodeId) || []).length +
|
|
247
|
-
(this.graph.inEdges.get(nodeId) || []).length
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
/** Get the file path a node belongs to (for function/class nodes, return their file) */
|
|
251
|
-
private getFileForNode(nodeId: string): string | null {
|
|
252
|
-
const node = this.graph.nodes.get(nodeId)
|
|
253
|
-
if (!node) return null
|
|
254
|
-
if (node.type === 'file') return nodeId
|
|
255
|
-
// For function/class/generic nodes, find their parent file
|
|
256
|
-
return node.file || null
|
|
257
|
-
}
|
|
258
|
-
|
|
259
|
-
/** Get the file path from a file node ID (the node's .file property) */
|
|
260
|
-
private getNodeFile(fileNodeId: string): string {
|
|
261
|
-
const node = this.graph.nodes.get(fileNodeId)
|
|
262
|
-
return node?.file || fileNodeId
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
/** Get all function IDs contained in a set of file node IDs */
|
|
266
|
-
private getFunctionIdsForFiles(fileNodeIds: string[]): string[] {
|
|
267
|
-
return fileNodeIds.flatMap(f => {
|
|
268
|
-
const containEdges = this.graph.outEdges.get(f) || []
|
|
269
|
-
return containEdges
|
|
270
|
-
.filter(e => e.type === 'contains')
|
|
271
|
-
.map(e => e.target)
|
|
272
|
-
})
|
|
273
|
-
}
|
|
274
|
-
|
|
275
|
-
/** Infer a module ID from file paths (common directory prefix) */
|
|
276
|
-
private inferClusterId(filePaths: string[]): string {
|
|
277
|
-
if (filePaths.length === 0) return 'unknown'
|
|
278
|
-
if (filePaths.length === 1) {
|
|
279
|
-
return this.getDirSegment(filePaths[0])
|
|
280
|
-
}
|
|
281
|
-
// Find the longest common directory prefix
|
|
282
|
-
const segments = filePaths.map(f => f.split('/'))
|
|
283
|
-
const firstSegments = segments[0]
|
|
284
|
-
let commonLen = 0
|
|
285
|
-
for (let i = 0; i < firstSegments.length - 1; i++) {
|
|
286
|
-
if (segments.every(s => s[i] === firstSegments[i])) {
|
|
287
|
-
commonLen = i + 1
|
|
288
|
-
} else {
|
|
289
|
-
break
|
|
290
|
-
}
|
|
291
|
-
}
|
|
292
|
-
const commonPath = firstSegments.slice(0, commonLen).join('/')
|
|
293
|
-
return this.getDirSegment(commonPath || filePaths[0])
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
/** Get the most meaningful directory segment from a path */
|
|
297
|
-
private getDirSegment(filePath: string): string {
|
|
298
|
-
const parts = filePath.split('/')
|
|
299
|
-
// Skip 'src' if present
|
|
300
|
-
if (parts[0] === 'src' && parts.length >= 2) return parts[1]
|
|
301
|
-
if (parts.length > 1) return parts[0]
|
|
302
|
-
return path.basename(filePath, path.extname(filePath))
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
/** Infer a human-readable cluster name */
|
|
306
|
-
private inferClusterName(filePaths: string[]): string {
|
|
307
|
-
const dir = this.inferClusterId(filePaths)
|
|
308
|
-
return dir
|
|
309
|
-
.replace(/[-_]/g, ' ')
|
|
310
|
-
.replace(/\b\w/g, c => c.toUpperCase())
|
|
311
|
-
}
|
|
312
|
-
}
|
|
1
|
+
import * as path from 'node:path'
|
|
2
|
+
import type { DependencyGraph, ModuleCluster, GraphNode } from './types.js'
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* ClusterDetector — analyzes the dependency graph and groups files
|
|
6
|
+
* into natural module clusters using greedy agglomeration with coupling scores.
|
|
7
|
+
*
|
|
8
|
+
* Algorithm (Section 4 of Mikk Technical Reference):
|
|
9
|
+
* 1. Build a coupling matrix: coupling(A,B) = (edges between A,B) / (total edges of A + total edges of B)
|
|
10
|
+
* 2. Sort files by total edge count (most connected first)
|
|
11
|
+
* 3. Greedy agglomeration: seed clusters from most connected files,
|
|
12
|
+
* expand by pulling in strongly-coupled neighbors
|
|
13
|
+
* 4. Orphan files go to single-file clusters with low confidence
|
|
14
|
+
*/
|
|
15
|
+
export class ClusterDetector {
|
|
16
|
+
constructor(
|
|
17
|
+
private graph: DependencyGraph,
|
|
18
|
+
private minClusterSize: number = 2,
|
|
19
|
+
private minCouplingScore: number = 0.15
|
|
20
|
+
) { }
|
|
21
|
+
|
|
22
|
+
/** Returns groups of files that naturally belong together, sorted by confidence */
|
|
23
|
+
detect(): ModuleCluster[] {
|
|
24
|
+
const fileNodes = [...this.graph.nodes.values()].filter(n => n.type === 'file')
|
|
25
|
+
if (fileNodes.length === 0) return []
|
|
26
|
+
|
|
27
|
+
const files = fileNodes.map(n => n.id)
|
|
28
|
+
const couplingMatrix = this.computeCouplingMatrix(files)
|
|
29
|
+
const assigned = new Set<string>()
|
|
30
|
+
const clusters: ModuleCluster[] = []
|
|
31
|
+
|
|
32
|
+
// Sort files by total edge count (most connected first)
|
|
33
|
+
const sortedFiles = [...files].sort((a, b) =>
|
|
34
|
+
this.getTotalEdges(b) - this.getTotalEdges(a)
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
for (const seedFile of sortedFiles) {
|
|
38
|
+
if (assigned.has(seedFile)) continue
|
|
39
|
+
|
|
40
|
+
// Start a new cluster with this file as seed
|
|
41
|
+
const cluster: string[] = [seedFile]
|
|
42
|
+
assigned.add(seedFile)
|
|
43
|
+
|
|
44
|
+
// Expand: find files strongly coupled to any file in this cluster
|
|
45
|
+
let expanded = true
|
|
46
|
+
while (expanded) {
|
|
47
|
+
expanded = false
|
|
48
|
+
|
|
49
|
+
for (const clusterFile of [...cluster]) {
|
|
50
|
+
const partners = couplingMatrix.get(clusterFile) || new Map()
|
|
51
|
+
|
|
52
|
+
for (const [candidate, score] of partners) {
|
|
53
|
+
if (assigned.has(candidate)) continue
|
|
54
|
+
if (score < this.minCouplingScore) continue
|
|
55
|
+
|
|
56
|
+
// Is this candidate more coupled to this cluster than to others?
|
|
57
|
+
const clusterAffinity = this.computeClusterAffinity(
|
|
58
|
+
candidate, cluster, couplingMatrix
|
|
59
|
+
)
|
|
60
|
+
const bestOutsideAffinity = this.computeBestOutsideAffinity(
|
|
61
|
+
candidate, cluster, couplingMatrix, assigned
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
if (clusterAffinity > bestOutsideAffinity) {
|
|
65
|
+
cluster.push(candidate)
|
|
66
|
+
assigned.add(candidate)
|
|
67
|
+
expanded = true
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (cluster.length >= this.minClusterSize) {
|
|
74
|
+
const filePathsForCluster = cluster.map(id => this.getNodeFile(id))
|
|
75
|
+
clusters.push({
|
|
76
|
+
id: this.inferClusterId(filePathsForCluster),
|
|
77
|
+
files: filePathsForCluster,
|
|
78
|
+
confidence: this.computeClusterConfidence(cluster),
|
|
79
|
+
suggestedName: this.inferClusterName(filePathsForCluster),
|
|
80
|
+
functions: this.getFunctionIdsForFiles(cluster),
|
|
81
|
+
})
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Orphan files get their own single-file clusters
|
|
86
|
+
for (const file of files) {
|
|
87
|
+
if (!assigned.has(file)) {
|
|
88
|
+
const filePath = this.getNodeFile(file)
|
|
89
|
+
clusters.push({
|
|
90
|
+
id: this.inferClusterId([filePath]),
|
|
91
|
+
files: [filePath],
|
|
92
|
+
confidence: 0.3,
|
|
93
|
+
suggestedName: this.inferClusterName([filePath]),
|
|
94
|
+
functions: this.getFunctionIdsForFiles([file]),
|
|
95
|
+
})
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return clusters.sort((a, b) => b.confidence - a.confidence)
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// ─── Coupling Matrix ──────────────────────────────────────────
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Build coupling matrix: for every pair of files, compute
|
|
106
|
+
* coupling(A,B) = (edges between A,B * 2) / (totalEdges(A) + totalEdges(B))
|
|
107
|
+
*/
|
|
108
|
+
private computeCouplingMatrix(files: string[]): Map<string, Map<string, number>> {
|
|
109
|
+
const matrix = new Map<string, Map<string, number>>()
|
|
110
|
+
const fileEdgeCounts = new Map<string, number>()
|
|
111
|
+
const pairCounts = new Map<string, number>()
|
|
112
|
+
|
|
113
|
+
// Count total edges per file
|
|
114
|
+
for (const fileId of files) {
|
|
115
|
+
const outCount = (this.graph.outEdges.get(fileId) || []).length
|
|
116
|
+
const inCount = (this.graph.inEdges.get(fileId) || []).length
|
|
117
|
+
fileEdgeCounts.set(fileId, outCount + inCount)
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const fileSet = new Set(files)
|
|
121
|
+
|
|
122
|
+
// Count edges between each pair of files (file-level imports + function-level calls)
|
|
123
|
+
for (const edge of this.graph.edges) {
|
|
124
|
+
if (edge.type !== 'imports' && edge.type !== 'calls') continue
|
|
125
|
+
|
|
126
|
+
const sourceFile = this.getFileForNode(edge.source)
|
|
127
|
+
const targetFile = this.getFileForNode(edge.target)
|
|
128
|
+
|
|
129
|
+
if (!sourceFile || !targetFile || sourceFile === targetFile) continue
|
|
130
|
+
if (!fileSet.has(sourceFile) || !fileSet.has(targetFile)) continue
|
|
131
|
+
|
|
132
|
+
// Increment pair count for both directions
|
|
133
|
+
this.incrementPair(matrix, sourceFile, targetFile)
|
|
134
|
+
this.incrementPair(matrix, targetFile, sourceFile)
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Normalize to coupling scores
|
|
138
|
+
for (const [file, partners] of matrix) {
|
|
139
|
+
const totalEdges = fileEdgeCounts.get(file) || 1
|
|
140
|
+
for (const [partner, edgeCount] of partners) {
|
|
141
|
+
const partnerEdges = fileEdgeCounts.get(partner) || 1
|
|
142
|
+
const score = (edgeCount * 2) / (totalEdges + partnerEdges)
|
|
143
|
+
partners.set(partner, score)
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return matrix
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
private incrementPair(matrix: Map<string, Map<string, number>>, a: string, b: string): void {
|
|
151
|
+
if (!matrix.has(a)) matrix.set(a, new Map())
|
|
152
|
+
const partners = matrix.get(a)!
|
|
153
|
+
partners.set(b, (partners.get(b) || 0) + 1)
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
// ─── Affinity Computation ─────────────────────────────────────
|
|
157
|
+
|
|
158
|
+
/** Average coupling score of a candidate to all files in the cluster */
|
|
159
|
+
private computeClusterAffinity(
|
|
160
|
+
candidate: string,
|
|
161
|
+
cluster: string[],
|
|
162
|
+
couplingMatrix: Map<string, Map<string, number>>
|
|
163
|
+
): number {
|
|
164
|
+
const partners = couplingMatrix.get(candidate) || new Map()
|
|
165
|
+
let totalScore = 0
|
|
166
|
+
let count = 0
|
|
167
|
+
for (const clusterFile of cluster) {
|
|
168
|
+
const score = partners.get(clusterFile) || 0
|
|
169
|
+
totalScore += score
|
|
170
|
+
count++
|
|
171
|
+
}
|
|
172
|
+
return count > 0 ? totalScore / count : 0
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/** Best coupling score of a candidate to any file NOT in the cluster and not yet assigned */
|
|
176
|
+
private computeBestOutsideAffinity(
|
|
177
|
+
candidate: string,
|
|
178
|
+
cluster: string[],
|
|
179
|
+
couplingMatrix: Map<string, Map<string, number>>,
|
|
180
|
+
assigned: Set<string>
|
|
181
|
+
): number {
|
|
182
|
+
const partners = couplingMatrix.get(candidate) || new Map()
|
|
183
|
+
const clusterSet = new Set(cluster)
|
|
184
|
+
let best = 0
|
|
185
|
+
for (const [partner, score] of partners) {
|
|
186
|
+
if (clusterSet.has(partner) || assigned.has(partner)) continue
|
|
187
|
+
if (score > best) best = score
|
|
188
|
+
}
|
|
189
|
+
return best
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// ─── Confidence ───────────────────────────────────────────────
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Confidence = (internal edges) / (internal edges + external edges)
|
|
196
|
+
* Score of 1.0 = perfectly self-contained
|
|
197
|
+
* Score of 0.0 = all edges go outside
|
|
198
|
+
*/
|
|
199
|
+
computeClusterConfidence(files: string[]): number {
|
|
200
|
+
const fileSet = new Set(files)
|
|
201
|
+
let internalEdges = 0
|
|
202
|
+
let externalEdges = 0
|
|
203
|
+
|
|
204
|
+
for (const file of files) {
|
|
205
|
+
const outEdges = this.graph.outEdges.get(file) || []
|
|
206
|
+
for (const edge of outEdges) {
|
|
207
|
+
if (edge.type === 'imports') {
|
|
208
|
+
if (fileSet.has(edge.target)) {
|
|
209
|
+
internalEdges++
|
|
210
|
+
} else {
|
|
211
|
+
externalEdges++
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// Also count function-level call edges
|
|
218
|
+
for (const file of files) {
|
|
219
|
+
const containEdges = this.graph.outEdges.get(file) || []
|
|
220
|
+
for (const containEdge of containEdges) {
|
|
221
|
+
if (containEdge.type === 'contains') {
|
|
222
|
+
const fnOutEdges = this.graph.outEdges.get(containEdge.target) || []
|
|
223
|
+
for (const callEdge of fnOutEdges) {
|
|
224
|
+
if (callEdge.type === 'calls') {
|
|
225
|
+
const targetNode = this.graph.nodes.get(callEdge.target)
|
|
226
|
+
if (targetNode && fileSet.has(targetNode.file)) {
|
|
227
|
+
internalEdges++
|
|
228
|
+
} else if (targetNode) {
|
|
229
|
+
externalEdges++
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const total = internalEdges + externalEdges
|
|
238
|
+
if (total === 0) return 0.5
|
|
239
|
+
return internalEdges / total
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
// ─── Helpers ──────────────────────────────────────────────────
|
|
243
|
+
|
|
244
|
+
/** Total edges (in + out) for a node */
|
|
245
|
+
private getTotalEdges(nodeId: string): number {
|
|
246
|
+
return (this.graph.outEdges.get(nodeId) || []).length +
|
|
247
|
+
(this.graph.inEdges.get(nodeId) || []).length
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/** Get the file path a node belongs to (for function/class nodes, return their file) */
|
|
251
|
+
private getFileForNode(nodeId: string): string | null {
|
|
252
|
+
const node = this.graph.nodes.get(nodeId)
|
|
253
|
+
if (!node) return null
|
|
254
|
+
if (node.type === 'file') return nodeId
|
|
255
|
+
// For function/class/generic nodes, find their parent file
|
|
256
|
+
return node.file || null
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
/** Get the file path from a file node ID (the node's .file property) */
|
|
260
|
+
private getNodeFile(fileNodeId: string): string {
|
|
261
|
+
const node = this.graph.nodes.get(fileNodeId)
|
|
262
|
+
return node?.file || fileNodeId
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/** Get all function IDs contained in a set of file node IDs */
|
|
266
|
+
private getFunctionIdsForFiles(fileNodeIds: string[]): string[] {
|
|
267
|
+
return fileNodeIds.flatMap(f => {
|
|
268
|
+
const containEdges = this.graph.outEdges.get(f) || []
|
|
269
|
+
return containEdges
|
|
270
|
+
.filter(e => e.type === 'contains')
|
|
271
|
+
.map(e => e.target)
|
|
272
|
+
})
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
/** Infer a module ID from file paths (common directory prefix) */
|
|
276
|
+
private inferClusterId(filePaths: string[]): string {
|
|
277
|
+
if (filePaths.length === 0) return 'unknown'
|
|
278
|
+
if (filePaths.length === 1) {
|
|
279
|
+
return this.getDirSegment(filePaths[0])
|
|
280
|
+
}
|
|
281
|
+
// Find the longest common directory prefix
|
|
282
|
+
const segments = filePaths.map(f => f.split('/'))
|
|
283
|
+
const firstSegments = segments[0]
|
|
284
|
+
let commonLen = 0
|
|
285
|
+
for (let i = 0; i < firstSegments.length - 1; i++) {
|
|
286
|
+
if (segments.every(s => s[i] === firstSegments[i])) {
|
|
287
|
+
commonLen = i + 1
|
|
288
|
+
} else {
|
|
289
|
+
break
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
const commonPath = firstSegments.slice(0, commonLen).join('/')
|
|
293
|
+
return this.getDirSegment(commonPath || filePaths[0])
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/** Get the most meaningful directory segment from a path */
|
|
297
|
+
private getDirSegment(filePath: string): string {
|
|
298
|
+
const parts = filePath.split('/')
|
|
299
|
+
// Skip 'src' if present
|
|
300
|
+
if (parts[0] === 'src' && parts.length >= 2) return parts[1]
|
|
301
|
+
if (parts.length > 1) return parts[0]
|
|
302
|
+
return path.basename(filePath, path.extname(filePath))
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/** Infer a human-readable cluster name */
|
|
306
|
+
private inferClusterName(filePaths: string[]): string {
|
|
307
|
+
const dir = this.inferClusterId(filePaths)
|
|
308
|
+
return dir
|
|
309
|
+
.replace(/[-_]/g, ' ')
|
|
310
|
+
.replace(/\b\w/g, c => c.toUpperCase())
|
|
311
|
+
}
|
|
312
|
+
}
|