@aiready/context-analyzer 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/.turbo/turbo-test.log +21 -9
- package/README.md +8 -0
- package/dist/__tests__/analyzer.test.d.ts +2 -0
- package/dist/__tests__/analyzer.test.d.ts.map +1 -0
- package/dist/__tests__/analyzer.test.js +157 -0
- package/dist/__tests__/analyzer.test.js.map +1 -0
- package/dist/__tests__/auto-detection.test.d.ts +2 -0
- package/dist/__tests__/auto-detection.test.d.ts.map +1 -0
- package/dist/__tests__/auto-detection.test.js +132 -0
- package/dist/__tests__/auto-detection.test.js.map +1 -0
- package/dist/__tests__/enhanced-cohesion.test.d.ts +2 -0
- package/dist/__tests__/enhanced-cohesion.test.d.ts.map +1 -0
- package/dist/__tests__/enhanced-cohesion.test.js +109 -0
- package/dist/__tests__/enhanced-cohesion.test.js.map +1 -0
- package/dist/__tests__/fragmentation-advanced.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-advanced.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-advanced.test.js +50 -0
- package/dist/__tests__/fragmentation-advanced.test.js.map +1 -0
- package/dist/__tests__/fragmentation-coupling.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-coupling.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-coupling.test.js +52 -0
- package/dist/__tests__/fragmentation-coupling.test.js.map +1 -0
- package/dist/__tests__/fragmentation-log.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-log.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-log.test.js +33 -0
- package/dist/__tests__/fragmentation-log.test.js.map +1 -0
- package/dist/__tests__/scoring.test.d.ts +2 -0
- package/dist/__tests__/scoring.test.d.ts.map +1 -0
- package/dist/__tests__/scoring.test.js +118 -0
- package/dist/__tests__/scoring.test.js.map +1 -0
- package/dist/__tests__/structural-cohesion.test.d.ts +2 -0
- package/dist/__tests__/structural-cohesion.test.d.ts.map +1 -0
- package/dist/__tests__/structural-cohesion.test.js +29 -0
- package/dist/__tests__/structural-cohesion.test.js.map +1 -0
- package/dist/analyzer.d.ts +100 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +701 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/analyzers/python-context.d.ts +38 -0
- package/dist/analyzers/python-context.d.ts.map +1 -0
- package/dist/analyzers/python-context.js +232 -0
- package/dist/analyzers/python-context.js.map +1 -0
- package/dist/chunk-BD4NWUVG.mjs +1242 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +139 -13
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +3 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +139 -13
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1 -1
- package/dist/scoring.d.ts +13 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +133 -0
- package/dist/scoring.js.map +1 -0
- package/dist/semantic-analysis.d.ts +44 -0
- package/dist/semantic-analysis.d.ts.map +1 -0
- package/dist/semantic-analysis.js +241 -0
- package/dist/semantic-analysis.js.map +1 -0
- package/dist/types.d.ts +117 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +2 -2
- package/src/__tests__/fragmentation-advanced.test.ts +60 -0
- package/src/__tests__/fragmentation-coupling.test.ts +62 -0
- package/src/__tests__/fragmentation-log.test.ts +38 -0
- package/src/__tests__/structural-cohesion.test.ts +32 -0
- package/src/analyzer.ts +193 -18
- package/src/index.ts +34 -2
- package/src/types.ts +3 -0
- package/tsconfig.tsbuildinfo +1 -0
package/dist/analyzer.js
ADDED
|
@@ -0,0 +1,701 @@
|
|
|
1
|
+
import { estimateTokens, parseFileExports } from '@aiready/core';
|
|
2
|
+
import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
|
|
3
|
+
/**
|
|
4
|
+
* Auto-detect domain keywords from workspace folder structure
|
|
5
|
+
* Extracts unique folder names from file paths as potential domain keywords
|
|
6
|
+
*/
|
|
7
|
+
function extractDomainKeywordsFromPaths(files) {
|
|
8
|
+
const folderNames = new Set();
|
|
9
|
+
for (const { file } of files) {
|
|
10
|
+
const segments = file.split('/');
|
|
11
|
+
// Extract meaningful folder names (skip common infrastructure folders)
|
|
12
|
+
const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
|
|
13
|
+
for (const segment of segments) {
|
|
14
|
+
const normalized = segment.toLowerCase();
|
|
15
|
+
if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
|
|
16
|
+
// Singularize common plural forms for better matching
|
|
17
|
+
const singular = singularize(normalized);
|
|
18
|
+
folderNames.add(singular);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return Array.from(folderNames);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Simple singularization for common English plurals
|
|
26
|
+
*/
|
|
27
|
+
function singularize(word) {
|
|
28
|
+
// Handle irregular plurals
|
|
29
|
+
const irregulars = {
|
|
30
|
+
people: 'person',
|
|
31
|
+
children: 'child',
|
|
32
|
+
men: 'man',
|
|
33
|
+
women: 'woman',
|
|
34
|
+
};
|
|
35
|
+
if (irregulars[word]) {
|
|
36
|
+
return irregulars[word];
|
|
37
|
+
}
|
|
38
|
+
// Common plural patterns
|
|
39
|
+
if (word.endsWith('ies')) {
|
|
40
|
+
return word.slice(0, -3) + 'y'; // categories -> category
|
|
41
|
+
}
|
|
42
|
+
if (word.endsWith('ses')) {
|
|
43
|
+
return word.slice(0, -2); // classes -> class
|
|
44
|
+
}
|
|
45
|
+
if (word.endsWith('s') && word.length > 3) {
|
|
46
|
+
return word.slice(0, -1); // orders -> order
|
|
47
|
+
}
|
|
48
|
+
return word;
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* Build a dependency graph from file contents
|
|
52
|
+
*/
|
|
53
|
+
export function buildDependencyGraph(files) {
|
|
54
|
+
const nodes = new Map();
|
|
55
|
+
const edges = new Map();
|
|
56
|
+
// Auto-detect domain keywords from workspace folder structure
|
|
57
|
+
const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
|
|
58
|
+
// First pass: Create nodes with folder-based domain inference
|
|
59
|
+
for (const { file, content } of files) {
|
|
60
|
+
const imports = extractImportsFromContent(content);
|
|
61
|
+
// Use AST-based extraction for better accuracy, fallback to regex
|
|
62
|
+
const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
|
|
63
|
+
const tokenCost = estimateTokens(content);
|
|
64
|
+
const linesOfCode = content.split('\n').length;
|
|
65
|
+
nodes.set(file, {
|
|
66
|
+
file,
|
|
67
|
+
imports,
|
|
68
|
+
exports,
|
|
69
|
+
tokenCost,
|
|
70
|
+
linesOfCode,
|
|
71
|
+
});
|
|
72
|
+
edges.set(file, new Set(imports));
|
|
73
|
+
}
|
|
74
|
+
// Second pass: Build semantic analysis graphs
|
|
75
|
+
const graph = { nodes, edges };
|
|
76
|
+
const coUsageMatrix = buildCoUsageMatrix(graph);
|
|
77
|
+
const typeGraph = buildTypeGraph(graph);
|
|
78
|
+
// Add semantic data to graph
|
|
79
|
+
graph.coUsageMatrix = coUsageMatrix;
|
|
80
|
+
graph.typeGraph = typeGraph;
|
|
81
|
+
// Third pass: Enhance domain assignments with semantic analysis
|
|
82
|
+
for (const [file, node] of nodes) {
|
|
83
|
+
for (const exp of node.exports) {
|
|
84
|
+
// Get semantic domain assignments
|
|
85
|
+
const semanticAssignments = inferDomainFromSemantics(file, exp.name, graph, coUsageMatrix, typeGraph, exp.typeReferences);
|
|
86
|
+
// Add multi-domain assignments with confidence scores
|
|
87
|
+
exp.domains = semanticAssignments;
|
|
88
|
+
// Keep inferredDomain for backwards compatibility (use highest confidence)
|
|
89
|
+
if (semanticAssignments.length > 0) {
|
|
90
|
+
exp.inferredDomain = semanticAssignments[0].domain;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return graph;
|
|
95
|
+
}
|
|
96
|
+
/**
|
|
97
|
+
* Extract imports from file content using regex
|
|
98
|
+
* Simple implementation - could be improved with AST parsing
|
|
99
|
+
*/
|
|
100
|
+
function extractImportsFromContent(content) {
|
|
101
|
+
const imports = [];
|
|
102
|
+
// Match various import patterns
|
|
103
|
+
const patterns = [
|
|
104
|
+
/import\s+.*?\s+from\s+['"](.+?)['"]/g, // import ... from '...'
|
|
105
|
+
/import\s+['"](.+?)['"]/g, // import '...'
|
|
106
|
+
/require\(['"](.+?)['"]\)/g, // require('...')
|
|
107
|
+
];
|
|
108
|
+
for (const pattern of patterns) {
|
|
109
|
+
let match;
|
|
110
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
111
|
+
const importPath = match[1];
|
|
112
|
+
// Exclude only node built-ins (node:), include all local and aliased imports
|
|
113
|
+
if (importPath && !importPath.startsWith('node:')) {
|
|
114
|
+
imports.push(importPath);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
return [...new Set(imports)]; // Deduplicate
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Calculate the maximum depth of import tree for a file
|
|
122
|
+
*/
|
|
123
|
+
export function calculateImportDepth(file, graph, visited = new Set(), depth = 0) {
|
|
124
|
+
if (visited.has(file)) {
|
|
125
|
+
return depth; // Circular dependency, return current depth
|
|
126
|
+
}
|
|
127
|
+
const dependencies = graph.edges.get(file);
|
|
128
|
+
if (!dependencies || dependencies.size === 0) {
|
|
129
|
+
return depth;
|
|
130
|
+
}
|
|
131
|
+
visited.add(file);
|
|
132
|
+
let maxDepth = depth;
|
|
133
|
+
for (const dep of dependencies) {
|
|
134
|
+
const depDepth = calculateImportDepth(dep, graph, visited, depth + 1);
|
|
135
|
+
maxDepth = Math.max(maxDepth, depDepth);
|
|
136
|
+
}
|
|
137
|
+
visited.delete(file);
|
|
138
|
+
return maxDepth;
|
|
139
|
+
}
|
|
140
|
+
/**
|
|
141
|
+
* Get all transitive dependencies for a file
|
|
142
|
+
*/
|
|
143
|
+
export function getTransitiveDependencies(file, graph, visited = new Set()) {
|
|
144
|
+
if (visited.has(file)) {
|
|
145
|
+
return [];
|
|
146
|
+
}
|
|
147
|
+
visited.add(file);
|
|
148
|
+
const dependencies = graph.edges.get(file);
|
|
149
|
+
if (!dependencies || dependencies.size === 0) {
|
|
150
|
+
return [];
|
|
151
|
+
}
|
|
152
|
+
const allDeps = [];
|
|
153
|
+
for (const dep of dependencies) {
|
|
154
|
+
allDeps.push(dep);
|
|
155
|
+
allDeps.push(...getTransitiveDependencies(dep, graph, visited));
|
|
156
|
+
}
|
|
157
|
+
return [...new Set(allDeps)]; // Deduplicate
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Calculate total context budget (tokens needed to understand this file)
|
|
161
|
+
*/
|
|
162
|
+
export function calculateContextBudget(file, graph) {
|
|
163
|
+
const node = graph.nodes.get(file);
|
|
164
|
+
if (!node)
|
|
165
|
+
return 0;
|
|
166
|
+
let totalTokens = node.tokenCost;
|
|
167
|
+
const deps = getTransitiveDependencies(file, graph);
|
|
168
|
+
for (const dep of deps) {
|
|
169
|
+
const depNode = graph.nodes.get(dep);
|
|
170
|
+
if (depNode) {
|
|
171
|
+
totalTokens += depNode.tokenCost;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
return totalTokens;
|
|
175
|
+
}
|
|
176
|
+
/**
|
|
177
|
+
* Detect circular dependencies
|
|
178
|
+
*/
|
|
179
|
+
export function detectCircularDependencies(graph) {
|
|
180
|
+
const cycles = [];
|
|
181
|
+
const visited = new Set();
|
|
182
|
+
const recursionStack = new Set();
|
|
183
|
+
function dfs(file, path) {
|
|
184
|
+
if (recursionStack.has(file)) {
|
|
185
|
+
// Found a cycle
|
|
186
|
+
const cycleStart = path.indexOf(file);
|
|
187
|
+
if (cycleStart !== -1) {
|
|
188
|
+
cycles.push([...path.slice(cycleStart), file]);
|
|
189
|
+
}
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
if (visited.has(file)) {
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
visited.add(file);
|
|
196
|
+
recursionStack.add(file);
|
|
197
|
+
path.push(file);
|
|
198
|
+
const dependencies = graph.edges.get(file);
|
|
199
|
+
if (dependencies) {
|
|
200
|
+
for (const dep of dependencies) {
|
|
201
|
+
dfs(dep, [...path]);
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
recursionStack.delete(file);
|
|
205
|
+
}
|
|
206
|
+
for (const file of graph.nodes.keys()) {
|
|
207
|
+
if (!visited.has(file)) {
|
|
208
|
+
dfs(file, []);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return cycles;
|
|
212
|
+
}
|
|
213
|
+
/**
|
|
214
|
+
* Calculate cohesion score (how related are exports in a file)
|
|
215
|
+
* Uses enhanced calculation combining domain-based and import-based analysis
|
|
216
|
+
* @param exports - Array of export information
|
|
217
|
+
* @param filePath - Optional file path for context-aware scoring
|
|
218
|
+
*/
|
|
219
|
+
export function calculateCohesion(exports, filePath, options) {
|
|
220
|
+
return calculateEnhancedCohesion(exports, filePath, options);
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Check if a file is a test/mock/fixture file
|
|
224
|
+
*/
|
|
225
|
+
function isTestFile(filePath) {
|
|
226
|
+
const lower = filePath.toLowerCase();
|
|
227
|
+
return (lower.includes('test') ||
|
|
228
|
+
lower.includes('spec') ||
|
|
229
|
+
lower.includes('mock') ||
|
|
230
|
+
lower.includes('fixture') ||
|
|
231
|
+
lower.includes('__tests__') ||
|
|
232
|
+
lower.includes('.test.') ||
|
|
233
|
+
lower.includes('.spec.'));
|
|
234
|
+
}
|
|
235
|
+
/**
|
|
236
|
+
* Calculate fragmentation score (how scattered is a domain)
|
|
237
|
+
*/
|
|
238
|
+
export function calculateFragmentation(files, domain, options) {
|
|
239
|
+
if (files.length <= 1)
|
|
240
|
+
return 0; // Single file = no fragmentation
|
|
241
|
+
// Calculate how many different directories contain these files
|
|
242
|
+
const directories = new Set(files.map((f) => f.split('/').slice(0, -1).join('/')));
|
|
243
|
+
const uniqueDirs = directories.size;
|
|
244
|
+
// If log-scaling requested, normalize using logarithms so that
|
|
245
|
+
// going from 1 -> 2 directories shows a large jump while 10 -> 11
|
|
246
|
+
// is relatively small. Normalized value is log(uniqueDirs)/log(totalFiles).
|
|
247
|
+
if (options?.useLogScale) {
|
|
248
|
+
if (uniqueDirs <= 1)
|
|
249
|
+
return 0;
|
|
250
|
+
const total = files.length;
|
|
251
|
+
const base = options.logBase || Math.E;
|
|
252
|
+
const num = Math.log(uniqueDirs) / Math.log(base);
|
|
253
|
+
const den = Math.log(total) / Math.log(base);
|
|
254
|
+
return den > 0 ? num / den : 0;
|
|
255
|
+
}
|
|
256
|
+
// Default (linear) Fragmentation = (unique directories - 1) / (total files - 1)
|
|
257
|
+
return (uniqueDirs - 1) / (files.length - 1);
|
|
258
|
+
}
|
|
259
|
+
/**
|
|
260
|
+
* Calculate path entropy for a set of files.
|
|
261
|
+
* Returns a normalized entropy in [0,1], where 0 = all files in one directory,
|
|
262
|
+
* and 1 = files are evenly distributed across directories.
|
|
263
|
+
*/
|
|
264
|
+
export function calculatePathEntropy(files) {
|
|
265
|
+
if (!files || files.length === 0)
|
|
266
|
+
return 0;
|
|
267
|
+
const dirCounts = new Map();
|
|
268
|
+
for (const f of files) {
|
|
269
|
+
const dir = f.split('/').slice(0, -1).join('/') || '.';
|
|
270
|
+
dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
|
|
271
|
+
}
|
|
272
|
+
const counts = Array.from(dirCounts.values());
|
|
273
|
+
if (counts.length <= 1)
|
|
274
|
+
return 0; // single directory -> zero entropy
|
|
275
|
+
const total = counts.reduce((s, v) => s + v, 0);
|
|
276
|
+
let entropy = 0;
|
|
277
|
+
for (const c of counts) {
|
|
278
|
+
const p = c / total;
|
|
279
|
+
entropy -= p * Math.log2(p);
|
|
280
|
+
}
|
|
281
|
+
const maxEntropy = Math.log2(counts.length);
|
|
282
|
+
return maxEntropy > 0 ? entropy / maxEntropy : 0;
|
|
283
|
+
}
|
|
284
|
+
/**
|
|
285
|
+
* Calculate directory-distance metric based on common ancestor depth.
|
|
286
|
+
* For each file pair compute depth(commonAncestor) and normalize by the
|
|
287
|
+
* maximum path depth between the two files. Returns value in [0,1] where
|
|
288
|
+
* 0 means all pairs share a deep common ancestor (low fragmentation) and
|
|
289
|
+
* 1 means they share only the root (high fragmentation).
|
|
290
|
+
*/
|
|
291
|
+
export function calculateDirectoryDistance(files) {
|
|
292
|
+
if (!files || files.length <= 1)
|
|
293
|
+
return 0;
|
|
294
|
+
function pathSegments(p) {
|
|
295
|
+
return p.split('/').filter(Boolean);
|
|
296
|
+
}
|
|
297
|
+
function commonAncestorDepth(a, b) {
|
|
298
|
+
const minLen = Math.min(a.length, b.length);
|
|
299
|
+
let i = 0;
|
|
300
|
+
while (i < minLen && a[i] === b[i])
|
|
301
|
+
i++;
|
|
302
|
+
return i; // number of shared segments from root
|
|
303
|
+
}
|
|
304
|
+
let totalNormalized = 0;
|
|
305
|
+
let comparisons = 0;
|
|
306
|
+
for (let i = 0; i < files.length; i++) {
|
|
307
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
308
|
+
const segA = pathSegments(files[i]);
|
|
309
|
+
const segB = pathSegments(files[j]);
|
|
310
|
+
const shared = commonAncestorDepth(segA, segB);
|
|
311
|
+
const maxDepth = Math.max(segA.length, segB.length);
|
|
312
|
+
const normalizedShared = maxDepth > 0 ? shared / maxDepth : 0;
|
|
313
|
+
// distance is inverse of normalized shared depth
|
|
314
|
+
totalNormalized += 1 - normalizedShared;
|
|
315
|
+
comparisons++;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
return comparisons > 0 ? totalNormalized / comparisons : 0;
|
|
319
|
+
}
|
|
320
|
+
/**
|
|
321
|
+
* Group files by domain to detect module clusters
|
|
322
|
+
*/
|
|
323
|
+
export function detectModuleClusters(graph, options) {
|
|
324
|
+
const domainMap = new Map();
|
|
325
|
+
// Group files by their primary domain
|
|
326
|
+
for (const [file, node] of graph.nodes.entries()) {
|
|
327
|
+
const domains = node.exports.map((e) => e.inferredDomain || 'unknown');
|
|
328
|
+
const primaryDomain = domains[0] || 'unknown';
|
|
329
|
+
if (!domainMap.has(primaryDomain)) {
|
|
330
|
+
domainMap.set(primaryDomain, []);
|
|
331
|
+
}
|
|
332
|
+
domainMap.get(primaryDomain).push(file);
|
|
333
|
+
}
|
|
334
|
+
const clusters = [];
|
|
335
|
+
for (const [domain, files] of domainMap.entries()) {
|
|
336
|
+
if (files.length < 2)
|
|
337
|
+
continue; // Skip single-file domains
|
|
338
|
+
const totalTokens = files.reduce((sum, file) => {
|
|
339
|
+
const node = graph.nodes.get(file);
|
|
340
|
+
return sum + (node?.tokenCost || 0);
|
|
341
|
+
}, 0);
|
|
342
|
+
const baseFragmentation = calculateFragmentation(files, domain, { useLogScale: !!options?.useLogScale });
|
|
343
|
+
// Compute import-based cohesion across files in this domain cluster.
|
|
344
|
+
// This measures how much the files actually "talk" to each other.
|
|
345
|
+
// We'll compute average pairwise Jaccard similarity between each file's import lists.
|
|
346
|
+
let importSimilarityTotal = 0;
|
|
347
|
+
let importComparisons = 0;
|
|
348
|
+
for (let i = 0; i < files.length; i++) {
|
|
349
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
350
|
+
const f1 = files[i];
|
|
351
|
+
const f2 = files[j];
|
|
352
|
+
const n1 = graph.nodes.get(f1)?.imports || [];
|
|
353
|
+
const n2 = graph.nodes.get(f2)?.imports || [];
|
|
354
|
+
// Treat two empty import lists as not coupled (similarity 0)
|
|
355
|
+
const similarity = (n1.length === 0 && n2.length === 0)
|
|
356
|
+
? 0
|
|
357
|
+
: calculateJaccardSimilarity(n1, n2);
|
|
358
|
+
importSimilarityTotal += similarity;
|
|
359
|
+
importComparisons++;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
const importCohesion = importComparisons > 0 ? importSimilarityTotal / importComparisons : 0;
|
|
363
|
+
// Coupling discount: if files are heavily importing each other, reduce fragmentation penalty.
|
|
364
|
+
// Following recommendation: up to 20% discount proportional to import cohesion.
|
|
365
|
+
const couplingDiscountFactor = 1 - 0.2 * importCohesion;
|
|
366
|
+
const fragmentationScore = baseFragmentation * couplingDiscountFactor;
|
|
367
|
+
// Additional metrics for richer reporting
|
|
368
|
+
const pathEntropy = calculatePathEntropy(files);
|
|
369
|
+
const directoryDistance = calculateDirectoryDistance(files);
|
|
370
|
+
const avgCohesion = files.reduce((sum, file) => {
|
|
371
|
+
const node = graph.nodes.get(file);
|
|
372
|
+
return sum + (node ? calculateCohesion(node.exports, file, { coUsageMatrix: graph.coUsageMatrix }) : 0);
|
|
373
|
+
}, 0) / files.length;
|
|
374
|
+
// Generate consolidation plan
|
|
375
|
+
const targetFiles = Math.max(1, Math.ceil(files.length / 3)); // Aim to reduce by ~66%
|
|
376
|
+
const consolidationPlan = generateConsolidationPlan(domain, files, targetFiles);
|
|
377
|
+
clusters.push({
|
|
378
|
+
domain,
|
|
379
|
+
files,
|
|
380
|
+
totalTokens,
|
|
381
|
+
fragmentationScore,
|
|
382
|
+
pathEntropy,
|
|
383
|
+
directoryDistance,
|
|
384
|
+
importCohesion,
|
|
385
|
+
avgCohesion,
|
|
386
|
+
suggestedStructure: {
|
|
387
|
+
targetFiles,
|
|
388
|
+
consolidationPlan,
|
|
389
|
+
},
|
|
390
|
+
});
|
|
391
|
+
}
|
|
392
|
+
// Sort by fragmentation score (most fragmented first)
|
|
393
|
+
return clusters.sort((a, b) => b.fragmentationScore - a.fragmentationScore);
|
|
394
|
+
}
|
|
395
|
+
/**
|
|
396
|
+
* Extract export information from file content
|
|
397
|
+
* TODO: Use proper AST parsing for better accuracy
|
|
398
|
+
*/
|
|
399
|
+
function extractExports(content, filePath, domainOptions, fileImports) {
|
|
400
|
+
const exports = [];
|
|
401
|
+
// Simple regex-based extraction (improve with AST later)
|
|
402
|
+
const patterns = [
|
|
403
|
+
/export\s+function\s+(\w+)/g,
|
|
404
|
+
/export\s+class\s+(\w+)/g,
|
|
405
|
+
/export\s+const\s+(\w+)/g,
|
|
406
|
+
/export\s+type\s+(\w+)/g,
|
|
407
|
+
/export\s+interface\s+(\w+)/g,
|
|
408
|
+
/export\s+default/g,
|
|
409
|
+
];
|
|
410
|
+
const types = [
|
|
411
|
+
'function',
|
|
412
|
+
'class',
|
|
413
|
+
'const',
|
|
414
|
+
'type',
|
|
415
|
+
'interface',
|
|
416
|
+
'default',
|
|
417
|
+
];
|
|
418
|
+
patterns.forEach((pattern, index) => {
|
|
419
|
+
let match;
|
|
420
|
+
while ((match = pattern.exec(content)) !== null) {
|
|
421
|
+
const name = match[1] || 'default';
|
|
422
|
+
const type = types[index];
|
|
423
|
+
const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
|
|
424
|
+
exports.push({ name, type, inferredDomain });
|
|
425
|
+
}
|
|
426
|
+
});
|
|
427
|
+
return exports;
|
|
428
|
+
}
|
|
429
|
+
/**
|
|
430
|
+
* Infer domain from export name
|
|
431
|
+
* Uses common naming patterns with word boundary matching
|
|
432
|
+
*/
|
|
433
|
+
function inferDomain(name, filePath, domainOptions, fileImports) {
|
|
434
|
+
const lower = name.toLowerCase();
|
|
435
|
+
// Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
|
|
436
|
+
const tokens = Array.from(new Set(lower
|
|
437
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
438
|
+
.replace(/[^a-z0-9]+/gi, ' ')
|
|
439
|
+
.split(' ')
|
|
440
|
+
.filter(Boolean)));
|
|
441
|
+
// Domain keywords ordered from most specific to most general
|
|
442
|
+
// This prevents generic terms like 'util' from matching before specific domains
|
|
443
|
+
// NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
|
|
444
|
+
const defaultKeywords = [
|
|
445
|
+
'authentication',
|
|
446
|
+
'authorization',
|
|
447
|
+
'payment',
|
|
448
|
+
'invoice',
|
|
449
|
+
'customer',
|
|
450
|
+
'product',
|
|
451
|
+
'order',
|
|
452
|
+
'cart',
|
|
453
|
+
'user',
|
|
454
|
+
'admin',
|
|
455
|
+
'repository',
|
|
456
|
+
'controller',
|
|
457
|
+
'service',
|
|
458
|
+
'config',
|
|
459
|
+
'model',
|
|
460
|
+
'view',
|
|
461
|
+
'auth',
|
|
462
|
+
];
|
|
463
|
+
const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
|
|
464
|
+
? [...domainOptions.domainKeywords, ...defaultKeywords]
|
|
465
|
+
: defaultKeywords;
|
|
466
|
+
// Try word boundary matching first for more accurate detection
|
|
467
|
+
for (const keyword of domainKeywords) {
|
|
468
|
+
if (tokens.includes(keyword)) {
|
|
469
|
+
return keyword;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
// Fallback to substring matching for compound words
|
|
473
|
+
for (const keyword of domainKeywords) {
|
|
474
|
+
if (lower.includes(keyword)) {
|
|
475
|
+
return keyword;
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
// Import-path domain inference: analyze import statements for domain hints
|
|
479
|
+
if (fileImports && fileImports.length > 0) {
|
|
480
|
+
for (const importPath of fileImports) {
|
|
481
|
+
// Parse all segments, including those after '@' or '.'
|
|
482
|
+
// e.g., '@/orders/service' -> ['orders', 'service']
|
|
483
|
+
// '../payments/processor' -> ['payments', 'processor']
|
|
484
|
+
const allSegments = importPath.split('/');
|
|
485
|
+
const relevantSegments = allSegments.filter(s => {
|
|
486
|
+
if (!s)
|
|
487
|
+
return false;
|
|
488
|
+
// Skip '.' and '..' but keep everything else
|
|
489
|
+
if (s === '.' || s === '..')
|
|
490
|
+
return false;
|
|
491
|
+
// Skip '@' prefix but keep the path after it
|
|
492
|
+
if (s.startsWith('@') && s.length === 1)
|
|
493
|
+
return false;
|
|
494
|
+
// Remove '@' prefix from scoped imports like '@/orders'
|
|
495
|
+
return true;
|
|
496
|
+
}).map(s => s.startsWith('@') ? s.slice(1) : s);
|
|
497
|
+
for (const segment of relevantSegments) {
|
|
498
|
+
const segLower = segment.toLowerCase();
|
|
499
|
+
const singularSegment = singularize(segLower);
|
|
500
|
+
// Check if any domain keyword matches the import path segment (with singularization)
|
|
501
|
+
for (const keyword of domainKeywords) {
|
|
502
|
+
if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
|
|
503
|
+
return keyword;
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
// Path-based fallback: check file path segments
|
|
510
|
+
if (filePath) {
|
|
511
|
+
// Auto-detect from path by checking against domain keywords (with singularization)
|
|
512
|
+
const pathSegments = filePath.toLowerCase().split('/');
|
|
513
|
+
for (const segment of pathSegments) {
|
|
514
|
+
const singularSegment = singularize(segment);
|
|
515
|
+
for (const keyword of domainKeywords) {
|
|
516
|
+
if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
|
|
517
|
+
return keyword;
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
return 'unknown';
|
|
523
|
+
}
|
|
524
|
+
/**
|
|
525
|
+
* Generate consolidation plan for fragmented modules
|
|
526
|
+
*/
|
|
527
|
+
function generateConsolidationPlan(domain, files, targetFiles) {
|
|
528
|
+
const plan = [];
|
|
529
|
+
if (files.length <= targetFiles) {
|
|
530
|
+
return [`No consolidation needed for ${domain}`];
|
|
531
|
+
}
|
|
532
|
+
plan.push(`Consolidate ${files.length} ${domain} files into ${targetFiles} cohesive file(s):`);
|
|
533
|
+
// Group by directory
|
|
534
|
+
const dirGroups = new Map();
|
|
535
|
+
for (const file of files) {
|
|
536
|
+
const dir = file.split('/').slice(0, -1).join('/');
|
|
537
|
+
if (!dirGroups.has(dir)) {
|
|
538
|
+
dirGroups.set(dir, []);
|
|
539
|
+
}
|
|
540
|
+
dirGroups.get(dir).push(file);
|
|
541
|
+
}
|
|
542
|
+
plan.push(`1. Create unified ${domain} module file`);
|
|
543
|
+
plan.push(`2. Move related functionality from ${files.length} scattered files`);
|
|
544
|
+
plan.push(`3. Update imports in dependent files`);
|
|
545
|
+
plan.push(`4. Remove old files after consolidation (verify with tests first)`);
|
|
546
|
+
return plan;
|
|
547
|
+
}
|
|
548
|
+
/**
|
|
549
|
+
* Extract exports using AST parsing (enhanced version)
|
|
550
|
+
* Falls back to regex if AST parsing fails
|
|
551
|
+
*/
|
|
552
|
+
export function extractExportsWithAST(content, filePath, domainOptions, fileImports) {
|
|
553
|
+
try {
|
|
554
|
+
const { exports: astExports } = parseFileExports(content, filePath);
|
|
555
|
+
return astExports.map(exp => ({
|
|
556
|
+
name: exp.name,
|
|
557
|
+
type: exp.type,
|
|
558
|
+
inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
|
|
559
|
+
imports: exp.imports,
|
|
560
|
+
dependencies: exp.dependencies,
|
|
561
|
+
}));
|
|
562
|
+
}
|
|
563
|
+
catch (error) {
|
|
564
|
+
// Fallback to regex-based extraction
|
|
565
|
+
return extractExports(content, filePath, domainOptions, fileImports);
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
/**
|
|
569
|
+
* Calculate enhanced cohesion score using both domain inference and import similarity
|
|
570
|
+
*
|
|
571
|
+
* This combines:
|
|
572
|
+
* 1. Domain-based cohesion (entropy of inferred domains)
|
|
573
|
+
* 2. Import-based cohesion (Jaccard similarity of shared imports)
|
|
574
|
+
*
|
|
575
|
+
* Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
|
|
576
|
+
*/
|
|
577
|
+
export function calculateEnhancedCohesion(exports, filePath, options) {
|
|
578
|
+
if (exports.length === 0)
|
|
579
|
+
return 1;
|
|
580
|
+
if (exports.length === 1)
|
|
581
|
+
return 1;
|
|
582
|
+
// Special case for test files
|
|
583
|
+
if (filePath && isTestFile(filePath)) {
|
|
584
|
+
return 1;
|
|
585
|
+
}
|
|
586
|
+
// Calculate domain-based cohesion (existing method)
|
|
587
|
+
const domainCohesion = calculateDomainCohesion(exports);
|
|
588
|
+
// Calculate import-based cohesion if imports are available
|
|
589
|
+
const hasImportData = exports.some((e) => e.imports && e.imports.length > 0);
|
|
590
|
+
const importCohesion = hasImportData ? calculateImportBasedCohesion(exports) : undefined;
|
|
591
|
+
// Calculate structural cohesion (co-usage) if coUsageMatrix and filePath available
|
|
592
|
+
const coUsageMatrix = options?.coUsageMatrix;
|
|
593
|
+
const structuralCohesion = filePath && coUsageMatrix ? calculateStructuralCohesionFromCoUsage(filePath, coUsageMatrix) : undefined;
|
|
594
|
+
// Default weights (can be overridden via options)
|
|
595
|
+
const defaultWeights = { importBased: 0.5, structural: 0.3, domainBased: 0.2 };
|
|
596
|
+
const weights = { ...defaultWeights, ...(options?.weights || {}) };
|
|
597
|
+
// Collect available signals and normalize weights
|
|
598
|
+
const signals = [];
|
|
599
|
+
if (importCohesion !== undefined)
|
|
600
|
+
signals.push({ score: importCohesion, weight: weights.importBased });
|
|
601
|
+
if (structuralCohesion !== undefined)
|
|
602
|
+
signals.push({ score: structuralCohesion, weight: weights.structural });
|
|
603
|
+
// domain cohesion is always available
|
|
604
|
+
signals.push({ score: domainCohesion, weight: weights.domainBased });
|
|
605
|
+
const totalWeight = signals.reduce((s, el) => s + el.weight, 0);
|
|
606
|
+
if (totalWeight === 0)
|
|
607
|
+
return domainCohesion;
|
|
608
|
+
const combined = signals.reduce((sum, el) => sum + el.score * (el.weight / totalWeight), 0);
|
|
609
|
+
return combined;
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Calculate structural cohesion for a file based on co-usage patterns.
|
|
613
|
+
* Uses the co-usage distribution (files commonly imported alongside this file)
|
|
614
|
+
* and computes an entropy-based cohesion score in [0,1].
|
|
615
|
+
* - 1 => highly cohesive (imports mostly appear together with a small set)
|
|
616
|
+
* - 0 => maximally fragmented (imports appear uniformly across many partners)
|
|
617
|
+
*/
|
|
618
|
+
export function calculateStructuralCohesionFromCoUsage(file, coUsageMatrix) {
|
|
619
|
+
if (!coUsageMatrix)
|
|
620
|
+
return 1;
|
|
621
|
+
const coUsages = coUsageMatrix.get(file);
|
|
622
|
+
if (!coUsages || coUsages.size === 0)
|
|
623
|
+
return 1;
|
|
624
|
+
// Build probability distribution over co-imported files
|
|
625
|
+
let total = 0;
|
|
626
|
+
for (const count of coUsages.values())
|
|
627
|
+
total += count;
|
|
628
|
+
if (total === 0)
|
|
629
|
+
return 1;
|
|
630
|
+
const probs = [];
|
|
631
|
+
for (const count of coUsages.values()) {
|
|
632
|
+
if (count > 0)
|
|
633
|
+
probs.push(count / total);
|
|
634
|
+
}
|
|
635
|
+
if (probs.length <= 1)
|
|
636
|
+
return 1;
|
|
637
|
+
// Calculate entropy
|
|
638
|
+
let entropy = 0;
|
|
639
|
+
for (const p of probs) {
|
|
640
|
+
entropy -= p * Math.log2(p);
|
|
641
|
+
}
|
|
642
|
+
const maxEntropy = Math.log2(probs.length);
|
|
643
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
644
|
+
}
|
|
645
|
+
/**
|
|
646
|
+
* Calculate cohesion based on shared imports (Jaccard similarity)
|
|
647
|
+
*/
|
|
648
|
+
function calculateImportBasedCohesion(exports) {
|
|
649
|
+
const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
|
|
650
|
+
if (exportsWithImports.length < 2) {
|
|
651
|
+
return 1; // Not enough data
|
|
652
|
+
}
|
|
653
|
+
// Calculate pairwise import similarity
|
|
654
|
+
let totalSimilarity = 0;
|
|
655
|
+
let comparisons = 0;
|
|
656
|
+
for (let i = 0; i < exportsWithImports.length; i++) {
|
|
657
|
+
for (let j = i + 1; j < exportsWithImports.length; j++) {
|
|
658
|
+
const exp1 = exportsWithImports[i];
|
|
659
|
+
const exp2 = exportsWithImports[j];
|
|
660
|
+
const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
|
|
661
|
+
totalSimilarity += similarity;
|
|
662
|
+
comparisons++;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
return comparisons > 0 ? totalSimilarity / comparisons : 1;
|
|
666
|
+
}
|
|
667
|
+
/**
|
|
668
|
+
* Calculate Jaccard similarity between two arrays
|
|
669
|
+
*/
|
|
670
|
+
function calculateJaccardSimilarity(arr1, arr2) {
|
|
671
|
+
if (arr1.length === 0 && arr2.length === 0)
|
|
672
|
+
return 1;
|
|
673
|
+
if (arr1.length === 0 || arr2.length === 0)
|
|
674
|
+
return 0;
|
|
675
|
+
const set1 = new Set(arr1);
|
|
676
|
+
const set2 = new Set(arr2);
|
|
677
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
678
|
+
const union = new Set([...set1, ...set2]);
|
|
679
|
+
return intersection.size / union.size;
|
|
680
|
+
}
|
|
681
|
+
/**
|
|
682
|
+
* Calculate domain-based cohesion (existing entropy method)
|
|
683
|
+
*/
|
|
684
|
+
function calculateDomainCohesion(exports) {
|
|
685
|
+
const domains = exports.map((e) => e.inferredDomain || 'unknown');
|
|
686
|
+
const domainCounts = new Map();
|
|
687
|
+
for (const domain of domains) {
|
|
688
|
+
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
689
|
+
}
|
|
690
|
+
const total = domains.length;
|
|
691
|
+
let entropy = 0;
|
|
692
|
+
for (const count of domainCounts.values()) {
|
|
693
|
+
const p = count / total;
|
|
694
|
+
if (p > 0) {
|
|
695
|
+
entropy -= p * Math.log2(p);
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
const maxEntropy = Math.log2(total);
|
|
699
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
700
|
+
}
|
|
701
|
+
//# sourceMappingURL=analyzer.js.map
|