@aiready/context-analyzer 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +8 -8
- package/.turbo/turbo-test.log +10 -28
- package/COHESION-IMPROVEMENTS.md +202 -0
- package/dist/chunk-DD7UVNE3.mjs +678 -0
- package/dist/cli.js +74 -21
- package/dist/cli.mjs +1 -1
- package/dist/index.js +74 -21
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
- package/src/__tests__/enhanced-cohesion.test.ts +126 -0
- package/src/analyzer.ts +135 -33
- package/src/types.ts +3 -0
package/dist/cli.js
CHANGED
|
@@ -36,7 +36,7 @@ function buildDependencyGraph(files) {
|
|
|
36
36
|
const edges = /* @__PURE__ */ new Map();
|
|
37
37
|
for (const { file, content } of files) {
|
|
38
38
|
const imports = extractImportsFromContent(content);
|
|
39
|
-
const exports2 =
|
|
39
|
+
const exports2 = extractExportsWithAST(content, file);
|
|
40
40
|
const tokenCost = (0, import_core.estimateTokens)(content);
|
|
41
41
|
const linesOfCode = content.split("\n").length;
|
|
42
42
|
nodes.set(file, {
|
|
@@ -151,26 +151,7 @@ function detectCircularDependencies(graph) {
|
|
|
151
151
|
return cycles;
|
|
152
152
|
}
|
|
153
153
|
function calculateCohesion(exports2, filePath) {
|
|
154
|
-
|
|
155
|
-
if (exports2.length === 1) return 1;
|
|
156
|
-
if (filePath && isTestFile(filePath)) {
|
|
157
|
-
return 1;
|
|
158
|
-
}
|
|
159
|
-
const domains = exports2.map((e) => e.inferredDomain || "unknown");
|
|
160
|
-
const domainCounts = /* @__PURE__ */ new Map();
|
|
161
|
-
for (const domain of domains) {
|
|
162
|
-
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
163
|
-
}
|
|
164
|
-
const total = domains.length;
|
|
165
|
-
let entropy = 0;
|
|
166
|
-
for (const count of domainCounts.values()) {
|
|
167
|
-
const p = count / total;
|
|
168
|
-
if (p > 0) {
|
|
169
|
-
entropy -= p * Math.log2(p);
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
const maxEntropy = Math.log2(total);
|
|
173
|
-
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
154
|
+
return calculateEnhancedCohesion(exports2, filePath);
|
|
174
155
|
}
|
|
175
156
|
function isTestFile(filePath) {
|
|
176
157
|
const lower = filePath.toLowerCase();
|
|
@@ -315,6 +296,78 @@ function generateConsolidationPlan(domain, files, targetFiles) {
|
|
|
315
296
|
);
|
|
316
297
|
return plan;
|
|
317
298
|
}
|
|
299
|
+
function extractExportsWithAST(content, filePath) {
|
|
300
|
+
try {
|
|
301
|
+
const { exports: astExports } = (0, import_core.parseFileExports)(content, filePath);
|
|
302
|
+
return astExports.map((exp) => ({
|
|
303
|
+
name: exp.name,
|
|
304
|
+
type: exp.type,
|
|
305
|
+
inferredDomain: inferDomain(exp.name),
|
|
306
|
+
imports: exp.imports,
|
|
307
|
+
dependencies: exp.dependencies
|
|
308
|
+
}));
|
|
309
|
+
} catch (error) {
|
|
310
|
+
return extractExports(content);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
function calculateEnhancedCohesion(exports2, filePath) {
|
|
314
|
+
if (exports2.length === 0) return 1;
|
|
315
|
+
if (exports2.length === 1) return 1;
|
|
316
|
+
if (filePath && isTestFile(filePath)) {
|
|
317
|
+
return 1;
|
|
318
|
+
}
|
|
319
|
+
const domainCohesion = calculateDomainCohesion(exports2);
|
|
320
|
+
const hasImportData = exports2.some((e) => e.imports && e.imports.length > 0);
|
|
321
|
+
if (!hasImportData) {
|
|
322
|
+
return domainCohesion;
|
|
323
|
+
}
|
|
324
|
+
const importCohesion = calculateImportBasedCohesion(exports2);
|
|
325
|
+
return importCohesion * 0.6 + domainCohesion * 0.4;
|
|
326
|
+
}
|
|
327
|
+
function calculateImportBasedCohesion(exports2) {
|
|
328
|
+
const exportsWithImports = exports2.filter((e) => e.imports && e.imports.length > 0);
|
|
329
|
+
if (exportsWithImports.length < 2) {
|
|
330
|
+
return 1;
|
|
331
|
+
}
|
|
332
|
+
let totalSimilarity = 0;
|
|
333
|
+
let comparisons = 0;
|
|
334
|
+
for (let i = 0; i < exportsWithImports.length; i++) {
|
|
335
|
+
for (let j = i + 1; j < exportsWithImports.length; j++) {
|
|
336
|
+
const exp1 = exportsWithImports[i];
|
|
337
|
+
const exp2 = exportsWithImports[j];
|
|
338
|
+
const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
|
|
339
|
+
totalSimilarity += similarity;
|
|
340
|
+
comparisons++;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return comparisons > 0 ? totalSimilarity / comparisons : 1;
|
|
344
|
+
}
|
|
345
|
+
function calculateJaccardSimilarity(arr1, arr2) {
|
|
346
|
+
if (arr1.length === 0 && arr2.length === 0) return 1;
|
|
347
|
+
if (arr1.length === 0 || arr2.length === 0) return 0;
|
|
348
|
+
const set1 = new Set(arr1);
|
|
349
|
+
const set2 = new Set(arr2);
|
|
350
|
+
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
351
|
+
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
352
|
+
return intersection.size / union.size;
|
|
353
|
+
}
|
|
354
|
+
function calculateDomainCohesion(exports2) {
|
|
355
|
+
const domains = exports2.map((e) => e.inferredDomain || "unknown");
|
|
356
|
+
const domainCounts = /* @__PURE__ */ new Map();
|
|
357
|
+
for (const domain of domains) {
|
|
358
|
+
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
359
|
+
}
|
|
360
|
+
const total = domains.length;
|
|
361
|
+
let entropy = 0;
|
|
362
|
+
for (const count of domainCounts.values()) {
|
|
363
|
+
const p = count / total;
|
|
364
|
+
if (p > 0) {
|
|
365
|
+
entropy -= p * Math.log2(p);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
const maxEntropy = Math.log2(total);
|
|
369
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
370
|
+
}
|
|
318
371
|
|
|
319
372
|
// src/index.ts
|
|
320
373
|
async function analyzeContext(options) {
|
package/dist/cli.mjs
CHANGED
package/dist/index.js
CHANGED
|
@@ -34,7 +34,7 @@ function buildDependencyGraph(files) {
|
|
|
34
34
|
const edges = /* @__PURE__ */ new Map();
|
|
35
35
|
for (const { file, content } of files) {
|
|
36
36
|
const imports = extractImportsFromContent(content);
|
|
37
|
-
const exports2 =
|
|
37
|
+
const exports2 = extractExportsWithAST(content, file);
|
|
38
38
|
const tokenCost = (0, import_core.estimateTokens)(content);
|
|
39
39
|
const linesOfCode = content.split("\n").length;
|
|
40
40
|
nodes.set(file, {
|
|
@@ -149,26 +149,7 @@ function detectCircularDependencies(graph) {
|
|
|
149
149
|
return cycles;
|
|
150
150
|
}
|
|
151
151
|
function calculateCohesion(exports2, filePath) {
|
|
152
|
-
|
|
153
|
-
if (exports2.length === 1) return 1;
|
|
154
|
-
if (filePath && isTestFile(filePath)) {
|
|
155
|
-
return 1;
|
|
156
|
-
}
|
|
157
|
-
const domains = exports2.map((e) => e.inferredDomain || "unknown");
|
|
158
|
-
const domainCounts = /* @__PURE__ */ new Map();
|
|
159
|
-
for (const domain of domains) {
|
|
160
|
-
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
161
|
-
}
|
|
162
|
-
const total = domains.length;
|
|
163
|
-
let entropy = 0;
|
|
164
|
-
for (const count of domainCounts.values()) {
|
|
165
|
-
const p = count / total;
|
|
166
|
-
if (p > 0) {
|
|
167
|
-
entropy -= p * Math.log2(p);
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
const maxEntropy = Math.log2(total);
|
|
171
|
-
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
152
|
+
return calculateEnhancedCohesion(exports2, filePath);
|
|
172
153
|
}
|
|
173
154
|
function isTestFile(filePath) {
|
|
174
155
|
const lower = filePath.toLowerCase();
|
|
@@ -313,6 +294,78 @@ function generateConsolidationPlan(domain, files, targetFiles) {
|
|
|
313
294
|
);
|
|
314
295
|
return plan;
|
|
315
296
|
}
|
|
297
|
+
function extractExportsWithAST(content, filePath) {
|
|
298
|
+
try {
|
|
299
|
+
const { exports: astExports } = (0, import_core.parseFileExports)(content, filePath);
|
|
300
|
+
return astExports.map((exp) => ({
|
|
301
|
+
name: exp.name,
|
|
302
|
+
type: exp.type,
|
|
303
|
+
inferredDomain: inferDomain(exp.name),
|
|
304
|
+
imports: exp.imports,
|
|
305
|
+
dependencies: exp.dependencies
|
|
306
|
+
}));
|
|
307
|
+
} catch (error) {
|
|
308
|
+
return extractExports(content);
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
function calculateEnhancedCohesion(exports2, filePath) {
|
|
312
|
+
if (exports2.length === 0) return 1;
|
|
313
|
+
if (exports2.length === 1) return 1;
|
|
314
|
+
if (filePath && isTestFile(filePath)) {
|
|
315
|
+
return 1;
|
|
316
|
+
}
|
|
317
|
+
const domainCohesion = calculateDomainCohesion(exports2);
|
|
318
|
+
const hasImportData = exports2.some((e) => e.imports && e.imports.length > 0);
|
|
319
|
+
if (!hasImportData) {
|
|
320
|
+
return domainCohesion;
|
|
321
|
+
}
|
|
322
|
+
const importCohesion = calculateImportBasedCohesion(exports2);
|
|
323
|
+
return importCohesion * 0.6 + domainCohesion * 0.4;
|
|
324
|
+
}
|
|
325
|
+
function calculateImportBasedCohesion(exports2) {
|
|
326
|
+
const exportsWithImports = exports2.filter((e) => e.imports && e.imports.length > 0);
|
|
327
|
+
if (exportsWithImports.length < 2) {
|
|
328
|
+
return 1;
|
|
329
|
+
}
|
|
330
|
+
let totalSimilarity = 0;
|
|
331
|
+
let comparisons = 0;
|
|
332
|
+
for (let i = 0; i < exportsWithImports.length; i++) {
|
|
333
|
+
for (let j = i + 1; j < exportsWithImports.length; j++) {
|
|
334
|
+
const exp1 = exportsWithImports[i];
|
|
335
|
+
const exp2 = exportsWithImports[j];
|
|
336
|
+
const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
|
|
337
|
+
totalSimilarity += similarity;
|
|
338
|
+
comparisons++;
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
return comparisons > 0 ? totalSimilarity / comparisons : 1;
|
|
342
|
+
}
|
|
343
|
+
function calculateJaccardSimilarity(arr1, arr2) {
|
|
344
|
+
if (arr1.length === 0 && arr2.length === 0) return 1;
|
|
345
|
+
if (arr1.length === 0 || arr2.length === 0) return 0;
|
|
346
|
+
const set1 = new Set(arr1);
|
|
347
|
+
const set2 = new Set(arr2);
|
|
348
|
+
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
349
|
+
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
350
|
+
return intersection.size / union.size;
|
|
351
|
+
}
|
|
352
|
+
function calculateDomainCohesion(exports2) {
|
|
353
|
+
const domains = exports2.map((e) => e.inferredDomain || "unknown");
|
|
354
|
+
const domainCounts = /* @__PURE__ */ new Map();
|
|
355
|
+
for (const domain of domains) {
|
|
356
|
+
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
357
|
+
}
|
|
358
|
+
const total = domains.length;
|
|
359
|
+
let entropy = 0;
|
|
360
|
+
for (const count of domainCounts.values()) {
|
|
361
|
+
const p = count / total;
|
|
362
|
+
if (p > 0) {
|
|
363
|
+
entropy -= p * Math.log2(p);
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
const maxEntropy = Math.log2(total);
|
|
367
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
368
|
+
}
|
|
316
369
|
|
|
317
370
|
// src/index.ts
|
|
318
371
|
async function getSmartDefaults(directory, userOptions) {
|
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/context-analyzer",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "AI context window cost analysis - detect fragmented code, deep import chains, and expensive context budgets",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"commander": "^12.1.0",
|
|
51
51
|
"chalk": "^5.3.0",
|
|
52
52
|
"prompts": "^2.4.2",
|
|
53
|
-
"@aiready/core": "0.
|
|
53
|
+
"@aiready/core": "0.6.0"
|
|
54
54
|
},
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"@types/node": "^22.10.2",
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { calculateCohesion } from '../analyzer';
|
|
3
|
+
import type { ExportInfo } from '../types';
|
|
4
|
+
|
|
5
|
+
describe('Enhanced Cohesion Calculation', () => {
|
|
6
|
+
it('should use domain-based cohesion when no import data available', () => {
|
|
7
|
+
const exports: ExportInfo[] = [
|
|
8
|
+
{ name: 'getUserData', type: 'function', inferredDomain: 'user' },
|
|
9
|
+
{ name: 'getProductData', type: 'function', inferredDomain: 'product' },
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
const cohesion = calculateCohesion(exports);
|
|
13
|
+
|
|
14
|
+
// With mixed domains (user, product) and no import data, should use domain-based calculation
|
|
15
|
+
// Domain entropy for 2 different domains = low cohesion
|
|
16
|
+
expect(cohesion).toBeLessThan(0.5);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('should use import-based cohesion when import data available', () => {
|
|
20
|
+
const exports: ExportInfo[] = [
|
|
21
|
+
{
|
|
22
|
+
name: 'getUserData',
|
|
23
|
+
type: 'function',
|
|
24
|
+
inferredDomain: 'user',
|
|
25
|
+
imports: ['react', 'axios', 'lodash'],
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: 'getProductData',
|
|
29
|
+
type: 'function',
|
|
30
|
+
inferredDomain: 'product',
|
|
31
|
+
imports: ['react', 'axios', 'lodash'], // Same imports!
|
|
32
|
+
},
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
const cohesion = calculateCohesion(exports);
|
|
36
|
+
|
|
37
|
+
// Even though domains differ, imports are identical (Jaccard = 1.0)
|
|
38
|
+
// Enhanced cohesion = 0.6 * 1.0 + 0.4 * 0.0 (different domains) = 0.6
|
|
39
|
+
// Should be >= 0.6 (import-based weight)
|
|
40
|
+
expect(cohesion).toBeGreaterThanOrEqual(0.6);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('should weight import-based similarity higher than domain-based', () => {
|
|
44
|
+
const exportsWithSharedImports: ExportInfo[] = [
|
|
45
|
+
{
|
|
46
|
+
name: 'getUserData',
|
|
47
|
+
type: 'function',
|
|
48
|
+
inferredDomain: 'user',
|
|
49
|
+
imports: ['react', 'axios'],
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: 'getProductData',
|
|
53
|
+
type: 'function',
|
|
54
|
+
inferredDomain: 'product',
|
|
55
|
+
imports: ['react', 'axios'],
|
|
56
|
+
},
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
const exportsWithoutSharedImports: ExportInfo[] = [
|
|
60
|
+
{
|
|
61
|
+
name: 'getUserData',
|
|
62
|
+
type: 'function',
|
|
63
|
+
inferredDomain: 'user',
|
|
64
|
+
imports: ['react', 'axios'],
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'getProductData',
|
|
68
|
+
type: 'function',
|
|
69
|
+
inferredDomain: 'product',
|
|
70
|
+
imports: ['lodash', 'moment'],
|
|
71
|
+
},
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
const cohesionWithShared = calculateCohesion(exportsWithSharedImports);
|
|
75
|
+
const cohesionWithoutShared = calculateCohesion(exportsWithoutSharedImports);
|
|
76
|
+
|
|
77
|
+
// Shared imports should result in higher cohesion
|
|
78
|
+
expect(cohesionWithShared).toBeGreaterThan(cohesionWithoutShared);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should handle mixed case: some exports with imports, some without', () => {
|
|
82
|
+
const exports: ExportInfo[] = [
|
|
83
|
+
{
|
|
84
|
+
name: 'getUserData',
|
|
85
|
+
type: 'function',
|
|
86
|
+
inferredDomain: 'user',
|
|
87
|
+
imports: ['react', 'axios'],
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
name: 'getProductData',
|
|
91
|
+
type: 'function',
|
|
92
|
+
inferredDomain: 'product',
|
|
93
|
+
// No imports field
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
const cohesion = calculateCohesion(exports);
|
|
98
|
+
|
|
99
|
+
// Should fall back to domain-based when not all exports have import data
|
|
100
|
+
expect(cohesion).toBeGreaterThan(0);
|
|
101
|
+
expect(cohesion).toBeLessThan(1);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('should return 1 for single export', () => {
|
|
105
|
+
const exports: ExportInfo[] = [
|
|
106
|
+
{
|
|
107
|
+
name: 'getUserData',
|
|
108
|
+
type: 'function',
|
|
109
|
+
inferredDomain: 'user',
|
|
110
|
+
imports: ['react'],
|
|
111
|
+
},
|
|
112
|
+
];
|
|
113
|
+
|
|
114
|
+
expect(calculateCohesion(exports)).toBe(1);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('should return 1 for test files regardless of domains or imports', () => {
|
|
118
|
+
const exports: ExportInfo[] = [
|
|
119
|
+
{ name: 'testUserLogin', type: 'function', inferredDomain: 'user', imports: ['react'] },
|
|
120
|
+
{ name: 'testProductView', type: 'function', inferredDomain: 'product', imports: [] },
|
|
121
|
+
];
|
|
122
|
+
|
|
123
|
+
const cohesion = calculateCohesion(exports, 'src/utils/test-helpers.ts');
|
|
124
|
+
expect(cohesion).toBe(1);
|
|
125
|
+
});
|
|
126
|
+
});
|
package/src/analyzer.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { estimateTokens } from '@aiready/core';
|
|
1
|
+
import { estimateTokens, parseFileExports, calculateImportSimilarity, type ExportWithImports } from '@aiready/core';
|
|
2
2
|
import type {
|
|
3
3
|
ContextAnalysisResult,
|
|
4
4
|
DependencyGraph,
|
|
@@ -24,7 +24,10 @@ export function buildDependencyGraph(
|
|
|
24
24
|
// First pass: Create nodes
|
|
25
25
|
for (const { file, content } of files) {
|
|
26
26
|
const imports = extractImportsFromContent(content);
|
|
27
|
-
|
|
27
|
+
|
|
28
|
+
// Use AST-based extraction for better accuracy, fallback to regex
|
|
29
|
+
const exports = extractExportsWithAST(content, file);
|
|
30
|
+
|
|
28
31
|
const tokenCost = estimateTokens(content);
|
|
29
32
|
const linesOfCode = content.split('\n').length;
|
|
30
33
|
|
|
@@ -199,41 +202,12 @@ export function detectCircularDependencies(
|
|
|
199
202
|
|
|
200
203
|
/**
|
|
201
204
|
* Calculate cohesion score (how related are exports in a file)
|
|
202
|
-
* Uses
|
|
205
|
+
* Uses enhanced calculation combining domain-based and import-based analysis
|
|
203
206
|
* @param exports - Array of export information
|
|
204
207
|
* @param filePath - Optional file path for context-aware scoring
|
|
205
208
|
*/
|
|
206
209
|
export function calculateCohesion(exports: ExportInfo[], filePath?: string): number {
|
|
207
|
-
|
|
208
|
-
if (exports.length === 1) return 1; // Single export = perfect cohesion
|
|
209
|
-
|
|
210
|
-
// Special case: Test/mock/fixture files are expected to have multi-domain exports
|
|
211
|
-
// They serve a single purpose (testing) even if they mock different domains
|
|
212
|
-
if (filePath && isTestFile(filePath)) {
|
|
213
|
-
return 1; // Test utilities are inherently cohesive despite mixed domains
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const domains = exports.map((e) => e.inferredDomain || 'unknown');
|
|
217
|
-
const domainCounts = new Map<string, number>();
|
|
218
|
-
|
|
219
|
-
for (const domain of domains) {
|
|
220
|
-
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// Calculate Shannon entropy
|
|
224
|
-
const total = domains.length;
|
|
225
|
-
let entropy = 0;
|
|
226
|
-
|
|
227
|
-
for (const count of domainCounts.values()) {
|
|
228
|
-
const p = count / total;
|
|
229
|
-
if (p > 0) {
|
|
230
|
-
entropy -= p * Math.log2(p);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Normalize to 0-1 (higher = better cohesion)
|
|
235
|
-
const maxEntropy = Math.log2(total);
|
|
236
|
-
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
210
|
+
return calculateEnhancedCohesion(exports, filePath);
|
|
237
211
|
}
|
|
238
212
|
|
|
239
213
|
/**
|
|
@@ -460,3 +434,131 @@ function generateConsolidationPlan(
|
|
|
460
434
|
|
|
461
435
|
return plan;
|
|
462
436
|
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Extract exports using AST parsing (enhanced version)
|
|
440
|
+
* Falls back to regex if AST parsing fails
|
|
441
|
+
*/
|
|
442
|
+
export function extractExportsWithAST(content: string, filePath: string): ExportInfo[] {
|
|
443
|
+
try {
|
|
444
|
+
const { exports: astExports } = parseFileExports(content, filePath);
|
|
445
|
+
|
|
446
|
+
return astExports.map(exp => ({
|
|
447
|
+
name: exp.name,
|
|
448
|
+
type: exp.type,
|
|
449
|
+
inferredDomain: inferDomain(exp.name),
|
|
450
|
+
imports: exp.imports,
|
|
451
|
+
dependencies: exp.dependencies,
|
|
452
|
+
}));
|
|
453
|
+
} catch (error) {
|
|
454
|
+
// Fallback to regex-based extraction
|
|
455
|
+
return extractExports(content);
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/**
|
|
460
|
+
* Calculate enhanced cohesion score using both domain inference and import similarity
|
|
461
|
+
*
|
|
462
|
+
* This combines:
|
|
463
|
+
* 1. Domain-based cohesion (entropy of inferred domains)
|
|
464
|
+
* 2. Import-based cohesion (Jaccard similarity of shared imports)
|
|
465
|
+
*
|
|
466
|
+
* Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
|
|
467
|
+
*/
|
|
468
|
+
export function calculateEnhancedCohesion(
|
|
469
|
+
exports: ExportInfo[],
|
|
470
|
+
filePath?: string
|
|
471
|
+
): number {
|
|
472
|
+
if (exports.length === 0) return 1;
|
|
473
|
+
if (exports.length === 1) return 1;
|
|
474
|
+
|
|
475
|
+
// Special case for test files
|
|
476
|
+
if (filePath && isTestFile(filePath)) {
|
|
477
|
+
return 1;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Calculate domain-based cohesion (existing method)
|
|
481
|
+
const domainCohesion = calculateDomainCohesion(exports);
|
|
482
|
+
|
|
483
|
+
// Calculate import-based cohesion if imports are available
|
|
484
|
+
const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
|
|
485
|
+
|
|
486
|
+
if (!hasImportData) {
|
|
487
|
+
// No import data available, use domain-based only
|
|
488
|
+
return domainCohesion;
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
const importCohesion = calculateImportBasedCohesion(exports);
|
|
492
|
+
|
|
493
|
+
// Weighted combination: 60% import-based, 40% domain-based
|
|
494
|
+
return importCohesion * 0.6 + domainCohesion * 0.4;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Calculate cohesion based on shared imports (Jaccard similarity)
|
|
499
|
+
*/
|
|
500
|
+
function calculateImportBasedCohesion(exports: ExportInfo[]): number {
|
|
501
|
+
const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
|
|
502
|
+
|
|
503
|
+
if (exportsWithImports.length < 2) {
|
|
504
|
+
return 1; // Not enough data
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Calculate pairwise import similarity
|
|
508
|
+
let totalSimilarity = 0;
|
|
509
|
+
let comparisons = 0;
|
|
510
|
+
|
|
511
|
+
for (let i = 0; i < exportsWithImports.length; i++) {
|
|
512
|
+
for (let j = i + 1; j < exportsWithImports.length; j++) {
|
|
513
|
+
const exp1 = exportsWithImports[i] as ExportInfo & { imports: string[] };
|
|
514
|
+
const exp2 = exportsWithImports[j] as ExportInfo & { imports: string[] };
|
|
515
|
+
|
|
516
|
+
const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
|
|
517
|
+
totalSimilarity += similarity;
|
|
518
|
+
comparisons++;
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
return comparisons > 0 ? totalSimilarity / comparisons : 1;
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Calculate Jaccard similarity between two arrays
|
|
527
|
+
*/
|
|
528
|
+
function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number {
|
|
529
|
+
if (arr1.length === 0 && arr2.length === 0) return 1;
|
|
530
|
+
if (arr1.length === 0 || arr2.length === 0) return 0;
|
|
531
|
+
|
|
532
|
+
const set1 = new Set(arr1);
|
|
533
|
+
const set2 = new Set(arr2);
|
|
534
|
+
|
|
535
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
536
|
+
const union = new Set([...set1, ...set2]);
|
|
537
|
+
|
|
538
|
+
return intersection.size / union.size;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Calculate domain-based cohesion (existing entropy method)
|
|
543
|
+
*/
|
|
544
|
+
function calculateDomainCohesion(exports: ExportInfo[]): number {
|
|
545
|
+
const domains = exports.map((e) => e.inferredDomain || 'unknown');
|
|
546
|
+
const domainCounts = new Map<string, number>();
|
|
547
|
+
|
|
548
|
+
for (const domain of domains) {
|
|
549
|
+
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
const total = domains.length;
|
|
553
|
+
let entropy = 0;
|
|
554
|
+
|
|
555
|
+
for (const count of domainCounts.values()) {
|
|
556
|
+
const p = count / total;
|
|
557
|
+
if (p > 0) {
|
|
558
|
+
entropy -= p * Math.log2(p);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
const maxEntropy = Math.log2(total);
|
|
563
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
564
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -101,4 +101,7 @@ export interface ExportInfo {
|
|
|
101
101
|
name: string;
|
|
102
102
|
type: 'function' | 'class' | 'const' | 'type' | 'interface' | 'default';
|
|
103
103
|
inferredDomain?: string; // Inferred from name/usage
|
|
104
|
+
imports?: string[]; // Imports used by this export (for import-based cohesion)
|
|
105
|
+
dependencies?: string[]; // Other exports from same file this depends on
|
|
104
106
|
}
|
|
107
|
+
|