@aiready/context-analyzer 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/.turbo/turbo-test.log +21 -9
- package/README.md +8 -0
- package/dist/__tests__/analyzer.test.d.ts +2 -0
- package/dist/__tests__/analyzer.test.d.ts.map +1 -0
- package/dist/__tests__/analyzer.test.js +157 -0
- package/dist/__tests__/analyzer.test.js.map +1 -0
- package/dist/__tests__/auto-detection.test.d.ts +2 -0
- package/dist/__tests__/auto-detection.test.d.ts.map +1 -0
- package/dist/__tests__/auto-detection.test.js +132 -0
- package/dist/__tests__/auto-detection.test.js.map +1 -0
- package/dist/__tests__/enhanced-cohesion.test.d.ts +2 -0
- package/dist/__tests__/enhanced-cohesion.test.d.ts.map +1 -0
- package/dist/__tests__/enhanced-cohesion.test.js +109 -0
- package/dist/__tests__/enhanced-cohesion.test.js.map +1 -0
- package/dist/__tests__/fragmentation-advanced.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-advanced.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-advanced.test.js +50 -0
- package/dist/__tests__/fragmentation-advanced.test.js.map +1 -0
- package/dist/__tests__/fragmentation-coupling.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-coupling.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-coupling.test.js +52 -0
- package/dist/__tests__/fragmentation-coupling.test.js.map +1 -0
- package/dist/__tests__/fragmentation-log.test.d.ts +2 -0
- package/dist/__tests__/fragmentation-log.test.d.ts.map +1 -0
- package/dist/__tests__/fragmentation-log.test.js +33 -0
- package/dist/__tests__/fragmentation-log.test.js.map +1 -0
- package/dist/__tests__/scoring.test.d.ts +2 -0
- package/dist/__tests__/scoring.test.d.ts.map +1 -0
- package/dist/__tests__/scoring.test.js +118 -0
- package/dist/__tests__/scoring.test.js.map +1 -0
- package/dist/__tests__/structural-cohesion.test.d.ts +2 -0
- package/dist/__tests__/structural-cohesion.test.d.ts.map +1 -0
- package/dist/__tests__/structural-cohesion.test.js +29 -0
- package/dist/__tests__/structural-cohesion.test.js.map +1 -0
- package/dist/analyzer.d.ts +100 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +701 -0
- package/dist/analyzer.js.map +1 -0
- package/dist/analyzers/python-context.d.ts +38 -0
- package/dist/analyzers/python-context.d.ts.map +1 -0
- package/dist/analyzers/python-context.js +232 -0
- package/dist/analyzers/python-context.js.map +1 -0
- package/dist/chunk-BD4NWUVG.mjs +1242 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +139 -13
- package/dist/cli.js.map +1 -0
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +3 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +139 -13
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +1 -1
- package/dist/scoring.d.ts +13 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +133 -0
- package/dist/scoring.js.map +1 -0
- package/dist/semantic-analysis.d.ts +44 -0
- package/dist/semantic-analysis.d.ts.map +1 -0
- package/dist/semantic-analysis.js +241 -0
- package/dist/semantic-analysis.js.map +1 -0
- package/dist/types.d.ts +117 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/package.json +2 -2
- package/src/__tests__/fragmentation-advanced.test.ts +60 -0
- package/src/__tests__/fragmentation-coupling.test.ts +62 -0
- package/src/__tests__/fragmentation-log.test.ts +38 -0
- package/src/__tests__/structural-cohesion.test.ts +32 -0
- package/src/analyzer.ts +193 -18
- package/src/index.ts +34 -2
- package/src/types.ts +3 -0
- package/tsconfig.tsbuildinfo +1 -0
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import {
|
|
3
|
+
buildDependencyGraph,
|
|
4
|
+
detectModuleClusters,
|
|
5
|
+
calculateFragmentation,
|
|
6
|
+
} from '../analyzer';
|
|
7
|
+
|
|
8
|
+
describe('fragmentation coupling discount', () => {
|
|
9
|
+
it('does not apply discount when files have no shared imports', () => {
|
|
10
|
+
const files = [
|
|
11
|
+
{
|
|
12
|
+
file: 'src/billing/a.ts',
|
|
13
|
+
content: `export const getBillingA = 1;`,
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
file: 'src/api/billing/b.ts',
|
|
17
|
+
content: `export const getBillingB = 2;`,
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
file: 'lib/billing/c.ts',
|
|
21
|
+
content: `export const getBillingC = 3;`,
|
|
22
|
+
},
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
const graph = buildDependencyGraph(files);
|
|
26
|
+
const clusters = detectModuleClusters(graph);
|
|
27
|
+
const cluster = clusters.find((c) => c.domain === 'billing');
|
|
28
|
+
expect(cluster).toBeDefined();
|
|
29
|
+
|
|
30
|
+
const base = calculateFragmentation(files.map(f => f.file), 'billing');
|
|
31
|
+
// With no import similarity the coupling discount should be 0 -> fragmentation unchanged
|
|
32
|
+
expect(cluster!.fragmentationScore).toBeCloseTo(base, 6);
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('applies up-to-20% discount when files share identical imports', () => {
|
|
36
|
+
const files = [
|
|
37
|
+
{
|
|
38
|
+
file: 'src/billing/a.ts',
|
|
39
|
+
content: `import { shared } from 'shared/module';\nexport const getBillingA = 1;`,
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
file: 'src/api/billing/b.ts',
|
|
43
|
+
content: `import { shared } from 'shared/module';\nexport const getBillingB = 2;`,
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
file: 'lib/billing/c.ts',
|
|
47
|
+
content: `import { shared } from 'shared/module';\nexport const getBillingC = 3;`,
|
|
48
|
+
},
|
|
49
|
+
];
|
|
50
|
+
|
|
51
|
+
const graph = buildDependencyGraph(files);
|
|
52
|
+
const clusters = detectModuleClusters(graph);
|
|
53
|
+
const cluster = clusters.find((c) => c.domain === 'billing');
|
|
54
|
+
expect(cluster).toBeDefined();
|
|
55
|
+
|
|
56
|
+
const base = calculateFragmentation(files.map(f => f.file), 'billing');
|
|
57
|
+
const expected = base * 0.8; // full cohesion => 20% discount
|
|
58
|
+
|
|
59
|
+
// Allow small FP tolerance
|
|
60
|
+
expect(cluster!.fragmentationScore).toBeCloseTo(expected, 6);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { calculateFragmentation } from '../analyzer';
|
|
3
|
+
|
|
4
|
+
describe('calculateFragmentation (log scale option)', () => {
|
|
5
|
+
it('returns 0 for single file regardless of option', () => {
|
|
6
|
+
const files = ['src/user/user.ts'];
|
|
7
|
+
expect(calculateFragmentation(files, 'user')).toBe(0);
|
|
8
|
+
expect(calculateFragmentation(files, 'user', { useLogScale: true })).toBe(0);
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
it('matches linear formula when not using log scale', () => {
|
|
12
|
+
const files = [
|
|
13
|
+
'a/one.ts',
|
|
14
|
+
'b/two.ts',
|
|
15
|
+
'c/three.ts',
|
|
16
|
+
'd/four.ts',
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
const uniqueDirs = 4;
|
|
20
|
+
const linear = (uniqueDirs - 1) / (files.length - 1);
|
|
21
|
+
expect(calculateFragmentation(files, 'domain')).toBeCloseTo(linear);
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
it('computes normalized log-based fragmentation when requested', () => {
|
|
25
|
+
const files = [
|
|
26
|
+
'src/group/a.ts',
|
|
27
|
+
'src/group/b.ts',
|
|
28
|
+
'src/group/c.ts',
|
|
29
|
+
'lib/other/d.ts',
|
|
30
|
+
'tools/x/e.ts',
|
|
31
|
+
];
|
|
32
|
+
|
|
33
|
+
const dirs = new Set(files.map((f) => f.split('/').slice(0, -1).join('/'))).size;
|
|
34
|
+
const expected = Math.log(dirs) / Math.log(files.length);
|
|
35
|
+
|
|
36
|
+
expect(calculateFragmentation(files, 'domain', { useLogScale: true })).toBeCloseTo(expected, 6);
|
|
37
|
+
});
|
|
38
|
+
});
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest'
|
|
2
|
+
import { calculateStructuralCohesionFromCoUsage } from '../analyzer'
|
|
3
|
+
|
|
4
|
+
describe('calculateStructuralCohesionFromCoUsage', () => {
|
|
5
|
+
it('returns 1 when no co-usage data present', () => {
|
|
6
|
+
const score = calculateStructuralCohesionFromCoUsage('missing', undefined)
|
|
7
|
+
expect(score).toBe(1)
|
|
8
|
+
})
|
|
9
|
+
|
|
10
|
+
it('returns 1 when co-usage only with a single file', () => {
|
|
11
|
+
const coUsage = new Map<string, Map<string, number>>()
|
|
12
|
+
coUsage.set('a', new Map([['b', 10]]))
|
|
13
|
+
const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
|
|
14
|
+
expect(score).toBe(1)
|
|
15
|
+
})
|
|
16
|
+
|
|
17
|
+
it('returns ~0 when co-usage is perfectly balanced across two files', () => {
|
|
18
|
+
const coUsage = new Map<string, Map<string, number>>()
|
|
19
|
+
coUsage.set('a', new Map([['b', 5], ['c', 5]]))
|
|
20
|
+
const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
|
|
21
|
+
// Balanced distribution => entropy == 1 (for 2 items) => cohesion ~= 0
|
|
22
|
+
expect(score).toBeCloseTo(0, 3)
|
|
23
|
+
})
|
|
24
|
+
|
|
25
|
+
it('returns intermediate value for skewed distribution', () => {
|
|
26
|
+
const coUsage = new Map<string, Map<string, number>>()
|
|
27
|
+
coUsage.set('a', new Map([['b', 8], ['c', 2]]))
|
|
28
|
+
const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
|
|
29
|
+
// Expected approx 0.279
|
|
30
|
+
expect(score).toBeCloseTo(0.279, 2)
|
|
31
|
+
})
|
|
32
|
+
})
|
package/src/analyzer.ts
CHANGED
|
@@ -297,8 +297,12 @@ export function detectCircularDependencies(
|
|
|
297
297
|
* @param exports - Array of export information
|
|
298
298
|
* @param filePath - Optional file path for context-aware scoring
|
|
299
299
|
*/
|
|
300
|
-
export function calculateCohesion(
|
|
301
|
-
|
|
300
|
+
export function calculateCohesion(
|
|
301
|
+
exports: ExportInfo[],
|
|
302
|
+
filePath?: string,
|
|
303
|
+
options?: { coUsageMatrix?: Map<string, Map<string, number>>; weights?: { importBased?: number; structural?: number; domainBased?: number } }
|
|
304
|
+
): number {
|
|
305
|
+
return calculateEnhancedCohesion(exports, filePath, options);
|
|
302
306
|
}
|
|
303
307
|
|
|
304
308
|
/**
|
|
@@ -322,23 +326,105 @@ function isTestFile(filePath: string): boolean {
|
|
|
322
326
|
*/
|
|
323
327
|
export function calculateFragmentation(
|
|
324
328
|
files: string[],
|
|
325
|
-
domain: string
|
|
329
|
+
domain: string,
|
|
330
|
+
options?: { useLogScale?: boolean; logBase?: number }
|
|
326
331
|
): number {
|
|
327
332
|
if (files.length <= 1) return 0; // Single file = no fragmentation
|
|
328
333
|
|
|
329
334
|
// Calculate how many different directories contain these files
|
|
330
335
|
const directories = new Set(files.map((f) => f.split('/').slice(0, -1).join('/')));
|
|
336
|
+
const uniqueDirs = directories.size;
|
|
337
|
+
|
|
338
|
+
// If log-scaling requested, normalize using logarithms so that
|
|
339
|
+
// going from 1 -> 2 directories shows a large jump while 10 -> 11
|
|
340
|
+
// is relatively small. Normalized value is log(uniqueDirs)/log(totalFiles).
|
|
341
|
+
if (options?.useLogScale) {
|
|
342
|
+
if (uniqueDirs <= 1) return 0;
|
|
343
|
+
const total = files.length;
|
|
344
|
+
const base = options.logBase || Math.E;
|
|
345
|
+
const num = Math.log(uniqueDirs) / Math.log(base);
|
|
346
|
+
const den = Math.log(total) / Math.log(base);
|
|
347
|
+
return den > 0 ? num / den : 0;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
// Default (linear) Fragmentation = (unique directories - 1) / (total files - 1)
|
|
351
|
+
return (uniqueDirs - 1) / (files.length - 1);
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
/**
|
|
355
|
+
* Calculate path entropy for a set of files.
|
|
356
|
+
* Returns a normalized entropy in [0,1], where 0 = all files in one directory,
|
|
357
|
+
* and 1 = files are evenly distributed across directories.
|
|
358
|
+
*/
|
|
359
|
+
export function calculatePathEntropy(files: string[]): number {
|
|
360
|
+
if (!files || files.length === 0) return 0;
|
|
331
361
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
362
|
+
const dirCounts = new Map<string, number>();
|
|
363
|
+
for (const f of files) {
|
|
364
|
+
const dir = f.split('/').slice(0, -1).join('/') || '.';
|
|
365
|
+
dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const counts = Array.from(dirCounts.values());
|
|
369
|
+
if (counts.length <= 1) return 0; // single directory -> zero entropy
|
|
370
|
+
|
|
371
|
+
const total = counts.reduce((s, v) => s + v, 0);
|
|
372
|
+
let entropy = 0;
|
|
373
|
+
for (const c of counts) {
|
|
374
|
+
const p = c / total;
|
|
375
|
+
entropy -= p * Math.log2(p);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
const maxEntropy = Math.log2(counts.length);
|
|
379
|
+
return maxEntropy > 0 ? entropy / maxEntropy : 0;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/**
|
|
383
|
+
* Calculate directory-distance metric based on common ancestor depth.
|
|
384
|
+
* For each file pair compute depth(commonAncestor) and normalize by the
|
|
385
|
+
* maximum path depth between the two files. Returns value in [0,1] where
|
|
386
|
+
* 0 means all pairs share a deep common ancestor (low fragmentation) and
|
|
387
|
+
* 1 means they share only the root (high fragmentation).
|
|
388
|
+
*/
|
|
389
|
+
export function calculateDirectoryDistance(files: string[]): number {
|
|
390
|
+
if (!files || files.length <= 1) return 0;
|
|
391
|
+
|
|
392
|
+
function pathSegments(p: string) {
|
|
393
|
+
return p.split('/').filter(Boolean);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
function commonAncestorDepth(a: string[], b: string[]) {
|
|
397
|
+
const minLen = Math.min(a.length, b.length);
|
|
398
|
+
let i = 0;
|
|
399
|
+
while (i < minLen && a[i] === b[i]) i++;
|
|
400
|
+
return i; // number of shared segments from root
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
let totalNormalized = 0;
|
|
404
|
+
let comparisons = 0;
|
|
405
|
+
|
|
406
|
+
for (let i = 0; i < files.length; i++) {
|
|
407
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
408
|
+
const segA = pathSegments(files[i]);
|
|
409
|
+
const segB = pathSegments(files[j]);
|
|
410
|
+
const shared = commonAncestorDepth(segA, segB);
|
|
411
|
+
const maxDepth = Math.max(segA.length, segB.length);
|
|
412
|
+
const normalizedShared = maxDepth > 0 ? shared / maxDepth : 0;
|
|
413
|
+
// distance is inverse of normalized shared depth
|
|
414
|
+
totalNormalized += 1 - normalizedShared;
|
|
415
|
+
comparisons++;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
return comparisons > 0 ? totalNormalized / comparisons : 0;
|
|
335
420
|
}
|
|
336
421
|
|
|
337
422
|
/**
|
|
338
423
|
* Group files by domain to detect module clusters
|
|
339
424
|
*/
|
|
340
425
|
export function detectModuleClusters(
|
|
341
|
-
graph: DependencyGraph
|
|
426
|
+
graph: DependencyGraph,
|
|
427
|
+
options?: { useLogScale?: boolean }
|
|
342
428
|
): ModuleCluster[] {
|
|
343
429
|
const domainMap = new Map<string, string[]>();
|
|
344
430
|
|
|
@@ -363,12 +449,47 @@ export function detectModuleClusters(
|
|
|
363
449
|
return sum + (node?.tokenCost || 0);
|
|
364
450
|
}, 0);
|
|
365
451
|
|
|
366
|
-
const
|
|
452
|
+
const baseFragmentation = calculateFragmentation(files, domain, { useLogScale: !!options?.useLogScale });
|
|
453
|
+
|
|
454
|
+
// Compute import-based cohesion across files in this domain cluster.
|
|
455
|
+
// This measures how much the files actually "talk" to each other.
|
|
456
|
+
// We'll compute average pairwise Jaccard similarity between each file's import lists.
|
|
457
|
+
let importSimilarityTotal = 0;
|
|
458
|
+
let importComparisons = 0;
|
|
459
|
+
|
|
460
|
+
for (let i = 0; i < files.length; i++) {
|
|
461
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
462
|
+
const f1 = files[i];
|
|
463
|
+
const f2 = files[j];
|
|
464
|
+
const n1 = graph.nodes.get(f1)?.imports || [];
|
|
465
|
+
const n2 = graph.nodes.get(f2)?.imports || [];
|
|
466
|
+
|
|
467
|
+
// Treat two empty import lists as not coupled (similarity 0)
|
|
468
|
+
const similarity = (n1.length === 0 && n2.length === 0)
|
|
469
|
+
? 0
|
|
470
|
+
: calculateJaccardSimilarity(n1, n2);
|
|
471
|
+
|
|
472
|
+
importSimilarityTotal += similarity;
|
|
473
|
+
importComparisons++;
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
const importCohesion = importComparisons > 0 ? importSimilarityTotal / importComparisons : 0;
|
|
478
|
+
|
|
479
|
+
// Coupling discount: if files are heavily importing each other, reduce fragmentation penalty.
|
|
480
|
+
// Following recommendation: up to 20% discount proportional to import cohesion.
|
|
481
|
+
const couplingDiscountFactor = 1 - 0.2 * importCohesion;
|
|
482
|
+
|
|
483
|
+
const fragmentationScore = baseFragmentation * couplingDiscountFactor;
|
|
484
|
+
|
|
485
|
+
// Additional metrics for richer reporting
|
|
486
|
+
const pathEntropy = calculatePathEntropy(files);
|
|
487
|
+
const directoryDistance = calculateDirectoryDistance(files);
|
|
367
488
|
|
|
368
489
|
const avgCohesion =
|
|
369
490
|
files.reduce((sum, file) => {
|
|
370
491
|
const node = graph.nodes.get(file);
|
|
371
|
-
return sum + (node ? calculateCohesion(node.exports, file) : 0);
|
|
492
|
+
return sum + (node ? calculateCohesion(node.exports, file, { coUsageMatrix: graph.coUsageMatrix }) : 0);
|
|
372
493
|
}, 0) / files.length;
|
|
373
494
|
|
|
374
495
|
// Generate consolidation plan
|
|
@@ -384,6 +505,9 @@ export function detectModuleClusters(
|
|
|
384
505
|
files,
|
|
385
506
|
totalTokens,
|
|
386
507
|
fragmentationScore,
|
|
508
|
+
pathEntropy,
|
|
509
|
+
directoryDistance,
|
|
510
|
+
importCohesion,
|
|
387
511
|
avgCohesion,
|
|
388
512
|
suggestedStructure: {
|
|
389
513
|
targetFiles,
|
|
@@ -631,7 +755,8 @@ export function extractExportsWithAST(
|
|
|
631
755
|
*/
|
|
632
756
|
export function calculateEnhancedCohesion(
|
|
633
757
|
exports: ExportInfo[],
|
|
634
|
-
filePath?: string
|
|
758
|
+
filePath?: string,
|
|
759
|
+
options?: { coUsageMatrix?: Map<string, Map<string, number>>; weights?: { importBased?: number; structural?: number; domainBased?: number } }
|
|
635
760
|
): number {
|
|
636
761
|
if (exports.length === 0) return 1;
|
|
637
762
|
if (exports.length === 1) return 1;
|
|
@@ -645,17 +770,67 @@ export function calculateEnhancedCohesion(
|
|
|
645
770
|
const domainCohesion = calculateDomainCohesion(exports);
|
|
646
771
|
|
|
647
772
|
// Calculate import-based cohesion if imports are available
|
|
648
|
-
const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
773
|
+
const hasImportData = exports.some((e) => e.imports && e.imports.length > 0);
|
|
774
|
+
const importCohesion = hasImportData ? calculateImportBasedCohesion(exports) : undefined;
|
|
775
|
+
|
|
776
|
+
// Calculate structural cohesion (co-usage) if coUsageMatrix and filePath available
|
|
777
|
+
const coUsageMatrix = options?.coUsageMatrix;
|
|
778
|
+
const structuralCohesion = filePath && coUsageMatrix ? calculateStructuralCohesionFromCoUsage(filePath, coUsageMatrix) : undefined;
|
|
779
|
+
|
|
780
|
+
// Default weights (can be overridden via options)
|
|
781
|
+
const defaultWeights = { importBased: 0.5, structural: 0.3, domainBased: 0.2 };
|
|
782
|
+
const weights = { ...defaultWeights, ...(options?.weights || {}) };
|
|
783
|
+
|
|
784
|
+
// Collect available signals and normalize weights
|
|
785
|
+
const signals: Array<{ score: number; weight: number }> = [];
|
|
786
|
+
if (importCohesion !== undefined) signals.push({ score: importCohesion, weight: weights.importBased });
|
|
787
|
+
if (structuralCohesion !== undefined) signals.push({ score: structuralCohesion, weight: weights.structural });
|
|
788
|
+
// domain cohesion is always available
|
|
789
|
+
signals.push({ score: domainCohesion, weight: weights.domainBased });
|
|
790
|
+
|
|
791
|
+
const totalWeight = signals.reduce((s, el) => s + el.weight, 0);
|
|
792
|
+
if (totalWeight === 0) return domainCohesion;
|
|
793
|
+
|
|
794
|
+
const combined = signals.reduce((sum, el) => sum + el.score * (el.weight / totalWeight), 0);
|
|
795
|
+
return combined;
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
/**
|
|
799
|
+
* Calculate structural cohesion for a file based on co-usage patterns.
|
|
800
|
+
* Uses the co-usage distribution (files commonly imported alongside this file)
|
|
801
|
+
* and computes an entropy-based cohesion score in [0,1].
|
|
802
|
+
* - 1 => highly cohesive (imports mostly appear together with a small set)
|
|
803
|
+
* - 0 => maximally fragmented (imports appear uniformly across many partners)
|
|
804
|
+
*/
|
|
805
|
+
export function calculateStructuralCohesionFromCoUsage(
|
|
806
|
+
file: string,
|
|
807
|
+
coUsageMatrix?: Map<string, Map<string, number>>
|
|
808
|
+
): number {
|
|
809
|
+
if (!coUsageMatrix) return 1;
|
|
810
|
+
|
|
811
|
+
const coUsages = coUsageMatrix.get(file);
|
|
812
|
+
if (!coUsages || coUsages.size === 0) return 1;
|
|
813
|
+
|
|
814
|
+
// Build probability distribution over co-imported files
|
|
815
|
+
let total = 0;
|
|
816
|
+
for (const count of coUsages.values()) total += count;
|
|
817
|
+
if (total === 0) return 1;
|
|
818
|
+
|
|
819
|
+
const probs: number[] = [];
|
|
820
|
+
for (const count of coUsages.values()) {
|
|
821
|
+
if (count > 0) probs.push(count / total);
|
|
653
822
|
}
|
|
654
823
|
|
|
655
|
-
|
|
824
|
+
if (probs.length <= 1) return 1;
|
|
656
825
|
|
|
657
|
-
//
|
|
658
|
-
|
|
826
|
+
// Calculate entropy
|
|
827
|
+
let entropy = 0;
|
|
828
|
+
for (const p of probs) {
|
|
829
|
+
entropy -= p * Math.log2(p);
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
const maxEntropy = Math.log2(probs.length);
|
|
833
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
659
834
|
}
|
|
660
835
|
|
|
661
836
|
/**
|
package/src/index.ts
CHANGED
|
@@ -9,6 +9,8 @@ import {
|
|
|
9
9
|
calculateCohesion,
|
|
10
10
|
calculateFragmentation,
|
|
11
11
|
detectModuleClusters,
|
|
12
|
+
calculatePathEntropy,
|
|
13
|
+
calculateDirectoryDistance,
|
|
12
14
|
} from './analyzer';
|
|
13
15
|
import { calculateContextScore } from './scoring';
|
|
14
16
|
import type {
|
|
@@ -206,7 +208,9 @@ export async function analyzeContext(
|
|
|
206
208
|
const circularDeps = detectCircularDependencies(graph);
|
|
207
209
|
|
|
208
210
|
// Detect module clusters for fragmentation analysis
|
|
209
|
-
|
|
211
|
+
// Enable log-scaling for fragmentation by default on medium+ repos
|
|
212
|
+
const useLogScale = files.length >= 500; // medium and larger projects
|
|
213
|
+
const clusters = detectModuleClusters(graph, { useLogScale });
|
|
210
214
|
const fragmentationMap = new Map<string, number>();
|
|
211
215
|
for (const cluster of clusters) {
|
|
212
216
|
for (const file of cluster.files) {
|
|
@@ -374,7 +378,6 @@ export function generateSummary(
|
|
|
374
378
|
const fragmentedModules: ModuleCluster[] = [];
|
|
375
379
|
for (const [domain, files] of moduleMap.entries()) {
|
|
376
380
|
if (files.length < 2) continue;
|
|
377
|
-
|
|
378
381
|
const fragmentationScore =
|
|
379
382
|
files.reduce((sum, f) => sum + f.fragmentationScore, 0) / files.length;
|
|
380
383
|
if (fragmentationScore < 0.3) continue; // Skip well-organized modules
|
|
@@ -384,11 +387,40 @@ export function generateSummary(
|
|
|
384
387
|
files.reduce((sum, f) => sum + f.cohesionScore, 0) / files.length;
|
|
385
388
|
const targetFiles = Math.max(1, Math.ceil(files.length / 3));
|
|
386
389
|
|
|
390
|
+
// Compute path entropy and directory distance for reporting
|
|
391
|
+
const filePaths = files.map((f) => f.file);
|
|
392
|
+
const pathEntropy = calculatePathEntropy(filePaths);
|
|
393
|
+
const directoryDistance = calculateDirectoryDistance(filePaths);
|
|
394
|
+
|
|
395
|
+
// Compute import cohesion based on dependency lists (Jaccard similarity)
|
|
396
|
+
function jaccard(a: string[], b: string[]) {
|
|
397
|
+
const s1 = new Set(a || []);
|
|
398
|
+
const s2 = new Set(b || []);
|
|
399
|
+
if (s1.size === 0 && s2.size === 0) return 0;
|
|
400
|
+
const inter = new Set([...s1].filter((x) => s2.has(x)));
|
|
401
|
+
const uni = new Set([...s1, ...s2]);
|
|
402
|
+
return uni.size === 0 ? 0 : inter.size / uni.size;
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
let importSimTotal = 0;
|
|
406
|
+
let importPairs = 0;
|
|
407
|
+
for (let i = 0; i < files.length; i++) {
|
|
408
|
+
for (let j = i + 1; j < files.length; j++) {
|
|
409
|
+
importSimTotal += jaccard(files[i].dependencyList || [], files[j].dependencyList || []);
|
|
410
|
+
importPairs++;
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const importCohesion = importPairs > 0 ? importSimTotal / importPairs : 0;
|
|
415
|
+
|
|
387
416
|
fragmentedModules.push({
|
|
388
417
|
domain,
|
|
389
418
|
files: files.map((f) => f.file),
|
|
390
419
|
totalTokens,
|
|
391
420
|
fragmentationScore,
|
|
421
|
+
pathEntropy,
|
|
422
|
+
directoryDistance,
|
|
423
|
+
importCohesion,
|
|
392
424
|
avgCohesion,
|
|
393
425
|
suggestedStructure: {
|
|
394
426
|
targetFiles,
|
package/src/types.ts
CHANGED
|
@@ -44,6 +44,9 @@ export interface ModuleCluster {
|
|
|
44
44
|
files: string[];
|
|
45
45
|
totalTokens: number;
|
|
46
46
|
fragmentationScore: number; // 0-1, higher = more scattered
|
|
47
|
+
pathEntropy?: number; // normalized [0-1] Shannon entropy of directory distribution
|
|
48
|
+
directoryDistance?: number; // normalized [0-1] based on common ancestor depth
|
|
49
|
+
importCohesion?: number; // 0-1 average pairwise Jaccard similarity of imports
|
|
47
50
|
avgCohesion: number; // Average cohesion across files in cluster
|
|
48
51
|
suggestedStructure: {
|
|
49
52
|
targetFiles: number; // Recommended number of files
|