@aiready/context-analyzer 0.9.4 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/.turbo/turbo-test.log +21 -9
  3. package/README.md +8 -0
  4. package/dist/__tests__/analyzer.test.d.ts +2 -0
  5. package/dist/__tests__/analyzer.test.d.ts.map +1 -0
  6. package/dist/__tests__/analyzer.test.js +157 -0
  7. package/dist/__tests__/analyzer.test.js.map +1 -0
  8. package/dist/__tests__/auto-detection.test.d.ts +2 -0
  9. package/dist/__tests__/auto-detection.test.d.ts.map +1 -0
  10. package/dist/__tests__/auto-detection.test.js +132 -0
  11. package/dist/__tests__/auto-detection.test.js.map +1 -0
  12. package/dist/__tests__/enhanced-cohesion.test.d.ts +2 -0
  13. package/dist/__tests__/enhanced-cohesion.test.d.ts.map +1 -0
  14. package/dist/__tests__/enhanced-cohesion.test.js +109 -0
  15. package/dist/__tests__/enhanced-cohesion.test.js.map +1 -0
  16. package/dist/__tests__/fragmentation-advanced.test.d.ts +2 -0
  17. package/dist/__tests__/fragmentation-advanced.test.d.ts.map +1 -0
  18. package/dist/__tests__/fragmentation-advanced.test.js +50 -0
  19. package/dist/__tests__/fragmentation-advanced.test.js.map +1 -0
  20. package/dist/__tests__/fragmentation-coupling.test.d.ts +2 -0
  21. package/dist/__tests__/fragmentation-coupling.test.d.ts.map +1 -0
  22. package/dist/__tests__/fragmentation-coupling.test.js +52 -0
  23. package/dist/__tests__/fragmentation-coupling.test.js.map +1 -0
  24. package/dist/__tests__/fragmentation-log.test.d.ts +2 -0
  25. package/dist/__tests__/fragmentation-log.test.d.ts.map +1 -0
  26. package/dist/__tests__/fragmentation-log.test.js +33 -0
  27. package/dist/__tests__/fragmentation-log.test.js.map +1 -0
  28. package/dist/__tests__/scoring.test.d.ts +2 -0
  29. package/dist/__tests__/scoring.test.d.ts.map +1 -0
  30. package/dist/__tests__/scoring.test.js +118 -0
  31. package/dist/__tests__/scoring.test.js.map +1 -0
  32. package/dist/__tests__/structural-cohesion.test.d.ts +2 -0
  33. package/dist/__tests__/structural-cohesion.test.d.ts.map +1 -0
  34. package/dist/__tests__/structural-cohesion.test.js +29 -0
  35. package/dist/__tests__/structural-cohesion.test.js.map +1 -0
  36. package/dist/analyzer.d.ts +100 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +701 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/analyzers/python-context.d.ts +38 -0
  41. package/dist/analyzers/python-context.d.ts.map +1 -0
  42. package/dist/analyzers/python-context.js +232 -0
  43. package/dist/analyzers/python-context.js.map +1 -0
  44. package/dist/chunk-BD4NWUVG.mjs +1242 -0
  45. package/dist/cli.d.ts.map +1 -0
  46. package/dist/cli.js +139 -13
  47. package/dist/cli.js.map +1 -0
  48. package/dist/cli.mjs +1 -1
  49. package/dist/index.d.mts +3 -0
  50. package/dist/index.d.ts +3 -0
  51. package/dist/index.d.ts.map +1 -0
  52. package/dist/index.js +139 -13
  53. package/dist/index.js.map +1 -0
  54. package/dist/index.mjs +1 -1
  55. package/dist/scoring.d.ts +13 -0
  56. package/dist/scoring.d.ts.map +1 -0
  57. package/dist/scoring.js +133 -0
  58. package/dist/scoring.js.map +1 -0
  59. package/dist/semantic-analysis.d.ts +44 -0
  60. package/dist/semantic-analysis.d.ts.map +1 -0
  61. package/dist/semantic-analysis.js +241 -0
  62. package/dist/semantic-analysis.js.map +1 -0
  63. package/dist/types.d.ts +117 -0
  64. package/dist/types.d.ts.map +1 -0
  65. package/dist/types.js +2 -0
  66. package/dist/types.js.map +1 -0
  67. package/package.json +2 -2
  68. package/src/__tests__/fragmentation-advanced.test.ts +60 -0
  69. package/src/__tests__/fragmentation-coupling.test.ts +62 -0
  70. package/src/__tests__/fragmentation-log.test.ts +38 -0
  71. package/src/__tests__/structural-cohesion.test.ts +32 -0
  72. package/src/analyzer.ts +193 -18
  73. package/src/index.ts +34 -2
  74. package/src/types.ts +3 -0
  75. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,62 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import {
3
+ buildDependencyGraph,
4
+ detectModuleClusters,
5
+ calculateFragmentation,
6
+ } from '../analyzer';
7
+
8
+ describe('fragmentation coupling discount', () => {
9
+ it('does not apply discount when files have no shared imports', () => {
10
+ const files = [
11
+ {
12
+ file: 'src/billing/a.ts',
13
+ content: `export const getBillingA = 1;`,
14
+ },
15
+ {
16
+ file: 'src/api/billing/b.ts',
17
+ content: `export const getBillingB = 2;`,
18
+ },
19
+ {
20
+ file: 'lib/billing/c.ts',
21
+ content: `export const getBillingC = 3;`,
22
+ },
23
+ ];
24
+
25
+ const graph = buildDependencyGraph(files);
26
+ const clusters = detectModuleClusters(graph);
27
+ const cluster = clusters.find((c) => c.domain === 'billing');
28
+ expect(cluster).toBeDefined();
29
+
30
+ const base = calculateFragmentation(files.map(f => f.file), 'billing');
31
+ // With no import similarity the coupling discount should be 0 -> fragmentation unchanged
32
+ expect(cluster!.fragmentationScore).toBeCloseTo(base, 6);
33
+ });
34
+
35
+ it('applies up-to-20% discount when files share identical imports', () => {
36
+ const files = [
37
+ {
38
+ file: 'src/billing/a.ts',
39
+ content: `import { shared } from 'shared/module';\nexport const getBillingA = 1;`,
40
+ },
41
+ {
42
+ file: 'src/api/billing/b.ts',
43
+ content: `import { shared } from 'shared/module';\nexport const getBillingB = 2;`,
44
+ },
45
+ {
46
+ file: 'lib/billing/c.ts',
47
+ content: `import { shared } from 'shared/module';\nexport const getBillingC = 3;`,
48
+ },
49
+ ];
50
+
51
+ const graph = buildDependencyGraph(files);
52
+ const clusters = detectModuleClusters(graph);
53
+ const cluster = clusters.find((c) => c.domain === 'billing');
54
+ expect(cluster).toBeDefined();
55
+
56
+ const base = calculateFragmentation(files.map(f => f.file), 'billing');
57
+ const expected = base * 0.8; // full cohesion => 20% discount
58
+
59
+ // Allow small FP tolerance
60
+ expect(cluster!.fragmentationScore).toBeCloseTo(expected, 6);
61
+ });
62
+ });
@@ -0,0 +1,38 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { calculateFragmentation } from '../analyzer';
3
+
4
+ describe('calculateFragmentation (log scale option)', () => {
5
+ it('returns 0 for single file regardless of option', () => {
6
+ const files = ['src/user/user.ts'];
7
+ expect(calculateFragmentation(files, 'user')).toBe(0);
8
+ expect(calculateFragmentation(files, 'user', { useLogScale: true })).toBe(0);
9
+ });
10
+
11
+ it('matches linear formula when not using log scale', () => {
12
+ const files = [
13
+ 'a/one.ts',
14
+ 'b/two.ts',
15
+ 'c/three.ts',
16
+ 'd/four.ts',
17
+ ];
18
+
19
+ const uniqueDirs = 4;
20
+ const linear = (uniqueDirs - 1) / (files.length - 1);
21
+ expect(calculateFragmentation(files, 'domain')).toBeCloseTo(linear);
22
+ });
23
+
24
+ it('computes normalized log-based fragmentation when requested', () => {
25
+ const files = [
26
+ 'src/group/a.ts',
27
+ 'src/group/b.ts',
28
+ 'src/group/c.ts',
29
+ 'lib/other/d.ts',
30
+ 'tools/x/e.ts',
31
+ ];
32
+
33
+ const dirs = new Set(files.map((f) => f.split('/').slice(0, -1).join('/'))).size;
34
+ const expected = Math.log(dirs) / Math.log(files.length);
35
+
36
+ expect(calculateFragmentation(files, 'domain', { useLogScale: true })).toBeCloseTo(expected, 6);
37
+ });
38
+ });
@@ -0,0 +1,32 @@
1
+ import { describe, it, expect } from 'vitest'
2
+ import { calculateStructuralCohesionFromCoUsage } from '../analyzer'
3
+
4
+ describe('calculateStructuralCohesionFromCoUsage', () => {
5
+ it('returns 1 when no co-usage data present', () => {
6
+ const score = calculateStructuralCohesionFromCoUsage('missing', undefined)
7
+ expect(score).toBe(1)
8
+ })
9
+
10
+ it('returns 1 when co-usage only with a single file', () => {
11
+ const coUsage = new Map<string, Map<string, number>>()
12
+ coUsage.set('a', new Map([['b', 10]]))
13
+ const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
14
+ expect(score).toBe(1)
15
+ })
16
+
17
+ it('returns ~0 when co-usage is perfectly balanced across two files', () => {
18
+ const coUsage = new Map<string, Map<string, number>>()
19
+ coUsage.set('a', new Map([['b', 5], ['c', 5]]))
20
+ const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
21
+ // Balanced distribution => entropy == 1 (for 2 items) => cohesion ~= 0
22
+ expect(score).toBeCloseTo(0, 3)
23
+ })
24
+
25
+ it('returns intermediate value for skewed distribution', () => {
26
+ const coUsage = new Map<string, Map<string, number>>()
27
+ coUsage.set('a', new Map([['b', 8], ['c', 2]]))
28
+ const score = calculateStructuralCohesionFromCoUsage('a', coUsage)
29
+ // Expected approx 0.279
30
+ expect(score).toBeCloseTo(0.279, 2)
31
+ })
32
+ })
package/src/analyzer.ts CHANGED
@@ -297,8 +297,12 @@ export function detectCircularDependencies(
297
297
  * @param exports - Array of export information
298
298
  * @param filePath - Optional file path for context-aware scoring
299
299
  */
300
- export function calculateCohesion(exports: ExportInfo[], filePath?: string): number {
301
- return calculateEnhancedCohesion(exports, filePath);
300
+ export function calculateCohesion(
301
+ exports: ExportInfo[],
302
+ filePath?: string,
303
+ options?: { coUsageMatrix?: Map<string, Map<string, number>>; weights?: { importBased?: number; structural?: number; domainBased?: number } }
304
+ ): number {
305
+ return calculateEnhancedCohesion(exports, filePath, options);
302
306
  }
303
307
 
304
308
  /**
@@ -322,23 +326,105 @@ function isTestFile(filePath: string): boolean {
322
326
  */
323
327
  export function calculateFragmentation(
324
328
  files: string[],
325
- domain: string
329
+ domain: string,
330
+ options?: { useLogScale?: boolean; logBase?: number }
326
331
  ): number {
327
332
  if (files.length <= 1) return 0; // Single file = no fragmentation
328
333
 
329
334
  // Calculate how many different directories contain these files
330
335
  const directories = new Set(files.map((f) => f.split('/').slice(0, -1).join('/')));
336
+ const uniqueDirs = directories.size;
337
+
338
+ // If log-scaling requested, normalize using logarithms so that
339
+ // going from 1 -> 2 directories shows a large jump while 10 -> 11
340
+ // is relatively small. Normalized value is log(uniqueDirs)/log(totalFiles).
341
+ if (options?.useLogScale) {
342
+ if (uniqueDirs <= 1) return 0;
343
+ const total = files.length;
344
+ const base = options.logBase || Math.E;
345
+ const num = Math.log(uniqueDirs) / Math.log(base);
346
+ const den = Math.log(total) / Math.log(base);
347
+ return den > 0 ? num / den : 0;
348
+ }
349
+
350
+ // Default (linear) Fragmentation = (unique directories - 1) / (total files - 1)
351
+ return (uniqueDirs - 1) / (files.length - 1);
352
+ }
353
+
354
+ /**
355
+ * Calculate path entropy for a set of files.
356
+ * Returns a normalized entropy in [0,1], where 0 = all files in one directory,
357
+ * and 1 = files are evenly distributed across directories.
358
+ */
359
+ export function calculatePathEntropy(files: string[]): number {
360
+ if (!files || files.length === 0) return 0;
331
361
 
332
- // Fragmentation = unique directories / total files
333
- // 0 = all in same dir, 1 = all in different dirs
334
- return (directories.size - 1) / (files.length - 1);
362
+ const dirCounts = new Map<string, number>();
363
+ for (const f of files) {
364
+ const dir = f.split('/').slice(0, -1).join('/') || '.';
365
+ dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
366
+ }
367
+
368
+ const counts = Array.from(dirCounts.values());
369
+ if (counts.length <= 1) return 0; // single directory -> zero entropy
370
+
371
+ const total = counts.reduce((s, v) => s + v, 0);
372
+ let entropy = 0;
373
+ for (const c of counts) {
374
+ const p = c / total;
375
+ entropy -= p * Math.log2(p);
376
+ }
377
+
378
+ const maxEntropy = Math.log2(counts.length);
379
+ return maxEntropy > 0 ? entropy / maxEntropy : 0;
380
+ }
381
+
382
+ /**
383
+ * Calculate directory-distance metric based on common ancestor depth.
384
+ * For each file pair compute depth(commonAncestor) and normalize by the
385
+ * maximum path depth between the two files. Returns value in [0,1] where
386
+ * 0 means all pairs share a deep common ancestor (low fragmentation) and
387
+ * 1 means they share only the root (high fragmentation).
388
+ */
389
+ export function calculateDirectoryDistance(files: string[]): number {
390
+ if (!files || files.length <= 1) return 0;
391
+
392
+ function pathSegments(p: string) {
393
+ return p.split('/').filter(Boolean);
394
+ }
395
+
396
+ function commonAncestorDepth(a: string[], b: string[]) {
397
+ const minLen = Math.min(a.length, b.length);
398
+ let i = 0;
399
+ while (i < minLen && a[i] === b[i]) i++;
400
+ return i; // number of shared segments from root
401
+ }
402
+
403
+ let totalNormalized = 0;
404
+ let comparisons = 0;
405
+
406
+ for (let i = 0; i < files.length; i++) {
407
+ for (let j = i + 1; j < files.length; j++) {
408
+ const segA = pathSegments(files[i]);
409
+ const segB = pathSegments(files[j]);
410
+ const shared = commonAncestorDepth(segA, segB);
411
+ const maxDepth = Math.max(segA.length, segB.length);
412
+ const normalizedShared = maxDepth > 0 ? shared / maxDepth : 0;
413
+ // distance is inverse of normalized shared depth
414
+ totalNormalized += 1 - normalizedShared;
415
+ comparisons++;
416
+ }
417
+ }
418
+
419
+ return comparisons > 0 ? totalNormalized / comparisons : 0;
335
420
  }
336
421
 
337
422
  /**
338
423
  * Group files by domain to detect module clusters
339
424
  */
340
425
  export function detectModuleClusters(
341
- graph: DependencyGraph
426
+ graph: DependencyGraph,
427
+ options?: { useLogScale?: boolean }
342
428
  ): ModuleCluster[] {
343
429
  const domainMap = new Map<string, string[]>();
344
430
 
@@ -363,12 +449,47 @@ export function detectModuleClusters(
363
449
  return sum + (node?.tokenCost || 0);
364
450
  }, 0);
365
451
 
366
- const fragmentationScore = calculateFragmentation(files, domain);
452
+ const baseFragmentation = calculateFragmentation(files, domain, { useLogScale: !!options?.useLogScale });
453
+
454
+ // Compute import-based cohesion across files in this domain cluster.
455
+ // This measures how much the files actually "talk" to each other.
456
+ // We'll compute average pairwise Jaccard similarity between each file's import lists.
457
+ let importSimilarityTotal = 0;
458
+ let importComparisons = 0;
459
+
460
+ for (let i = 0; i < files.length; i++) {
461
+ for (let j = i + 1; j < files.length; j++) {
462
+ const f1 = files[i];
463
+ const f2 = files[j];
464
+ const n1 = graph.nodes.get(f1)?.imports || [];
465
+ const n2 = graph.nodes.get(f2)?.imports || [];
466
+
467
+ // Treat two empty import lists as not coupled (similarity 0)
468
+ const similarity = (n1.length === 0 && n2.length === 0)
469
+ ? 0
470
+ : calculateJaccardSimilarity(n1, n2);
471
+
472
+ importSimilarityTotal += similarity;
473
+ importComparisons++;
474
+ }
475
+ }
476
+
477
+ const importCohesion = importComparisons > 0 ? importSimilarityTotal / importComparisons : 0;
478
+
479
+ // Coupling discount: if files are heavily importing each other, reduce fragmentation penalty.
480
+ // Following recommendation: up to 20% discount proportional to import cohesion.
481
+ const couplingDiscountFactor = 1 - 0.2 * importCohesion;
482
+
483
+ const fragmentationScore = baseFragmentation * couplingDiscountFactor;
484
+
485
+ // Additional metrics for richer reporting
486
+ const pathEntropy = calculatePathEntropy(files);
487
+ const directoryDistance = calculateDirectoryDistance(files);
367
488
 
368
489
  const avgCohesion =
369
490
  files.reduce((sum, file) => {
370
491
  const node = graph.nodes.get(file);
371
- return sum + (node ? calculateCohesion(node.exports, file) : 0);
492
+ return sum + (node ? calculateCohesion(node.exports, file, { coUsageMatrix: graph.coUsageMatrix }) : 0);
372
493
  }, 0) / files.length;
373
494
 
374
495
  // Generate consolidation plan
@@ -384,6 +505,9 @@ export function detectModuleClusters(
384
505
  files,
385
506
  totalTokens,
386
507
  fragmentationScore,
508
+ pathEntropy,
509
+ directoryDistance,
510
+ importCohesion,
387
511
  avgCohesion,
388
512
  suggestedStructure: {
389
513
  targetFiles,
@@ -631,7 +755,8 @@ export function extractExportsWithAST(
631
755
  */
632
756
  export function calculateEnhancedCohesion(
633
757
  exports: ExportInfo[],
634
- filePath?: string
758
+ filePath?: string,
759
+ options?: { coUsageMatrix?: Map<string, Map<string, number>>; weights?: { importBased?: number; structural?: number; domainBased?: number } }
635
760
  ): number {
636
761
  if (exports.length === 0) return 1;
637
762
  if (exports.length === 1) return 1;
@@ -645,17 +770,67 @@ export function calculateEnhancedCohesion(
645
770
  const domainCohesion = calculateDomainCohesion(exports);
646
771
 
647
772
  // Calculate import-based cohesion if imports are available
648
- const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
649
-
650
- if (!hasImportData) {
651
- // No import data available, use domain-based only
652
- return domainCohesion;
773
+ const hasImportData = exports.some((e) => e.imports && e.imports.length > 0);
774
+ const importCohesion = hasImportData ? calculateImportBasedCohesion(exports) : undefined;
775
+
776
+ // Calculate structural cohesion (co-usage) if coUsageMatrix and filePath available
777
+ const coUsageMatrix = options?.coUsageMatrix;
778
+ const structuralCohesion = filePath && coUsageMatrix ? calculateStructuralCohesionFromCoUsage(filePath, coUsageMatrix) : undefined;
779
+
780
+ // Default weights (can be overridden via options)
781
+ const defaultWeights = { importBased: 0.5, structural: 0.3, domainBased: 0.2 };
782
+ const weights = { ...defaultWeights, ...(options?.weights || {}) };
783
+
784
+ // Collect available signals and normalize weights
785
+ const signals: Array<{ score: number; weight: number }> = [];
786
+ if (importCohesion !== undefined) signals.push({ score: importCohesion, weight: weights.importBased });
787
+ if (structuralCohesion !== undefined) signals.push({ score: structuralCohesion, weight: weights.structural });
788
+ // domain cohesion is always available
789
+ signals.push({ score: domainCohesion, weight: weights.domainBased });
790
+
791
+ const totalWeight = signals.reduce((s, el) => s + el.weight, 0);
792
+ if (totalWeight === 0) return domainCohesion;
793
+
794
+ const combined = signals.reduce((sum, el) => sum + el.score * (el.weight / totalWeight), 0);
795
+ return combined;
796
+ }
797
+
798
+ /**
799
+ * Calculate structural cohesion for a file based on co-usage patterns.
800
+ * Uses the co-usage distribution (files commonly imported alongside this file)
801
+ * and computes an entropy-based cohesion score in [0,1].
802
+ * - 1 => highly cohesive (imports mostly appear together with a small set)
803
+ * - 0 => maximally fragmented (imports appear uniformly across many partners)
804
+ */
805
+ export function calculateStructuralCohesionFromCoUsage(
806
+ file: string,
807
+ coUsageMatrix?: Map<string, Map<string, number>>
808
+ ): number {
809
+ if (!coUsageMatrix) return 1;
810
+
811
+ const coUsages = coUsageMatrix.get(file);
812
+ if (!coUsages || coUsages.size === 0) return 1;
813
+
814
+ // Build probability distribution over co-imported files
815
+ let total = 0;
816
+ for (const count of coUsages.values()) total += count;
817
+ if (total === 0) return 1;
818
+
819
+ const probs: number[] = [];
820
+ for (const count of coUsages.values()) {
821
+ if (count > 0) probs.push(count / total);
653
822
  }
654
823
 
655
- const importCohesion = calculateImportBasedCohesion(exports);
824
+ if (probs.length <= 1) return 1;
656
825
 
657
- // Weighted combination: 60% import-based, 40% domain-based
658
- return importCohesion * 0.6 + domainCohesion * 0.4;
826
+ // Calculate entropy
827
+ let entropy = 0;
828
+ for (const p of probs) {
829
+ entropy -= p * Math.log2(p);
830
+ }
831
+
832
+ const maxEntropy = Math.log2(probs.length);
833
+ return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
659
834
  }
660
835
 
661
836
  /**
package/src/index.ts CHANGED
@@ -9,6 +9,8 @@ import {
9
9
  calculateCohesion,
10
10
  calculateFragmentation,
11
11
  detectModuleClusters,
12
+ calculatePathEntropy,
13
+ calculateDirectoryDistance,
12
14
  } from './analyzer';
13
15
  import { calculateContextScore } from './scoring';
14
16
  import type {
@@ -206,7 +208,9 @@ export async function analyzeContext(
206
208
  const circularDeps = detectCircularDependencies(graph);
207
209
 
208
210
  // Detect module clusters for fragmentation analysis
209
- const clusters = detectModuleClusters(graph);
211
+ // Enable log-scaling for fragmentation by default on medium+ repos
212
+ const useLogScale = files.length >= 500; // medium and larger projects
213
+ const clusters = detectModuleClusters(graph, { useLogScale });
210
214
  const fragmentationMap = new Map<string, number>();
211
215
  for (const cluster of clusters) {
212
216
  for (const file of cluster.files) {
@@ -374,7 +378,6 @@ export function generateSummary(
374
378
  const fragmentedModules: ModuleCluster[] = [];
375
379
  for (const [domain, files] of moduleMap.entries()) {
376
380
  if (files.length < 2) continue;
377
-
378
381
  const fragmentationScore =
379
382
  files.reduce((sum, f) => sum + f.fragmentationScore, 0) / files.length;
380
383
  if (fragmentationScore < 0.3) continue; // Skip well-organized modules
@@ -384,11 +387,40 @@ export function generateSummary(
384
387
  files.reduce((sum, f) => sum + f.cohesionScore, 0) / files.length;
385
388
  const targetFiles = Math.max(1, Math.ceil(files.length / 3));
386
389
 
390
+ // Compute path entropy and directory distance for reporting
391
+ const filePaths = files.map((f) => f.file);
392
+ const pathEntropy = calculatePathEntropy(filePaths);
393
+ const directoryDistance = calculateDirectoryDistance(filePaths);
394
+
395
+ // Compute import cohesion based on dependency lists (Jaccard similarity)
396
+ function jaccard(a: string[], b: string[]) {
397
+ const s1 = new Set(a || []);
398
+ const s2 = new Set(b || []);
399
+ if (s1.size === 0 && s2.size === 0) return 0;
400
+ const inter = new Set([...s1].filter((x) => s2.has(x)));
401
+ const uni = new Set([...s1, ...s2]);
402
+ return uni.size === 0 ? 0 : inter.size / uni.size;
403
+ }
404
+
405
+ let importSimTotal = 0;
406
+ let importPairs = 0;
407
+ for (let i = 0; i < files.length; i++) {
408
+ for (let j = i + 1; j < files.length; j++) {
409
+ importSimTotal += jaccard(files[i].dependencyList || [], files[j].dependencyList || []);
410
+ importPairs++;
411
+ }
412
+ }
413
+
414
+ const importCohesion = importPairs > 0 ? importSimTotal / importPairs : 0;
415
+
387
416
  fragmentedModules.push({
388
417
  domain,
389
418
  files: files.map((f) => f.file),
390
419
  totalTokens,
391
420
  fragmentationScore,
421
+ pathEntropy,
422
+ directoryDistance,
423
+ importCohesion,
392
424
  avgCohesion,
393
425
  suggestedStructure: {
394
426
  targetFiles,
package/src/types.ts CHANGED
@@ -44,6 +44,9 @@ export interface ModuleCluster {
44
44
  files: string[];
45
45
  totalTokens: number;
46
46
  fragmentationScore: number; // 0-1, higher = more scattered
47
+ pathEntropy?: number; // normalized [0-1] Shannon entropy of directory distribution
48
+ directoryDistance?: number; // normalized [0-1] based on common ancestor depth
49
+ importCohesion?: number; // 0-1 average pairwise Jaccard similarity of imports
47
50
  avgCohesion: number; // Average cohesion across files in cluster
48
51
  suggestedStructure: {
49
52
  targetFiles: number; // Recommended number of files