@aiready/context-analyzer 0.5.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,126 @@
1
+ import { describe, it, expect } from 'vitest';
2
+ import { calculateCohesion } from '../analyzer';
3
+ import type { ExportInfo } from '../types';
4
+
5
+ describe('Enhanced Cohesion Calculation', () => {
6
+ it('should use domain-based cohesion when no import data available', () => {
7
+ const exports: ExportInfo[] = [
8
+ { name: 'getUserData', type: 'function', inferredDomain: 'user' },
9
+ { name: 'getProductData', type: 'function', inferredDomain: 'product' },
10
+ ];
11
+
12
+ const cohesion = calculateCohesion(exports);
13
+
14
+ // With mixed domains (user, product) and no import data, should use domain-based calculation
15
+ // Domain entropy for 2 different domains = low cohesion
16
+ expect(cohesion).toBeLessThan(0.5);
17
+ });
18
+
19
+ it('should use import-based cohesion when import data available', () => {
20
+ const exports: ExportInfo[] = [
21
+ {
22
+ name: 'getUserData',
23
+ type: 'function',
24
+ inferredDomain: 'user',
25
+ imports: ['react', 'axios', 'lodash'],
26
+ },
27
+ {
28
+ name: 'getProductData',
29
+ type: 'function',
30
+ inferredDomain: 'product',
31
+ imports: ['react', 'axios', 'lodash'], // Same imports!
32
+ },
33
+ ];
34
+
35
+ const cohesion = calculateCohesion(exports);
36
+
37
+ // Even though domains differ, imports are identical (Jaccard = 1.0)
38
+ // Enhanced cohesion = 0.6 * 1.0 + 0.4 * 0.0 (different domains) = 0.6
39
+ // Should be >= 0.6 (import-based weight)
40
+ expect(cohesion).toBeGreaterThanOrEqual(0.6);
41
+ });
42
+
43
+ it('should weight import-based similarity higher than domain-based', () => {
44
+ const exportsWithSharedImports: ExportInfo[] = [
45
+ {
46
+ name: 'getUserData',
47
+ type: 'function',
48
+ inferredDomain: 'user',
49
+ imports: ['react', 'axios'],
50
+ },
51
+ {
52
+ name: 'getProductData',
53
+ type: 'function',
54
+ inferredDomain: 'product',
55
+ imports: ['react', 'axios'],
56
+ },
57
+ ];
58
+
59
+ const exportsWithoutSharedImports: ExportInfo[] = [
60
+ {
61
+ name: 'getUserData',
62
+ type: 'function',
63
+ inferredDomain: 'user',
64
+ imports: ['react', 'axios'],
65
+ },
66
+ {
67
+ name: 'getProductData',
68
+ type: 'function',
69
+ inferredDomain: 'product',
70
+ imports: ['lodash', 'moment'],
71
+ },
72
+ ];
73
+
74
+ const cohesionWithShared = calculateCohesion(exportsWithSharedImports);
75
+ const cohesionWithoutShared = calculateCohesion(exportsWithoutSharedImports);
76
+
77
+ // Shared imports should result in higher cohesion
78
+ expect(cohesionWithShared).toBeGreaterThan(cohesionWithoutShared);
79
+ });
80
+
81
+ it('should handle mixed case: some exports with imports, some without', () => {
82
+ const exports: ExportInfo[] = [
83
+ {
84
+ name: 'getUserData',
85
+ type: 'function',
86
+ inferredDomain: 'user',
87
+ imports: ['react', 'axios'],
88
+ },
89
+ {
90
+ name: 'getProductData',
91
+ type: 'function',
92
+ inferredDomain: 'product',
93
+ // No imports field
94
+ },
95
+ ];
96
+
97
+ const cohesion = calculateCohesion(exports);
98
+
99
+ // Should fall back to domain-based when not all exports have import data
100
+ expect(cohesion).toBeGreaterThan(0);
101
+ expect(cohesion).toBeLessThan(1);
102
+ });
103
+
104
+ it('should return 1 for single export', () => {
105
+ const exports: ExportInfo[] = [
106
+ {
107
+ name: 'getUserData',
108
+ type: 'function',
109
+ inferredDomain: 'user',
110
+ imports: ['react'],
111
+ },
112
+ ];
113
+
114
+ expect(calculateCohesion(exports)).toBe(1);
115
+ });
116
+
117
+ it('should return 1 for test files regardless of domains or imports', () => {
118
+ const exports: ExportInfo[] = [
119
+ { name: 'testUserLogin', type: 'function', inferredDomain: 'user', imports: ['react'] },
120
+ { name: 'testProductView', type: 'function', inferredDomain: 'product', imports: [] },
121
+ ];
122
+
123
+ const cohesion = calculateCohesion(exports, 'src/utils/test-helpers.ts');
124
+ expect(cohesion).toBe(1);
125
+ });
126
+ });
package/src/analyzer.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { estimateTokens } from '@aiready/core';
1
+ import { estimateTokens, parseFileExports, calculateImportSimilarity, type ExportWithImports } from '@aiready/core';
2
2
  import type {
3
3
  ContextAnalysisResult,
4
4
  DependencyGraph,
@@ -6,25 +6,87 @@ import type {
6
6
  ExportInfo,
7
7
  ModuleCluster,
8
8
  } from './types';
9
+ import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
9
10
 
10
11
  interface FileContent {
11
12
  file: string;
12
13
  content: string;
13
14
  }
14
15
 
16
+ /**
17
+ * Auto-detect domain keywords from workspace folder structure
18
+ * Extracts unique folder names from file paths as potential domain keywords
19
+ */
20
+ function extractDomainKeywordsFromPaths(files: FileContent[]): string[] {
21
+ const folderNames = new Set<string>();
22
+
23
+ for (const { file } of files) {
24
+ const segments = file.split('/');
25
+ // Extract meaningful folder names (skip common infrastructure folders)
26
+ const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
27
+
28
+ for (const segment of segments) {
29
+ const normalized = segment.toLowerCase();
30
+ if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
31
+ // Singularize common plural forms for better matching
32
+ const singular = singularize(normalized);
33
+ folderNames.add(singular);
34
+ }
35
+ }
36
+ }
37
+
38
+ return Array.from(folderNames);
39
+ }
40
+
41
+ /**
42
+ * Simple singularization for common English plurals
43
+ */
44
+ function singularize(word: string): string {
45
+ // Handle irregular plurals
46
+ const irregulars: Record<string, string> = {
47
+ people: 'person',
48
+ children: 'child',
49
+ men: 'man',
50
+ women: 'woman',
51
+ };
52
+
53
+ if (irregulars[word]) {
54
+ return irregulars[word];
55
+ }
56
+
57
+ // Common plural patterns
58
+ if (word.endsWith('ies')) {
59
+ return word.slice(0, -3) + 'y'; // categories -> category
60
+ }
61
+ if (word.endsWith('ses')) {
62
+ return word.slice(0, -2); // classes -> class
63
+ }
64
+ if (word.endsWith('s') && word.length > 3) {
65
+ return word.slice(0, -1); // orders -> order
66
+ }
67
+
68
+ return word;
69
+ }
70
+
15
71
  /**
16
72
  * Build a dependency graph from file contents
17
73
  */
18
74
  export function buildDependencyGraph(
19
- files: FileContent[]
75
+ files: FileContent[],
20
76
  ): DependencyGraph {
21
77
  const nodes = new Map<string, DependencyNode>();
22
78
  const edges = new Map<string, Set<string>>();
23
79
 
24
- // First pass: Create nodes
80
+ // Auto-detect domain keywords from workspace folder structure
81
+ const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
82
+
83
+ // First pass: Create nodes with folder-based domain inference
25
84
  for (const { file, content } of files) {
26
85
  const imports = extractImportsFromContent(content);
27
- const exports = extractExports(content);
86
+
87
+ // Use AST-based extraction for better accuracy, fallback to regex
88
+ const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
89
+
28
90
  const tokenCost = estimateTokens(content);
29
91
  const linesOfCode = content.split('\n').length;
30
92
 
@@ -39,7 +101,39 @@ export function buildDependencyGraph(
39
101
  edges.set(file, new Set(imports));
40
102
  }
41
103
 
42
- return { nodes, edges };
104
+ // Second pass: Build semantic analysis graphs
105
+ const graph: DependencyGraph = { nodes, edges };
106
+ const coUsageMatrix = buildCoUsageMatrix(graph);
107
+ const typeGraph = buildTypeGraph(graph);
108
+
109
+ // Add semantic data to graph
110
+ graph.coUsageMatrix = coUsageMatrix;
111
+ graph.typeGraph = typeGraph;
112
+
113
+ // Third pass: Enhance domain assignments with semantic analysis
114
+ for (const [file, node] of nodes) {
115
+ for (const exp of node.exports) {
116
+ // Get semantic domain assignments
117
+ const semanticAssignments = inferDomainFromSemantics(
118
+ file,
119
+ exp.name,
120
+ graph,
121
+ coUsageMatrix,
122
+ typeGraph,
123
+ exp.typeReferences
124
+ );
125
+
126
+ // Add multi-domain assignments with confidence scores
127
+ exp.domains = semanticAssignments;
128
+
129
+ // Keep inferredDomain for backwards compatibility (use highest confidence)
130
+ if (semanticAssignments.length > 0) {
131
+ exp.inferredDomain = semanticAssignments[0].domain;
132
+ }
133
+ }
134
+ }
135
+
136
+ return graph;
43
137
  }
44
138
 
45
139
  /**
@@ -60,8 +154,8 @@ function extractImportsFromContent(content: string): string[] {
60
154
  let match;
61
155
  while ((match = pattern.exec(content)) !== null) {
62
156
  const importPath = match[1];
63
- if (importPath && !importPath.startsWith('@') && !importPath.startsWith('node:')) {
64
- // Only include relative/local imports
157
+ // Exclude only node built-ins (node:), include all local and aliased imports
158
+ if (importPath && !importPath.startsWith('node:')) {
65
159
  imports.push(importPath);
66
160
  }
67
161
  }
@@ -199,41 +293,12 @@ export function detectCircularDependencies(
199
293
 
200
294
  /**
201
295
  * Calculate cohesion score (how related are exports in a file)
202
- * Uses entropy: low entropy = high cohesion
296
+ * Uses enhanced calculation combining domain-based and import-based analysis
203
297
  * @param exports - Array of export information
204
298
  * @param filePath - Optional file path for context-aware scoring
205
299
  */
206
300
  export function calculateCohesion(exports: ExportInfo[], filePath?: string): number {
207
- if (exports.length === 0) return 1;
208
- if (exports.length === 1) return 1; // Single export = perfect cohesion
209
-
210
- // Special case: Test/mock/fixture files are expected to have multi-domain exports
211
- // They serve a single purpose (testing) even if they mock different domains
212
- if (filePath && isTestFile(filePath)) {
213
- return 1; // Test utilities are inherently cohesive despite mixed domains
214
- }
215
-
216
- const domains = exports.map((e) => e.inferredDomain || 'unknown');
217
- const domainCounts = new Map<string, number>();
218
-
219
- for (const domain of domains) {
220
- domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
221
- }
222
-
223
- // Calculate Shannon entropy
224
- const total = domains.length;
225
- let entropy = 0;
226
-
227
- for (const count of domainCounts.values()) {
228
- const p = count / total;
229
- if (p > 0) {
230
- entropy -= p * Math.log2(p);
231
- }
232
- }
233
-
234
- // Normalize to 0-1 (higher = better cohesion)
235
- const maxEntropy = Math.log2(total);
236
- return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
301
+ return calculateEnhancedCohesion(exports, filePath);
237
302
  }
238
303
 
239
304
  /**
@@ -335,7 +400,12 @@ export function detectModuleClusters(
335
400
  * Extract export information from file content
336
401
  * TODO: Use proper AST parsing for better accuracy
337
402
  */
338
- function extractExports(content: string): ExportInfo[] {
403
+ function extractExports(
404
+ content: string,
405
+ filePath?: string,
406
+ domainOptions?: { domainKeywords?: string[]; domainPatterns?: string[]; pathDomainMap?: Record<string, string> },
407
+ fileImports?: string[]
408
+ ): ExportInfo[] {
339
409
  const exports: ExportInfo[] = [];
340
410
 
341
411
  // Simple regex-based extraction (improve with AST later)
@@ -362,7 +432,7 @@ function extractExports(content: string): ExportInfo[] {
362
432
  while ((match = pattern.exec(content)) !== null) {
363
433
  const name = match[1] || 'default';
364
434
  const type = types[index];
365
- const inferredDomain = inferDomain(name);
435
+ const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
366
436
 
367
437
  exports.push({ name, type, inferredDomain });
368
438
  }
@@ -375,12 +445,29 @@ function extractExports(content: string): ExportInfo[] {
375
445
  * Infer domain from export name
376
446
  * Uses common naming patterns with word boundary matching
377
447
  */
378
- function inferDomain(name: string): string {
448
+ function inferDomain(
449
+ name: string,
450
+ filePath?: string,
451
+ domainOptions?: { domainKeywords?: string[] },
452
+ fileImports?: string[]
453
+ ): string {
379
454
  const lower = name.toLowerCase();
380
455
 
456
+ // Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
457
+ const tokens = Array.from(
458
+ new Set(
459
+ lower
460
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
461
+ .replace(/[^a-z0-9]+/gi, ' ')
462
+ .split(' ')
463
+ .filter(Boolean)
464
+ )
465
+ );
466
+
381
467
  // Domain keywords ordered from most specific to most general
382
468
  // This prevents generic terms like 'util' from matching before specific domains
383
- const domainKeywords = [
469
+ // NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
470
+ const defaultKeywords = [
384
471
  'authentication',
385
472
  'authorization',
386
473
  'payment',
@@ -398,15 +485,15 @@ function inferDomain(name: string): string {
398
485
  'model',
399
486
  'view',
400
487
  'auth',
401
- 'api',
402
- 'helper',
403
- 'util',
404
488
  ];
405
489
 
490
+ const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
491
+ ? [...domainOptions.domainKeywords, ...defaultKeywords]
492
+ : defaultKeywords;
493
+
406
494
  // Try word boundary matching first for more accurate detection
407
495
  for (const keyword of domainKeywords) {
408
- const wordBoundaryPattern = new RegExp(`\\b${keyword}\\b`, 'i');
409
- if (wordBoundaryPattern.test(name)) {
496
+ if (tokens.includes(keyword)) {
410
497
  return keyword;
411
498
  }
412
499
  }
@@ -418,6 +505,52 @@ function inferDomain(name: string): string {
418
505
  }
419
506
  }
420
507
 
508
+ // Import-path domain inference: analyze import statements for domain hints
509
+ if (fileImports && fileImports.length > 0) {
510
+ for (const importPath of fileImports) {
511
+ // Parse all segments, including those after '@' or '.'
512
+ // e.g., '@/orders/service' -> ['orders', 'service']
513
+ // '../payments/processor' -> ['payments', 'processor']
514
+ const allSegments = importPath.split('/');
515
+ const relevantSegments = allSegments.filter(s => {
516
+ if (!s) return false;
517
+ // Skip '.' and '..' but keep everything else
518
+ if (s === '.' || s === '..') return false;
519
+ // Skip '@' prefix but keep the path after it
520
+ if (s.startsWith('@') && s.length === 1) return false;
521
+ // Remove '@' prefix from scoped imports like '@/orders'
522
+ return true;
523
+ }).map(s => s.startsWith('@') ? s.slice(1) : s);
524
+
525
+ for (const segment of relevantSegments) {
526
+ const segLower = segment.toLowerCase();
527
+ const singularSegment = singularize(segLower);
528
+
529
+ // Check if any domain keyword matches the import path segment (with singularization)
530
+ for (const keyword of domainKeywords) {
531
+ if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
532
+ return keyword;
533
+ }
534
+ }
535
+ }
536
+ }
537
+ }
538
+
539
+ // Path-based fallback: check file path segments
540
+ if (filePath) {
541
+ // Auto-detect from path by checking against domain keywords (with singularization)
542
+ const pathSegments = filePath.toLowerCase().split('/');
543
+ for (const segment of pathSegments) {
544
+ const singularSegment = singularize(segment);
545
+
546
+ for (const keyword of domainKeywords) {
547
+ if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
548
+ return keyword;
549
+ }
550
+ }
551
+ }
552
+ }
553
+
421
554
  return 'unknown';
422
555
  }
423
556
 
@@ -460,3 +593,136 @@ function generateConsolidationPlan(
460
593
 
461
594
  return plan;
462
595
  }
596
+
597
+ /**
598
+ * Extract exports using AST parsing (enhanced version)
599
+ * Falls back to regex if AST parsing fails
600
+ */
601
+ export function extractExportsWithAST(
602
+ content: string,
603
+ filePath: string,
604
+ domainOptions?: { domainKeywords?: string[] },
605
+ fileImports?: string[]
606
+ ): ExportInfo[] {
607
+ try {
608
+ const { exports: astExports } = parseFileExports(content, filePath);
609
+
610
+ return astExports.map(exp => ({
611
+ name: exp.name,
612
+ type: exp.type,
613
+ inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
614
+ imports: exp.imports,
615
+ dependencies: exp.dependencies,
616
+ }));
617
+ } catch (error) {
618
+ // Fallback to regex-based extraction
619
+ return extractExports(content, filePath, domainOptions, fileImports);
620
+ }
621
+ }
622
+
623
+ /**
624
+ * Calculate enhanced cohesion score using both domain inference and import similarity
625
+ *
626
+ * This combines:
627
+ * 1. Domain-based cohesion (entropy of inferred domains)
628
+ * 2. Import-based cohesion (Jaccard similarity of shared imports)
629
+ *
630
+ * Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
631
+ */
632
+ export function calculateEnhancedCohesion(
633
+ exports: ExportInfo[],
634
+ filePath?: string
635
+ ): number {
636
+ if (exports.length === 0) return 1;
637
+ if (exports.length === 1) return 1;
638
+
639
+ // Special case for test files
640
+ if (filePath && isTestFile(filePath)) {
641
+ return 1;
642
+ }
643
+
644
+ // Calculate domain-based cohesion (existing method)
645
+ const domainCohesion = calculateDomainCohesion(exports);
646
+
647
+ // Calculate import-based cohesion if imports are available
648
+ const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
649
+
650
+ if (!hasImportData) {
651
+ // No import data available, use domain-based only
652
+ return domainCohesion;
653
+ }
654
+
655
+ const importCohesion = calculateImportBasedCohesion(exports);
656
+
657
+ // Weighted combination: 60% import-based, 40% domain-based
658
+ return importCohesion * 0.6 + domainCohesion * 0.4;
659
+ }
660
+
661
+ /**
662
+ * Calculate cohesion based on shared imports (Jaccard similarity)
663
+ */
664
+ function calculateImportBasedCohesion(exports: ExportInfo[]): number {
665
+ const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
666
+
667
+ if (exportsWithImports.length < 2) {
668
+ return 1; // Not enough data
669
+ }
670
+
671
+ // Calculate pairwise import similarity
672
+ let totalSimilarity = 0;
673
+ let comparisons = 0;
674
+
675
+ for (let i = 0; i < exportsWithImports.length; i++) {
676
+ for (let j = i + 1; j < exportsWithImports.length; j++) {
677
+ const exp1 = exportsWithImports[i] as ExportInfo & { imports: string[] };
678
+ const exp2 = exportsWithImports[j] as ExportInfo & { imports: string[] };
679
+
680
+ const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
681
+ totalSimilarity += similarity;
682
+ comparisons++;
683
+ }
684
+ }
685
+
686
+ return comparisons > 0 ? totalSimilarity / comparisons : 1;
687
+ }
688
+
689
+ /**
690
+ * Calculate Jaccard similarity between two arrays
691
+ */
692
+ function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number {
693
+ if (arr1.length === 0 && arr2.length === 0) return 1;
694
+ if (arr1.length === 0 || arr2.length === 0) return 0;
695
+
696
+ const set1 = new Set(arr1);
697
+ const set2 = new Set(arr2);
698
+
699
+ const intersection = new Set([...set1].filter(x => set2.has(x)));
700
+ const union = new Set([...set1, ...set2]);
701
+
702
+ return intersection.size / union.size;
703
+ }
704
+
705
+ /**
706
+ * Calculate domain-based cohesion (existing entropy method)
707
+ */
708
+ function calculateDomainCohesion(exports: ExportInfo[]): number {
709
+ const domains = exports.map((e) => e.inferredDomain || 'unknown');
710
+ const domainCounts = new Map<string, number>();
711
+
712
+ for (const domain of domains) {
713
+ domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
714
+ }
715
+
716
+ const total = domains.length;
717
+ let entropy = 0;
718
+
719
+ for (const count of domainCounts.values()) {
720
+ const p = count / total;
721
+ if (p > 0) {
722
+ entropy -= p * Math.log2(p);
723
+ }
724
+ }
725
+
726
+ const maxEntropy = Math.log2(total);
727
+ return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
728
+ }
package/src/index.ts CHANGED
@@ -15,9 +15,41 @@ import type {
15
15
  ContextAnalysisResult,
16
16
  ContextSummary,
17
17
  ModuleCluster,
18
+ DomainAssignment,
19
+ DomainSignals,
20
+ CoUsageData,
21
+ TypeDependency,
18
22
  } from './types';
19
-
20
- export type { ContextAnalyzerOptions, ContextAnalysisResult, ContextSummary, ModuleCluster };
23
+ import {
24
+ buildCoUsageMatrix,
25
+ buildTypeGraph,
26
+ findSemanticClusters,
27
+ calculateDomainConfidence,
28
+ inferDomainFromSemantics,
29
+ getCoUsageData,
30
+ findConsolidationCandidates,
31
+ } from './semantic-analysis';
32
+
33
+ export type {
34
+ ContextAnalyzerOptions,
35
+ ContextAnalysisResult,
36
+ ContextSummary,
37
+ ModuleCluster,
38
+ DomainAssignment,
39
+ DomainSignals,
40
+ CoUsageData,
41
+ TypeDependency,
42
+ };
43
+
44
+ export {
45
+ buildCoUsageMatrix,
46
+ buildTypeGraph,
47
+ findSemanticClusters,
48
+ calculateDomainConfidence,
49
+ inferDomainFromSemantics,
50
+ getCoUsageData,
51
+ findConsolidationCandidates,
52
+ };
21
53
 
22
54
  /**
23
55
  * Generate smart defaults for context analysis based on repository size