@aiready/context-analyzer 0.5.3 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/.turbo/turbo-test.log +12 -28
- package/README.md +5 -3
- package/SEMANTIC-VALIDATION.md +235 -0
- package/dist/chunk-AEK3MZC5.mjs +709 -0
- package/dist/chunk-DD7UVNE3.mjs +678 -0
- package/dist/chunk-DMRZMS2U.mjs +964 -0
- package/dist/chunk-HQNHM2X7.mjs +997 -0
- package/dist/chunk-I54HL4FZ.mjs +781 -0
- package/dist/chunk-IRWCPDWD.mjs +779 -0
- package/dist/chunk-PVVCCE6W.mjs +755 -0
- package/dist/chunk-RYIB5CWD.mjs +781 -0
- package/dist/cli.js +304 -33
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +90 -1
- package/dist/index.d.ts +90 -1
- package/dist/index.js +381 -35
- package/dist/index.mjs +17 -3
- package/package.json +2 -2
- package/src/__tests__/auto-detection.test.ts +156 -0
- package/src/__tests__/enhanced-cohesion.test.ts +126 -0
- package/src/analyzer.ts +313 -47
- package/src/index.ts +34 -2
- package/src/semantic-analysis.ts +287 -0
- package/src/types.ts +36 -1
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { describe, it, expect } from 'vitest';
|
|
2
|
+
import { calculateCohesion } from '../analyzer';
|
|
3
|
+
import type { ExportInfo } from '../types';
|
|
4
|
+
|
|
5
|
+
describe('Enhanced Cohesion Calculation', () => {
|
|
6
|
+
it('should use domain-based cohesion when no import data available', () => {
|
|
7
|
+
const exports: ExportInfo[] = [
|
|
8
|
+
{ name: 'getUserData', type: 'function', inferredDomain: 'user' },
|
|
9
|
+
{ name: 'getProductData', type: 'function', inferredDomain: 'product' },
|
|
10
|
+
];
|
|
11
|
+
|
|
12
|
+
const cohesion = calculateCohesion(exports);
|
|
13
|
+
|
|
14
|
+
// With mixed domains (user, product) and no import data, should use domain-based calculation
|
|
15
|
+
// Domain entropy for 2 different domains = low cohesion
|
|
16
|
+
expect(cohesion).toBeLessThan(0.5);
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
it('should use import-based cohesion when import data available', () => {
|
|
20
|
+
const exports: ExportInfo[] = [
|
|
21
|
+
{
|
|
22
|
+
name: 'getUserData',
|
|
23
|
+
type: 'function',
|
|
24
|
+
inferredDomain: 'user',
|
|
25
|
+
imports: ['react', 'axios', 'lodash'],
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
name: 'getProductData',
|
|
29
|
+
type: 'function',
|
|
30
|
+
inferredDomain: 'product',
|
|
31
|
+
imports: ['react', 'axios', 'lodash'], // Same imports!
|
|
32
|
+
},
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
const cohesion = calculateCohesion(exports);
|
|
36
|
+
|
|
37
|
+
// Even though domains differ, imports are identical (Jaccard = 1.0)
|
|
38
|
+
// Enhanced cohesion = 0.6 * 1.0 + 0.4 * 0.0 (different domains) = 0.6
|
|
39
|
+
// Should be >= 0.6 (import-based weight)
|
|
40
|
+
expect(cohesion).toBeGreaterThanOrEqual(0.6);
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
it('should weight import-based similarity higher than domain-based', () => {
|
|
44
|
+
const exportsWithSharedImports: ExportInfo[] = [
|
|
45
|
+
{
|
|
46
|
+
name: 'getUserData',
|
|
47
|
+
type: 'function',
|
|
48
|
+
inferredDomain: 'user',
|
|
49
|
+
imports: ['react', 'axios'],
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
name: 'getProductData',
|
|
53
|
+
type: 'function',
|
|
54
|
+
inferredDomain: 'product',
|
|
55
|
+
imports: ['react', 'axios'],
|
|
56
|
+
},
|
|
57
|
+
];
|
|
58
|
+
|
|
59
|
+
const exportsWithoutSharedImports: ExportInfo[] = [
|
|
60
|
+
{
|
|
61
|
+
name: 'getUserData',
|
|
62
|
+
type: 'function',
|
|
63
|
+
inferredDomain: 'user',
|
|
64
|
+
imports: ['react', 'axios'],
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
name: 'getProductData',
|
|
68
|
+
type: 'function',
|
|
69
|
+
inferredDomain: 'product',
|
|
70
|
+
imports: ['lodash', 'moment'],
|
|
71
|
+
},
|
|
72
|
+
];
|
|
73
|
+
|
|
74
|
+
const cohesionWithShared = calculateCohesion(exportsWithSharedImports);
|
|
75
|
+
const cohesionWithoutShared = calculateCohesion(exportsWithoutSharedImports);
|
|
76
|
+
|
|
77
|
+
// Shared imports should result in higher cohesion
|
|
78
|
+
expect(cohesionWithShared).toBeGreaterThan(cohesionWithoutShared);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should handle mixed case: some exports with imports, some without', () => {
|
|
82
|
+
const exports: ExportInfo[] = [
|
|
83
|
+
{
|
|
84
|
+
name: 'getUserData',
|
|
85
|
+
type: 'function',
|
|
86
|
+
inferredDomain: 'user',
|
|
87
|
+
imports: ['react', 'axios'],
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
name: 'getProductData',
|
|
91
|
+
type: 'function',
|
|
92
|
+
inferredDomain: 'product',
|
|
93
|
+
// No imports field
|
|
94
|
+
},
|
|
95
|
+
];
|
|
96
|
+
|
|
97
|
+
const cohesion = calculateCohesion(exports);
|
|
98
|
+
|
|
99
|
+
// Should fall back to domain-based when not all exports have import data
|
|
100
|
+
expect(cohesion).toBeGreaterThan(0);
|
|
101
|
+
expect(cohesion).toBeLessThan(1);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('should return 1 for single export', () => {
|
|
105
|
+
const exports: ExportInfo[] = [
|
|
106
|
+
{
|
|
107
|
+
name: 'getUserData',
|
|
108
|
+
type: 'function',
|
|
109
|
+
inferredDomain: 'user',
|
|
110
|
+
imports: ['react'],
|
|
111
|
+
},
|
|
112
|
+
];
|
|
113
|
+
|
|
114
|
+
expect(calculateCohesion(exports)).toBe(1);
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
it('should return 1 for test files regardless of domains or imports', () => {
|
|
118
|
+
const exports: ExportInfo[] = [
|
|
119
|
+
{ name: 'testUserLogin', type: 'function', inferredDomain: 'user', imports: ['react'] },
|
|
120
|
+
{ name: 'testProductView', type: 'function', inferredDomain: 'product', imports: [] },
|
|
121
|
+
];
|
|
122
|
+
|
|
123
|
+
const cohesion = calculateCohesion(exports, 'src/utils/test-helpers.ts');
|
|
124
|
+
expect(cohesion).toBe(1);
|
|
125
|
+
});
|
|
126
|
+
});
|
package/src/analyzer.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { estimateTokens } from '@aiready/core';
|
|
1
|
+
import { estimateTokens, parseFileExports, calculateImportSimilarity, type ExportWithImports } from '@aiready/core';
|
|
2
2
|
import type {
|
|
3
3
|
ContextAnalysisResult,
|
|
4
4
|
DependencyGraph,
|
|
@@ -6,25 +6,87 @@ import type {
|
|
|
6
6
|
ExportInfo,
|
|
7
7
|
ModuleCluster,
|
|
8
8
|
} from './types';
|
|
9
|
+
import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
|
|
9
10
|
|
|
10
11
|
interface FileContent {
|
|
11
12
|
file: string;
|
|
12
13
|
content: string;
|
|
13
14
|
}
|
|
14
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Auto-detect domain keywords from workspace folder structure
|
|
18
|
+
* Extracts unique folder names from file paths as potential domain keywords
|
|
19
|
+
*/
|
|
20
|
+
function extractDomainKeywordsFromPaths(files: FileContent[]): string[] {
|
|
21
|
+
const folderNames = new Set<string>();
|
|
22
|
+
|
|
23
|
+
for (const { file } of files) {
|
|
24
|
+
const segments = file.split('/');
|
|
25
|
+
// Extract meaningful folder names (skip common infrastructure folders)
|
|
26
|
+
const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
|
|
27
|
+
|
|
28
|
+
for (const segment of segments) {
|
|
29
|
+
const normalized = segment.toLowerCase();
|
|
30
|
+
if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
|
|
31
|
+
// Singularize common plural forms for better matching
|
|
32
|
+
const singular = singularize(normalized);
|
|
33
|
+
folderNames.add(singular);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return Array.from(folderNames);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Simple singularization for common English plurals
|
|
43
|
+
*/
|
|
44
|
+
function singularize(word: string): string {
|
|
45
|
+
// Handle irregular plurals
|
|
46
|
+
const irregulars: Record<string, string> = {
|
|
47
|
+
people: 'person',
|
|
48
|
+
children: 'child',
|
|
49
|
+
men: 'man',
|
|
50
|
+
women: 'woman',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
if (irregulars[word]) {
|
|
54
|
+
return irregulars[word];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Common plural patterns
|
|
58
|
+
if (word.endsWith('ies')) {
|
|
59
|
+
return word.slice(0, -3) + 'y'; // categories -> category
|
|
60
|
+
}
|
|
61
|
+
if (word.endsWith('ses')) {
|
|
62
|
+
return word.slice(0, -2); // classes -> class
|
|
63
|
+
}
|
|
64
|
+
if (word.endsWith('s') && word.length > 3) {
|
|
65
|
+
return word.slice(0, -1); // orders -> order
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return word;
|
|
69
|
+
}
|
|
70
|
+
|
|
15
71
|
/**
|
|
16
72
|
* Build a dependency graph from file contents
|
|
17
73
|
*/
|
|
18
74
|
export function buildDependencyGraph(
|
|
19
|
-
files: FileContent[]
|
|
75
|
+
files: FileContent[],
|
|
20
76
|
): DependencyGraph {
|
|
21
77
|
const nodes = new Map<string, DependencyNode>();
|
|
22
78
|
const edges = new Map<string, Set<string>>();
|
|
23
79
|
|
|
24
|
-
//
|
|
80
|
+
// Auto-detect domain keywords from workspace folder structure
|
|
81
|
+
const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
|
|
82
|
+
|
|
83
|
+
// First pass: Create nodes with folder-based domain inference
|
|
25
84
|
for (const { file, content } of files) {
|
|
26
85
|
const imports = extractImportsFromContent(content);
|
|
27
|
-
|
|
86
|
+
|
|
87
|
+
// Use AST-based extraction for better accuracy, fallback to regex
|
|
88
|
+
const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
|
|
89
|
+
|
|
28
90
|
const tokenCost = estimateTokens(content);
|
|
29
91
|
const linesOfCode = content.split('\n').length;
|
|
30
92
|
|
|
@@ -39,7 +101,39 @@ export function buildDependencyGraph(
|
|
|
39
101
|
edges.set(file, new Set(imports));
|
|
40
102
|
}
|
|
41
103
|
|
|
42
|
-
|
|
104
|
+
// Second pass: Build semantic analysis graphs
|
|
105
|
+
const graph: DependencyGraph = { nodes, edges };
|
|
106
|
+
const coUsageMatrix = buildCoUsageMatrix(graph);
|
|
107
|
+
const typeGraph = buildTypeGraph(graph);
|
|
108
|
+
|
|
109
|
+
// Add semantic data to graph
|
|
110
|
+
graph.coUsageMatrix = coUsageMatrix;
|
|
111
|
+
graph.typeGraph = typeGraph;
|
|
112
|
+
|
|
113
|
+
// Third pass: Enhance domain assignments with semantic analysis
|
|
114
|
+
for (const [file, node] of nodes) {
|
|
115
|
+
for (const exp of node.exports) {
|
|
116
|
+
// Get semantic domain assignments
|
|
117
|
+
const semanticAssignments = inferDomainFromSemantics(
|
|
118
|
+
file,
|
|
119
|
+
exp.name,
|
|
120
|
+
graph,
|
|
121
|
+
coUsageMatrix,
|
|
122
|
+
typeGraph,
|
|
123
|
+
exp.typeReferences
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
// Add multi-domain assignments with confidence scores
|
|
127
|
+
exp.domains = semanticAssignments;
|
|
128
|
+
|
|
129
|
+
// Keep inferredDomain for backwards compatibility (use highest confidence)
|
|
130
|
+
if (semanticAssignments.length > 0) {
|
|
131
|
+
exp.inferredDomain = semanticAssignments[0].domain;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return graph;
|
|
43
137
|
}
|
|
44
138
|
|
|
45
139
|
/**
|
|
@@ -60,8 +154,8 @@ function extractImportsFromContent(content: string): string[] {
|
|
|
60
154
|
let match;
|
|
61
155
|
while ((match = pattern.exec(content)) !== null) {
|
|
62
156
|
const importPath = match[1];
|
|
63
|
-
|
|
64
|
-
|
|
157
|
+
// Exclude only node built-ins (node:), include all local and aliased imports
|
|
158
|
+
if (importPath && !importPath.startsWith('node:')) {
|
|
65
159
|
imports.push(importPath);
|
|
66
160
|
}
|
|
67
161
|
}
|
|
@@ -199,41 +293,12 @@ export function detectCircularDependencies(
|
|
|
199
293
|
|
|
200
294
|
/**
|
|
201
295
|
* Calculate cohesion score (how related are exports in a file)
|
|
202
|
-
* Uses
|
|
296
|
+
* Uses enhanced calculation combining domain-based and import-based analysis
|
|
203
297
|
* @param exports - Array of export information
|
|
204
298
|
* @param filePath - Optional file path for context-aware scoring
|
|
205
299
|
*/
|
|
206
300
|
export function calculateCohesion(exports: ExportInfo[], filePath?: string): number {
|
|
207
|
-
|
|
208
|
-
if (exports.length === 1) return 1; // Single export = perfect cohesion
|
|
209
|
-
|
|
210
|
-
// Special case: Test/mock/fixture files are expected to have multi-domain exports
|
|
211
|
-
// They serve a single purpose (testing) even if they mock different domains
|
|
212
|
-
if (filePath && isTestFile(filePath)) {
|
|
213
|
-
return 1; // Test utilities are inherently cohesive despite mixed domains
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const domains = exports.map((e) => e.inferredDomain || 'unknown');
|
|
217
|
-
const domainCounts = new Map<string, number>();
|
|
218
|
-
|
|
219
|
-
for (const domain of domains) {
|
|
220
|
-
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
// Calculate Shannon entropy
|
|
224
|
-
const total = domains.length;
|
|
225
|
-
let entropy = 0;
|
|
226
|
-
|
|
227
|
-
for (const count of domainCounts.values()) {
|
|
228
|
-
const p = count / total;
|
|
229
|
-
if (p > 0) {
|
|
230
|
-
entropy -= p * Math.log2(p);
|
|
231
|
-
}
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Normalize to 0-1 (higher = better cohesion)
|
|
235
|
-
const maxEntropy = Math.log2(total);
|
|
236
|
-
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
301
|
+
return calculateEnhancedCohesion(exports, filePath);
|
|
237
302
|
}
|
|
238
303
|
|
|
239
304
|
/**
|
|
@@ -335,7 +400,12 @@ export function detectModuleClusters(
|
|
|
335
400
|
* Extract export information from file content
|
|
336
401
|
* TODO: Use proper AST parsing for better accuracy
|
|
337
402
|
*/
|
|
338
|
-
function extractExports(
|
|
403
|
+
function extractExports(
|
|
404
|
+
content: string,
|
|
405
|
+
filePath?: string,
|
|
406
|
+
domainOptions?: { domainKeywords?: string[]; domainPatterns?: string[]; pathDomainMap?: Record<string, string> },
|
|
407
|
+
fileImports?: string[]
|
|
408
|
+
): ExportInfo[] {
|
|
339
409
|
const exports: ExportInfo[] = [];
|
|
340
410
|
|
|
341
411
|
// Simple regex-based extraction (improve with AST later)
|
|
@@ -362,7 +432,7 @@ function extractExports(content: string): ExportInfo[] {
|
|
|
362
432
|
while ((match = pattern.exec(content)) !== null) {
|
|
363
433
|
const name = match[1] || 'default';
|
|
364
434
|
const type = types[index];
|
|
365
|
-
const inferredDomain = inferDomain(name);
|
|
435
|
+
const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
|
|
366
436
|
|
|
367
437
|
exports.push({ name, type, inferredDomain });
|
|
368
438
|
}
|
|
@@ -375,12 +445,29 @@ function extractExports(content: string): ExportInfo[] {
|
|
|
375
445
|
* Infer domain from export name
|
|
376
446
|
* Uses common naming patterns with word boundary matching
|
|
377
447
|
*/
|
|
378
|
-
function inferDomain(
|
|
448
|
+
function inferDomain(
|
|
449
|
+
name: string,
|
|
450
|
+
filePath?: string,
|
|
451
|
+
domainOptions?: { domainKeywords?: string[] },
|
|
452
|
+
fileImports?: string[]
|
|
453
|
+
): string {
|
|
379
454
|
const lower = name.toLowerCase();
|
|
380
455
|
|
|
456
|
+
// Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
|
|
457
|
+
const tokens = Array.from(
|
|
458
|
+
new Set(
|
|
459
|
+
lower
|
|
460
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
461
|
+
.replace(/[^a-z0-9]+/gi, ' ')
|
|
462
|
+
.split(' ')
|
|
463
|
+
.filter(Boolean)
|
|
464
|
+
)
|
|
465
|
+
);
|
|
466
|
+
|
|
381
467
|
// Domain keywords ordered from most specific to most general
|
|
382
468
|
// This prevents generic terms like 'util' from matching before specific domains
|
|
383
|
-
|
|
469
|
+
// NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
|
|
470
|
+
const defaultKeywords = [
|
|
384
471
|
'authentication',
|
|
385
472
|
'authorization',
|
|
386
473
|
'payment',
|
|
@@ -398,15 +485,15 @@ function inferDomain(name: string): string {
|
|
|
398
485
|
'model',
|
|
399
486
|
'view',
|
|
400
487
|
'auth',
|
|
401
|
-
'api',
|
|
402
|
-
'helper',
|
|
403
|
-
'util',
|
|
404
488
|
];
|
|
405
489
|
|
|
490
|
+
const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
|
|
491
|
+
? [...domainOptions.domainKeywords, ...defaultKeywords]
|
|
492
|
+
: defaultKeywords;
|
|
493
|
+
|
|
406
494
|
// Try word boundary matching first for more accurate detection
|
|
407
495
|
for (const keyword of domainKeywords) {
|
|
408
|
-
|
|
409
|
-
if (wordBoundaryPattern.test(name)) {
|
|
496
|
+
if (tokens.includes(keyword)) {
|
|
410
497
|
return keyword;
|
|
411
498
|
}
|
|
412
499
|
}
|
|
@@ -418,6 +505,52 @@ function inferDomain(name: string): string {
|
|
|
418
505
|
}
|
|
419
506
|
}
|
|
420
507
|
|
|
508
|
+
// Import-path domain inference: analyze import statements for domain hints
|
|
509
|
+
if (fileImports && fileImports.length > 0) {
|
|
510
|
+
for (const importPath of fileImports) {
|
|
511
|
+
// Parse all segments, including those after '@' or '.'
|
|
512
|
+
// e.g., '@/orders/service' -> ['orders', 'service']
|
|
513
|
+
// '../payments/processor' -> ['payments', 'processor']
|
|
514
|
+
const allSegments = importPath.split('/');
|
|
515
|
+
const relevantSegments = allSegments.filter(s => {
|
|
516
|
+
if (!s) return false;
|
|
517
|
+
// Skip '.' and '..' but keep everything else
|
|
518
|
+
if (s === '.' || s === '..') return false;
|
|
519
|
+
// Skip '@' prefix but keep the path after it
|
|
520
|
+
if (s.startsWith('@') && s.length === 1) return false;
|
|
521
|
+
// Remove '@' prefix from scoped imports like '@/orders'
|
|
522
|
+
return true;
|
|
523
|
+
}).map(s => s.startsWith('@') ? s.slice(1) : s);
|
|
524
|
+
|
|
525
|
+
for (const segment of relevantSegments) {
|
|
526
|
+
const segLower = segment.toLowerCase();
|
|
527
|
+
const singularSegment = singularize(segLower);
|
|
528
|
+
|
|
529
|
+
// Check if any domain keyword matches the import path segment (with singularization)
|
|
530
|
+
for (const keyword of domainKeywords) {
|
|
531
|
+
if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
|
|
532
|
+
return keyword;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Path-based fallback: check file path segments
|
|
540
|
+
if (filePath) {
|
|
541
|
+
// Auto-detect from path by checking against domain keywords (with singularization)
|
|
542
|
+
const pathSegments = filePath.toLowerCase().split('/');
|
|
543
|
+
for (const segment of pathSegments) {
|
|
544
|
+
const singularSegment = singularize(segment);
|
|
545
|
+
|
|
546
|
+
for (const keyword of domainKeywords) {
|
|
547
|
+
if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
|
|
548
|
+
return keyword;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
421
554
|
return 'unknown';
|
|
422
555
|
}
|
|
423
556
|
|
|
@@ -460,3 +593,136 @@ function generateConsolidationPlan(
|
|
|
460
593
|
|
|
461
594
|
return plan;
|
|
462
595
|
}
|
|
596
|
+
|
|
597
|
+
/**
|
|
598
|
+
* Extract exports using AST parsing (enhanced version)
|
|
599
|
+
* Falls back to regex if AST parsing fails
|
|
600
|
+
*/
|
|
601
|
+
export function extractExportsWithAST(
|
|
602
|
+
content: string,
|
|
603
|
+
filePath: string,
|
|
604
|
+
domainOptions?: { domainKeywords?: string[] },
|
|
605
|
+
fileImports?: string[]
|
|
606
|
+
): ExportInfo[] {
|
|
607
|
+
try {
|
|
608
|
+
const { exports: astExports } = parseFileExports(content, filePath);
|
|
609
|
+
|
|
610
|
+
return astExports.map(exp => ({
|
|
611
|
+
name: exp.name,
|
|
612
|
+
type: exp.type,
|
|
613
|
+
inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
|
|
614
|
+
imports: exp.imports,
|
|
615
|
+
dependencies: exp.dependencies,
|
|
616
|
+
}));
|
|
617
|
+
} catch (error) {
|
|
618
|
+
// Fallback to regex-based extraction
|
|
619
|
+
return extractExports(content, filePath, domainOptions, fileImports);
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
/**
|
|
624
|
+
* Calculate enhanced cohesion score using both domain inference and import similarity
|
|
625
|
+
*
|
|
626
|
+
* This combines:
|
|
627
|
+
* 1. Domain-based cohesion (entropy of inferred domains)
|
|
628
|
+
* 2. Import-based cohesion (Jaccard similarity of shared imports)
|
|
629
|
+
*
|
|
630
|
+
* Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
|
|
631
|
+
*/
|
|
632
|
+
export function calculateEnhancedCohesion(
|
|
633
|
+
exports: ExportInfo[],
|
|
634
|
+
filePath?: string
|
|
635
|
+
): number {
|
|
636
|
+
if (exports.length === 0) return 1;
|
|
637
|
+
if (exports.length === 1) return 1;
|
|
638
|
+
|
|
639
|
+
// Special case for test files
|
|
640
|
+
if (filePath && isTestFile(filePath)) {
|
|
641
|
+
return 1;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
// Calculate domain-based cohesion (existing method)
|
|
645
|
+
const domainCohesion = calculateDomainCohesion(exports);
|
|
646
|
+
|
|
647
|
+
// Calculate import-based cohesion if imports are available
|
|
648
|
+
const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
|
|
649
|
+
|
|
650
|
+
if (!hasImportData) {
|
|
651
|
+
// No import data available, use domain-based only
|
|
652
|
+
return domainCohesion;
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
const importCohesion = calculateImportBasedCohesion(exports);
|
|
656
|
+
|
|
657
|
+
// Weighted combination: 60% import-based, 40% domain-based
|
|
658
|
+
return importCohesion * 0.6 + domainCohesion * 0.4;
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
/**
|
|
662
|
+
* Calculate cohesion based on shared imports (Jaccard similarity)
|
|
663
|
+
*/
|
|
664
|
+
function calculateImportBasedCohesion(exports: ExportInfo[]): number {
|
|
665
|
+
const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
|
|
666
|
+
|
|
667
|
+
if (exportsWithImports.length < 2) {
|
|
668
|
+
return 1; // Not enough data
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
// Calculate pairwise import similarity
|
|
672
|
+
let totalSimilarity = 0;
|
|
673
|
+
let comparisons = 0;
|
|
674
|
+
|
|
675
|
+
for (let i = 0; i < exportsWithImports.length; i++) {
|
|
676
|
+
for (let j = i + 1; j < exportsWithImports.length; j++) {
|
|
677
|
+
const exp1 = exportsWithImports[i] as ExportInfo & { imports: string[] };
|
|
678
|
+
const exp2 = exportsWithImports[j] as ExportInfo & { imports: string[] };
|
|
679
|
+
|
|
680
|
+
const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
|
|
681
|
+
totalSimilarity += similarity;
|
|
682
|
+
comparisons++;
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
return comparisons > 0 ? totalSimilarity / comparisons : 1;
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
/**
|
|
690
|
+
* Calculate Jaccard similarity between two arrays
|
|
691
|
+
*/
|
|
692
|
+
function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number {
|
|
693
|
+
if (arr1.length === 0 && arr2.length === 0) return 1;
|
|
694
|
+
if (arr1.length === 0 || arr2.length === 0) return 0;
|
|
695
|
+
|
|
696
|
+
const set1 = new Set(arr1);
|
|
697
|
+
const set2 = new Set(arr2);
|
|
698
|
+
|
|
699
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
700
|
+
const union = new Set([...set1, ...set2]);
|
|
701
|
+
|
|
702
|
+
return intersection.size / union.size;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
/**
|
|
706
|
+
* Calculate domain-based cohesion (existing entropy method)
|
|
707
|
+
*/
|
|
708
|
+
function calculateDomainCohesion(exports: ExportInfo[]): number {
|
|
709
|
+
const domains = exports.map((e) => e.inferredDomain || 'unknown');
|
|
710
|
+
const domainCounts = new Map<string, number>();
|
|
711
|
+
|
|
712
|
+
for (const domain of domains) {
|
|
713
|
+
domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
const total = domains.length;
|
|
717
|
+
let entropy = 0;
|
|
718
|
+
|
|
719
|
+
for (const count of domainCounts.values()) {
|
|
720
|
+
const p = count / total;
|
|
721
|
+
if (p > 0) {
|
|
722
|
+
entropy -= p * Math.log2(p);
|
|
723
|
+
}
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
const maxEntropy = Math.log2(total);
|
|
727
|
+
return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
|
|
728
|
+
}
|
package/src/index.ts
CHANGED
|
@@ -15,9 +15,41 @@ import type {
|
|
|
15
15
|
ContextAnalysisResult,
|
|
16
16
|
ContextSummary,
|
|
17
17
|
ModuleCluster,
|
|
18
|
+
DomainAssignment,
|
|
19
|
+
DomainSignals,
|
|
20
|
+
CoUsageData,
|
|
21
|
+
TypeDependency,
|
|
18
22
|
} from './types';
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
import {
|
|
24
|
+
buildCoUsageMatrix,
|
|
25
|
+
buildTypeGraph,
|
|
26
|
+
findSemanticClusters,
|
|
27
|
+
calculateDomainConfidence,
|
|
28
|
+
inferDomainFromSemantics,
|
|
29
|
+
getCoUsageData,
|
|
30
|
+
findConsolidationCandidates,
|
|
31
|
+
} from './semantic-analysis';
|
|
32
|
+
|
|
33
|
+
export type {
|
|
34
|
+
ContextAnalyzerOptions,
|
|
35
|
+
ContextAnalysisResult,
|
|
36
|
+
ContextSummary,
|
|
37
|
+
ModuleCluster,
|
|
38
|
+
DomainAssignment,
|
|
39
|
+
DomainSignals,
|
|
40
|
+
CoUsageData,
|
|
41
|
+
TypeDependency,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export {
|
|
45
|
+
buildCoUsageMatrix,
|
|
46
|
+
buildTypeGraph,
|
|
47
|
+
findSemanticClusters,
|
|
48
|
+
calculateDomainConfidence,
|
|
49
|
+
inferDomainFromSemantics,
|
|
50
|
+
getCoUsageData,
|
|
51
|
+
findConsolidationCandidates,
|
|
52
|
+
};
|
|
21
53
|
|
|
22
54
|
/**
|
|
23
55
|
* Generate smart defaults for context analysis based on repository size
|