@aiready/context-analyzer 0.6.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/.turbo/turbo-test.log +11 -6
- package/README.md +5 -3
- package/SEMANTIC-VALIDATION.md +235 -0
- package/dist/chunk-AEK3MZC5.mjs +709 -0
- package/dist/chunk-DMRZMS2U.mjs +964 -0
- package/dist/chunk-HQNHM2X7.mjs +997 -0
- package/dist/chunk-I54HL4FZ.mjs +781 -0
- package/dist/chunk-IRWCPDWD.mjs +779 -0
- package/dist/chunk-PVVCCE6W.mjs +755 -0
- package/dist/chunk-RYIB5CWD.mjs +781 -0
- package/dist/cli.js +234 -16
- package/dist/cli.mjs +1 -1
- package/dist/index.d.mts +90 -1
- package/dist/index.d.ts +90 -1
- package/dist/index.js +311 -18
- package/dist/index.mjs +17 -3
- package/package.json +2 -2
- package/src/__tests__/auto-detection.test.ts +156 -0
- package/src/analyzer.ts +182 -18
- package/src/index.ts +34 -2
- package/src/semantic-analysis.ts +287 -0
- package/src/types.ts +33 -1
- package/COHESION-IMPROVEMENTS.md +0 -202
package/src/analyzer.ts
CHANGED
|
@@ -6,27 +6,86 @@ import type {
|
|
|
6
6
|
ExportInfo,
|
|
7
7
|
ModuleCluster,
|
|
8
8
|
} from './types';
|
|
9
|
+
import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
|
|
9
10
|
|
|
10
11
|
interface FileContent {
|
|
11
12
|
file: string;
|
|
12
13
|
content: string;
|
|
13
14
|
}
|
|
14
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Auto-detect domain keywords from workspace folder structure
|
|
18
|
+
* Extracts unique folder names from file paths as potential domain keywords
|
|
19
|
+
*/
|
|
20
|
+
function extractDomainKeywordsFromPaths(files: FileContent[]): string[] {
|
|
21
|
+
const folderNames = new Set<string>();
|
|
22
|
+
|
|
23
|
+
for (const { file } of files) {
|
|
24
|
+
const segments = file.split('/');
|
|
25
|
+
// Extract meaningful folder names (skip common infrastructure folders)
|
|
26
|
+
const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
|
|
27
|
+
|
|
28
|
+
for (const segment of segments) {
|
|
29
|
+
const normalized = segment.toLowerCase();
|
|
30
|
+
if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
|
|
31
|
+
// Singularize common plural forms for better matching
|
|
32
|
+
const singular = singularize(normalized);
|
|
33
|
+
folderNames.add(singular);
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
return Array.from(folderNames);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Simple singularization for common English plurals
|
|
43
|
+
*/
|
|
44
|
+
function singularize(word: string): string {
|
|
45
|
+
// Handle irregular plurals
|
|
46
|
+
const irregulars: Record<string, string> = {
|
|
47
|
+
people: 'person',
|
|
48
|
+
children: 'child',
|
|
49
|
+
men: 'man',
|
|
50
|
+
women: 'woman',
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
if (irregulars[word]) {
|
|
54
|
+
return irregulars[word];
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Common plural patterns
|
|
58
|
+
if (word.endsWith('ies')) {
|
|
59
|
+
return word.slice(0, -3) + 'y'; // categories -> category
|
|
60
|
+
}
|
|
61
|
+
if (word.endsWith('ses')) {
|
|
62
|
+
return word.slice(0, -2); // classes -> class
|
|
63
|
+
}
|
|
64
|
+
if (word.endsWith('s') && word.length > 3) {
|
|
65
|
+
return word.slice(0, -1); // orders -> order
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
return word;
|
|
69
|
+
}
|
|
70
|
+
|
|
15
71
|
/**
|
|
16
72
|
* Build a dependency graph from file contents
|
|
17
73
|
*/
|
|
18
74
|
export function buildDependencyGraph(
|
|
19
|
-
files: FileContent[]
|
|
75
|
+
files: FileContent[],
|
|
20
76
|
): DependencyGraph {
|
|
21
77
|
const nodes = new Map<string, DependencyNode>();
|
|
22
78
|
const edges = new Map<string, Set<string>>();
|
|
23
79
|
|
|
24
|
-
//
|
|
80
|
+
// Auto-detect domain keywords from workspace folder structure
|
|
81
|
+
const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
|
|
82
|
+
|
|
83
|
+
// First pass: Create nodes with folder-based domain inference
|
|
25
84
|
for (const { file, content } of files) {
|
|
26
85
|
const imports = extractImportsFromContent(content);
|
|
27
86
|
|
|
28
87
|
// Use AST-based extraction for better accuracy, fallback to regex
|
|
29
|
-
const exports = extractExportsWithAST(content, file);
|
|
88
|
+
const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
|
|
30
89
|
|
|
31
90
|
const tokenCost = estimateTokens(content);
|
|
32
91
|
const linesOfCode = content.split('\n').length;
|
|
@@ -42,7 +101,39 @@ export function buildDependencyGraph(
|
|
|
42
101
|
edges.set(file, new Set(imports));
|
|
43
102
|
}
|
|
44
103
|
|
|
45
|
-
|
|
104
|
+
// Second pass: Build semantic analysis graphs
|
|
105
|
+
const graph: DependencyGraph = { nodes, edges };
|
|
106
|
+
const coUsageMatrix = buildCoUsageMatrix(graph);
|
|
107
|
+
const typeGraph = buildTypeGraph(graph);
|
|
108
|
+
|
|
109
|
+
// Add semantic data to graph
|
|
110
|
+
graph.coUsageMatrix = coUsageMatrix;
|
|
111
|
+
graph.typeGraph = typeGraph;
|
|
112
|
+
|
|
113
|
+
// Third pass: Enhance domain assignments with semantic analysis
|
|
114
|
+
for (const [file, node] of nodes) {
|
|
115
|
+
for (const exp of node.exports) {
|
|
116
|
+
// Get semantic domain assignments
|
|
117
|
+
const semanticAssignments = inferDomainFromSemantics(
|
|
118
|
+
file,
|
|
119
|
+
exp.name,
|
|
120
|
+
graph,
|
|
121
|
+
coUsageMatrix,
|
|
122
|
+
typeGraph,
|
|
123
|
+
exp.typeReferences
|
|
124
|
+
);
|
|
125
|
+
|
|
126
|
+
// Add multi-domain assignments with confidence scores
|
|
127
|
+
exp.domains = semanticAssignments;
|
|
128
|
+
|
|
129
|
+
// Keep inferredDomain for backwards compatibility (use highest confidence)
|
|
130
|
+
if (semanticAssignments.length > 0) {
|
|
131
|
+
exp.inferredDomain = semanticAssignments[0].domain;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return graph;
|
|
46
137
|
}
|
|
47
138
|
|
|
48
139
|
/**
|
|
@@ -63,8 +154,8 @@ function extractImportsFromContent(content: string): string[] {
|
|
|
63
154
|
let match;
|
|
64
155
|
while ((match = pattern.exec(content)) !== null) {
|
|
65
156
|
const importPath = match[1];
|
|
66
|
-
|
|
67
|
-
|
|
157
|
+
// Exclude only node built-ins (node:), include all local and aliased imports
|
|
158
|
+
if (importPath && !importPath.startsWith('node:')) {
|
|
68
159
|
imports.push(importPath);
|
|
69
160
|
}
|
|
70
161
|
}
|
|
@@ -309,7 +400,12 @@ export function detectModuleClusters(
|
|
|
309
400
|
* Extract export information from file content
|
|
310
401
|
* TODO: Use proper AST parsing for better accuracy
|
|
311
402
|
*/
|
|
312
|
-
function extractExports(
|
|
403
|
+
function extractExports(
|
|
404
|
+
content: string,
|
|
405
|
+
filePath?: string,
|
|
406
|
+
domainOptions?: { domainKeywords?: string[]; domainPatterns?: string[]; pathDomainMap?: Record<string, string> },
|
|
407
|
+
fileImports?: string[]
|
|
408
|
+
): ExportInfo[] {
|
|
313
409
|
const exports: ExportInfo[] = [];
|
|
314
410
|
|
|
315
411
|
// Simple regex-based extraction (improve with AST later)
|
|
@@ -336,7 +432,7 @@ function extractExports(content: string): ExportInfo[] {
|
|
|
336
432
|
while ((match = pattern.exec(content)) !== null) {
|
|
337
433
|
const name = match[1] || 'default';
|
|
338
434
|
const type = types[index];
|
|
339
|
-
const inferredDomain = inferDomain(name);
|
|
435
|
+
const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
|
|
340
436
|
|
|
341
437
|
exports.push({ name, type, inferredDomain });
|
|
342
438
|
}
|
|
@@ -349,12 +445,29 @@ function extractExports(content: string): ExportInfo[] {
|
|
|
349
445
|
* Infer domain from export name
|
|
350
446
|
* Uses common naming patterns with word boundary matching
|
|
351
447
|
*/
|
|
352
|
-
function inferDomain(
|
|
448
|
+
function inferDomain(
|
|
449
|
+
name: string,
|
|
450
|
+
filePath?: string,
|
|
451
|
+
domainOptions?: { domainKeywords?: string[] },
|
|
452
|
+
fileImports?: string[]
|
|
453
|
+
): string {
|
|
353
454
|
const lower = name.toLowerCase();
|
|
354
455
|
|
|
456
|
+
// Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
|
|
457
|
+
const tokens = Array.from(
|
|
458
|
+
new Set(
|
|
459
|
+
lower
|
|
460
|
+
.replace(/([a-z0-9])([A-Z])/g, '$1 $2')
|
|
461
|
+
.replace(/[^a-z0-9]+/gi, ' ')
|
|
462
|
+
.split(' ')
|
|
463
|
+
.filter(Boolean)
|
|
464
|
+
)
|
|
465
|
+
);
|
|
466
|
+
|
|
355
467
|
// Domain keywords ordered from most specific to most general
|
|
356
468
|
// This prevents generic terms like 'util' from matching before specific domains
|
|
357
|
-
|
|
469
|
+
// NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
|
|
470
|
+
const defaultKeywords = [
|
|
358
471
|
'authentication',
|
|
359
472
|
'authorization',
|
|
360
473
|
'payment',
|
|
@@ -372,15 +485,15 @@ function inferDomain(name: string): string {
|
|
|
372
485
|
'model',
|
|
373
486
|
'view',
|
|
374
487
|
'auth',
|
|
375
|
-
'api',
|
|
376
|
-
'helper',
|
|
377
|
-
'util',
|
|
378
488
|
];
|
|
379
489
|
|
|
490
|
+
const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
|
|
491
|
+
? [...domainOptions.domainKeywords, ...defaultKeywords]
|
|
492
|
+
: defaultKeywords;
|
|
493
|
+
|
|
380
494
|
// Try word boundary matching first for more accurate detection
|
|
381
495
|
for (const keyword of domainKeywords) {
|
|
382
|
-
|
|
383
|
-
if (wordBoundaryPattern.test(name)) {
|
|
496
|
+
if (tokens.includes(keyword)) {
|
|
384
497
|
return keyword;
|
|
385
498
|
}
|
|
386
499
|
}
|
|
@@ -392,6 +505,52 @@ function inferDomain(name: string): string {
|
|
|
392
505
|
}
|
|
393
506
|
}
|
|
394
507
|
|
|
508
|
+
// Import-path domain inference: analyze import statements for domain hints
|
|
509
|
+
if (fileImports && fileImports.length > 0) {
|
|
510
|
+
for (const importPath of fileImports) {
|
|
511
|
+
// Parse all segments, including those after '@' or '.'
|
|
512
|
+
// e.g., '@/orders/service' -> ['orders', 'service']
|
|
513
|
+
// '../payments/processor' -> ['payments', 'processor']
|
|
514
|
+
const allSegments = importPath.split('/');
|
|
515
|
+
const relevantSegments = allSegments.filter(s => {
|
|
516
|
+
if (!s) return false;
|
|
517
|
+
// Skip '.' and '..' but keep everything else
|
|
518
|
+
if (s === '.' || s === '..') return false;
|
|
519
|
+
// Skip '@' prefix but keep the path after it
|
|
520
|
+
if (s.startsWith('@') && s.length === 1) return false;
|
|
521
|
+
// Remove '@' prefix from scoped imports like '@/orders'
|
|
522
|
+
return true;
|
|
523
|
+
}).map(s => s.startsWith('@') ? s.slice(1) : s);
|
|
524
|
+
|
|
525
|
+
for (const segment of relevantSegments) {
|
|
526
|
+
const segLower = segment.toLowerCase();
|
|
527
|
+
const singularSegment = singularize(segLower);
|
|
528
|
+
|
|
529
|
+
// Check if any domain keyword matches the import path segment (with singularization)
|
|
530
|
+
for (const keyword of domainKeywords) {
|
|
531
|
+
if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
|
|
532
|
+
return keyword;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Path-based fallback: check file path segments
|
|
540
|
+
if (filePath) {
|
|
541
|
+
// Auto-detect from path by checking against domain keywords (with singularization)
|
|
542
|
+
const pathSegments = filePath.toLowerCase().split('/');
|
|
543
|
+
for (const segment of pathSegments) {
|
|
544
|
+
const singularSegment = singularize(segment);
|
|
545
|
+
|
|
546
|
+
for (const keyword of domainKeywords) {
|
|
547
|
+
if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
|
|
548
|
+
return keyword;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
395
554
|
return 'unknown';
|
|
396
555
|
}
|
|
397
556
|
|
|
@@ -439,20 +598,25 @@ function generateConsolidationPlan(
|
|
|
439
598
|
* Extract exports using AST parsing (enhanced version)
|
|
440
599
|
* Falls back to regex if AST parsing fails
|
|
441
600
|
*/
|
|
442
|
-
export function extractExportsWithAST(
|
|
601
|
+
export function extractExportsWithAST(
|
|
602
|
+
content: string,
|
|
603
|
+
filePath: string,
|
|
604
|
+
domainOptions?: { domainKeywords?: string[] },
|
|
605
|
+
fileImports?: string[]
|
|
606
|
+
): ExportInfo[] {
|
|
443
607
|
try {
|
|
444
608
|
const { exports: astExports } = parseFileExports(content, filePath);
|
|
445
609
|
|
|
446
610
|
return astExports.map(exp => ({
|
|
447
611
|
name: exp.name,
|
|
448
612
|
type: exp.type,
|
|
449
|
-
inferredDomain: inferDomain(exp.name),
|
|
613
|
+
inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
|
|
450
614
|
imports: exp.imports,
|
|
451
615
|
dependencies: exp.dependencies,
|
|
452
616
|
}));
|
|
453
617
|
} catch (error) {
|
|
454
618
|
// Fallback to regex-based extraction
|
|
455
|
-
return extractExports(content);
|
|
619
|
+
return extractExports(content, filePath, domainOptions, fileImports);
|
|
456
620
|
}
|
|
457
621
|
}
|
|
458
622
|
|
package/src/index.ts
CHANGED
|
@@ -15,9 +15,41 @@ import type {
|
|
|
15
15
|
ContextAnalysisResult,
|
|
16
16
|
ContextSummary,
|
|
17
17
|
ModuleCluster,
|
|
18
|
+
DomainAssignment,
|
|
19
|
+
DomainSignals,
|
|
20
|
+
CoUsageData,
|
|
21
|
+
TypeDependency,
|
|
18
22
|
} from './types';
|
|
19
|
-
|
|
20
|
-
|
|
23
|
+
import {
|
|
24
|
+
buildCoUsageMatrix,
|
|
25
|
+
buildTypeGraph,
|
|
26
|
+
findSemanticClusters,
|
|
27
|
+
calculateDomainConfidence,
|
|
28
|
+
inferDomainFromSemantics,
|
|
29
|
+
getCoUsageData,
|
|
30
|
+
findConsolidationCandidates,
|
|
31
|
+
} from './semantic-analysis';
|
|
32
|
+
|
|
33
|
+
export type {
|
|
34
|
+
ContextAnalyzerOptions,
|
|
35
|
+
ContextAnalysisResult,
|
|
36
|
+
ContextSummary,
|
|
37
|
+
ModuleCluster,
|
|
38
|
+
DomainAssignment,
|
|
39
|
+
DomainSignals,
|
|
40
|
+
CoUsageData,
|
|
41
|
+
TypeDependency,
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
export {
|
|
45
|
+
buildCoUsageMatrix,
|
|
46
|
+
buildTypeGraph,
|
|
47
|
+
findSemanticClusters,
|
|
48
|
+
calculateDomainConfidence,
|
|
49
|
+
inferDomainFromSemantics,
|
|
50
|
+
getCoUsageData,
|
|
51
|
+
findConsolidationCandidates,
|
|
52
|
+
};
|
|
21
53
|
|
|
22
54
|
/**
|
|
23
55
|
* Generate smart defaults for context analysis based on repository size
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
import type { DependencyGraph, CoUsageData, TypeDependency, DomainAssignment, DomainSignals } from './types';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Build co-usage matrix: track which files are imported together
|
|
5
|
+
*
|
|
6
|
+
* Files frequently imported together likely belong to the same semantic domain
|
|
7
|
+
*/
|
|
8
|
+
export function buildCoUsageMatrix(graph: DependencyGraph): Map<string, Map<string, number>> {
|
|
9
|
+
const coUsageMatrix = new Map<string, Map<string, number>>();
|
|
10
|
+
|
|
11
|
+
// For each file, track which other files are imported alongside it
|
|
12
|
+
for (const [sourceFile, node] of graph.nodes) {
|
|
13
|
+
const imports = node.imports;
|
|
14
|
+
|
|
15
|
+
// For each pair of imports in this file, increment their co-usage count
|
|
16
|
+
for (let i = 0; i < imports.length; i++) {
|
|
17
|
+
const fileA = imports[i];
|
|
18
|
+
|
|
19
|
+
if (!coUsageMatrix.has(fileA)) {
|
|
20
|
+
coUsageMatrix.set(fileA, new Map());
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
for (let j = i + 1; j < imports.length; j++) {
|
|
24
|
+
const fileB = imports[j];
|
|
25
|
+
|
|
26
|
+
// Increment bidirectional co-usage count
|
|
27
|
+
const fileAUsage = coUsageMatrix.get(fileA)!;
|
|
28
|
+
fileAUsage.set(fileB, (fileAUsage.get(fileB) || 0) + 1);
|
|
29
|
+
|
|
30
|
+
if (!coUsageMatrix.has(fileB)) {
|
|
31
|
+
coUsageMatrix.set(fileB, new Map());
|
|
32
|
+
}
|
|
33
|
+
const fileBUsage = coUsageMatrix.get(fileB)!;
|
|
34
|
+
fileBUsage.set(fileA, (fileBUsage.get(fileA) || 0) + 1);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
return coUsageMatrix;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Extract type dependencies from AST exports
|
|
44
|
+
*
|
|
45
|
+
* Files that share types are semantically related
|
|
46
|
+
*/
|
|
47
|
+
export function buildTypeGraph(graph: DependencyGraph): Map<string, Set<string>> {
|
|
48
|
+
const typeGraph = new Map<string, Set<string>>();
|
|
49
|
+
|
|
50
|
+
for (const [file, node] of graph.nodes) {
|
|
51
|
+
for (const exp of node.exports) {
|
|
52
|
+
if (exp.typeReferences) {
|
|
53
|
+
for (const typeRef of exp.typeReferences) {
|
|
54
|
+
if (!typeGraph.has(typeRef)) {
|
|
55
|
+
typeGraph.set(typeRef, new Set());
|
|
56
|
+
}
|
|
57
|
+
typeGraph.get(typeRef)!.add(file);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
return typeGraph;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Find semantic clusters using co-usage patterns
|
|
68
|
+
*
|
|
69
|
+
* Files with high co-usage counts belong in the same cluster
|
|
70
|
+
*/
|
|
71
|
+
export function findSemanticClusters(
|
|
72
|
+
coUsageMatrix: Map<string, Map<string, number>>,
|
|
73
|
+
minCoUsage: number = 3
|
|
74
|
+
): Map<string, string[]> {
|
|
75
|
+
const clusters = new Map<string, string[]>();
|
|
76
|
+
const visited = new Set<string>();
|
|
77
|
+
|
|
78
|
+
// Simple clustering: group files with high co-usage
|
|
79
|
+
for (const [file, coUsages] of coUsageMatrix) {
|
|
80
|
+
if (visited.has(file)) continue;
|
|
81
|
+
|
|
82
|
+
const cluster: string[] = [file];
|
|
83
|
+
visited.add(file);
|
|
84
|
+
|
|
85
|
+
// Find strongly related files (co-imported >= minCoUsage times)
|
|
86
|
+
for (const [relatedFile, count] of coUsages) {
|
|
87
|
+
if (count >= minCoUsage && !visited.has(relatedFile)) {
|
|
88
|
+
cluster.push(relatedFile);
|
|
89
|
+
visited.add(relatedFile);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (cluster.length > 1) {
|
|
94
|
+
// Use first file as cluster ID
|
|
95
|
+
clusters.set(file, cluster);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return clusters;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Calculate confidence score for domain assignment based on multiple signals
|
|
104
|
+
*/
|
|
105
|
+
export function calculateDomainConfidence(signals: DomainSignals): number {
|
|
106
|
+
const weights = {
|
|
107
|
+
coUsage: 0.35, // Strongest signal: actual usage patterns
|
|
108
|
+
typeReference: 0.30, // Strong signal: shared types
|
|
109
|
+
exportName: 0.15, // Medium signal: identifier semantics
|
|
110
|
+
importPath: 0.10, // Weaker signal: path structure
|
|
111
|
+
folderStructure: 0.10 // Weakest signal: organization convention
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
let confidence = 0;
|
|
115
|
+
if (signals.coUsage) confidence += weights.coUsage;
|
|
116
|
+
if (signals.typeReference) confidence += weights.typeReference;
|
|
117
|
+
if (signals.exportName) confidence += weights.exportName;
|
|
118
|
+
if (signals.importPath) confidence += weights.importPath;
|
|
119
|
+
if (signals.folderStructure) confidence += weights.folderStructure;
|
|
120
|
+
|
|
121
|
+
return confidence;
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
/**
|
|
125
|
+
* Infer domain from semantic analysis (co-usage + types)
|
|
126
|
+
*
|
|
127
|
+
* This replaces the folder-based heuristic with actual code relationships
|
|
128
|
+
*/
|
|
129
|
+
export function inferDomainFromSemantics(
|
|
130
|
+
file: string,
|
|
131
|
+
exportName: string,
|
|
132
|
+
graph: DependencyGraph,
|
|
133
|
+
coUsageMatrix: Map<string, Map<string, number>>,
|
|
134
|
+
typeGraph: Map<string, Set<string>>,
|
|
135
|
+
exportTypeRefs?: string[]
|
|
136
|
+
): DomainAssignment[] {
|
|
137
|
+
const assignments: DomainAssignment[] = [];
|
|
138
|
+
const domainSignals = new Map<string, DomainSignals>();
|
|
139
|
+
|
|
140
|
+
// 1. Check co-usage patterns
|
|
141
|
+
const coUsages = coUsageMatrix.get(file) || new Map();
|
|
142
|
+
const strongCoUsages = Array.from(coUsages.entries())
|
|
143
|
+
.filter(([_, count]) => count >= 3)
|
|
144
|
+
.map(([coFile]) => coFile);
|
|
145
|
+
|
|
146
|
+
// Extract domains from frequently co-imported files
|
|
147
|
+
for (const coFile of strongCoUsages) {
|
|
148
|
+
const coNode = graph.nodes.get(coFile);
|
|
149
|
+
if (coNode) {
|
|
150
|
+
for (const exp of coNode.exports) {
|
|
151
|
+
if (exp.inferredDomain && exp.inferredDomain !== 'unknown') {
|
|
152
|
+
const domain = exp.inferredDomain;
|
|
153
|
+
if (!domainSignals.has(domain)) {
|
|
154
|
+
domainSignals.set(domain, {
|
|
155
|
+
coUsage: false,
|
|
156
|
+
typeReference: false,
|
|
157
|
+
exportName: false,
|
|
158
|
+
importPath: false,
|
|
159
|
+
folderStructure: false
|
|
160
|
+
});
|
|
161
|
+
}
|
|
162
|
+
domainSignals.get(domain)!.coUsage = true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// 2. Check type references
|
|
169
|
+
if (exportTypeRefs) {
|
|
170
|
+
for (const typeRef of exportTypeRefs) {
|
|
171
|
+
const filesWithType = typeGraph.get(typeRef);
|
|
172
|
+
if (filesWithType) {
|
|
173
|
+
for (const typeFile of filesWithType) {
|
|
174
|
+
if (typeFile !== file) {
|
|
175
|
+
const typeNode = graph.nodes.get(typeFile);
|
|
176
|
+
if (typeNode) {
|
|
177
|
+
for (const exp of typeNode.exports) {
|
|
178
|
+
if (exp.inferredDomain && exp.inferredDomain !== 'unknown') {
|
|
179
|
+
const domain = exp.inferredDomain;
|
|
180
|
+
if (!domainSignals.has(domain)) {
|
|
181
|
+
domainSignals.set(domain, {
|
|
182
|
+
coUsage: false,
|
|
183
|
+
typeReference: false,
|
|
184
|
+
exportName: false,
|
|
185
|
+
importPath: false,
|
|
186
|
+
folderStructure: false
|
|
187
|
+
});
|
|
188
|
+
}
|
|
189
|
+
domainSignals.get(domain)!.typeReference = true;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// 3. Build domain assignments with confidence scores
|
|
200
|
+
for (const [domain, signals] of domainSignals) {
|
|
201
|
+
const confidence = calculateDomainConfidence(signals);
|
|
202
|
+
if (confidence >= 0.3) { // Minimum confidence threshold
|
|
203
|
+
assignments.push({ domain, confidence, signals });
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Sort by confidence (highest first)
|
|
208
|
+
assignments.sort((a, b) => b.confidence - a.confidence);
|
|
209
|
+
|
|
210
|
+
return assignments;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Get co-usage data for a specific file
|
|
215
|
+
*/
|
|
216
|
+
export function getCoUsageData(
|
|
217
|
+
file: string,
|
|
218
|
+
coUsageMatrix: Map<string, Map<string, number>>
|
|
219
|
+
): CoUsageData {
|
|
220
|
+
const coImportedWith = coUsageMatrix.get(file) || new Map();
|
|
221
|
+
|
|
222
|
+
// Find files that import both this file and others
|
|
223
|
+
const sharedImporters: string[] = [];
|
|
224
|
+
// This would require inverse mapping from imports, simplified for now
|
|
225
|
+
|
|
226
|
+
return {
|
|
227
|
+
file,
|
|
228
|
+
coImportedWith,
|
|
229
|
+
sharedImporters
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Find files that should be consolidated based on semantic similarity
|
|
235
|
+
*
|
|
236
|
+
* High co-usage + shared types = strong consolidation candidate
|
|
237
|
+
*/
|
|
238
|
+
export function findConsolidationCandidates(
|
|
239
|
+
graph: DependencyGraph,
|
|
240
|
+
coUsageMatrix: Map<string, Map<string, number>>,
|
|
241
|
+
typeGraph: Map<string, Set<string>>,
|
|
242
|
+
minCoUsage: number = 5,
|
|
243
|
+
minSharedTypes: number = 2
|
|
244
|
+
): Array<{ files: string[]; reason: string; strength: number }> {
|
|
245
|
+
const candidates: Array<{ files: string[]; reason: string; strength: number }> = [];
|
|
246
|
+
|
|
247
|
+
// Find file pairs with both high co-usage AND shared types
|
|
248
|
+
for (const [fileA, coUsages] of coUsageMatrix) {
|
|
249
|
+
const nodeA = graph.nodes.get(fileA);
|
|
250
|
+
if (!nodeA) continue;
|
|
251
|
+
|
|
252
|
+
for (const [fileB, coUsageCount] of coUsages) {
|
|
253
|
+
if (fileB <= fileA) continue; // Avoid duplicates
|
|
254
|
+
if (coUsageCount < minCoUsage) continue;
|
|
255
|
+
|
|
256
|
+
const nodeB = graph.nodes.get(fileB);
|
|
257
|
+
if (!nodeB) continue;
|
|
258
|
+
|
|
259
|
+
// Count shared types
|
|
260
|
+
const typesA = new Set(nodeA.exports.flatMap(e => e.typeReferences || []));
|
|
261
|
+
const typesB = new Set(nodeB.exports.flatMap(e => e.typeReferences || []));
|
|
262
|
+
const sharedTypes = Array.from(typesA).filter(t => typesB.has(t));
|
|
263
|
+
|
|
264
|
+
if (sharedTypes.length >= minSharedTypes) {
|
|
265
|
+
const strength = (coUsageCount / 10) + (sharedTypes.length / 5);
|
|
266
|
+
candidates.push({
|
|
267
|
+
files: [fileA, fileB],
|
|
268
|
+
reason: `High co-usage (${coUsageCount}x) and ${sharedTypes.length} shared types`,
|
|
269
|
+
strength
|
|
270
|
+
});
|
|
271
|
+
} else if (coUsageCount >= minCoUsage * 2) {
|
|
272
|
+
// Very high co-usage alone is enough
|
|
273
|
+
const strength = coUsageCount / 10;
|
|
274
|
+
candidates.push({
|
|
275
|
+
files: [fileA, fileB],
|
|
276
|
+
reason: `Very high co-usage (${coUsageCount}x)`,
|
|
277
|
+
strength
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// Sort by strength (highest first)
|
|
284
|
+
candidates.sort((a, b) => b.strength - a.strength);
|
|
285
|
+
|
|
286
|
+
return candidates;
|
|
287
|
+
}
|
package/src/types.ts
CHANGED
|
@@ -87,6 +87,8 @@ export interface ContextSummary {
|
|
|
87
87
|
export interface DependencyGraph {
|
|
88
88
|
nodes: Map<string, DependencyNode>;
|
|
89
89
|
edges: Map<string, Set<string>>; // file -> dependencies
|
|
90
|
+
coUsageMatrix?: Map<string, Map<string, number>>; // file -> file -> co-usage count
|
|
91
|
+
typeGraph?: Map<string, Set<string>>; // type -> files that reference it
|
|
90
92
|
}
|
|
91
93
|
|
|
92
94
|
export interface DependencyNode {
|
|
@@ -95,13 +97,43 @@ export interface DependencyNode {
|
|
|
95
97
|
exports: ExportInfo[];
|
|
96
98
|
tokenCost: number;
|
|
97
99
|
linesOfCode: number;
|
|
100
|
+
exportedBy?: string[]; // Files that import exports from this file
|
|
101
|
+
sharedTypes?: string[]; // Types shared with other files
|
|
98
102
|
}
|
|
99
103
|
|
|
100
104
|
export interface ExportInfo {
|
|
101
105
|
name: string;
|
|
102
106
|
type: 'function' | 'class' | 'const' | 'type' | 'interface' | 'default';
|
|
103
|
-
inferredDomain?: string; // Inferred from name/usage
|
|
107
|
+
inferredDomain?: string; // Inferred from name/usage (legacy single domain)
|
|
108
|
+
domains?: DomainAssignment[]; // Multi-domain support with confidence scores
|
|
104
109
|
imports?: string[]; // Imports used by this export (for import-based cohesion)
|
|
105
110
|
dependencies?: string[]; // Other exports from same file this depends on
|
|
111
|
+
typeReferences?: string[]; // TypeScript types referenced by this export
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export interface DomainAssignment {
|
|
115
|
+
domain: string;
|
|
116
|
+
confidence: number; // 0-1, how confident are we in this assignment
|
|
117
|
+
signals: DomainSignals; // Which signals contributed to this assignment
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface DomainSignals {
|
|
121
|
+
folderStructure: boolean; // Matched from folder name
|
|
122
|
+
importPath: boolean; // Matched from import paths
|
|
123
|
+
typeReference: boolean; // Matched from TypeScript type usage
|
|
124
|
+
coUsage: boolean; // Matched from co-usage patterns
|
|
125
|
+
exportName: boolean; // Matched from export identifier name
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export interface CoUsageData {
|
|
129
|
+
file: string;
|
|
130
|
+
coImportedWith: Map<string, number>; // file -> count of times imported together
|
|
131
|
+
sharedImporters: string[]; // files that import both this and another file
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export interface TypeDependency {
|
|
135
|
+
typeName: string;
|
|
136
|
+
definedIn: string; // file where type is defined
|
|
137
|
+
usedBy: string[]; // files that reference this type
|
|
106
138
|
}
|
|
107
139
|
|