@aiready/context-analyzer 0.9.4 → 0.9.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/.turbo/turbo-build.log +10 -10
  2. package/.turbo/turbo-test.log +21 -9
  3. package/README.md +8 -0
  4. package/dist/__tests__/analyzer.test.d.ts +2 -0
  5. package/dist/__tests__/analyzer.test.d.ts.map +1 -0
  6. package/dist/__tests__/analyzer.test.js +157 -0
  7. package/dist/__tests__/analyzer.test.js.map +1 -0
  8. package/dist/__tests__/auto-detection.test.d.ts +2 -0
  9. package/dist/__tests__/auto-detection.test.d.ts.map +1 -0
  10. package/dist/__tests__/auto-detection.test.js +132 -0
  11. package/dist/__tests__/auto-detection.test.js.map +1 -0
  12. package/dist/__tests__/enhanced-cohesion.test.d.ts +2 -0
  13. package/dist/__tests__/enhanced-cohesion.test.d.ts.map +1 -0
  14. package/dist/__tests__/enhanced-cohesion.test.js +109 -0
  15. package/dist/__tests__/enhanced-cohesion.test.js.map +1 -0
  16. package/dist/__tests__/fragmentation-advanced.test.d.ts +2 -0
  17. package/dist/__tests__/fragmentation-advanced.test.d.ts.map +1 -0
  18. package/dist/__tests__/fragmentation-advanced.test.js +50 -0
  19. package/dist/__tests__/fragmentation-advanced.test.js.map +1 -0
  20. package/dist/__tests__/fragmentation-coupling.test.d.ts +2 -0
  21. package/dist/__tests__/fragmentation-coupling.test.d.ts.map +1 -0
  22. package/dist/__tests__/fragmentation-coupling.test.js +52 -0
  23. package/dist/__tests__/fragmentation-coupling.test.js.map +1 -0
  24. package/dist/__tests__/fragmentation-log.test.d.ts +2 -0
  25. package/dist/__tests__/fragmentation-log.test.d.ts.map +1 -0
  26. package/dist/__tests__/fragmentation-log.test.js +33 -0
  27. package/dist/__tests__/fragmentation-log.test.js.map +1 -0
  28. package/dist/__tests__/scoring.test.d.ts +2 -0
  29. package/dist/__tests__/scoring.test.d.ts.map +1 -0
  30. package/dist/__tests__/scoring.test.js +118 -0
  31. package/dist/__tests__/scoring.test.js.map +1 -0
  32. package/dist/__tests__/structural-cohesion.test.d.ts +2 -0
  33. package/dist/__tests__/structural-cohesion.test.d.ts.map +1 -0
  34. package/dist/__tests__/structural-cohesion.test.js +29 -0
  35. package/dist/__tests__/structural-cohesion.test.js.map +1 -0
  36. package/dist/analyzer.d.ts +100 -0
  37. package/dist/analyzer.d.ts.map +1 -0
  38. package/dist/analyzer.js +701 -0
  39. package/dist/analyzer.js.map +1 -0
  40. package/dist/analyzers/python-context.d.ts +38 -0
  41. package/dist/analyzers/python-context.d.ts.map +1 -0
  42. package/dist/analyzers/python-context.js +232 -0
  43. package/dist/analyzers/python-context.js.map +1 -0
  44. package/dist/chunk-BD4NWUVG.mjs +1242 -0
  45. package/dist/cli.d.ts.map +1 -0
  46. package/dist/cli.js +139 -13
  47. package/dist/cli.js.map +1 -0
  48. package/dist/cli.mjs +1 -1
  49. package/dist/index.d.mts +3 -0
  50. package/dist/index.d.ts +3 -0
  51. package/dist/index.d.ts.map +1 -0
  52. package/dist/index.js +139 -13
  53. package/dist/index.js.map +1 -0
  54. package/dist/index.mjs +1 -1
  55. package/dist/scoring.d.ts +13 -0
  56. package/dist/scoring.d.ts.map +1 -0
  57. package/dist/scoring.js +133 -0
  58. package/dist/scoring.js.map +1 -0
  59. package/dist/semantic-analysis.d.ts +44 -0
  60. package/dist/semantic-analysis.d.ts.map +1 -0
  61. package/dist/semantic-analysis.js +241 -0
  62. package/dist/semantic-analysis.js.map +1 -0
  63. package/dist/types.d.ts +117 -0
  64. package/dist/types.d.ts.map +1 -0
  65. package/dist/types.js +2 -0
  66. package/dist/types.js.map +1 -0
  67. package/package.json +2 -2
  68. package/src/__tests__/fragmentation-advanced.test.ts +60 -0
  69. package/src/__tests__/fragmentation-coupling.test.ts +62 -0
  70. package/src/__tests__/fragmentation-log.test.ts +38 -0
  71. package/src/__tests__/structural-cohesion.test.ts +32 -0
  72. package/src/analyzer.ts +193 -18
  73. package/src/index.ts +34 -2
  74. package/src/types.ts +3 -0
  75. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,701 @@
1
+ import { estimateTokens, parseFileExports } from '@aiready/core';
2
+ import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
3
+ /**
4
+ * Auto-detect domain keywords from workspace folder structure
5
+ * Extracts unique folder names from file paths as potential domain keywords
6
+ */
7
+ function extractDomainKeywordsFromPaths(files) {
8
+ const folderNames = new Set();
9
+ for (const { file } of files) {
10
+ const segments = file.split('/');
11
+ // Extract meaningful folder names (skip common infrastructure folders)
12
+ const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
13
+ for (const segment of segments) {
14
+ const normalized = segment.toLowerCase();
15
+ if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
16
+ // Singularize common plural forms for better matching
17
+ const singular = singularize(normalized);
18
+ folderNames.add(singular);
19
+ }
20
+ }
21
+ }
22
+ return Array.from(folderNames);
23
+ }
24
+ /**
25
+ * Simple singularization for common English plurals
26
+ */
27
+ function singularize(word) {
28
+ // Handle irregular plurals
29
+ const irregulars = {
30
+ people: 'person',
31
+ children: 'child',
32
+ men: 'man',
33
+ women: 'woman',
34
+ };
35
+ if (irregulars[word]) {
36
+ return irregulars[word];
37
+ }
38
+ // Common plural patterns
39
+ if (word.endsWith('ies')) {
40
+ return word.slice(0, -3) + 'y'; // categories -> category
41
+ }
42
+ if (word.endsWith('ses')) {
43
+ return word.slice(0, -2); // classes -> class
44
+ }
45
+ if (word.endsWith('s') && word.length > 3) {
46
+ return word.slice(0, -1); // orders -> order
47
+ }
48
+ return word;
49
+ }
50
+ /**
51
+ * Build a dependency graph from file contents
52
+ */
53
+ export function buildDependencyGraph(files) {
54
+ const nodes = new Map();
55
+ const edges = new Map();
56
+ // Auto-detect domain keywords from workspace folder structure
57
+ const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
58
+ // First pass: Create nodes with folder-based domain inference
59
+ for (const { file, content } of files) {
60
+ const imports = extractImportsFromContent(content);
61
+ // Use AST-based extraction for better accuracy, fallback to regex
62
+ const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
63
+ const tokenCost = estimateTokens(content);
64
+ const linesOfCode = content.split('\n').length;
65
+ nodes.set(file, {
66
+ file,
67
+ imports,
68
+ exports,
69
+ tokenCost,
70
+ linesOfCode,
71
+ });
72
+ edges.set(file, new Set(imports));
73
+ }
74
+ // Second pass: Build semantic analysis graphs
75
+ const graph = { nodes, edges };
76
+ const coUsageMatrix = buildCoUsageMatrix(graph);
77
+ const typeGraph = buildTypeGraph(graph);
78
+ // Add semantic data to graph
79
+ graph.coUsageMatrix = coUsageMatrix;
80
+ graph.typeGraph = typeGraph;
81
+ // Third pass: Enhance domain assignments with semantic analysis
82
+ for (const [file, node] of nodes) {
83
+ for (const exp of node.exports) {
84
+ // Get semantic domain assignments
85
+ const semanticAssignments = inferDomainFromSemantics(file, exp.name, graph, coUsageMatrix, typeGraph, exp.typeReferences);
86
+ // Add multi-domain assignments with confidence scores
87
+ exp.domains = semanticAssignments;
88
+ // Keep inferredDomain for backwards compatibility (use highest confidence)
89
+ if (semanticAssignments.length > 0) {
90
+ exp.inferredDomain = semanticAssignments[0].domain;
91
+ }
92
+ }
93
+ }
94
+ return graph;
95
+ }
96
+ /**
97
+ * Extract imports from file content using regex
98
+ * Simple implementation - could be improved with AST parsing
99
+ */
100
+ function extractImportsFromContent(content) {
101
+ const imports = [];
102
+ // Match various import patterns
103
+ const patterns = [
104
+ /import\s+.*?\s+from\s+['"](.+?)['"]/g, // import ... from '...'
105
+ /import\s+['"](.+?)['"]/g, // import '...'
106
+ /require\(['"](.+?)['"]\)/g, // require('...')
107
+ ];
108
+ for (const pattern of patterns) {
109
+ let match;
110
+ while ((match = pattern.exec(content)) !== null) {
111
+ const importPath = match[1];
112
+ // Exclude only node built-ins (node:), include all local and aliased imports
113
+ if (importPath && !importPath.startsWith('node:')) {
114
+ imports.push(importPath);
115
+ }
116
+ }
117
+ }
118
+ return [...new Set(imports)]; // Deduplicate
119
+ }
120
+ /**
121
+ * Calculate the maximum depth of import tree for a file
122
+ */
123
+ export function calculateImportDepth(file, graph, visited = new Set(), depth = 0) {
124
+ if (visited.has(file)) {
125
+ return depth; // Circular dependency, return current depth
126
+ }
127
+ const dependencies = graph.edges.get(file);
128
+ if (!dependencies || dependencies.size === 0) {
129
+ return depth;
130
+ }
131
+ visited.add(file);
132
+ let maxDepth = depth;
133
+ for (const dep of dependencies) {
134
+ const depDepth = calculateImportDepth(dep, graph, visited, depth + 1);
135
+ maxDepth = Math.max(maxDepth, depDepth);
136
+ }
137
+ visited.delete(file);
138
+ return maxDepth;
139
+ }
140
+ /**
141
+ * Get all transitive dependencies for a file
142
+ */
143
+ export function getTransitiveDependencies(file, graph, visited = new Set()) {
144
+ if (visited.has(file)) {
145
+ return [];
146
+ }
147
+ visited.add(file);
148
+ const dependencies = graph.edges.get(file);
149
+ if (!dependencies || dependencies.size === 0) {
150
+ return [];
151
+ }
152
+ const allDeps = [];
153
+ for (const dep of dependencies) {
154
+ allDeps.push(dep);
155
+ allDeps.push(...getTransitiveDependencies(dep, graph, visited));
156
+ }
157
+ return [...new Set(allDeps)]; // Deduplicate
158
+ }
159
+ /**
160
+ * Calculate total context budget (tokens needed to understand this file)
161
+ */
162
+ export function calculateContextBudget(file, graph) {
163
+ const node = graph.nodes.get(file);
164
+ if (!node)
165
+ return 0;
166
+ let totalTokens = node.tokenCost;
167
+ const deps = getTransitiveDependencies(file, graph);
168
+ for (const dep of deps) {
169
+ const depNode = graph.nodes.get(dep);
170
+ if (depNode) {
171
+ totalTokens += depNode.tokenCost;
172
+ }
173
+ }
174
+ return totalTokens;
175
+ }
176
+ /**
177
+ * Detect circular dependencies
178
+ */
179
+ export function detectCircularDependencies(graph) {
180
+ const cycles = [];
181
+ const visited = new Set();
182
+ const recursionStack = new Set();
183
+ function dfs(file, path) {
184
+ if (recursionStack.has(file)) {
185
+ // Found a cycle
186
+ const cycleStart = path.indexOf(file);
187
+ if (cycleStart !== -1) {
188
+ cycles.push([...path.slice(cycleStart), file]);
189
+ }
190
+ return;
191
+ }
192
+ if (visited.has(file)) {
193
+ return;
194
+ }
195
+ visited.add(file);
196
+ recursionStack.add(file);
197
+ path.push(file);
198
+ const dependencies = graph.edges.get(file);
199
+ if (dependencies) {
200
+ for (const dep of dependencies) {
201
+ dfs(dep, [...path]);
202
+ }
203
+ }
204
+ recursionStack.delete(file);
205
+ }
206
+ for (const file of graph.nodes.keys()) {
207
+ if (!visited.has(file)) {
208
+ dfs(file, []);
209
+ }
210
+ }
211
+ return cycles;
212
+ }
213
+ /**
214
+ * Calculate cohesion score (how related are exports in a file)
215
+ * Uses enhanced calculation combining domain-based and import-based analysis
216
+ * @param exports - Array of export information
217
+ * @param filePath - Optional file path for context-aware scoring
218
+ */
219
+ export function calculateCohesion(exports, filePath, options) {
220
+ return calculateEnhancedCohesion(exports, filePath, options);
221
+ }
222
+ /**
223
+ * Check if a file is a test/mock/fixture file
224
+ */
225
+ function isTestFile(filePath) {
226
+ const lower = filePath.toLowerCase();
227
+ return (lower.includes('test') ||
228
+ lower.includes('spec') ||
229
+ lower.includes('mock') ||
230
+ lower.includes('fixture') ||
231
+ lower.includes('__tests__') ||
232
+ lower.includes('.test.') ||
233
+ lower.includes('.spec.'));
234
+ }
235
+ /**
236
+ * Calculate fragmentation score (how scattered is a domain)
237
+ */
238
+ export function calculateFragmentation(files, domain, options) {
239
+ if (files.length <= 1)
240
+ return 0; // Single file = no fragmentation
241
+ // Calculate how many different directories contain these files
242
+ const directories = new Set(files.map((f) => f.split('/').slice(0, -1).join('/')));
243
+ const uniqueDirs = directories.size;
244
+ // If log-scaling requested, normalize using logarithms so that
245
+ // going from 1 -> 2 directories shows a large jump while 10 -> 11
246
+ // is relatively small. Normalized value is log(uniqueDirs)/log(totalFiles).
247
+ if (options?.useLogScale) {
248
+ if (uniqueDirs <= 1)
249
+ return 0;
250
+ const total = files.length;
251
+ const base = options.logBase || Math.E;
252
+ const num = Math.log(uniqueDirs) / Math.log(base);
253
+ const den = Math.log(total) / Math.log(base);
254
+ return den > 0 ? num / den : 0;
255
+ }
256
+ // Default (linear) Fragmentation = (unique directories - 1) / (total files - 1)
257
+ return (uniqueDirs - 1) / (files.length - 1);
258
+ }
259
+ /**
260
+ * Calculate path entropy for a set of files.
261
+ * Returns a normalized entropy in [0,1], where 0 = all files in one directory,
262
+ * and 1 = files are evenly distributed across directories.
263
+ */
264
+ export function calculatePathEntropy(files) {
265
+ if (!files || files.length === 0)
266
+ return 0;
267
+ const dirCounts = new Map();
268
+ for (const f of files) {
269
+ const dir = f.split('/').slice(0, -1).join('/') || '.';
270
+ dirCounts.set(dir, (dirCounts.get(dir) || 0) + 1);
271
+ }
272
+ const counts = Array.from(dirCounts.values());
273
+ if (counts.length <= 1)
274
+ return 0; // single directory -> zero entropy
275
+ const total = counts.reduce((s, v) => s + v, 0);
276
+ let entropy = 0;
277
+ for (const c of counts) {
278
+ const p = c / total;
279
+ entropy -= p * Math.log2(p);
280
+ }
281
+ const maxEntropy = Math.log2(counts.length);
282
+ return maxEntropy > 0 ? entropy / maxEntropy : 0;
283
+ }
284
+ /**
285
+ * Calculate directory-distance metric based on common ancestor depth.
286
+ * For each file pair compute depth(commonAncestor) and normalize by the
287
+ * maximum path depth between the two files. Returns value in [0,1] where
288
+ * 0 means all pairs share a deep common ancestor (low fragmentation) and
289
+ * 1 means they share only the root (high fragmentation).
290
+ */
291
+ export function calculateDirectoryDistance(files) {
292
+ if (!files || files.length <= 1)
293
+ return 0;
294
+ function pathSegments(p) {
295
+ return p.split('/').filter(Boolean);
296
+ }
297
+ function commonAncestorDepth(a, b) {
298
+ const minLen = Math.min(a.length, b.length);
299
+ let i = 0;
300
+ while (i < minLen && a[i] === b[i])
301
+ i++;
302
+ return i; // number of shared segments from root
303
+ }
304
+ let totalNormalized = 0;
305
+ let comparisons = 0;
306
+ for (let i = 0; i < files.length; i++) {
307
+ for (let j = i + 1; j < files.length; j++) {
308
+ const segA = pathSegments(files[i]);
309
+ const segB = pathSegments(files[j]);
310
+ const shared = commonAncestorDepth(segA, segB);
311
+ const maxDepth = Math.max(segA.length, segB.length);
312
+ const normalizedShared = maxDepth > 0 ? shared / maxDepth : 0;
313
+ // distance is inverse of normalized shared depth
314
+ totalNormalized += 1 - normalizedShared;
315
+ comparisons++;
316
+ }
317
+ }
318
+ return comparisons > 0 ? totalNormalized / comparisons : 0;
319
+ }
320
+ /**
321
+ * Group files by domain to detect module clusters
322
+ */
323
+ export function detectModuleClusters(graph, options) {
324
+ const domainMap = new Map();
325
+ // Group files by their primary domain
326
+ for (const [file, node] of graph.nodes.entries()) {
327
+ const domains = node.exports.map((e) => e.inferredDomain || 'unknown');
328
+ const primaryDomain = domains[0] || 'unknown';
329
+ if (!domainMap.has(primaryDomain)) {
330
+ domainMap.set(primaryDomain, []);
331
+ }
332
+ domainMap.get(primaryDomain).push(file);
333
+ }
334
+ const clusters = [];
335
+ for (const [domain, files] of domainMap.entries()) {
336
+ if (files.length < 2)
337
+ continue; // Skip single-file domains
338
+ const totalTokens = files.reduce((sum, file) => {
339
+ const node = graph.nodes.get(file);
340
+ return sum + (node?.tokenCost || 0);
341
+ }, 0);
342
+ const baseFragmentation = calculateFragmentation(files, domain, { useLogScale: !!options?.useLogScale });
343
+ // Compute import-based cohesion across files in this domain cluster.
344
+ // This measures how much the files actually "talk" to each other.
345
+ // We'll compute average pairwise Jaccard similarity between each file's import lists.
346
+ let importSimilarityTotal = 0;
347
+ let importComparisons = 0;
348
+ for (let i = 0; i < files.length; i++) {
349
+ for (let j = i + 1; j < files.length; j++) {
350
+ const f1 = files[i];
351
+ const f2 = files[j];
352
+ const n1 = graph.nodes.get(f1)?.imports || [];
353
+ const n2 = graph.nodes.get(f2)?.imports || [];
354
+ // Treat two empty import lists as not coupled (similarity 0)
355
+ const similarity = (n1.length === 0 && n2.length === 0)
356
+ ? 0
357
+ : calculateJaccardSimilarity(n1, n2);
358
+ importSimilarityTotal += similarity;
359
+ importComparisons++;
360
+ }
361
+ }
362
+ const importCohesion = importComparisons > 0 ? importSimilarityTotal / importComparisons : 0;
363
+ // Coupling discount: if files are heavily importing each other, reduce fragmentation penalty.
364
+ // Following recommendation: up to 20% discount proportional to import cohesion.
365
+ const couplingDiscountFactor = 1 - 0.2 * importCohesion;
366
+ const fragmentationScore = baseFragmentation * couplingDiscountFactor;
367
+ // Additional metrics for richer reporting
368
+ const pathEntropy = calculatePathEntropy(files);
369
+ const directoryDistance = calculateDirectoryDistance(files);
370
+ const avgCohesion = files.reduce((sum, file) => {
371
+ const node = graph.nodes.get(file);
372
+ return sum + (node ? calculateCohesion(node.exports, file, { coUsageMatrix: graph.coUsageMatrix }) : 0);
373
+ }, 0) / files.length;
374
+ // Generate consolidation plan
375
+ const targetFiles = Math.max(1, Math.ceil(files.length / 3)); // Aim to reduce by ~66%
376
+ const consolidationPlan = generateConsolidationPlan(domain, files, targetFiles);
377
+ clusters.push({
378
+ domain,
379
+ files,
380
+ totalTokens,
381
+ fragmentationScore,
382
+ pathEntropy,
383
+ directoryDistance,
384
+ importCohesion,
385
+ avgCohesion,
386
+ suggestedStructure: {
387
+ targetFiles,
388
+ consolidationPlan,
389
+ },
390
+ });
391
+ }
392
+ // Sort by fragmentation score (most fragmented first)
393
+ return clusters.sort((a, b) => b.fragmentationScore - a.fragmentationScore);
394
+ }
395
+ /**
396
+ * Extract export information from file content
397
+ * TODO: Use proper AST parsing for better accuracy
398
+ */
399
+ function extractExports(content, filePath, domainOptions, fileImports) {
400
+ const exports = [];
401
+ // Simple regex-based extraction (improve with AST later)
402
+ const patterns = [
403
+ /export\s+function\s+(\w+)/g,
404
+ /export\s+class\s+(\w+)/g,
405
+ /export\s+const\s+(\w+)/g,
406
+ /export\s+type\s+(\w+)/g,
407
+ /export\s+interface\s+(\w+)/g,
408
+ /export\s+default/g,
409
+ ];
410
+ const types = [
411
+ 'function',
412
+ 'class',
413
+ 'const',
414
+ 'type',
415
+ 'interface',
416
+ 'default',
417
+ ];
418
+ patterns.forEach((pattern, index) => {
419
+ let match;
420
+ while ((match = pattern.exec(content)) !== null) {
421
+ const name = match[1] || 'default';
422
+ const type = types[index];
423
+ const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
424
+ exports.push({ name, type, inferredDomain });
425
+ }
426
+ });
427
+ return exports;
428
+ }
429
+ /**
430
+ * Infer domain from export name
431
+ * Uses common naming patterns with word boundary matching
432
+ */
433
+ function inferDomain(name, filePath, domainOptions, fileImports) {
434
+ const lower = name.toLowerCase();
435
+ // Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
436
+ const tokens = Array.from(new Set(lower
437
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
438
+ .replace(/[^a-z0-9]+/gi, ' ')
439
+ .split(' ')
440
+ .filter(Boolean)));
441
+ // Domain keywords ordered from most specific to most general
442
+ // This prevents generic terms like 'util' from matching before specific domains
443
+ // NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
444
+ const defaultKeywords = [
445
+ 'authentication',
446
+ 'authorization',
447
+ 'payment',
448
+ 'invoice',
449
+ 'customer',
450
+ 'product',
451
+ 'order',
452
+ 'cart',
453
+ 'user',
454
+ 'admin',
455
+ 'repository',
456
+ 'controller',
457
+ 'service',
458
+ 'config',
459
+ 'model',
460
+ 'view',
461
+ 'auth',
462
+ ];
463
+ const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
464
+ ? [...domainOptions.domainKeywords, ...defaultKeywords]
465
+ : defaultKeywords;
466
+ // Try word boundary matching first for more accurate detection
467
+ for (const keyword of domainKeywords) {
468
+ if (tokens.includes(keyword)) {
469
+ return keyword;
470
+ }
471
+ }
472
+ // Fallback to substring matching for compound words
473
+ for (const keyword of domainKeywords) {
474
+ if (lower.includes(keyword)) {
475
+ return keyword;
476
+ }
477
+ }
478
+ // Import-path domain inference: analyze import statements for domain hints
479
+ if (fileImports && fileImports.length > 0) {
480
+ for (const importPath of fileImports) {
481
+ // Parse all segments, including those after '@' or '.'
482
+ // e.g., '@/orders/service' -> ['orders', 'service']
483
+ // '../payments/processor' -> ['payments', 'processor']
484
+ const allSegments = importPath.split('/');
485
+ const relevantSegments = allSegments.filter(s => {
486
+ if (!s)
487
+ return false;
488
+ // Skip '.' and '..' but keep everything else
489
+ if (s === '.' || s === '..')
490
+ return false;
491
+ // Skip '@' prefix but keep the path after it
492
+ if (s.startsWith('@') && s.length === 1)
493
+ return false;
494
+ // Remove '@' prefix from scoped imports like '@/orders'
495
+ return true;
496
+ }).map(s => s.startsWith('@') ? s.slice(1) : s);
497
+ for (const segment of relevantSegments) {
498
+ const segLower = segment.toLowerCase();
499
+ const singularSegment = singularize(segLower);
500
+ // Check if any domain keyword matches the import path segment (with singularization)
501
+ for (const keyword of domainKeywords) {
502
+ if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
503
+ return keyword;
504
+ }
505
+ }
506
+ }
507
+ }
508
+ }
509
+ // Path-based fallback: check file path segments
510
+ if (filePath) {
511
+ // Auto-detect from path by checking against domain keywords (with singularization)
512
+ const pathSegments = filePath.toLowerCase().split('/');
513
+ for (const segment of pathSegments) {
514
+ const singularSegment = singularize(segment);
515
+ for (const keyword of domainKeywords) {
516
+ if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
517
+ return keyword;
518
+ }
519
+ }
520
+ }
521
+ }
522
+ return 'unknown';
523
+ }
524
+ /**
525
+ * Generate consolidation plan for fragmented modules
526
+ */
527
+ function generateConsolidationPlan(domain, files, targetFiles) {
528
+ const plan = [];
529
+ if (files.length <= targetFiles) {
530
+ return [`No consolidation needed for ${domain}`];
531
+ }
532
+ plan.push(`Consolidate ${files.length} ${domain} files into ${targetFiles} cohesive file(s):`);
533
+ // Group by directory
534
+ const dirGroups = new Map();
535
+ for (const file of files) {
536
+ const dir = file.split('/').slice(0, -1).join('/');
537
+ if (!dirGroups.has(dir)) {
538
+ dirGroups.set(dir, []);
539
+ }
540
+ dirGroups.get(dir).push(file);
541
+ }
542
+ plan.push(`1. Create unified ${domain} module file`);
543
+ plan.push(`2. Move related functionality from ${files.length} scattered files`);
544
+ plan.push(`3. Update imports in dependent files`);
545
+ plan.push(`4. Remove old files after consolidation (verify with tests first)`);
546
+ return plan;
547
+ }
548
+ /**
549
+ * Extract exports using AST parsing (enhanced version)
550
+ * Falls back to regex if AST parsing fails
551
+ */
552
+ export function extractExportsWithAST(content, filePath, domainOptions, fileImports) {
553
+ try {
554
+ const { exports: astExports } = parseFileExports(content, filePath);
555
+ return astExports.map(exp => ({
556
+ name: exp.name,
557
+ type: exp.type,
558
+ inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
559
+ imports: exp.imports,
560
+ dependencies: exp.dependencies,
561
+ }));
562
+ }
563
+ catch (error) {
564
+ // Fallback to regex-based extraction
565
+ return extractExports(content, filePath, domainOptions, fileImports);
566
+ }
567
+ }
568
+ /**
569
+ * Calculate enhanced cohesion score using both domain inference and import similarity
570
+ *
571
+ * This combines:
572
+ * 1. Domain-based cohesion (entropy of inferred domains)
573
+ * 2. Import-based cohesion (Jaccard similarity of shared imports)
574
+ *
575
+ * Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
576
+ */
577
+ export function calculateEnhancedCohesion(exports, filePath, options) {
578
+ if (exports.length === 0)
579
+ return 1;
580
+ if (exports.length === 1)
581
+ return 1;
582
+ // Special case for test files
583
+ if (filePath && isTestFile(filePath)) {
584
+ return 1;
585
+ }
586
+ // Calculate domain-based cohesion (existing method)
587
+ const domainCohesion = calculateDomainCohesion(exports);
588
+ // Calculate import-based cohesion if imports are available
589
+ const hasImportData = exports.some((e) => e.imports && e.imports.length > 0);
590
+ const importCohesion = hasImportData ? calculateImportBasedCohesion(exports) : undefined;
591
+ // Calculate structural cohesion (co-usage) if coUsageMatrix and filePath available
592
+ const coUsageMatrix = options?.coUsageMatrix;
593
+ const structuralCohesion = filePath && coUsageMatrix ? calculateStructuralCohesionFromCoUsage(filePath, coUsageMatrix) : undefined;
594
+ // Default weights (can be overridden via options)
595
+ const defaultWeights = { importBased: 0.5, structural: 0.3, domainBased: 0.2 };
596
+ const weights = { ...defaultWeights, ...(options?.weights || {}) };
597
+ // Collect available signals and normalize weights
598
+ const signals = [];
599
+ if (importCohesion !== undefined)
600
+ signals.push({ score: importCohesion, weight: weights.importBased });
601
+ if (structuralCohesion !== undefined)
602
+ signals.push({ score: structuralCohesion, weight: weights.structural });
603
+ // domain cohesion is always available
604
+ signals.push({ score: domainCohesion, weight: weights.domainBased });
605
+ const totalWeight = signals.reduce((s, el) => s + el.weight, 0);
606
+ if (totalWeight === 0)
607
+ return domainCohesion;
608
+ const combined = signals.reduce((sum, el) => sum + el.score * (el.weight / totalWeight), 0);
609
+ return combined;
610
+ }
611
+ /**
612
+ * Calculate structural cohesion for a file based on co-usage patterns.
613
+ * Uses the co-usage distribution (files commonly imported alongside this file)
614
+ * and computes an entropy-based cohesion score in [0,1].
615
+ * - 1 => highly cohesive (imports mostly appear together with a small set)
616
+ * - 0 => maximally fragmented (imports appear uniformly across many partners)
617
+ */
618
+ export function calculateStructuralCohesionFromCoUsage(file, coUsageMatrix) {
619
+ if (!coUsageMatrix)
620
+ return 1;
621
+ const coUsages = coUsageMatrix.get(file);
622
+ if (!coUsages || coUsages.size === 0)
623
+ return 1;
624
+ // Build probability distribution over co-imported files
625
+ let total = 0;
626
+ for (const count of coUsages.values())
627
+ total += count;
628
+ if (total === 0)
629
+ return 1;
630
+ const probs = [];
631
+ for (const count of coUsages.values()) {
632
+ if (count > 0)
633
+ probs.push(count / total);
634
+ }
635
+ if (probs.length <= 1)
636
+ return 1;
637
+ // Calculate entropy
638
+ let entropy = 0;
639
+ for (const p of probs) {
640
+ entropy -= p * Math.log2(p);
641
+ }
642
+ const maxEntropy = Math.log2(probs.length);
643
+ return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
644
+ }
645
+ /**
646
+ * Calculate cohesion based on shared imports (Jaccard similarity)
647
+ */
648
+ function calculateImportBasedCohesion(exports) {
649
+ const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
650
+ if (exportsWithImports.length < 2) {
651
+ return 1; // Not enough data
652
+ }
653
+ // Calculate pairwise import similarity
654
+ let totalSimilarity = 0;
655
+ let comparisons = 0;
656
+ for (let i = 0; i < exportsWithImports.length; i++) {
657
+ for (let j = i + 1; j < exportsWithImports.length; j++) {
658
+ const exp1 = exportsWithImports[i];
659
+ const exp2 = exportsWithImports[j];
660
+ const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
661
+ totalSimilarity += similarity;
662
+ comparisons++;
663
+ }
664
+ }
665
+ return comparisons > 0 ? totalSimilarity / comparisons : 1;
666
+ }
667
+ /**
668
+ * Calculate Jaccard similarity between two arrays
669
+ */
670
+ function calculateJaccardSimilarity(arr1, arr2) {
671
+ if (arr1.length === 0 && arr2.length === 0)
672
+ return 1;
673
+ if (arr1.length === 0 || arr2.length === 0)
674
+ return 0;
675
+ const set1 = new Set(arr1);
676
+ const set2 = new Set(arr2);
677
+ const intersection = new Set([...set1].filter(x => set2.has(x)));
678
+ const union = new Set([...set1, ...set2]);
679
+ return intersection.size / union.size;
680
+ }
681
+ /**
682
+ * Calculate domain-based cohesion (existing entropy method)
683
+ */
684
+ function calculateDomainCohesion(exports) {
685
+ const domains = exports.map((e) => e.inferredDomain || 'unknown');
686
+ const domainCounts = new Map();
687
+ for (const domain of domains) {
688
+ domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
689
+ }
690
+ const total = domains.length;
691
+ let entropy = 0;
692
+ for (const count of domainCounts.values()) {
693
+ const p = count / total;
694
+ if (p > 0) {
695
+ entropy -= p * Math.log2(p);
696
+ }
697
+ }
698
+ const maxEntropy = Math.log2(total);
699
+ return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
700
+ }
701
+ //# sourceMappingURL=analyzer.js.map