npm - @aiready/context-analyzer - Versions diffs - 0.5.3 → 0.7.0 - Mend

@aiready/context-analyzer 0.5.3 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

package/.turbo/turbo-build.log +10 -10
package/.turbo/turbo-test.log +12 -28
package/README.md +5 -3
package/SEMANTIC-VALIDATION.md +235 -0
package/dist/chunk-AEK3MZC5.mjs +709 -0
package/dist/chunk-DD7UVNE3.mjs +678 -0
package/dist/chunk-DMRZMS2U.mjs +964 -0
package/dist/chunk-HQNHM2X7.mjs +997 -0
package/dist/chunk-I54HL4FZ.mjs +781 -0
package/dist/chunk-IRWCPDWD.mjs +779 -0
package/dist/chunk-PVVCCE6W.mjs +755 -0
package/dist/chunk-RYIB5CWD.mjs +781 -0
package/dist/cli.js +304 -33
package/dist/cli.mjs +1 -1
package/dist/index.d.mts +90 -1
package/dist/index.d.ts +90 -1
package/dist/index.js +381 -35
package/dist/index.mjs +17 -3
package/package.json +2 -2
package/src/__tests__/auto-detection.test.ts +156 -0
package/src/__tests__/enhanced-cohesion.test.ts +126 -0
package/src/analyzer.ts +313 -47
package/src/index.ts +34 -2
package/src/semantic-analysis.ts +287 -0
package/src/types.ts +36 -1

package/src/__tests__/enhanced-cohesion.test.ts ADDED Viewed

@@ -0,0 +1,126 @@
+import { describe, it, expect } from 'vitest';
+import { calculateCohesion } from '../analyzer';
+import type { ExportInfo } from '../types';
+describe('Enhanced Cohesion Calculation', () => {
+  it('should use domain-based cohesion when no import data available', () => {
+    const exports: ExportInfo[] = [
+      { name: 'getUserData', type: 'function', inferredDomain: 'user' },
+      { name: 'getProductData', type: 'function', inferredDomain: 'product' },
+    ];
+    const cohesion = calculateCohesion(exports);
+    // With mixed domains (user, product) and no import data, should use domain-based calculation
+    // Domain entropy for 2 different domains = low cohesion
+    expect(cohesion).toBeLessThan(0.5);
+  });
+  it('should use import-based cohesion when import data available', () => {
+    const exports: ExportInfo[] = [
+      {
+        name: 'getUserData',
+        type: 'function',
+        inferredDomain: 'user',
+        imports: ['react', 'axios', 'lodash'],
+      },
+      {
+        name: 'getProductData',
+        type: 'function',
+        inferredDomain: 'product',
+        imports: ['react', 'axios', 'lodash'], // Same imports!
+      },
+    ];
+    const cohesion = calculateCohesion(exports);
+    // Even though domains differ, imports are identical (Jaccard = 1.0)
+    // Enhanced cohesion = 0.6 * 1.0 + 0.4 * 0.0 (different domains) = 0.6
+    // Should be >= 0.6 (import-based weight)
+    expect(cohesion).toBeGreaterThanOrEqual(0.6);
+  });
+  it('should weight import-based similarity higher than domain-based', () => {
+    const exportsWithSharedImports: ExportInfo[] = [
+      {
+        name: 'getUserData',
+        type: 'function',
+        inferredDomain: 'user',
+        imports: ['react', 'axios'],
+      },
+      {
+        name: 'getProductData',
+        type: 'function',
+        inferredDomain: 'product',
+        imports: ['react', 'axios'],
+      },
+    ];
+    const exportsWithoutSharedImports: ExportInfo[] = [
+      {
+        name: 'getUserData',
+        type: 'function',
+        inferredDomain: 'user',
+        imports: ['react', 'axios'],
+      },
+      {
+        name: 'getProductData',
+        type: 'function',
+        inferredDomain: 'product',
+        imports: ['lodash', 'moment'],
+      },
+    ];
+    const cohesionWithShared = calculateCohesion(exportsWithSharedImports);
+    const cohesionWithoutShared = calculateCohesion(exportsWithoutSharedImports);
+    // Shared imports should result in higher cohesion
+    expect(cohesionWithShared).toBeGreaterThan(cohesionWithoutShared);
+  });
+  it('should handle mixed case: some exports with imports, some without', () => {
+    const exports: ExportInfo[] = [
+      {
+        name: 'getUserData',
+        type: 'function',
+        inferredDomain: 'user',
+        imports: ['react', 'axios'],
+      },
+      {
+        name: 'getProductData',
+        type: 'function',
+        inferredDomain: 'product',
+        // No imports field
+      },
+    ];
+    const cohesion = calculateCohesion(exports);
+    // Should fall back to domain-based when not all exports have import data
+    expect(cohesion).toBeGreaterThan(0);
+    expect(cohesion).toBeLessThan(1);
+  });
+  it('should return 1 for single export', () => {
+    const exports: ExportInfo[] = [
+      {
+        name: 'getUserData',
+        type: 'function',
+        inferredDomain: 'user',
+        imports: ['react'],
+      },
+    ];
+    expect(calculateCohesion(exports)).toBe(1);
+  });
+  it('should return 1 for test files regardless of domains or imports', () => {
+    const exports: ExportInfo[] = [
+      { name: 'testUserLogin', type: 'function', inferredDomain: 'user', imports: ['react'] },
+      { name: 'testProductView', type: 'function', inferredDomain: 'product', imports: [] },
+    ];
+    const cohesion = calculateCohesion(exports, 'src/utils/test-helpers.ts');
+    expect(cohesion).toBe(1);
+  });
+});

package/src/analyzer.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { estimateTokens } from '@aiready/core';
+import { estimateTokens, parseFileExports, calculateImportSimilarity, type ExportWithImports } from '@aiready/core';
 import type {
   ContextAnalysisResult,
   DependencyGraph,
@@ -6,25 +6,87 @@ import type {
   ExportInfo,
   ModuleCluster,
 } from './types';
+import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
 interface FileContent {
   file: string;
   content: string;
 }
+/**
+ * Auto-detect domain keywords from workspace folder structure
+ * Extracts unique folder names from file paths as potential domain keywords
+ */
+function extractDomainKeywordsFromPaths(files: FileContent[]): string[] {
+  const folderNames = new Set<string>();
+  for (const { file } of files) {
+    const segments = file.split('/');
+    // Extract meaningful folder names (skip common infrastructure folders)
+    const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
+    for (const segment of segments) {
+      const normalized = segment.toLowerCase();
+      if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
+        // Singularize common plural forms for better matching
+        const singular = singularize(normalized);
+        folderNames.add(singular);
+      }
+    }
+  }
+  return Array.from(folderNames);
+}
+/**
+ * Simple singularization for common English plurals
+ */
+function singularize(word: string): string {
+  // Handle irregular plurals
+  const irregulars: Record<string, string> = {
+    people: 'person',
+    children: 'child',
+    men: 'man',
+    women: 'woman',
+  };
+  if (irregulars[word]) {
+    return irregulars[word];
+  }
+  // Common plural patterns
+  if (word.endsWith('ies')) {
+    return word.slice(0, -3) + 'y'; // categories -> category
+  }
+  if (word.endsWith('ses')) {
+    return word.slice(0, -2); // classes -> class
+  }
+  if (word.endsWith('s') && word.length > 3) {
+    return word.slice(0, -1); // orders -> order
+  }
+  return word;
+}
 /**
  * Build a dependency graph from file contents
  */
 export function buildDependencyGraph(
-  files: FileContent[]
+  files: FileContent[],
 ): DependencyGraph {
   const nodes = new Map<string, DependencyNode>();
   const edges = new Map<string, Set<string>>();
-  // First pass: Create nodes
+  // Auto-detect domain keywords from workspace folder structure
+  const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
+  // First pass: Create nodes with folder-based domain inference
   for (const { file, content } of files) {
     const imports = extractImportsFromContent(content);
-    const exports = extractExports(content);
+    // Use AST-based extraction for better accuracy, fallback to regex
+    const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
     const tokenCost = estimateTokens(content);
     const linesOfCode = content.split('\n').length;
@@ -39,7 +101,39 @@ export function buildDependencyGraph(
     edges.set(file, new Set(imports));
   }
-  return { nodes, edges };
+  // Second pass: Build semantic analysis graphs
+  const graph: DependencyGraph = { nodes, edges };
+  const coUsageMatrix = buildCoUsageMatrix(graph);
+  const typeGraph = buildTypeGraph(graph);
+  // Add semantic data to graph
+  graph.coUsageMatrix = coUsageMatrix;
+  graph.typeGraph = typeGraph;
+  // Third pass: Enhance domain assignments with semantic analysis
+  for (const [file, node] of nodes) {
+    for (const exp of node.exports) {
+      // Get semantic domain assignments
+      const semanticAssignments = inferDomainFromSemantics(
+        file,
+        exp.name,
+        graph,
+        coUsageMatrix,
+        typeGraph,
+        exp.typeReferences
+      );
+      // Add multi-domain assignments with confidence scores
+      exp.domains = semanticAssignments;
+      // Keep inferredDomain for backwards compatibility (use highest confidence)
+      if (semanticAssignments.length > 0) {
+        exp.inferredDomain = semanticAssignments[0].domain;
+      }
+    }
+  }
+  return graph;
 }
 /**
@@ -60,8 +154,8 @@ function extractImportsFromContent(content: string): string[] {
     let match;
     while ((match = pattern.exec(content)) !== null) {
       const importPath = match[1];
-      if (importPath && !importPath.startsWith('@') && !importPath.startsWith('node:')) {
-        // Only include relative/local imports
+      // Exclude only node built-ins (node:), include all local and aliased imports
+      if (importPath && !importPath.startsWith('node:')) {
         imports.push(importPath);
       }
     }
@@ -199,41 +293,12 @@ export function detectCircularDependencies(
 /**
  * Calculate cohesion score (how related are exports in a file)
- * Uses entropy: low entropy = high cohesion
+ * Uses enhanced calculation combining domain-based and import-based analysis
  * @param exports - Array of export information
  * @param filePath - Optional file path for context-aware scoring
  */
 export function calculateCohesion(exports: ExportInfo[], filePath?: string): number {
-  if (exports.length === 0) return 1;
-  if (exports.length === 1) return 1; // Single export = perfect cohesion
-  // Special case: Test/mock/fixture files are expected to have multi-domain exports
-  // They serve a single purpose (testing) even if they mock different domains
-  if (filePath && isTestFile(filePath)) {
-    return 1; // Test utilities are inherently cohesive despite mixed domains
-  }
-  const domains = exports.map((e) => e.inferredDomain || 'unknown');
-  const domainCounts = new Map<string, number>();
-  for (const domain of domains) {
-    domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
-  }
-  // Calculate Shannon entropy
-  const total = domains.length;
-  let entropy = 0;
-  for (const count of domainCounts.values()) {
-    const p = count / total;
-    if (p > 0) {
-      entropy -= p * Math.log2(p);
-    }
-  }
-  // Normalize to 0-1 (higher = better cohesion)
-  const maxEntropy = Math.log2(total);
-  return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
+  return calculateEnhancedCohesion(exports, filePath);
 }
 /**
@@ -335,7 +400,12 @@ export function detectModuleClusters(
  * Extract export information from file content
  * TODO: Use proper AST parsing for better accuracy
  */
-function extractExports(content: string): ExportInfo[] {
+function extractExports(
+  content: string,
+  filePath?: string,
+  domainOptions?: { domainKeywords?: string[]; domainPatterns?: string[]; pathDomainMap?: Record<string, string> },
+  fileImports?: string[]
+): ExportInfo[] {
   const exports: ExportInfo[] = [];
   // Simple regex-based extraction (improve with AST later)
@@ -362,7 +432,7 @@ function extractExports(content: string): ExportInfo[] {
     while ((match = pattern.exec(content)) !== null) {
       const name = match[1] || 'default';
       const type = types[index];
-      const inferredDomain = inferDomain(name);
+      const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
       exports.push({ name, type, inferredDomain });
     }
@@ -375,12 +445,29 @@ function extractExports(content: string): ExportInfo[] {
  * Infer domain from export name
  * Uses common naming patterns with word boundary matching
  */
-function inferDomain(name: string): string {
+function inferDomain(
+  name: string,
+  filePath?: string,
+  domainOptions?: { domainKeywords?: string[] },
+  fileImports?: string[]
+): string {
   const lower = name.toLowerCase();
+  // Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
+  const tokens = Array.from(
+    new Set(
+      lower
+        .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
+        .replace(/[^a-z0-9]+/gi, ' ')
+        .split(' ')
+        .filter(Boolean)
+    )
+  );
   // Domain keywords ordered from most specific to most general
   // This prevents generic terms like 'util' from matching before specific domains
-  const domainKeywords = [
+  // NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
+  const defaultKeywords = [
     'authentication',
     'authorization',
     'payment',
@@ -398,15 +485,15 @@ function inferDomain(name: string): string {
     'model',
     'view',
     'auth',
-    'api',
-    'helper',
-    'util',
   ];
+  const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
+    ? [...domainOptions.domainKeywords, ...defaultKeywords]
+    : defaultKeywords;
   // Try word boundary matching first for more accurate detection
   for (const keyword of domainKeywords) {
-    const wordBoundaryPattern = new RegExp(`\\b${keyword}\\b`, 'i');
-    if (wordBoundaryPattern.test(name)) {
+    if (tokens.includes(keyword)) {
       return keyword;
     }
   }
@@ -418,6 +505,52 @@ function inferDomain(name: string): string {
     }
   }
+  // Import-path domain inference: analyze import statements for domain hints
+  if (fileImports && fileImports.length > 0) {
+    for (const importPath of fileImports) {
+      // Parse all segments, including those after '@' or '.'
+      // e.g., '@/orders/service' -> ['orders', 'service']
+      //       '../payments/processor' -> ['payments', 'processor']
+      const allSegments = importPath.split('/');
+      const relevantSegments = allSegments.filter(s => {
+        if (!s) return false;
+        // Skip '.' and '..' but keep everything else
+        if (s === '.' || s === '..') return false;
+        // Skip '@' prefix but keep the path after it
+        if (s.startsWith('@') && s.length === 1) return false;
+        // Remove '@' prefix from scoped imports like '@/orders'
+        return true;
+      }).map(s => s.startsWith('@') ? s.slice(1) : s);
+      for (const segment of relevantSegments) {
+        const segLower = segment.toLowerCase();
+        const singularSegment = singularize(segLower);
+        // Check if any domain keyword matches the import path segment (with singularization)
+        for (const keyword of domainKeywords) {
+          if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
+            return keyword;
+          }
+        }
+      }
+    }
+  }
+  // Path-based fallback: check file path segments
+  if (filePath) {
+    // Auto-detect from path by checking against domain keywords (with singularization)
+    const pathSegments = filePath.toLowerCase().split('/');
+    for (const segment of pathSegments) {
+      const singularSegment = singularize(segment);
+      for (const keyword of domainKeywords) {
+        if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
+          return keyword;
+        }
+      }
+    }
+  }
   return 'unknown';
 }
@@ -460,3 +593,136 @@ function generateConsolidationPlan(
   return plan;
 }
+/**
+ * Extract exports using AST parsing (enhanced version)
+ * Falls back to regex if AST parsing fails
+ */
+export function extractExportsWithAST(
+  content: string,
+  filePath: string,
+  domainOptions?: { domainKeywords?: string[] },
+  fileImports?: string[]
+): ExportInfo[] {
+  try {
+    const { exports: astExports } = parseFileExports(content, filePath);
+    return astExports.map(exp => ({
+      name: exp.name,
+      type: exp.type,
+      inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
+      imports: exp.imports,
+      dependencies: exp.dependencies,
+    }));
+  } catch (error) {
+    // Fallback to regex-based extraction
+    return extractExports(content, filePath, domainOptions, fileImports);
+  }
+}
+/**
+ * Calculate enhanced cohesion score using both domain inference and import similarity
+ *
+ * This combines:
+ * 1. Domain-based cohesion (entropy of inferred domains)
+ * 2. Import-based cohesion (Jaccard similarity of shared imports)
+ *
+ * Weight: 60% import-based, 40% domain-based (import analysis is more reliable)
+ */
+export function calculateEnhancedCohesion(
+  exports: ExportInfo[],
+  filePath?: string
+): number {
+  if (exports.length === 0) return 1;
+  if (exports.length === 1) return 1;
+  // Special case for test files
+  if (filePath && isTestFile(filePath)) {
+    return 1;
+  }
+  // Calculate domain-based cohesion (existing method)
+  const domainCohesion = calculateDomainCohesion(exports);
+  // Calculate import-based cohesion if imports are available
+  const hasImportData = exports.some(e => e.imports && e.imports.length > 0);
+  if (!hasImportData) {
+    // No import data available, use domain-based only
+    return domainCohesion;
+  }
+  const importCohesion = calculateImportBasedCohesion(exports);
+  // Weighted combination: 60% import-based, 40% domain-based
+  return importCohesion * 0.6 + domainCohesion * 0.4;
+}
+/**
+ * Calculate cohesion based on shared imports (Jaccard similarity)
+ */
+function calculateImportBasedCohesion(exports: ExportInfo[]): number {
+  const exportsWithImports = exports.filter(e => e.imports && e.imports.length > 0);
+  if (exportsWithImports.length < 2) {
+    return 1; // Not enough data
+  }
+  // Calculate pairwise import similarity
+  let totalSimilarity = 0;
+  let comparisons = 0;
+  for (let i = 0; i < exportsWithImports.length; i++) {
+    for (let j = i + 1; j < exportsWithImports.length; j++) {
+      const exp1 = exportsWithImports[i] as ExportInfo & { imports: string[] };
+      const exp2 = exportsWithImports[j] as ExportInfo & { imports: string[] };
+      const similarity = calculateJaccardSimilarity(exp1.imports, exp2.imports);
+      totalSimilarity += similarity;
+      comparisons++;
+    }
+  }
+  return comparisons > 0 ? totalSimilarity / comparisons : 1;
+}
+/**
+ * Calculate Jaccard similarity between two arrays
+ */
+function calculateJaccardSimilarity(arr1: string[], arr2: string[]): number {
+  if (arr1.length === 0 && arr2.length === 0) return 1;
+  if (arr1.length === 0 || arr2.length === 0) return 0;
+  const set1 = new Set(arr1);
+  const set2 = new Set(arr2);
+  const intersection = new Set([...set1].filter(x => set2.has(x)));
+  const union = new Set([...set1, ...set2]);
+  return intersection.size / union.size;
+}
+/**
+ * Calculate domain-based cohesion (existing entropy method)
+ */
+function calculateDomainCohesion(exports: ExportInfo[]): number {
+  const domains = exports.map((e) => e.inferredDomain || 'unknown');
+  const domainCounts = new Map<string, number>();
+  for (const domain of domains) {
+    domainCounts.set(domain, (domainCounts.get(domain) || 0) + 1);
+  }
+  const total = domains.length;
+  let entropy = 0;
+  for (const count of domainCounts.values()) {
+    const p = count / total;
+    if (p > 0) {
+      entropy -= p * Math.log2(p);
+    }
+  }
+  const maxEntropy = Math.log2(total);
+  return maxEntropy > 0 ? 1 - entropy / maxEntropy : 1;
+}

package/src/index.ts CHANGED Viewed

@@ -15,9 +15,41 @@ import type {
   ContextAnalysisResult,
   ContextSummary,
   ModuleCluster,
+  DomainAssignment,
+  DomainSignals,
+  CoUsageData,
+  TypeDependency,
 } from './types';
-export type { ContextAnalyzerOptions, ContextAnalysisResult, ContextSummary, ModuleCluster };
+import {
+  buildCoUsageMatrix,
+  buildTypeGraph,
+  findSemanticClusters,
+  calculateDomainConfidence,
+  inferDomainFromSemantics,
+  getCoUsageData,
+  findConsolidationCandidates,
+} from './semantic-analysis';
+export type {
+  ContextAnalyzerOptions,
+  ContextAnalysisResult,
+  ContextSummary,
+  ModuleCluster,
+  DomainAssignment,
+  DomainSignals,
+  CoUsageData,
+  TypeDependency,
+};
+export {
+  buildCoUsageMatrix,
+  buildTypeGraph,
+  findSemanticClusters,
+  calculateDomainConfidence,
+  inferDomainFromSemantics,
+  getCoUsageData,
+  findConsolidationCandidates,
+};
 /**
  * Generate smart defaults for context analysis based on repository size