npm - @aiready/context-analyzer - Versions diffs - 0.6.0 → 0.7.1 - Mend

@aiready/context-analyzer 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/.turbo/turbo-build.log +10 -10
package/.turbo/turbo-test.log +11 -6
package/README.md +5 -3
package/SEMANTIC-VALIDATION.md +235 -0
package/dist/chunk-AEK3MZC5.mjs +709 -0
package/dist/chunk-DMRZMS2U.mjs +964 -0
package/dist/chunk-HQNHM2X7.mjs +997 -0
package/dist/chunk-I54HL4FZ.mjs +781 -0
package/dist/chunk-IRWCPDWD.mjs +779 -0
package/dist/chunk-PVVCCE6W.mjs +755 -0
package/dist/chunk-RYIB5CWD.mjs +781 -0
package/dist/cli.js +234 -16
package/dist/cli.mjs +1 -1
package/dist/index.d.mts +90 -1
package/dist/index.d.ts +90 -1
package/dist/index.js +311 -18
package/dist/index.mjs +17 -3
package/package.json +2 -2
package/src/__tests__/auto-detection.test.ts +156 -0
package/src/analyzer.ts +182 -18
package/src/index.ts +34 -2
package/src/semantic-analysis.ts +287 -0
package/src/types.ts +33 -1
package/COHESION-IMPROVEMENTS.md +0 -202

package/src/analyzer.ts CHANGED Viewed

@@ -6,27 +6,86 @@ import type {
   ExportInfo,
   ModuleCluster,
 } from './types';
+import { buildCoUsageMatrix, buildTypeGraph, inferDomainFromSemantics } from './semantic-analysis';
 interface FileContent {
   file: string;
   content: string;
 }
+/**
+ * Auto-detect domain keywords from workspace folder structure
+ * Extracts unique folder names from file paths as potential domain keywords
+ */
+function extractDomainKeywordsFromPaths(files: FileContent[]): string[] {
+  const folderNames = new Set<string>();
+  for (const { file } of files) {
+    const segments = file.split('/');
+    // Extract meaningful folder names (skip common infrastructure folders)
+    const skipFolders = new Set(['src', 'lib', 'dist', 'build', 'node_modules', 'test', 'tests', '__tests__', 'spec', 'e2e', 'scripts', 'components', 'utils', 'helpers', 'util', 'helper', 'api', 'apis']);
+    for (const segment of segments) {
+      const normalized = segment.toLowerCase();
+      if (normalized && !skipFolders.has(normalized) && !normalized.includes('.')) {
+        // Singularize common plural forms for better matching
+        const singular = singularize(normalized);
+        folderNames.add(singular);
+      }
+    }
+  }
+  return Array.from(folderNames);
+}
+/**
+ * Simple singularization for common English plurals
+ */
+function singularize(word: string): string {
+  // Handle irregular plurals
+  const irregulars: Record<string, string> = {
+    people: 'person',
+    children: 'child',
+    men: 'man',
+    women: 'woman',
+  };
+  if (irregulars[word]) {
+    return irregulars[word];
+  }
+  // Common plural patterns
+  if (word.endsWith('ies')) {
+    return word.slice(0, -3) + 'y'; // categories -> category
+  }
+  if (word.endsWith('ses')) {
+    return word.slice(0, -2); // classes -> class
+  }
+  if (word.endsWith('s') && word.length > 3) {
+    return word.slice(0, -1); // orders -> order
+  }
+  return word;
+}
 /**
  * Build a dependency graph from file contents
  */
 export function buildDependencyGraph(
-  files: FileContent[]
+  files: FileContent[],
 ): DependencyGraph {
   const nodes = new Map<string, DependencyNode>();
   const edges = new Map<string, Set<string>>();
-  // First pass: Create nodes
+  // Auto-detect domain keywords from workspace folder structure
+  const autoDetectedKeywords = extractDomainKeywordsFromPaths(files);
+  // First pass: Create nodes with folder-based domain inference
   for (const { file, content } of files) {
     const imports = extractImportsFromContent(content);
     // Use AST-based extraction for better accuracy, fallback to regex
-    const exports = extractExportsWithAST(content, file);
+    const exports = extractExportsWithAST(content, file, { domainKeywords: autoDetectedKeywords }, imports);
     const tokenCost = estimateTokens(content);
     const linesOfCode = content.split('\n').length;
@@ -42,7 +101,39 @@ export function buildDependencyGraph(
     edges.set(file, new Set(imports));
   }
-  return { nodes, edges };
+  // Second pass: Build semantic analysis graphs
+  const graph: DependencyGraph = { nodes, edges };
+  const coUsageMatrix = buildCoUsageMatrix(graph);
+  const typeGraph = buildTypeGraph(graph);
+  // Add semantic data to graph
+  graph.coUsageMatrix = coUsageMatrix;
+  graph.typeGraph = typeGraph;
+  // Third pass: Enhance domain assignments with semantic analysis
+  for (const [file, node] of nodes) {
+    for (const exp of node.exports) {
+      // Get semantic domain assignments
+      const semanticAssignments = inferDomainFromSemantics(
+        file,
+        exp.name,
+        graph,
+        coUsageMatrix,
+        typeGraph,
+        exp.typeReferences
+      );
+      // Add multi-domain assignments with confidence scores
+      exp.domains = semanticAssignments;
+      // Keep inferredDomain for backwards compatibility (use highest confidence)
+      if (semanticAssignments.length > 0) {
+        exp.inferredDomain = semanticAssignments[0].domain;
+      }
+    }
+  }
+  return graph;
 }
 /**
@@ -63,8 +154,8 @@ function extractImportsFromContent(content: string): string[] {
     let match;
     while ((match = pattern.exec(content)) !== null) {
       const importPath = match[1];
-      if (importPath && !importPath.startsWith('@') && !importPath.startsWith('node:')) {
-        // Only include relative/local imports
+      // Exclude only node built-ins (node:), include all local and aliased imports
+      if (importPath && !importPath.startsWith('node:')) {
         imports.push(importPath);
       }
     }
@@ -309,7 +400,12 @@ export function detectModuleClusters(
  * Extract export information from file content
  * TODO: Use proper AST parsing for better accuracy
  */
-function extractExports(content: string): ExportInfo[] {
+function extractExports(
+  content: string,
+  filePath?: string,
+  domainOptions?: { domainKeywords?: string[]; domainPatterns?: string[]; pathDomainMap?: Record<string, string> },
+  fileImports?: string[]
+): ExportInfo[] {
   const exports: ExportInfo[] = [];
   // Simple regex-based extraction (improve with AST later)
@@ -336,7 +432,7 @@ function extractExports(content: string): ExportInfo[] {
     while ((match = pattern.exec(content)) !== null) {
       const name = match[1] || 'default';
       const type = types[index];
-      const inferredDomain = inferDomain(name);
+      const inferredDomain = inferDomain(name, filePath, domainOptions, fileImports);
       exports.push({ name, type, inferredDomain });
     }
@@ -349,12 +445,29 @@ function extractExports(content: string): ExportInfo[] {
  * Infer domain from export name
  * Uses common naming patterns with word boundary matching
  */
-function inferDomain(name: string): string {
+function inferDomain(
+  name: string,
+  filePath?: string,
+  domainOptions?: { domainKeywords?: string[] },
+  fileImports?: string[]
+): string {
   const lower = name.toLowerCase();
+  // Tokenize identifier: split camelCase, snake_case, kebab-case, and numbers
+  const tokens = Array.from(
+    new Set(
+      lower
+        .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
+        .replace(/[^a-z0-9]+/gi, ' ')
+        .split(' ')
+        .filter(Boolean)
+    )
+  );
   // Domain keywords ordered from most specific to most general
   // This prevents generic terms like 'util' from matching before specific domains
-  const domainKeywords = [
+  // NOTE: 'api', 'util', 'helper' are intentionally excluded as they are too generic
+  const defaultKeywords = [
     'authentication',
     'authorization',
     'payment',
@@ -372,15 +485,15 @@ function inferDomain(name: string): string {
     'model',
     'view',
     'auth',
-    'api',
-    'helper',
-    'util',
   ];
+  const domainKeywords = domainOptions?.domainKeywords && domainOptions.domainKeywords.length
+    ? [...domainOptions.domainKeywords, ...defaultKeywords]
+    : defaultKeywords;
   // Try word boundary matching first for more accurate detection
   for (const keyword of domainKeywords) {
-    const wordBoundaryPattern = new RegExp(`\\b${keyword}\\b`, 'i');
-    if (wordBoundaryPattern.test(name)) {
+    if (tokens.includes(keyword)) {
       return keyword;
     }
   }
@@ -392,6 +505,52 @@ function inferDomain(name: string): string {
     }
   }
+  // Import-path domain inference: analyze import statements for domain hints
+  if (fileImports && fileImports.length > 0) {
+    for (const importPath of fileImports) {
+      // Parse all segments, including those after '@' or '.'
+      // e.g., '@/orders/service' -> ['orders', 'service']
+      //       '../payments/processor' -> ['payments', 'processor']
+      const allSegments = importPath.split('/');
+      const relevantSegments = allSegments.filter(s => {
+        if (!s) return false;
+        // Skip '.' and '..' but keep everything else
+        if (s === '.' || s === '..') return false;
+        // Skip '@' prefix but keep the path after it
+        if (s.startsWith('@') && s.length === 1) return false;
+        // Remove '@' prefix from scoped imports like '@/orders'
+        return true;
+      }).map(s => s.startsWith('@') ? s.slice(1) : s);
+      for (const segment of relevantSegments) {
+        const segLower = segment.toLowerCase();
+        const singularSegment = singularize(segLower);
+        // Check if any domain keyword matches the import path segment (with singularization)
+        for (const keyword of domainKeywords) {
+          if (singularSegment === keyword || segLower === keyword || segLower.includes(keyword)) {
+            return keyword;
+          }
+        }
+      }
+    }
+  }
+  // Path-based fallback: check file path segments
+  if (filePath) {
+    // Auto-detect from path by checking against domain keywords (with singularization)
+    const pathSegments = filePath.toLowerCase().split('/');
+    for (const segment of pathSegments) {
+      const singularSegment = singularize(segment);
+      for (const keyword of domainKeywords) {
+        if (singularSegment === keyword || segment === keyword || segment.includes(keyword)) {
+          return keyword;
+        }
+      }
+    }
+  }
   return 'unknown';
 }
@@ -439,20 +598,25 @@ function generateConsolidationPlan(
  * Extract exports using AST parsing (enhanced version)
  * Falls back to regex if AST parsing fails
  */
-export function extractExportsWithAST(content: string, filePath: string): ExportInfo[] {
+export function extractExportsWithAST(
+  content: string,
+  filePath: string,
+  domainOptions?: { domainKeywords?: string[] },
+  fileImports?: string[]
+): ExportInfo[] {
   try {
     const { exports: astExports } = parseFileExports(content, filePath);
     return astExports.map(exp => ({
       name: exp.name,
       type: exp.type,
-      inferredDomain: inferDomain(exp.name),
+      inferredDomain: inferDomain(exp.name, filePath, domainOptions, fileImports),
       imports: exp.imports,
       dependencies: exp.dependencies,
     }));
   } catch (error) {
     // Fallback to regex-based extraction
-    return extractExports(content);
+    return extractExports(content, filePath, domainOptions, fileImports);
   }
 }

package/src/index.ts CHANGED Viewed

@@ -15,9 +15,41 @@ import type {
   ContextAnalysisResult,
   ContextSummary,
   ModuleCluster,
+  DomainAssignment,
+  DomainSignals,
+  CoUsageData,
+  TypeDependency,
 } from './types';
-export type { ContextAnalyzerOptions, ContextAnalysisResult, ContextSummary, ModuleCluster };
+import {
+  buildCoUsageMatrix,
+  buildTypeGraph,
+  findSemanticClusters,
+  calculateDomainConfidence,
+  inferDomainFromSemantics,
+  getCoUsageData,
+  findConsolidationCandidates,
+} from './semantic-analysis';
+export type {
+  ContextAnalyzerOptions,
+  ContextAnalysisResult,
+  ContextSummary,
+  ModuleCluster,
+  DomainAssignment,
+  DomainSignals,
+  CoUsageData,
+  TypeDependency,
+};
+export {
+  buildCoUsageMatrix,
+  buildTypeGraph,
+  findSemanticClusters,
+  calculateDomainConfidence,
+  inferDomainFromSemantics,
+  getCoUsageData,
+  findConsolidationCandidates,
+};
 /**
  * Generate smart defaults for context analysis based on repository size

package/src/semantic-analysis.ts ADDED Viewed

@@ -0,0 +1,287 @@
+import type { DependencyGraph, CoUsageData, TypeDependency, DomainAssignment, DomainSignals } from './types';
+/**
+ * Build co-usage matrix: track which files are imported together
+ *
+ * Files frequently imported together likely belong to the same semantic domain
+ */
+export function buildCoUsageMatrix(graph: DependencyGraph): Map<string, Map<string, number>> {
+  const coUsageMatrix = new Map<string, Map<string, number>>();
+  // For each file, track which other files are imported alongside it
+  for (const [sourceFile, node] of graph.nodes) {
+    const imports = node.imports;
+    // For each pair of imports in this file, increment their co-usage count
+    for (let i = 0; i < imports.length; i++) {
+      const fileA = imports[i];
+      if (!coUsageMatrix.has(fileA)) {
+        coUsageMatrix.set(fileA, new Map());
+      }
+      for (let j = i + 1; j < imports.length; j++) {
+        const fileB = imports[j];
+        // Increment bidirectional co-usage count
+        const fileAUsage = coUsageMatrix.get(fileA)!;
+        fileAUsage.set(fileB, (fileAUsage.get(fileB) || 0) + 1);
+        if (!coUsageMatrix.has(fileB)) {
+          coUsageMatrix.set(fileB, new Map());
+        }
+        const fileBUsage = coUsageMatrix.get(fileB)!;
+        fileBUsage.set(fileA, (fileBUsage.get(fileA) || 0) + 1);
+      }
+    }
+  }
+  return coUsageMatrix;
+}
+/**
+ * Extract type dependencies from AST exports
+ *
+ * Files that share types are semantically related
+ */
+export function buildTypeGraph(graph: DependencyGraph): Map<string, Set<string>> {
+  const typeGraph = new Map<string, Set<string>>();
+  for (const [file, node] of graph.nodes) {
+    for (const exp of node.exports) {
+      if (exp.typeReferences) {
+        for (const typeRef of exp.typeReferences) {
+          if (!typeGraph.has(typeRef)) {
+            typeGraph.set(typeRef, new Set());
+          }
+          typeGraph.get(typeRef)!.add(file);
+        }
+      }
+    }
+  }
+  return typeGraph;
+}
+/**
+ * Find semantic clusters using co-usage patterns
+ *
+ * Files with high co-usage counts belong in the same cluster
+ */
+export function findSemanticClusters(
+  coUsageMatrix: Map<string, Map<string, number>>,
+  minCoUsage: number = 3
+): Map<string, string[]> {
+  const clusters = new Map<string, string[]>();
+  const visited = new Set<string>();
+  // Simple clustering: group files with high co-usage
+  for (const [file, coUsages] of coUsageMatrix) {
+    if (visited.has(file)) continue;
+    const cluster: string[] = [file];
+    visited.add(file);
+    // Find strongly related files (co-imported >= minCoUsage times)
+    for (const [relatedFile, count] of coUsages) {
+      if (count >= minCoUsage && !visited.has(relatedFile)) {
+        cluster.push(relatedFile);
+        visited.add(relatedFile);
+      }
+    }
+    if (cluster.length > 1) {
+      // Use first file as cluster ID
+      clusters.set(file, cluster);
+    }
+  }
+  return clusters;
+}
+/**
+ * Calculate confidence score for domain assignment based on multiple signals
+ */
+export function calculateDomainConfidence(signals: DomainSignals): number {
+  const weights = {
+    coUsage: 0.35,        // Strongest signal: actual usage patterns
+    typeReference: 0.30,   // Strong signal: shared types
+    exportName: 0.15,      // Medium signal: identifier semantics
+    importPath: 0.10,      // Weaker signal: path structure
+    folderStructure: 0.10  // Weakest signal: organization convention
+  };
+  let confidence = 0;
+  if (signals.coUsage) confidence += weights.coUsage;
+  if (signals.typeReference) confidence += weights.typeReference;
+  if (signals.exportName) confidence += weights.exportName;
+  if (signals.importPath) confidence += weights.importPath;
+  if (signals.folderStructure) confidence += weights.folderStructure;
+  return confidence;
+}
+/**
+ * Infer domain from semantic analysis (co-usage + types)
+ *
+ * This replaces the folder-based heuristic with actual code relationships
+ */
+export function inferDomainFromSemantics(
+  file: string,
+  exportName: string,
+  graph: DependencyGraph,
+  coUsageMatrix: Map<string, Map<string, number>>,
+  typeGraph: Map<string, Set<string>>,
+  exportTypeRefs?: string[]
+): DomainAssignment[] {
+  const assignments: DomainAssignment[] = [];
+  const domainSignals = new Map<string, DomainSignals>();
+  // 1. Check co-usage patterns
+  const coUsages = coUsageMatrix.get(file) || new Map();
+  const strongCoUsages = Array.from(coUsages.entries())
+    .filter(([_, count]) => count >= 3)
+    .map(([coFile]) => coFile);
+  // Extract domains from frequently co-imported files
+  for (const coFile of strongCoUsages) {
+    const coNode = graph.nodes.get(coFile);
+    if (coNode) {
+      for (const exp of coNode.exports) {
+        if (exp.inferredDomain && exp.inferredDomain !== 'unknown') {
+          const domain = exp.inferredDomain;
+          if (!domainSignals.has(domain)) {
+            domainSignals.set(domain, {
+              coUsage: false,
+              typeReference: false,
+              exportName: false,
+              importPath: false,
+              folderStructure: false
+            });
+          }
+          domainSignals.get(domain)!.coUsage = true;
+        }
+      }
+    }
+  }
+  // 2. Check type references
+  if (exportTypeRefs) {
+    for (const typeRef of exportTypeRefs) {
+      const filesWithType = typeGraph.get(typeRef);
+      if (filesWithType) {
+        for (const typeFile of filesWithType) {
+          if (typeFile !== file) {
+            const typeNode = graph.nodes.get(typeFile);
+            if (typeNode) {
+              for (const exp of typeNode.exports) {
+                if (exp.inferredDomain && exp.inferredDomain !== 'unknown') {
+                  const domain = exp.inferredDomain;
+                  if (!domainSignals.has(domain)) {
+                    domainSignals.set(domain, {
+                      coUsage: false,
+                      typeReference: false,
+                      exportName: false,
+                      importPath: false,
+                      folderStructure: false
+                    });
+                  }
+                  domainSignals.get(domain)!.typeReference = true;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  // 3. Build domain assignments with confidence scores
+  for (const [domain, signals] of domainSignals) {
+    const confidence = calculateDomainConfidence(signals);
+    if (confidence >= 0.3) { // Minimum confidence threshold
+      assignments.push({ domain, confidence, signals });
+    }
+  }
+  // Sort by confidence (highest first)
+  assignments.sort((a, b) => b.confidence - a.confidence);
+  return assignments;
+}
+/**
+ * Get co-usage data for a specific file
+ */
+export function getCoUsageData(
+  file: string,
+  coUsageMatrix: Map<string, Map<string, number>>
+): CoUsageData {
+  const coImportedWith = coUsageMatrix.get(file) || new Map();
+  // Find files that import both this file and others
+  const sharedImporters: string[] = [];
+  // This would require inverse mapping from imports, simplified for now
+  return {
+    file,
+    coImportedWith,
+    sharedImporters
+  };
+}
+/**
+ * Find files that should be consolidated based on semantic similarity
+ *
+ * High co-usage + shared types = strong consolidation candidate
+ */
+export function findConsolidationCandidates(
+  graph: DependencyGraph,
+  coUsageMatrix: Map<string, Map<string, number>>,
+  typeGraph: Map<string, Set<string>>,
+  minCoUsage: number = 5,
+  minSharedTypes: number = 2
+): Array<{ files: string[]; reason: string; strength: number }> {
+  const candidates: Array<{ files: string[]; reason: string; strength: number }> = [];
+  // Find file pairs with both high co-usage AND shared types
+  for (const [fileA, coUsages] of coUsageMatrix) {
+    const nodeA = graph.nodes.get(fileA);
+    if (!nodeA) continue;
+    for (const [fileB, coUsageCount] of coUsages) {
+      if (fileB <= fileA) continue; // Avoid duplicates
+      if (coUsageCount < minCoUsage) continue;
+      const nodeB = graph.nodes.get(fileB);
+      if (!nodeB) continue;
+      // Count shared types
+      const typesA = new Set(nodeA.exports.flatMap(e => e.typeReferences || []));
+      const typesB = new Set(nodeB.exports.flatMap(e => e.typeReferences || []));
+      const sharedTypes = Array.from(typesA).filter(t => typesB.has(t));
+      if (sharedTypes.length >= minSharedTypes) {
+        const strength = (coUsageCount / 10) + (sharedTypes.length / 5);
+        candidates.push({
+          files: [fileA, fileB],
+          reason: `High co-usage (${coUsageCount}x) and ${sharedTypes.length} shared types`,
+          strength
+        });
+      } else if (coUsageCount >= minCoUsage * 2) {
+        // Very high co-usage alone is enough
+        const strength = coUsageCount / 10;
+        candidates.push({
+          files: [fileA, fileB],
+          reason: `Very high co-usage (${coUsageCount}x)`,
+          strength
+        });
+      }
+    }
+  }
+  // Sort by strength (highest first)
+  candidates.sort((a, b) => b.strength - a.strength);
+  return candidates;
+}

package/src/types.ts CHANGED Viewed

@@ -87,6 +87,8 @@ export interface ContextSummary {
 export interface DependencyGraph {
   nodes: Map<string, DependencyNode>;
   edges: Map<string, Set<string>>; // file -> dependencies
+  coUsageMatrix?: Map<string, Map<string, number>>; // file -> file -> co-usage count
+  typeGraph?: Map<string, Set<string>>; // type -> files that reference it
 }
 export interface DependencyNode {
@@ -95,13 +97,43 @@ export interface DependencyNode {
   exports: ExportInfo[];
   tokenCost: number;
   linesOfCode: number;
+  exportedBy?: string[]; // Files that import exports from this file
+  sharedTypes?: string[]; // Types shared with other files
 }
 export interface ExportInfo {
   name: string;
   type: 'function' | 'class' | 'const' | 'type' | 'interface' | 'default';
-  inferredDomain?: string; // Inferred from name/usage
+  inferredDomain?: string; // Inferred from name/usage (legacy single domain)
+  domains?: DomainAssignment[]; // Multi-domain support with confidence scores
   imports?: string[]; // Imports used by this export (for import-based cohesion)
   dependencies?: string[]; // Other exports from same file this depends on
+  typeReferences?: string[]; // TypeScript types referenced by this export
+}
+export interface DomainAssignment {
+  domain: string;
+  confidence: number; // 0-1, how confident are we in this assignment
+  signals: DomainSignals; // Which signals contributed to this assignment
+}
+export interface DomainSignals {
+  folderStructure: boolean; // Matched from folder name
+  importPath: boolean; // Matched from import paths
+  typeReference: boolean; // Matched from TypeScript type usage
+  coUsage: boolean; // Matched from co-usage patterns
+  exportName: boolean; // Matched from export identifier name
+}
+export interface CoUsageData {
+  file: string;
+  coImportedWith: Map<string, number>; // file -> count of times imported together
+  sharedImporters: string[]; // files that import both this and another file
+}
+export interface TypeDependency {
+  typeName: string;
+  definedIn: string; // file where type is defined
+  usedBy: string[]; // files that reference this type
 }