npm - @aiready/pattern-detect - Versions diffs - 0.11.31 → 0.11.34 - Mend

@aiready/pattern-detect 0.11.31 → 0.11.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/CONTRIBUTING.md +8 -1
package/README.md +1 -1
package/dist/chunk-FWUKMJEQ.mjs +1133 -0
package/dist/chunk-SLDK5PQK.mjs +1129 -0
package/dist/chunk-YSDOUNJJ.mjs +1142 -0
package/dist/cli.js +269 -75
package/dist/cli.mjs +160 -36
package/dist/index.d.mts +2 -0
package/dist/index.d.ts +2 -0
package/dist/index.js +121 -42
package/dist/index.mjs +1 -1
package/dist/python-extractor-ELAKYK2W.mjs +140 -0
package/package.json +2 -2

package/dist/cli.mjs CHANGED Viewed

@@ -3,16 +3,65 @@ import {
   analyzePatterns,
   filterBySeverity,
   generateSummary
-} from "./chunk-WKBCNITM.mjs";
+} from "./chunk-YSDOUNJJ.mjs";
 // src/cli.ts
 import { Command } from "commander";
 import chalk from "chalk";
 import { writeFileSync, mkdirSync, existsSync } from "fs";
 import { dirname } from "path";
-import { loadConfig, mergeConfigWithDefaults, resolveOutputPath } from "@aiready/core";
+import {
+  loadConfig,
+  mergeConfigWithDefaults,
+  resolveOutputPath
+} from "@aiready/core";
 var program = new Command();
-program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText("after", "\nCONFIGURATION:\n  Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n  CLI options override config file settings\n\nPARAMETER TUNING:\n  If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n  If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n  If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n  aiready-patterns .                                    # Basic analysis with smart defaults\n  aiready-patterns . --similarity 0.3 --min-lines 3     # More sensitive detection\n  aiready-patterns . --max-candidates 50 --no-approx    # Slower but more thorough\n  aiready-patterns . --output json > report.json       # JSON export").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4").option("-l, --min-lines <number>", "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5").option("--batch-size <number>", "Batch size for comparisons. Higher = faster but more memory. Default: 100").option("--no-approx", "Disable approximate candidate selection. Slower but more thorough on small repos").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8").option("--max-candidates <number>", "Maximum candidates per block. Higher = more thorough but slower. Default: 100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option("--min-severity <level>", "Minimum severity to show: critical|major|minor|info. Default: minor").option("--exclude-test-fixtures", "Exclude test fixture duplication (beforeAll/afterAll)").option("--exclude-templates", "Exclude template file duplication").option("--include-tests", "Include test files in analysis (excluded by default)").option("--max-results <number>", "Maximum number of results to show in console output. Default: 10").option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option("--min-cluster-tokens <number>", "Minimum token cost for cluster reporting. Default: 1000").option("--min-cluster-files <number>", "Minimum files for cluster reporting. Default: 3").option("--show-raw-duplicates", "Show raw duplicates instead of grouped view").option(
+program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
+  "after",
+  "\nCONFIGURATION:\n  Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n  CLI options override config file settings\n\nPARAMETER TUNING:\n  If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n  If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n  If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n  aiready-patterns .                                    # Basic analysis with smart defaults\n  aiready-patterns . --similarity 0.3 --min-lines 3     # More sensitive detection\n  aiready-patterns . --max-candidates 50 --no-approx    # Slower but more thorough\n  aiready-patterns . --output json > report.json       # JSON export"
+).argument("<directory>", "Directory to analyze").option(
+  "-s, --similarity <number>",
+  "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
+).option(
+  "-l, --min-lines <number>",
+  "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
+).option(
+  "--batch-size <number>",
+  "Batch size for comparisons. Higher = faster but more memory. Default: 100"
+).option(
+  "--no-approx",
+  "Disable approximate candidate selection. Slower but more thorough on small repos"
+).option(
+  "--min-shared-tokens <number>",
+  "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
+).option(
+  "--max-candidates <number>",
+  "Maximum candidates per block. Higher = more thorough but slower. Default: 100"
+).option(
+  "--no-stream-results",
+  "Disable incremental output (default: enabled)"
+).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
+  "--min-severity <level>",
+  "Minimum severity to show: critical|major|minor|info. Default: minor"
+).option(
+  "--exclude-test-fixtures",
+  "Exclude test fixture duplication (beforeAll/afterAll)"
+).option("--exclude-templates", "Exclude template file duplication").option(
+  "--include-tests",
+  "Include test files in analysis (excluded by default)"
+).option(
+  "--max-results <number>",
+  "Maximum number of results to show in console output. Default: 10"
+).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
+  "--min-cluster-tokens <number>",
+  "Minimum token cost for cluster reporting. Default: 1000"
+).option(
+  "--min-cluster-files <number>",
+  "Minimum files for cluster reporting. Default: 3"
+).option(
+  "--show-raw-duplicates",
+  "Show raw duplicates instead of grouped view"
+).option(
   "-o, --output <format>",
   "Output format: console, json, html",
   "console"
@@ -77,16 +126,29 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       (pattern) => !testPatterns.includes(pattern)
     );
   }
-  const { results, duplicates: rawDuplicates, files, groups, clusters } = await analyzePatterns(finalOptions);
+  const {
+    results,
+    duplicates: rawDuplicates,
+    files,
+    groups,
+    clusters
+  } = await analyzePatterns(finalOptions);
   let filteredDuplicates = rawDuplicates;
   if (finalOptions.minSeverity) {
-    filteredDuplicates = filterBySeverity(filteredDuplicates, finalOptions.minSeverity);
+    filteredDuplicates = filterBySeverity(
+      filteredDuplicates,
+      finalOptions.minSeverity
+    );
   }
   if (finalOptions.excludeTestFixtures) {
-    filteredDuplicates = filteredDuplicates.filter((d) => d.matchedRule !== "test-fixtures");
+    filteredDuplicates = filteredDuplicates.filter(
+      (d) => d.matchedRule !== "test-fixtures"
+    );
   }
   if (finalOptions.excludeTemplates) {
-    filteredDuplicates = filteredDuplicates.filter((d) => d.matchedRule !== "templates");
+    filteredDuplicates = filteredDuplicates.filter(
+      (d) => d.matchedRule !== "templates"
+    );
   }
   const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
   const summary = generateSummary(results);
@@ -140,7 +202,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
   );
   console.log(
-    chalk.yellow(`\u26A0  AI confusion patterns detected: ${chalk.bold(totalIssues)}`)
+    chalk.yellow(
+      `\u26A0  AI confusion patterns detected: ${chalk.bold(totalIssues)}`
+    )
   );
   console.log(
     chalk.red(
@@ -157,12 +221,16 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     console.log(chalk.cyan(divider) + "\n");
     sortedTypes.forEach(([type, count]) => {
       const icon = getPatternIcon(type);
-      console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
+      console.log(
+        `${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`
+      );
     });
   }
   if (!finalOptions.showRawDuplicates && groups && groups.length > 0) {
     console.log(chalk.cyan("\n" + divider));
-    console.log(chalk.bold.white(`  \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`));
+    console.log(
+      chalk.bold.white(`  \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
+    );
     console.log(chalk.cyan(divider) + "\n");
     const severityOrder = {
       critical: 4,
@@ -180,39 +248,63 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       const [file1, file2] = group.filePair.split("::");
       const file1Name = file1.split("/").pop() || file1;
       const file2Name = file2.split("/").pop() || file2;
-      console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`);
-      console.log(`   Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`);
+      console.log(
+        `${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
+      );
+      console.log(
+        `   Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`
+      );
       const displayRanges = group.lineRanges.slice(0, 3);
       displayRanges.forEach((range) => {
-        console.log(`   ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`);
+        console.log(
+          `   ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`
+        );
       });
       if (group.lineRanges.length > 3) {
-        console.log(`   ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`);
+        console.log(
+          `   ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`
+        );
       }
       console.log();
     });
     if (groups.length > topGroups.length) {
-      console.log(chalk.gray(`   ... and ${groups.length - topGroups.length} more file pairs`));
+      console.log(
+        chalk.gray(
+          `   ... and ${groups.length - topGroups.length} more file pairs`
+        )
+      );
     }
   }
   if (!finalOptions.showRawDuplicates && clusters && clusters.length > 0) {
     console.log(chalk.cyan("\n" + divider));
-    console.log(chalk.bold.white(`  \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`));
+    console.log(
+      chalk.bold.white(`  \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`)
+    );
     console.log(chalk.cyan(divider) + "\n");
     clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
       const severityBadge = getSeverityBadge(cluster.severity);
-      console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`);
-      console.log(`   Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`);
+      console.log(
+        `${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
+      );
+      console.log(
+        `   Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
+      );
       const displayFiles = cluster.files.slice(0, 5);
-      console.log(`   Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`);
+      console.log(
+        `   Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`
+      );
       if (cluster.files.length > 5) {
-        console.log(`   ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`);
+        console.log(
+          `   ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`
+        );
       }
       if (cluster.reason) {
         console.log(`   ${chalk.italic.gray(cluster.reason)}`);
       }
       if (cluster.suggestion) {
-        console.log(`   ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`);
+        console.log(
+          `   ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`
+        );
       }
       console.log();
     });
@@ -236,10 +328,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       const severityBadge = getSeverityBadge(dup.severity);
       const file1Name = dup.file1.split("/").pop() || dup.file1;
       const file2Name = dup.file2.split("/").pop() || dup.file2;
-      console.log(`${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`);
-      console.log(`   Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`);
-      console.log(`   ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`);
-      console.log(`   ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`);
+      console.log(
+        `${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
+      );
+      console.log(
+        `   Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`
+      );
+      console.log(
+        `   ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`
+      );
+      console.log(
+        `   ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`
+      );
       if (dup.reason) {
         console.log(`   ${chalk.italic.gray(dup.reason)}`);
       }
@@ -249,7 +349,11 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       console.log();
     });
     if (filteredDuplicates.length > topDuplicates.length) {
-      console.log(chalk.gray(`   ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`));
+      console.log(
+        chalk.gray(
+          `   ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`
+        )
+      );
     }
   }
   const allIssues = results.flatMap(
@@ -263,27 +367,45 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     console.log(chalk.bold.white("  CRITICAL ISSUES (>95% similar)"));
     console.log(chalk.cyan(divider) + "\n");
     criticalIssues.slice(0, 5).forEach((issue) => {
-      console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
+      console.log(
+        chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
+      );
       console.log(`  ${chalk.dim(issue.message)}`);
-      console.log(`  ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
-`);
+      console.log(
+        `  ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
+`
+      );
     });
   }
   if (totalIssues === 0) {
     console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
-    console.log(chalk.yellow("\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"));
-    console.log(chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3"));
+    console.log(
+      chalk.yellow(
+        "\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
+      )
+    );
+    console.log(
+      chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3")
+    );
     console.log(chalk.dim("   \u2022 Reduce minimum lines: --min-lines 3"));
     console.log(chalk.dim("   \u2022 Include test files: --include-tests"));
-    console.log(chalk.dim("   \u2022 Lower shared tokens threshold: --min-shared-tokens 5"));
+    console.log(
+      chalk.dim("   \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
+    );
     console.log("");
   }
   if (totalIssues > 0 && totalIssues < 5) {
-    console.log(chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:"));
-    console.log(chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3"));
+    console.log(
+      chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
+    );
+    console.log(
+      chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3")
+    );
     console.log(chalk.dim("   \u2022 Reduce minimum lines: --min-lines 3"));
     console.log(chalk.dim("   \u2022 Include test files: --include-tests"));
-    console.log(chalk.dim("   \u2022 Lower shared tokens threshold: --min-shared-tokens 5"));
+    console.log(
+      chalk.dim("   \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
+    );
     console.log("");
   }
   console.log(chalk.cyan(divider));
@@ -301,7 +423,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     )
   );
   console.log(
-    chalk.dim("\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n")
+    chalk.dim(
+      "\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
+    )
   );
 });
 function getPatternIcon(type) {

package/dist/index.d.mts CHANGED Viewed

@@ -57,6 +57,7 @@ interface DetectionOptions {
     maxCandidatesPerBlock?: number;
     maxComparisons?: number;
     streamResults?: boolean;
+    onProgress?: (processed: number, total: number, message: string) => void;
 }
 /**
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
     createClusters?: boolean;
     minClusterTokenCost?: number;
     minClusterFiles?: number;
+    onProgress?: (processed: number, total: number, message: string) => void;
 }
 interface PatternSummary {
     totalPatterns: number;

package/dist/index.d.ts CHANGED Viewed

@@ -57,6 +57,7 @@ interface DetectionOptions {
     maxCandidatesPerBlock?: number;
     maxComparisons?: number;
     streamResults?: boolean;
+    onProgress?: (processed: number, total: number, message: string) => void;
 }
 /**
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
     createClusters?: boolean;
     minClusterTokenCost?: number;
     minClusterFiles?: number;
+    onProgress?: (processed: number, total: number, message: string) => void;
 }
 interface PatternSummary {
     totalPatterns: number;

package/dist/index.js CHANGED Viewed

@@ -113,8 +113,14 @@ function calculatePythonSimilarity(pattern1, pattern2) {
 }
 function calculateNameSimilarity(name1, name2) {
   if (name1 === name2) return 1;
-  const clean1 = name1.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, "");
-  const clean2 = name2.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, "");
+  const clean1 = name1.replace(
+    /^(get|set|is|has|create|delete|update|fetch)_?/,
+    ""
+  );
+  const clean2 = name2.replace(
+    /^(get|set|is|has|create|delete|update|fetch)_?/,
+    ""
+  );
   if (clean1 === clean2) return 0.9;
   if (clean1.includes(clean2) || clean2.includes(clean1)) {
     return 0.7;
@@ -146,7 +152,10 @@ function detectPythonAntiPatterns(patterns) {
   const antiPatterns = [];
   const nameGroups = /* @__PURE__ */ new Map();
   for (const pattern of patterns) {
-    const baseName = pattern.name.replace(/^(get|set|create|delete|update)_/, "");
+    const baseName = pattern.name.replace(
+      /^(get|set|create|delete|update)_/,
+      ""
+    );
     if (!nameGroups.has(baseName)) {
       nameGroups.set(baseName, []);
     }
@@ -437,11 +446,15 @@ async function detectDuplicatePatterns(files, options) {
       linesOfCode: block.linesOfCode
     }))
   );
-  console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
+  if (!options.onProgress) {
+    console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
+  }
   const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
   if (pythonFiles.length > 0) {
     const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
-    const patterns = await extractPythonPatterns2(pythonFiles.map((f) => f.file));
+    const patterns = await extractPythonPatterns2(
+      pythonFiles.map((f) => f.file)
+    );
     const pythonBlocks = patterns.filter((p) => p.code && p.code.trim().length > 0).map((p) => ({
       content: p.code,
       startLine: p.startLine,
@@ -453,11 +466,17 @@ async function detectDuplicatePatterns(files, options) {
       linesOfCode: p.endLine - p.startLine + 1
     }));
     allBlocks.push(...pythonBlocks);
-    console.log(`Added ${pythonBlocks.length} Python patterns`);
+    if (!options.onProgress) {
+      console.log(`Added ${pythonBlocks.length} Python patterns`);
+    }
   }
   if (!approx && allBlocks.length > 500) {
-    console.log(`\u26A0\uFE0F  Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
-    console.log(`   Consider using approximate mode (default) for better performance.`);
+    console.log(
+      `\u26A0\uFE0F  Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`
+    );
+    console.log(
+      `   Consider using approximate mode (default) for better performance.`
+    );
   }
   const stopwords = /* @__PURE__ */ new Set([
     "return",
@@ -487,7 +506,11 @@ async function detectDuplicatePatterns(files, options) {
     "undefined",
     "this"
   ]);
-  const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
+  const tokenize = (norm) => {
+    const punctuation = "(){}[];.,";
+    const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
+    return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
+  };
   const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
   const invertedIndex = /* @__PURE__ */ new Map();
   if (approx) {
@@ -504,9 +527,13 @@ async function detectDuplicatePatterns(files, options) {
   }
   const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
   if (totalComparisons !== void 0) {
-    console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
+    console.log(
+      `Processing ${totalComparisons.toLocaleString()} comparisons in batches...`
+    );
   } else {
-    console.log(`Using approximate candidate selection to reduce comparisons...`);
+    console.log(
+      `Using approximate candidate selection to reduce comparisons...`
+    );
   }
   let comparisonsProcessed = 0;
   let comparisonsBudgetExhausted = false;
@@ -517,16 +544,24 @@ async function detectDuplicatePatterns(files, options) {
       break;
     }
     if (i % batchSize === 0 && i > 0) {
-      const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
-      const duplicatesFound = duplicates.length;
-      if (totalComparisons !== void 0) {
-        const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
-        const remaining = totalComparisons - comparisonsProcessed;
-        const rate = comparisonsProcessed / parseFloat(elapsed);
-        const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
-        console.log(`   ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
+      if (options.onProgress) {
+        options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
       } else {
-        console.log(`   Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
+        const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
+        const duplicatesFound = duplicates.length;
+        if (totalComparisons !== void 0) {
+          const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
+          const remaining = totalComparisons - comparisonsProcessed;
+          const rate = comparisonsProcessed / parseFloat(elapsed);
+          const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
+          console.log(
+            `   ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
+          );
+        } else {
+          console.log(
+            `   Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
+          );
+        }
       }
       await new Promise((resolve) => setImmediate(resolve));
     }
@@ -560,8 +595,12 @@ async function detectDuplicatePatterns(files, options) {
     if (approx && candidates) {
       for (const { j } of candidates) {
         if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
-          console.log(`\u26A0\uFE0F  Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
-          console.log(`   This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
+          console.log(
+            `\u26A0\uFE0F  Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`
+          );
+          console.log(
+            `   This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`
+          );
           break;
         }
         comparisonsProcessed++;
@@ -594,10 +633,16 @@ async function detectDuplicatePatterns(files, options) {
           };
           duplicates.push(duplicate);
           if (streamResults) {
-            console.log(`
-   \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
-            console.log(`      ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
-            console.log(`      Token cost: ${duplicate.tokenCost.toLocaleString()}`);
+            console.log(
+              `
+   \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
+            );
+            console.log(
+              `      ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
+            );
+            console.log(
+              `      Token cost: ${duplicate.tokenCost.toLocaleString()}`
+            );
           }
         }
       }
@@ -635,17 +680,25 @@ async function detectDuplicatePatterns(files, options) {
           };
           duplicates.push(duplicate);
           if (streamResults) {
-            console.log(`
-   \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
-            console.log(`      ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
-            console.log(`      Token cost: ${duplicate.tokenCost.toLocaleString()}`);
+            console.log(
+              `
+   \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
+            );
+            console.log(
+              `      ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
+            );
+            console.log(
+              `      Token cost: ${duplicate.tokenCost.toLocaleString()}`
+            );
           }
         }
       }
     }
   }
   if (comparisonsBudgetExhausted) {
-    console.log(`\u26A0\uFE0F  Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
+    console.log(
+      `\u26A0\uFE0F  Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`
+    );
   }
   return duplicates.sort(
     (a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
@@ -671,7 +724,10 @@ function groupDuplicatesByFilePair(duplicates) {
   const result = [];
   for (const [filePair, groupDups] of groups.entries()) {
     const deduplicated = deduplicateOverlappingRanges(groupDups);
-    const totalTokenCost = deduplicated.reduce((sum, d) => sum + d.tokenCost, 0);
+    const totalTokenCost = deduplicated.reduce(
+      (sum, d) => sum + d.tokenCost,
+      0
+    );
     const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
     const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
     const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
@@ -777,7 +833,9 @@ function identifyCluster(dup) {
   if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
     const component1 = extractComponentName(dup.file1);
     const component2 = extractComponentName(dup.file2);
-    console.log(`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`);
+    console.log(
+      `Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
+    );
     if (component1 && component2 && areSimilarComponents(component1, component2)) {
       const category = getComponentCategory(component1);
       console.log(`Creating cluster: component-${category}`);
@@ -876,7 +934,7 @@ function getClusterInfo(clusterId, patternType, fileCount) {
       suggestion: "Extract common middleware, error handling, and response formatting",
       reason: "API handler duplication leads to inconsistent error handling and response formats"
     },
-    "validators": {
+    validators: {
       name: `Validator Patterns (${fileCount} files)`,
       suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
       reason: "Validator duplication causes inconsistent validation and harder maintenance"
@@ -929,7 +987,12 @@ function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
     return {
       toolName: "pattern-detect",
       score: 100,
-      rawMetrics: { totalDuplicates: 0, totalTokenCost: 0, highImpactDuplicates: 0, totalFilesAnalyzed: 0 },
+      rawMetrics: {
+        totalDuplicates: 0,
+        totalTokenCost: 0,
+        highImpactDuplicates: 0,
+        totalFilesAnalyzed: 0
+      },
       factors: [],
       recommendations: []
     };
@@ -1053,13 +1116,22 @@ async function getSmartDefaults(directory, userOptions) {
   const { scanFiles: scanFiles2 } = await import("@aiready/core");
   const files = await scanFiles2(scanOptions);
   const estimatedBlocks = files.length * 3;
-  const maxCandidatesPerBlock = Math.max(3, Math.min(10, Math.floor(3e4 / estimatedBlocks)));
+  const maxCandidatesPerBlock = Math.max(
+    3,
+    Math.min(10, Math.floor(3e4 / estimatedBlocks))
+  );
   const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
-  const minLines = Math.max(6, Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3)));
-  const minSharedTokens = Math.max(10, Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3)));
+  const minLines = Math.max(
+    6,
+    Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3))
+  );
+  const minSharedTokens = Math.max(
+    10,
+    Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
+  );
   const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
   const severity = estimatedBlocks > 5e3 ? "high" : "all";
-  let defaults = {
+  const defaults = {
     rootDir: directory,
     minSimilarity,
     minLines,
@@ -1129,7 +1201,8 @@ async function analyzePatterns(options) {
     approx,
     minSharedTokens,
     maxCandidatesPerBlock,
-    streamResults
+    streamResults,
+    onProgress: options.onProgress
   });
   for (const file of files) {
     const fileDuplicates = duplicates.filter(
@@ -1157,7 +1230,9 @@ async function analyzePatterns(options) {
         medium: ["critical", "major", "minor"]
       };
       const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
-      filteredIssues = issues.filter((issue) => allowedSeverities.includes(issue.severity));
+      filteredIssues = issues.filter(
+        (issue) => allowedSeverities.includes(issue.severity)
+      );
     }
     const totalTokenCost = fileDuplicates.reduce(
       (sum, dup) => sum + dup.tokenCost,
@@ -1179,7 +1254,11 @@ async function analyzePatterns(options) {
   }
   if (createClusters) {
     const allClusters = createRefactorClusters(duplicates);
-    clusters = filterClustersByImpact(allClusters, minClusterTokenCost, minClusterFiles);
+    clusters = filterClustersByImpact(
+      allClusters,
+      minClusterTokenCost,
+      minClusterFiles
+    );
   }
   return { results, duplicates, files, groups, clusters };
 }

package/dist/index.mjs CHANGED Viewed

@@ -7,7 +7,7 @@ import {
   generateSummary,
   getSeverityLabel,
   getSmartDefaults
-} from "./chunk-WKBCNITM.mjs";
+} from "./chunk-YSDOUNJJ.mjs";
 export {
   analyzePatterns,
   calculatePatternScore,