npm - @aiready/pattern-detect - Versions diffs - 0.16.5 → 0.16.7 - Mend

@aiready/pattern-detect 0.16.5 → 0.16.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -3,10 +3,12 @@ import {
   analyzePatterns,
   filterBySeverity,
   generateSummary
-} from "./chunk-KPEK5REL.mjs";
+} from "./chunk-DR5W7S3Z.mjs";
 // src/cli.ts
 import { Command } from "commander";
+// src/cli-action.ts
 import chalk from "chalk";
 import { writeFileSync, mkdirSync, existsSync } from "fs";
 import { dirname } from "path";
@@ -118,80 +120,61 @@ function generateHTMLReport(results, summary) {
 </html>`;
 }
-// src/cli.ts
-var program = new Command();
-program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
-  "after",
-  "\nCONFIGURATION:\n  Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n  CLI options override config file settings\n\nPARAMETER TUNING:\n  If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n  If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n  If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n  aiready-patterns .                                    # Basic analysis with smart defaults\n  aiready-patterns . --similarity 0.3 --min-lines 3     # More sensitive detection\n  aiready-patterns . --max-candidates 50 --no-approx    # Slower but more thorough\n  aiready-patterns . --output json > report.json       # JSON export"
-).argument("<directory>", "Directory to analyze").option(
-  "-s, --similarity <number>",
-  "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
-).option(
-  "-l, --min-lines <number>",
-  "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
-).option(
-  "--batch-size <number>",
-  "Batch size for comparisons. Higher = faster but more memory. Default: 100"
-).option(
-  "--no-approx",
-  "Disable approximate candidate selection. Slower but more thorough on small repos"
-).option(
-  "--min-shared-tokens <number>",
-  "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
-).option(
-  "--max-candidates <number>",
-  "Maximum candidates per block. Higher = more thorough but slower. Default: 100"
-).option(
-  "--no-stream-results",
-  "Disable incremental output (default: enabled)"
-).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
-  "--min-severity <level>",
-  "Minimum severity to show: critical|major|minor|info. Default: minor"
-).option(
-  "--exclude-test-fixtures",
-  "Exclude test fixture duplication (beforeAll/afterAll)"
-).option("--exclude-templates", "Exclude template file duplication").option(
-  "--include-tests",
-  "Include test files in analysis (excluded by default)"
-).option(
-  "--max-results <number>",
-  "Maximum number of results to show in console output. Default: 10"
-).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
-  "--min-cluster-tokens <number>",
-  "Minimum token cost for cluster reporting. Default: 1000"
-).option(
-  "--min-cluster-files <number>",
-  "Minimum files for cluster reporting. Default: 3"
-).option(
-  "--show-raw-duplicates",
-  "Show raw duplicates instead of grouped view"
-).option(
-  "-o, --output <format>",
-  "Output format: console, json, html",
-  "console"
-).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
+// src/constants.ts
+var DEFAULT_MIN_SIMILARITY = 0.4;
+var DEFAULT_MIN_LINES = 5;
+var DEFAULT_BATCH_SIZE = 100;
+var DEFAULT_MIN_SHARED_TOKENS = 8;
+var DEFAULT_MAX_CANDIDATES_PER_BLOCK = 100;
+var DEFAULT_MAX_RESULTS = 10;
+var DEFAULT_MIN_CLUSTER_TOKEN_COST = 1e3;
+var DEFAULT_MIN_CLUSTER_FILES = 3;
+var COMMAND_NAME = "aiready-patterns";
+var COMMAND_VERSION = "0.1.0";
+var DEFAULT_OUTPUT_FORMAT = "console";
+var HELP_TEXT_AFTER = `
+CONFIGURATION:
+  Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js
+  CLI options override config file settings
+PARAMETER TUNING:
+  If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens
+  If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates
+  If you get too many false positives: increase --similarity or --min-lines
+EXAMPLES:
+  aiready-patterns .                                    # Basic analysis with smart defaults
+  aiready-patterns . --similarity 0.3 --min-lines 3     # More sensitive detection
+  aiready-patterns . --max-candidates 50 --no-approx    # Slower but more thorough
+  aiready-patterns . --output json > report.json       # JSON export`;
+// src/cli-action.ts
+async function patternActionHandler(directory, options) {
   console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
   const startTime = Date.now();
   const config = await loadConfig(directory);
   const defaults = {
-    minSimilarity: 0.4,
-    minLines: 5,
-    batchSize: 100,
+    minSimilarity: DEFAULT_MIN_SIMILARITY,
+    minLines: DEFAULT_MIN_LINES,
+    batchSize: DEFAULT_BATCH_SIZE,
     approx: true,
-    minSharedTokens: 8,
-    maxCandidatesPerBlock: 100,
+    minSharedTokens: DEFAULT_MIN_SHARED_TOKENS,
+    maxCandidatesPerBlock: DEFAULT_MAX_CANDIDATES_PER_BLOCK,
     streamResults: true,
     include: void 0,
     exclude: void 0,
+    excludePatterns: void 0,
+    confidenceThreshold: 0,
+    ignoreWhitelist: void 0,
     minSeverity: Severity.Minor,
     excludeTestFixtures: false,
     excludeTemplates: false,
     includeTests: false,
-    maxResults: 10,
+    maxResults: DEFAULT_MAX_RESULTS,
     groupByFilePair: true,
     createClusters: true,
-    minClusterTokenCost: 1e3,
-    minClusterFiles: 3,
+    minClusterTokenCost: DEFAULT_MIN_CLUSTER_TOKEN_COST,
+    minClusterFiles: DEFAULT_MIN_CLUSTER_FILES,
     showRawDuplicates: false
   };
   const mergedConfig = mergeConfigWithDefaults(config, defaults);
@@ -207,15 +190,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     streamResults: options.streamResults !== false && mergedConfig.streamResults,
     include: options.include?.split(",") || mergedConfig.include,
     exclude: options.exclude?.split(",") || mergedConfig.exclude,
+    excludePatterns: options.excludePatterns?.split(",") || mergedConfig.excludePatterns,
+    confidenceThreshold: options.confidenceThreshold ? parseFloat(options.confidenceThreshold) : mergedConfig.confidenceThreshold,
+    ignoreWhitelist: options.ignoreWhitelist?.split(",") || mergedConfig.ignoreWhitelist,
     minSeverity: options.minSeverity || mergedConfig.minSeverity,
     excludeTestFixtures: options.excludeTestFixtures || mergedConfig.excludeTestFixtures,
     excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
     includeTests: options.includeTests || mergedConfig.includeTests,
     maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
-    groupByFilePair: options.groupBy_file_pair !== false && mergedConfig.groupByFilePair,
-    createClusters: options.create_clusters !== false && mergedConfig.createClusters,
-    minClusterTokenCost: options.min_cluster_tokens ? parseInt(options.min_cluster_tokens) : mergedConfig.minClusterTokenCost,
-    minClusterFiles: options.min_cluster_files ? parseInt(options.min_cluster_files) : mergedConfig.minClusterFiles,
+    groupByFilePair: options.groupByFilePair !== false && mergedConfig.groupByFilePair,
+    createClusters: options.createClusters !== false && mergedConfig.createClusters,
+    minClusterTokenCost: options.minClusterTokens ? parseInt(options.minClusterTokens) : mergedConfig.minClusterTokenCost,
+    minClusterFiles: options.minClusterFiles ? parseInt(options.minClusterFiles) : mergedConfig.minClusterFiles,
     showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
   };
   if (finalOptions.includeTests && finalOptions.exclude) {
@@ -301,9 +287,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
   console.log(chalk.cyan(divider));
   console.log(chalk.bold.white("  PATTERN ANALYSIS SUMMARY"));
   console.log(chalk.cyan(divider) + "\n");
-  console.log(
-    chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
-  );
+  console.log(chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`));
   console.log(
     chalk.yellow(
       `\u26A0  AI confusion patterns detected: ${chalk.bold(totalIssues)}`
@@ -314,9 +298,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       `\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
     )
   );
-  console.log(
-    chalk.gray(`\u23F1  Analysis time: ${chalk.bold(elapsedTime + "s")}`)
-  );
+  console.log(chalk.gray(`\u23F1  Analysis time: ${chalk.bold(elapsedTime + "s")}`));
   const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
   if (sortedTypes.length > 0) {
     console.log(chalk.cyan("\n" + divider));
@@ -382,9 +364,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     console.log(chalk.cyan(divider) + "\n");
     clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
       const severityBadge = getSeverityBadge2(cluster.severity);
-      console.log(
-        `${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
-      );
+      console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`);
       console.log(
         `   Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
       );
@@ -466,10 +446,8 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
         chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
       );
       console.log(`  ${chalk.dim(issue.message)}`);
-      console.log(
-        `  ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
-`
-      );
+      console.log(`  ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
+`);
     });
   }
   if (totalIssues === 0) {
@@ -479,9 +457,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
         "\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
       )
     );
-    console.log(
-      chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3")
-    );
+    console.log(chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3"));
     console.log(chalk.dim("   \u2022 Reduce minimum lines: --min-lines 3"));
     console.log(chalk.dim("   \u2022 Include test files: --include-tests"));
     console.log(
@@ -493,9 +469,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
     console.log(
       chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
     );
-    console.log(
-      chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3")
-    );
+    console.log(chalk.dim("   \u2022 Lower similarity threshold: --similarity 0.3"));
     console.log(chalk.dim("   \u2022 Reduce minimum lines: --min-lines 3"));
     console.log(chalk.dim("   \u2022 Include test files: --include-tests"));
     console.log(
@@ -522,5 +496,64 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
       "\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
     )
   );
-});
+}
+// src/cli.ts
+var program = new Command();
+program.name(COMMAND_NAME).description("Detect duplicate patterns in your codebase").version(COMMAND_VERSION).addHelpText("after", HELP_TEXT_AFTER).argument("<directory>", "Directory to analyze").option(
+  "-s, --similarity <number>",
+  `Minimum similarity score (0-1). Default: ${DEFAULT_MIN_SIMILARITY}`
+).option(
+  "-l, --min-lines <number>",
+  `Minimum lines to consider. Default: ${DEFAULT_MIN_LINES}`
+).option(
+  "--batch-size <number>",
+  `Batch size for comparisons. Default: ${DEFAULT_BATCH_SIZE}`
+).option(
+  "--no-approx",
+  "Disable approximate candidate selection. Slower but more thorough on small repos"
+).option(
+  "--min-shared-tokens <number>",
+  `Minimum shared tokens to consider a candidate. Default: ${DEFAULT_MIN_SHARED_TOKENS}`
+).option(
+  "--max-candidates <number>",
+  `Maximum candidates per block. Default: ${DEFAULT_MAX_CANDIDATES_PER_BLOCK}`
+).option(
+  "--no-stream-results",
+  "Disable incremental output (default: enabled)"
+).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
+  "--exclude-patterns <regexes>",
+  "Regex patterns to exclude specific code content (comma-separated)"
+).option(
+  "--confidence-threshold <number>",
+  "Minimum confidence score (0-1). Default: 0"
+).option(
+  "--ignore-whitelist <patterns>",
+  "List of file pairs or patterns to ignore (comma-separated)"
+).option(
+  "--min-severity <level>",
+  "Minimum severity to show: critical|major|minor|info. Default: minor"
+).option(
+  "--exclude-test-fixtures",
+  "Exclude test fixture duplication (beforeAll/afterAll)"
+).option("--exclude-templates", "Exclude template file duplication").option(
+  "--include-tests",
+  "Include test files in analysis (excluded by default)"
+).option(
+  "--max-results <number>",
+  `Maximum number of results to show in console output. Default: ${DEFAULT_MAX_RESULTS}`
+).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
+  "--min-cluster-tokens <number>",
+  `Minimum token cost for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_TOKEN_COST}`
+).option(
+  "--min-cluster-files <number>",
+  `Minimum files for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_FILES}`
+).option(
+  "--show-raw-duplicates",
+  "Show raw duplicates instead of grouped view"
+).option(
+  "-o, --output <format>",
+  "Output format: console, json, html",
+  DEFAULT_OUTPUT_FORMAT
+).option("--output-file <path>", "Output file path (for json/html)").action(patternActionHandler);
 program.parse();

package/dist/index.d.mts CHANGED Viewed

@@ -17,6 +17,7 @@ interface DuplicatePattern {
     code1: string;
     code2: string;
     similarity: number;
+    confidence: number;
     patternType: PatternType;
     tokenCost: number;
     severity: Severity;
@@ -32,11 +33,18 @@ interface DetectionOptions {
     minSharedTokens: number;
     maxCandidatesPerBlock: number;
     streamResults: boolean;
+    excludePatterns?: string[];
+    confidenceThreshold?: number;
+    ignoreWhitelist?: string[];
     onProgress?: (processed: number, total: number, message: string) => void;
 }
 /**
  * Detect duplicate patterns across files
+ *
+ * @param fileContents - Array of file contents to analyze.
+ * @param options - Configuration for duplicate detection (thresholds, progress, etc).
+ * @returns Promise resolving to an array of detected duplicate patterns sorted by similarity.
  */
 declare function detectDuplicatePatterns(fileContents: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
@@ -104,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
     createClusters?: boolean;
     minClusterTokenCost?: number;
     minClusterFiles?: number;
+    excludePatterns?: string[];
+    confidenceThreshold?: number;
+    ignoreWhitelist?: string[];
     onProgress?: (processed: number, total: number, message: string) => void;
 }
 interface PatternSummary {
@@ -122,9 +133,20 @@ interface PatternSummary {
     }>;
 }
 /**
- * Determine smart defaults based on repository size estimation
+ * Determine smart defaults based on repository size estimation.
+ *
+ * @param directory - The directory to analyze for size.
+ * @param userOptions - User-provided option overrides.
+ * @returns Promise resolving to optimal detection options.
  */
 declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
+/**
+ * Main entry point for pattern detection analysis.
+ *
+ * @param options - Configuration including rootDir and detection parameters.
+ * @returns Promise resolving to the comprehensive pattern detect report.
+ * @lastUpdated 2026-03-18
+ */
 declare function analyzePatterns(options: PatternDetectOptions): Promise<{
     results: AnalysisResult[];
     duplicates: DuplicatePattern[];
@@ -133,6 +155,12 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
     clusters?: RefactorCluster[];
     config: PatternDetectOptions;
 }>;
+/**
+ * Generate a summary of pattern detection results.
+ *
+ * @param results - Array of file-level analysis results.
+ * @returns Consolidated pattern summary object.
+ */
 declare function generateSummary(results: AnalysisResult[]): PatternSummary;
 /**
@@ -146,6 +174,11 @@ declare function generateSummary(results: AnalysisResult[]): PatternSummary;
  * Includes business value metrics:
  * - Estimated monthly cost of token waste
  * - Estimated developer hours to fix
+ *
+ * @param duplicates - Array of detected duplicate patterns.
+ * @param totalFilesAnalyzed - Total count of files scanned.
+ * @param costConfig - Optional configuration for business value calculations.
+ * @returns Standardized scoring output for pattern detection.
  */
 declare function calculatePatternScore(duplicates: DuplicatePattern[], totalFilesAnalyzed: number, costConfig?: Partial<CostConfig>): ToolScoringOutput;
@@ -167,6 +200,13 @@ interface ContextRule {
 declare const CONTEXT_RULES: ContextRule[];
 /**
  * Calculate severity based on context rules and code characteristics
+ *
+ * @param file1 - First file path in the duplicate pair.
+ * @param file2 - Second file path in the duplicate pair.
+ * @param code - Snippet of the duplicated code.
+ * @param similarity - The calculated similarity score (0-1).
+ * @param linesOfCode - Number of lines in the duplicated block.
+ * @returns An object containing the severity level and reasoning.
  */
 declare function calculateSeverity(file1: string, file2: string, code: string, similarity: number, linesOfCode: number): {
     severity: Severity;
@@ -176,16 +216,26 @@ declare function calculateSeverity(file1: string, file2: string, code: string, s
 };
 /**
  * Get a human-readable severity label with emoji
+ *
+ * @param severity - The severity level to label.
+ * @returns Formatted label string for UI display.
  */
 declare function getSeverityLabel(severity: Severity): string;
 /**
  * Filter duplicates by minimum severity threshold
+ *
+ * @param duplicates - List of items with a severity property.
+ * @param minSeverity - Minimum threshold for inclusion.
+ * @returns Filtered list of items.
  */
 declare function filterBySeverity<T extends {
     severity: Severity;
 }>(duplicates: T[], minSeverity: Severity): T[];
 /**
- * Get severity threshold for filtering
+ * Get numerical similarity threshold associated with a severity level
+ *
+ * @param severity - The severity level to look up.
+ * @returns Minimum similarity value for this severity.
  */
 declare function getSeverityThreshold(severity: Severity): number;

package/dist/index.d.ts CHANGED Viewed

@@ -17,6 +17,7 @@ interface DuplicatePattern {
     code1: string;
     code2: string;
     similarity: number;
+    confidence: number;
     patternType: PatternType;
     tokenCost: number;
     severity: Severity;
@@ -32,11 +33,18 @@ interface DetectionOptions {
     minSharedTokens: number;
     maxCandidatesPerBlock: number;
     streamResults: boolean;
+    excludePatterns?: string[];
+    confidenceThreshold?: number;
+    ignoreWhitelist?: string[];
     onProgress?: (processed: number, total: number, message: string) => void;
 }
 /**
  * Detect duplicate patterns across files
+ *
+ * @param fileContents - Array of file contents to analyze.
+ * @param options - Configuration for duplicate detection (thresholds, progress, etc).
+ * @returns Promise resolving to an array of detected duplicate patterns sorted by similarity.
  */
 declare function detectDuplicatePatterns(fileContents: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
@@ -104,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
     createClusters?: boolean;
     minClusterTokenCost?: number;
     minClusterFiles?: number;
+    excludePatterns?: string[];
+    confidenceThreshold?: number;
+    ignoreWhitelist?: string[];
     onProgress?: (processed: number, total: number, message: string) => void;
 }
 interface PatternSummary {
@@ -122,9 +133,20 @@ interface PatternSummary {
     }>;
 }
 /**
- * Determine smart defaults based on repository size estimation
+ * Determine smart defaults based on repository size estimation.
+ *
+ * @param directory - The directory to analyze for size.
+ * @param userOptions - User-provided option overrides.
+ * @returns Promise resolving to optimal detection options.
  */
 declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
+/**
+ * Main entry point for pattern detection analysis.
+ *
+ * @param options - Configuration including rootDir and detection parameters.
+ * @returns Promise resolving to the comprehensive pattern detect report.
+ * @lastUpdated 2026-03-18
+ */
 declare function analyzePatterns(options: PatternDetectOptions): Promise<{
     results: AnalysisResult[];
     duplicates: DuplicatePattern[];
@@ -133,6 +155,12 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
     clusters?: RefactorCluster[];
     config: PatternDetectOptions;
 }>;
+/**
+ * Generate a summary of pattern detection results.
+ *
+ * @param results - Array of file-level analysis results.
+ * @returns Consolidated pattern summary object.
+ */
 declare function generateSummary(results: AnalysisResult[]): PatternSummary;
 /**
@@ -146,6 +174,11 @@ declare function generateSummary(results: AnalysisResult[]): PatternSummary;
  * Includes business value metrics:
  * - Estimated monthly cost of token waste
  * - Estimated developer hours to fix
+ *
+ * @param duplicates - Array of detected duplicate patterns.
+ * @param totalFilesAnalyzed - Total count of files scanned.
+ * @param costConfig - Optional configuration for business value calculations.
+ * @returns Standardized scoring output for pattern detection.
  */
 declare function calculatePatternScore(duplicates: DuplicatePattern[], totalFilesAnalyzed: number, costConfig?: Partial<CostConfig>): ToolScoringOutput;
@@ -167,6 +200,13 @@ interface ContextRule {
 declare const CONTEXT_RULES: ContextRule[];
 /**
  * Calculate severity based on context rules and code characteristics
+ *
+ * @param file1 - First file path in the duplicate pair.
+ * @param file2 - Second file path in the duplicate pair.
+ * @param code - Snippet of the duplicated code.
+ * @param similarity - The calculated similarity score (0-1).
+ * @param linesOfCode - Number of lines in the duplicated block.
+ * @returns An object containing the severity level and reasoning.
  */
 declare function calculateSeverity(file1: string, file2: string, code: string, similarity: number, linesOfCode: number): {
     severity: Severity;
@@ -176,16 +216,26 @@ declare function calculateSeverity(file1: string, file2: string, code: string, s
 };
 /**
  * Get a human-readable severity label with emoji
+ *
+ * @param severity - The severity level to label.
+ * @returns Formatted label string for UI display.
  */
 declare function getSeverityLabel(severity: Severity): string;
 /**
  * Filter duplicates by minimum severity threshold
+ *
+ * @param duplicates - List of items with a severity property.
+ * @param minSeverity - Minimum threshold for inclusion.
+ * @returns Filtered list of items.
  */
 declare function filterBySeverity<T extends {
     severity: Severity;
 }>(duplicates: T[], minSeverity: Severity): T[];
 /**
- * Get severity threshold for filtering
+ * Get numerical similarity threshold associated with a severity level
+ *
+ * @param severity - The severity level to look up.
+ * @returns Minimum similarity value for this severity.
  */
 declare function getSeverityThreshold(severity: Severity): number;

package/dist/index.js CHANGED Viewed

@@ -350,14 +350,33 @@ function calculateSimilarity(a, b) {
   const union = /* @__PURE__ */ new Set([...setA, ...setB]);
   return intersection.size / union.size;
 }
+function calculateConfidence(similarity, tokens, lines) {
+  let confidence = similarity;
+  if (lines > 20) confidence += 0.05;
+  if (tokens > 200) confidence += 0.05;
+  if (lines < 5) confidence -= 0.1;
+  return Math.max(0, Math.min(1, confidence));
+}
 async function detectDuplicatePatterns(fileContents, options) {
-  const { minSimilarity, minLines, streamResults, onProgress } = options;
+  const {
+    minSimilarity,
+    minLines,
+    streamResults,
+    onProgress,
+    excludePatterns = [],
+    confidenceThreshold = 0,
+    ignoreWhitelist = []
+  } = options;
   const allBlocks = [];
+  const excludeRegexes = excludePatterns.map((p) => new RegExp(p, "i"));
   for (const { file, content } of fileContents) {
     const blocks = extractBlocks(file, content);
-    allBlocks.push(
-      ...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines)
-    );
+    for (const b of blocks) {
+      if (b.endLine - b.startLine + 1 < minLines) continue;
+      const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
+      if (isExcluded) continue;
+      allBlocks.push(b);
+    }
   }
   const duplicates = [];
   const totalBlocks = allBlocks.length;
@@ -388,10 +407,20 @@ async function detectDuplicatePatterns(fileContents, options) {
       comparisons++;
       const b2 = allBlocks[j];
       if (b1.file === b2.file) continue;
+      const isWhitelisted = ignoreWhitelist.some((pattern) => {
+        return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
+      });
+      if (isWhitelisted) continue;
       const isPython2 = b2.file.toLowerCase().endsWith(".py");
       const norm2 = normalizeCode(b2.code, isPython2);
       const sim = calculateSimilarity(norm1, norm2);
       if (sim >= minSimilarity) {
+        const confidence = calculateConfidence(
+          sim,
+          b1.tokens,
+          b1.endLine - b1.startLine + 1
+        );
+        if (confidence < confidenceThreshold) continue;
         const { severity, reason, suggestion, matchedRule } = calculateSeverity(
           b1.file,
           b2.file,
@@ -409,6 +438,7 @@ async function detectDuplicatePatterns(fileContents, options) {
           code1: b1.code,
           code2: b2.code,
           similarity: sim,
+          confidence,
           patternType: b1.patternType,
           tokenCost: b1.tokens + b2.tokens,
           severity,
@@ -419,7 +449,7 @@ async function detectDuplicatePatterns(fileContents, options) {
         duplicates.push(dup);
         if (streamResults)
           console.log(
-            `[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%)`
+            `[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
           );
       }
     }
@@ -633,6 +663,17 @@ function logConfiguration(config, estimatedBlocks) {
   console.log(`   Min shared tokens: ${config.minSharedTokens}`);
   console.log(`   Severity filter: ${config.severity}`);
   console.log(`   Include tests: ${config.includeTests}`);
+  if (config.excludePatterns && config.excludePatterns.length > 0) {
+    console.log(`   Exclude patterns: ${config.excludePatterns.length} active`);
+  }
+  if (config.confidenceThreshold && config.confidenceThreshold > 0) {
+    console.log(`   Confidence threshold: ${config.confidenceThreshold}`);
+  }
+  if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
+    console.log(
+      `   Ignore whitelist: ${config.ignoreWhitelist.length} entries`
+    );
+  }
   console.log("");
 }
 async function analyzePatterns(options) {
@@ -651,6 +692,9 @@ async function analyzePatterns(options) {
     createClusters = true,
     minClusterTokenCost = 1e3,
     minClusterFiles = 3,
+    excludePatterns = [],
+    confidenceThreshold = 0,
+    ignoreWhitelist = [],
     ...scanOptions
   } = finalOptions;
   const files = await (0, import_core4.scanFiles)(scanOptions);
@@ -677,6 +721,9 @@ async function analyzePatterns(options) {
     minSharedTokens,
     maxCandidatesPerBlock,
     streamResults,
+    excludePatterns,
+    confidenceThreshold,
+    ignoreWhitelist,
     onProgress: options.onProgress
   });
   for (const file of files) {
@@ -778,6 +825,8 @@ function generateSummary(results) {
         }
       ],
       similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
+      confidence: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
+      // Fallback for summary
       patternType: typeMatch?.[1] || "unknown",
       tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
     };

package/dist/index.mjs CHANGED Viewed

@@ -14,7 +14,7 @@ import {
   getSeverityThreshold,
   getSmartDefaults,
   groupDuplicatesByFilePair
-} from "./chunk-KPEK5REL.mjs";
+} from "./chunk-DR5W7S3Z.mjs";
 export {
   CONTEXT_RULES,
   PatternDetectProvider,