@aiready/pattern-detect 0.16.5 → 0.16.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -3,10 +3,12 @@ import {
3
3
  analyzePatterns,
4
4
  filterBySeverity,
5
5
  generateSummary
6
- } from "./chunk-KPEK5REL.mjs";
6
+ } from "./chunk-DR5W7S3Z.mjs";
7
7
 
8
8
  // src/cli.ts
9
9
  import { Command } from "commander";
10
+
11
+ // src/cli-action.ts
10
12
  import chalk from "chalk";
11
13
  import { writeFileSync, mkdirSync, existsSync } from "fs";
12
14
  import { dirname } from "path";
@@ -118,80 +120,61 @@ function generateHTMLReport(results, summary) {
118
120
  </html>`;
119
121
  }
120
122
 
121
- // src/cli.ts
122
- var program = new Command();
123
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
124
- "after",
125
- "\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings\n\nPARAMETER TUNING:\n If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n aiready-patterns . # Basic analysis with smart defaults\n aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection\n aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough\n aiready-patterns . --output json > report.json # JSON export"
126
- ).argument("<directory>", "Directory to analyze").option(
127
- "-s, --similarity <number>",
128
- "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
129
- ).option(
130
- "-l, --min-lines <number>",
131
- "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
132
- ).option(
133
- "--batch-size <number>",
134
- "Batch size for comparisons. Higher = faster but more memory. Default: 100"
135
- ).option(
136
- "--no-approx",
137
- "Disable approximate candidate selection. Slower but more thorough on small repos"
138
- ).option(
139
- "--min-shared-tokens <number>",
140
- "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
141
- ).option(
142
- "--max-candidates <number>",
143
- "Maximum candidates per block. Higher = more thorough but slower. Default: 100"
144
- ).option(
145
- "--no-stream-results",
146
- "Disable incremental output (default: enabled)"
147
- ).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
148
- "--min-severity <level>",
149
- "Minimum severity to show: critical|major|minor|info. Default: minor"
150
- ).option(
151
- "--exclude-test-fixtures",
152
- "Exclude test fixture duplication (beforeAll/afterAll)"
153
- ).option("--exclude-templates", "Exclude template file duplication").option(
154
- "--include-tests",
155
- "Include test files in analysis (excluded by default)"
156
- ).option(
157
- "--max-results <number>",
158
- "Maximum number of results to show in console output. Default: 10"
159
- ).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
160
- "--min-cluster-tokens <number>",
161
- "Minimum token cost for cluster reporting. Default: 1000"
162
- ).option(
163
- "--min-cluster-files <number>",
164
- "Minimum files for cluster reporting. Default: 3"
165
- ).option(
166
- "--show-raw-duplicates",
167
- "Show raw duplicates instead of grouped view"
168
- ).option(
169
- "-o, --output <format>",
170
- "Output format: console, json, html",
171
- "console"
172
- ).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
123
+ // src/constants.ts
124
+ var DEFAULT_MIN_SIMILARITY = 0.4;
125
+ var DEFAULT_MIN_LINES = 5;
126
+ var DEFAULT_BATCH_SIZE = 100;
127
+ var DEFAULT_MIN_SHARED_TOKENS = 8;
128
+ var DEFAULT_MAX_CANDIDATES_PER_BLOCK = 100;
129
+ var DEFAULT_MAX_RESULTS = 10;
130
+ var DEFAULT_MIN_CLUSTER_TOKEN_COST = 1e3;
131
+ var DEFAULT_MIN_CLUSTER_FILES = 3;
132
+ var COMMAND_NAME = "aiready-patterns";
133
+ var COMMAND_VERSION = "0.1.0";
134
+ var DEFAULT_OUTPUT_FORMAT = "console";
135
+ var HELP_TEXT_AFTER = `
136
+ CONFIGURATION:
137
+ Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js
138
+ CLI options override config file settings
139
+
140
+ PARAMETER TUNING:
141
+ If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens
142
+ If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates
143
+ If you get too many false positives: increase --similarity or --min-lines
144
+
145
+ EXAMPLES:
146
+ aiready-patterns . # Basic analysis with smart defaults
147
+ aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection
148
+ aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough
149
+ aiready-patterns . --output json > report.json # JSON export`;
150
+
151
+ // src/cli-action.ts
152
+ async function patternActionHandler(directory, options) {
173
153
  console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
174
154
  const startTime = Date.now();
175
155
  const config = await loadConfig(directory);
176
156
  const defaults = {
177
- minSimilarity: 0.4,
178
- minLines: 5,
179
- batchSize: 100,
157
+ minSimilarity: DEFAULT_MIN_SIMILARITY,
158
+ minLines: DEFAULT_MIN_LINES,
159
+ batchSize: DEFAULT_BATCH_SIZE,
180
160
  approx: true,
181
- minSharedTokens: 8,
182
- maxCandidatesPerBlock: 100,
161
+ minSharedTokens: DEFAULT_MIN_SHARED_TOKENS,
162
+ maxCandidatesPerBlock: DEFAULT_MAX_CANDIDATES_PER_BLOCK,
183
163
  streamResults: true,
184
164
  include: void 0,
185
165
  exclude: void 0,
166
+ excludePatterns: void 0,
167
+ confidenceThreshold: 0,
168
+ ignoreWhitelist: void 0,
186
169
  minSeverity: Severity.Minor,
187
170
  excludeTestFixtures: false,
188
171
  excludeTemplates: false,
189
172
  includeTests: false,
190
- maxResults: 10,
173
+ maxResults: DEFAULT_MAX_RESULTS,
191
174
  groupByFilePair: true,
192
175
  createClusters: true,
193
- minClusterTokenCost: 1e3,
194
- minClusterFiles: 3,
176
+ minClusterTokenCost: DEFAULT_MIN_CLUSTER_TOKEN_COST,
177
+ minClusterFiles: DEFAULT_MIN_CLUSTER_FILES,
195
178
  showRawDuplicates: false
196
179
  };
197
180
  const mergedConfig = mergeConfigWithDefaults(config, defaults);
@@ -207,15 +190,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
207
190
  streamResults: options.streamResults !== false && mergedConfig.streamResults,
208
191
  include: options.include?.split(",") || mergedConfig.include,
209
192
  exclude: options.exclude?.split(",") || mergedConfig.exclude,
193
+ excludePatterns: options.excludePatterns?.split(",") || mergedConfig.excludePatterns,
194
+ confidenceThreshold: options.confidenceThreshold ? parseFloat(options.confidenceThreshold) : mergedConfig.confidenceThreshold,
195
+ ignoreWhitelist: options.ignoreWhitelist?.split(",") || mergedConfig.ignoreWhitelist,
210
196
  minSeverity: options.minSeverity || mergedConfig.minSeverity,
211
197
  excludeTestFixtures: options.excludeTestFixtures || mergedConfig.excludeTestFixtures,
212
198
  excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
213
199
  includeTests: options.includeTests || mergedConfig.includeTests,
214
200
  maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
215
- groupByFilePair: options.groupBy_file_pair !== false && mergedConfig.groupByFilePair,
216
- createClusters: options.create_clusters !== false && mergedConfig.createClusters,
217
- minClusterTokenCost: options.min_cluster_tokens ? parseInt(options.min_cluster_tokens) : mergedConfig.minClusterTokenCost,
218
- minClusterFiles: options.min_cluster_files ? parseInt(options.min_cluster_files) : mergedConfig.minClusterFiles,
201
+ groupByFilePair: options.groupByFilePair !== false && mergedConfig.groupByFilePair,
202
+ createClusters: options.createClusters !== false && mergedConfig.createClusters,
203
+ minClusterTokenCost: options.minClusterTokens ? parseInt(options.minClusterTokens) : mergedConfig.minClusterTokenCost,
204
+ minClusterFiles: options.minClusterFiles ? parseInt(options.minClusterFiles) : mergedConfig.minClusterFiles,
219
205
  showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
220
206
  };
221
207
  if (finalOptions.includeTests && finalOptions.exclude) {
@@ -301,9 +287,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
301
287
  console.log(chalk.cyan(divider));
302
288
  console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
303
289
  console.log(chalk.cyan(divider) + "\n");
304
- console.log(
305
- chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
306
- );
290
+ console.log(chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`));
307
291
  console.log(
308
292
  chalk.yellow(
309
293
  `\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`
@@ -314,9 +298,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
314
298
  `\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
315
299
  )
316
300
  );
317
- console.log(
318
- chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
319
- );
301
+ console.log(chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`));
320
302
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
321
303
  if (sortedTypes.length > 0) {
322
304
  console.log(chalk.cyan("\n" + divider));
@@ -382,9 +364,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
382
364
  console.log(chalk.cyan(divider) + "\n");
383
365
  clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
384
366
  const severityBadge = getSeverityBadge2(cluster.severity);
385
- console.log(
386
- `${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
387
- );
367
+ console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`);
388
368
  console.log(
389
369
  ` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
390
370
  );
@@ -466,10 +446,8 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
466
446
  chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
467
447
  );
468
448
  console.log(` ${chalk.dim(issue.message)}`);
469
- console.log(
470
- ` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
471
- `
472
- );
449
+ console.log(` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
450
+ `);
473
451
  });
474
452
  }
475
453
  if (totalIssues === 0) {
@@ -479,9 +457,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
479
457
  "\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
480
458
  )
481
459
  );
482
- console.log(
483
- chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
484
- );
460
+ console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
485
461
  console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
486
462
  console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
487
463
  console.log(
@@ -493,9 +469,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
493
469
  console.log(
494
470
  chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
495
471
  );
496
- console.log(
497
- chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
498
- );
472
+ console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
499
473
  console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
500
474
  console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
501
475
  console.log(
@@ -522,5 +496,64 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
522
496
  "\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
523
497
  )
524
498
  );
525
- });
499
+ }
500
+
501
+ // src/cli.ts
502
+ var program = new Command();
503
+ program.name(COMMAND_NAME).description("Detect duplicate patterns in your codebase").version(COMMAND_VERSION).addHelpText("after", HELP_TEXT_AFTER).argument("<directory>", "Directory to analyze").option(
504
+ "-s, --similarity <number>",
505
+ `Minimum similarity score (0-1). Default: ${DEFAULT_MIN_SIMILARITY}`
506
+ ).option(
507
+ "-l, --min-lines <number>",
508
+ `Minimum lines to consider. Default: ${DEFAULT_MIN_LINES}`
509
+ ).option(
510
+ "--batch-size <number>",
511
+ `Batch size for comparisons. Default: ${DEFAULT_BATCH_SIZE}`
512
+ ).option(
513
+ "--no-approx",
514
+ "Disable approximate candidate selection. Slower but more thorough on small repos"
515
+ ).option(
516
+ "--min-shared-tokens <number>",
517
+ `Minimum shared tokens to consider a candidate. Default: ${DEFAULT_MIN_SHARED_TOKENS}`
518
+ ).option(
519
+ "--max-candidates <number>",
520
+ `Maximum candidates per block. Default: ${DEFAULT_MAX_CANDIDATES_PER_BLOCK}`
521
+ ).option(
522
+ "--no-stream-results",
523
+ "Disable incremental output (default: enabled)"
524
+ ).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
525
+ "--exclude-patterns <regexes>",
526
+ "Regex patterns to exclude specific code content (comma-separated)"
527
+ ).option(
528
+ "--confidence-threshold <number>",
529
+ "Minimum confidence score (0-1). Default: 0"
530
+ ).option(
531
+ "--ignore-whitelist <patterns>",
532
+ "List of file pairs or patterns to ignore (comma-separated)"
533
+ ).option(
534
+ "--min-severity <level>",
535
+ "Minimum severity to show: critical|major|minor|info. Default: minor"
536
+ ).option(
537
+ "--exclude-test-fixtures",
538
+ "Exclude test fixture duplication (beforeAll/afterAll)"
539
+ ).option("--exclude-templates", "Exclude template file duplication").option(
540
+ "--include-tests",
541
+ "Include test files in analysis (excluded by default)"
542
+ ).option(
543
+ "--max-results <number>",
544
+ `Maximum number of results to show in console output. Default: ${DEFAULT_MAX_RESULTS}`
545
+ ).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
546
+ "--min-cluster-tokens <number>",
547
+ `Minimum token cost for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_TOKEN_COST}`
548
+ ).option(
549
+ "--min-cluster-files <number>",
550
+ `Minimum files for cluster reporting. Default: ${DEFAULT_MIN_CLUSTER_FILES}`
551
+ ).option(
552
+ "--show-raw-duplicates",
553
+ "Show raw duplicates instead of grouped view"
554
+ ).option(
555
+ "-o, --output <format>",
556
+ "Output format: console, json, html",
557
+ DEFAULT_OUTPUT_FORMAT
558
+ ).option("--output-file <path>", "Output file path (for json/html)").action(patternActionHandler);
526
559
  program.parse();
package/dist/index.d.mts CHANGED
@@ -17,6 +17,7 @@ interface DuplicatePattern {
17
17
  code1: string;
18
18
  code2: string;
19
19
  similarity: number;
20
+ confidence: number;
20
21
  patternType: PatternType;
21
22
  tokenCost: number;
22
23
  severity: Severity;
@@ -32,11 +33,18 @@ interface DetectionOptions {
32
33
  minSharedTokens: number;
33
34
  maxCandidatesPerBlock: number;
34
35
  streamResults: boolean;
36
+ excludePatterns?: string[];
37
+ confidenceThreshold?: number;
38
+ ignoreWhitelist?: string[];
35
39
  onProgress?: (processed: number, total: number, message: string) => void;
36
40
  }
37
41
 
38
42
  /**
39
43
  * Detect duplicate patterns across files
44
+ *
45
+ * @param fileContents - Array of file contents to analyze.
46
+ * @param options - Configuration for duplicate detection (thresholds, progress, etc).
47
+ * @returns Promise resolving to an array of detected duplicate patterns sorted by similarity.
40
48
  */
41
49
  declare function detectDuplicatePatterns(fileContents: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
42
50
 
@@ -104,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
104
112
  createClusters?: boolean;
105
113
  minClusterTokenCost?: number;
106
114
  minClusterFiles?: number;
115
+ excludePatterns?: string[];
116
+ confidenceThreshold?: number;
117
+ ignoreWhitelist?: string[];
107
118
  onProgress?: (processed: number, total: number, message: string) => void;
108
119
  }
109
120
  interface PatternSummary {
@@ -122,9 +133,20 @@ interface PatternSummary {
122
133
  }>;
123
134
  }
124
135
  /**
125
- * Determine smart defaults based on repository size estimation
136
+ * Determine smart defaults based on repository size estimation.
137
+ *
138
+ * @param directory - The directory to analyze for size.
139
+ * @param userOptions - User-provided option overrides.
140
+ * @returns Promise resolving to optimal detection options.
126
141
  */
127
142
  declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
143
+ /**
144
+ * Main entry point for pattern detection analysis.
145
+ *
146
+ * @param options - Configuration including rootDir and detection parameters.
147
+ * @returns Promise resolving to the comprehensive pattern detect report.
148
+ * @lastUpdated 2026-03-18
149
+ */
128
150
  declare function analyzePatterns(options: PatternDetectOptions): Promise<{
129
151
  results: AnalysisResult[];
130
152
  duplicates: DuplicatePattern[];
@@ -133,6 +155,12 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
133
155
  clusters?: RefactorCluster[];
134
156
  config: PatternDetectOptions;
135
157
  }>;
158
+ /**
159
+ * Generate a summary of pattern detection results.
160
+ *
161
+ * @param results - Array of file-level analysis results.
162
+ * @returns Consolidated pattern summary object.
163
+ */
136
164
  declare function generateSummary(results: AnalysisResult[]): PatternSummary;
137
165
 
138
166
  /**
@@ -146,6 +174,11 @@ declare function generateSummary(results: AnalysisResult[]): PatternSummary;
146
174
  * Includes business value metrics:
147
175
  * - Estimated monthly cost of token waste
148
176
  * - Estimated developer hours to fix
177
+ *
178
+ * @param duplicates - Array of detected duplicate patterns.
179
+ * @param totalFilesAnalyzed - Total count of files scanned.
180
+ * @param costConfig - Optional configuration for business value calculations.
181
+ * @returns Standardized scoring output for pattern detection.
149
182
  */
150
183
  declare function calculatePatternScore(duplicates: DuplicatePattern[], totalFilesAnalyzed: number, costConfig?: Partial<CostConfig>): ToolScoringOutput;
151
184
 
@@ -167,6 +200,13 @@ interface ContextRule {
167
200
  declare const CONTEXT_RULES: ContextRule[];
168
201
  /**
169
202
  * Calculate severity based on context rules and code characteristics
203
+ *
204
+ * @param file1 - First file path in the duplicate pair.
205
+ * @param file2 - Second file path in the duplicate pair.
206
+ * @param code - Snippet of the duplicated code.
207
+ * @param similarity - The calculated similarity score (0-1).
208
+ * @param linesOfCode - Number of lines in the duplicated block.
209
+ * @returns An object containing the severity level and reasoning.
170
210
  */
171
211
  declare function calculateSeverity(file1: string, file2: string, code: string, similarity: number, linesOfCode: number): {
172
212
  severity: Severity;
@@ -176,16 +216,26 @@ declare function calculateSeverity(file1: string, file2: string, code: string, s
176
216
  };
177
217
  /**
178
218
  * Get a human-readable severity label with emoji
219
+ *
220
+ * @param severity - The severity level to label.
221
+ * @returns Formatted label string for UI display.
179
222
  */
180
223
  declare function getSeverityLabel(severity: Severity): string;
181
224
  /**
182
225
  * Filter duplicates by minimum severity threshold
226
+ *
227
+ * @param duplicates - List of items with a severity property.
228
+ * @param minSeverity - Minimum threshold for inclusion.
229
+ * @returns Filtered list of items.
183
230
  */
184
231
  declare function filterBySeverity<T extends {
185
232
  severity: Severity;
186
233
  }>(duplicates: T[], minSeverity: Severity): T[];
187
234
  /**
188
- * Get severity threshold for filtering
235
+ * Get numerical similarity threshold associated with a severity level
236
+ *
237
+ * @param severity - The severity level to look up.
238
+ * @returns Minimum similarity value for this severity.
189
239
  */
190
240
  declare function getSeverityThreshold(severity: Severity): number;
191
241
 
package/dist/index.d.ts CHANGED
@@ -17,6 +17,7 @@ interface DuplicatePattern {
17
17
  code1: string;
18
18
  code2: string;
19
19
  similarity: number;
20
+ confidence: number;
20
21
  patternType: PatternType;
21
22
  tokenCost: number;
22
23
  severity: Severity;
@@ -32,11 +33,18 @@ interface DetectionOptions {
32
33
  minSharedTokens: number;
33
34
  maxCandidatesPerBlock: number;
34
35
  streamResults: boolean;
36
+ excludePatterns?: string[];
37
+ confidenceThreshold?: number;
38
+ ignoreWhitelist?: string[];
35
39
  onProgress?: (processed: number, total: number, message: string) => void;
36
40
  }
37
41
 
38
42
  /**
39
43
  * Detect duplicate patterns across files
44
+ *
45
+ * @param fileContents - Array of file contents to analyze.
46
+ * @param options - Configuration for duplicate detection (thresholds, progress, etc).
47
+ * @returns Promise resolving to an array of detected duplicate patterns sorted by similarity.
40
48
  */
41
49
  declare function detectDuplicatePatterns(fileContents: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
42
50
 
@@ -104,6 +112,9 @@ interface PatternDetectOptions extends ScanOptions {
104
112
  createClusters?: boolean;
105
113
  minClusterTokenCost?: number;
106
114
  minClusterFiles?: number;
115
+ excludePatterns?: string[];
116
+ confidenceThreshold?: number;
117
+ ignoreWhitelist?: string[];
107
118
  onProgress?: (processed: number, total: number, message: string) => void;
108
119
  }
109
120
  interface PatternSummary {
@@ -122,9 +133,20 @@ interface PatternSummary {
122
133
  }>;
123
134
  }
124
135
  /**
125
- * Determine smart defaults based on repository size estimation
136
+ * Determine smart defaults based on repository size estimation.
137
+ *
138
+ * @param directory - The directory to analyze for size.
139
+ * @param userOptions - User-provided option overrides.
140
+ * @returns Promise resolving to optimal detection options.
126
141
  */
127
142
  declare function getSmartDefaults(directory: string, userOptions: Partial<PatternDetectOptions>): Promise<PatternDetectOptions>;
143
+ /**
144
+ * Main entry point for pattern detection analysis.
145
+ *
146
+ * @param options - Configuration including rootDir and detection parameters.
147
+ * @returns Promise resolving to the comprehensive pattern detect report.
148
+ * @lastUpdated 2026-03-18
149
+ */
128
150
  declare function analyzePatterns(options: PatternDetectOptions): Promise<{
129
151
  results: AnalysisResult[];
130
152
  duplicates: DuplicatePattern[];
@@ -133,6 +155,12 @@ declare function analyzePatterns(options: PatternDetectOptions): Promise<{
133
155
  clusters?: RefactorCluster[];
134
156
  config: PatternDetectOptions;
135
157
  }>;
158
+ /**
159
+ * Generate a summary of pattern detection results.
160
+ *
161
+ * @param results - Array of file-level analysis results.
162
+ * @returns Consolidated pattern summary object.
163
+ */
136
164
  declare function generateSummary(results: AnalysisResult[]): PatternSummary;
137
165
 
138
166
  /**
@@ -146,6 +174,11 @@ declare function generateSummary(results: AnalysisResult[]): PatternSummary;
146
174
  * Includes business value metrics:
147
175
  * - Estimated monthly cost of token waste
148
176
  * - Estimated developer hours to fix
177
+ *
178
+ * @param duplicates - Array of detected duplicate patterns.
179
+ * @param totalFilesAnalyzed - Total count of files scanned.
180
+ * @param costConfig - Optional configuration for business value calculations.
181
+ * @returns Standardized scoring output for pattern detection.
149
182
  */
150
183
  declare function calculatePatternScore(duplicates: DuplicatePattern[], totalFilesAnalyzed: number, costConfig?: Partial<CostConfig>): ToolScoringOutput;
151
184
 
@@ -167,6 +200,13 @@ interface ContextRule {
167
200
  declare const CONTEXT_RULES: ContextRule[];
168
201
  /**
169
202
  * Calculate severity based on context rules and code characteristics
203
+ *
204
+ * @param file1 - First file path in the duplicate pair.
205
+ * @param file2 - Second file path in the duplicate pair.
206
+ * @param code - Snippet of the duplicated code.
207
+ * @param similarity - The calculated similarity score (0-1).
208
+ * @param linesOfCode - Number of lines in the duplicated block.
209
+ * @returns An object containing the severity level and reasoning.
170
210
  */
171
211
  declare function calculateSeverity(file1: string, file2: string, code: string, similarity: number, linesOfCode: number): {
172
212
  severity: Severity;
@@ -176,16 +216,26 @@ declare function calculateSeverity(file1: string, file2: string, code: string, s
176
216
  };
177
217
  /**
178
218
  * Get a human-readable severity label with emoji
219
+ *
220
+ * @param severity - The severity level to label.
221
+ * @returns Formatted label string for UI display.
179
222
  */
180
223
  declare function getSeverityLabel(severity: Severity): string;
181
224
  /**
182
225
  * Filter duplicates by minimum severity threshold
226
+ *
227
+ * @param duplicates - List of items with a severity property.
228
+ * @param minSeverity - Minimum threshold for inclusion.
229
+ * @returns Filtered list of items.
183
230
  */
184
231
  declare function filterBySeverity<T extends {
185
232
  severity: Severity;
186
233
  }>(duplicates: T[], minSeverity: Severity): T[];
187
234
  /**
188
- * Get severity threshold for filtering
235
+ * Get numerical similarity threshold associated with a severity level
236
+ *
237
+ * @param severity - The severity level to look up.
238
+ * @returns Minimum similarity value for this severity.
189
239
  */
190
240
  declare function getSeverityThreshold(severity: Severity): number;
191
241
 
package/dist/index.js CHANGED
@@ -350,14 +350,33 @@ function calculateSimilarity(a, b) {
350
350
  const union = /* @__PURE__ */ new Set([...setA, ...setB]);
351
351
  return intersection.size / union.size;
352
352
  }
353
+ function calculateConfidence(similarity, tokens, lines) {
354
+ let confidence = similarity;
355
+ if (lines > 20) confidence += 0.05;
356
+ if (tokens > 200) confidence += 0.05;
357
+ if (lines < 5) confidence -= 0.1;
358
+ return Math.max(0, Math.min(1, confidence));
359
+ }
353
360
  async function detectDuplicatePatterns(fileContents, options) {
354
- const { minSimilarity, minLines, streamResults, onProgress } = options;
361
+ const {
362
+ minSimilarity,
363
+ minLines,
364
+ streamResults,
365
+ onProgress,
366
+ excludePatterns = [],
367
+ confidenceThreshold = 0,
368
+ ignoreWhitelist = []
369
+ } = options;
355
370
  const allBlocks = [];
371
+ const excludeRegexes = excludePatterns.map((p) => new RegExp(p, "i"));
356
372
  for (const { file, content } of fileContents) {
357
373
  const blocks = extractBlocks(file, content);
358
- allBlocks.push(
359
- ...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines)
360
- );
374
+ for (const b of blocks) {
375
+ if (b.endLine - b.startLine + 1 < minLines) continue;
376
+ const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
377
+ if (isExcluded) continue;
378
+ allBlocks.push(b);
379
+ }
361
380
  }
362
381
  const duplicates = [];
363
382
  const totalBlocks = allBlocks.length;
@@ -388,10 +407,20 @@ async function detectDuplicatePatterns(fileContents, options) {
388
407
  comparisons++;
389
408
  const b2 = allBlocks[j];
390
409
  if (b1.file === b2.file) continue;
410
+ const isWhitelisted = ignoreWhitelist.some((pattern) => {
411
+ return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
412
+ });
413
+ if (isWhitelisted) continue;
391
414
  const isPython2 = b2.file.toLowerCase().endsWith(".py");
392
415
  const norm2 = normalizeCode(b2.code, isPython2);
393
416
  const sim = calculateSimilarity(norm1, norm2);
394
417
  if (sim >= minSimilarity) {
418
+ const confidence = calculateConfidence(
419
+ sim,
420
+ b1.tokens,
421
+ b1.endLine - b1.startLine + 1
422
+ );
423
+ if (confidence < confidenceThreshold) continue;
395
424
  const { severity, reason, suggestion, matchedRule } = calculateSeverity(
396
425
  b1.file,
397
426
  b2.file,
@@ -409,6 +438,7 @@ async function detectDuplicatePatterns(fileContents, options) {
409
438
  code1: b1.code,
410
439
  code2: b2.code,
411
440
  similarity: sim,
441
+ confidence,
412
442
  patternType: b1.patternType,
413
443
  tokenCost: b1.tokens + b2.tokens,
414
444
  severity,
@@ -419,7 +449,7 @@ async function detectDuplicatePatterns(fileContents, options) {
419
449
  duplicates.push(dup);
420
450
  if (streamResults)
421
451
  console.log(
422
- `[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%)`
452
+ `[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
423
453
  );
424
454
  }
425
455
  }
@@ -633,6 +663,17 @@ function logConfiguration(config, estimatedBlocks) {
633
663
  console.log(` Min shared tokens: ${config.minSharedTokens}`);
634
664
  console.log(` Severity filter: ${config.severity}`);
635
665
  console.log(` Include tests: ${config.includeTests}`);
666
+ if (config.excludePatterns && config.excludePatterns.length > 0) {
667
+ console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
668
+ }
669
+ if (config.confidenceThreshold && config.confidenceThreshold > 0) {
670
+ console.log(` Confidence threshold: ${config.confidenceThreshold}`);
671
+ }
672
+ if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
673
+ console.log(
674
+ ` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
675
+ );
676
+ }
636
677
  console.log("");
637
678
  }
638
679
  async function analyzePatterns(options) {
@@ -651,6 +692,9 @@ async function analyzePatterns(options) {
651
692
  createClusters = true,
652
693
  minClusterTokenCost = 1e3,
653
694
  minClusterFiles = 3,
695
+ excludePatterns = [],
696
+ confidenceThreshold = 0,
697
+ ignoreWhitelist = [],
654
698
  ...scanOptions
655
699
  } = finalOptions;
656
700
  const files = await (0, import_core4.scanFiles)(scanOptions);
@@ -677,6 +721,9 @@ async function analyzePatterns(options) {
677
721
  minSharedTokens,
678
722
  maxCandidatesPerBlock,
679
723
  streamResults,
724
+ excludePatterns,
725
+ confidenceThreshold,
726
+ ignoreWhitelist,
680
727
  onProgress: options.onProgress
681
728
  });
682
729
  for (const file of files) {
@@ -778,6 +825,8 @@ function generateSummary(results) {
778
825
  }
779
826
  ],
780
827
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
828
+ confidence: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
829
+ // Fallback for summary
781
830
  patternType: typeMatch?.[1] || "unknown",
782
831
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
783
832
  };
package/dist/index.mjs CHANGED
@@ -14,7 +14,7 @@ import {
14
14
  getSeverityThreshold,
15
15
  getSmartDefaults,
16
16
  groupDuplicatesByFilePair
17
- } from "./chunk-KPEK5REL.mjs";
17
+ } from "./chunk-DR5W7S3Z.mjs";
18
18
  export {
19
19
  CONTEXT_RULES,
20
20
  PatternDetectProvider,