@aiready/pattern-detect 0.11.31 → 0.11.34

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -3,16 +3,65 @@ import {
3
3
  analyzePatterns,
4
4
  filterBySeverity,
5
5
  generateSummary
6
- } from "./chunk-WKBCNITM.mjs";
6
+ } from "./chunk-YSDOUNJJ.mjs";
7
7
 
8
8
  // src/cli.ts
9
9
  import { Command } from "commander";
10
10
  import chalk from "chalk";
11
11
  import { writeFileSync, mkdirSync, existsSync } from "fs";
12
12
  import { dirname } from "path";
13
- import { loadConfig, mergeConfigWithDefaults, resolveOutputPath } from "@aiready/core";
13
+ import {
14
+ loadConfig,
15
+ mergeConfigWithDefaults,
16
+ resolveOutputPath
17
+ } from "@aiready/core";
14
18
  var program = new Command();
15
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText("after", "\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings\n\nPARAMETER TUNING:\n If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n aiready-patterns . # Basic analysis with smart defaults\n aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection\n aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough\n aiready-patterns . --output json > report.json # JSON export").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4").option("-l, --min-lines <number>", "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5").option("--batch-size <number>", "Batch size for comparisons. Higher = faster but more memory. Default: 100").option("--no-approx", "Disable approximate candidate selection. Slower but more thorough on small repos").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8").option("--max-candidates <number>", "Maximum candidates per block. Higher = more thorough but slower. Default: 100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option("--min-severity <level>", "Minimum severity to show: critical|major|minor|info. Default: minor").option("--exclude-test-fixtures", "Exclude test fixture duplication (beforeAll/afterAll)").option("--exclude-templates", "Exclude template file duplication").option("--include-tests", "Include test files in analysis (excluded by default)").option("--max-results <number>", "Maximum number of results to show in console output. Default: 10").option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option("--min-cluster-tokens <number>", "Minimum token cost for cluster reporting. Default: 1000").option("--min-cluster-files <number>", "Minimum files for cluster reporting. Default: 3").option("--show-raw-duplicates", "Show raw duplicates instead of grouped view").option(
19
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
20
+ "after",
21
+ "\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings\n\nPARAMETER TUNING:\n If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n aiready-patterns . # Basic analysis with smart defaults\n aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection\n aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough\n aiready-patterns . --output json > report.json # JSON export"
22
+ ).argument("<directory>", "Directory to analyze").option(
23
+ "-s, --similarity <number>",
24
+ "Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
25
+ ).option(
26
+ "-l, --min-lines <number>",
27
+ "Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
28
+ ).option(
29
+ "--batch-size <number>",
30
+ "Batch size for comparisons. Higher = faster but more memory. Default: 100"
31
+ ).option(
32
+ "--no-approx",
33
+ "Disable approximate candidate selection. Slower but more thorough on small repos"
34
+ ).option(
35
+ "--min-shared-tokens <number>",
36
+ "Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
37
+ ).option(
38
+ "--max-candidates <number>",
39
+ "Maximum candidates per block. Higher = more thorough but slower. Default: 100"
40
+ ).option(
41
+ "--no-stream-results",
42
+ "Disable incremental output (default: enabled)"
43
+ ).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
44
+ "--min-severity <level>",
45
+ "Minimum severity to show: critical|major|minor|info. Default: minor"
46
+ ).option(
47
+ "--exclude-test-fixtures",
48
+ "Exclude test fixture duplication (beforeAll/afterAll)"
49
+ ).option("--exclude-templates", "Exclude template file duplication").option(
50
+ "--include-tests",
51
+ "Include test files in analysis (excluded by default)"
52
+ ).option(
53
+ "--max-results <number>",
54
+ "Maximum number of results to show in console output. Default: 10"
55
+ ).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
56
+ "--min-cluster-tokens <number>",
57
+ "Minimum token cost for cluster reporting. Default: 1000"
58
+ ).option(
59
+ "--min-cluster-files <number>",
60
+ "Minimum files for cluster reporting. Default: 3"
61
+ ).option(
62
+ "--show-raw-duplicates",
63
+ "Show raw duplicates instead of grouped view"
64
+ ).option(
16
65
  "-o, --output <format>",
17
66
  "Output format: console, json, html",
18
67
  "console"
@@ -77,16 +126,29 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
77
126
  (pattern) => !testPatterns.includes(pattern)
78
127
  );
79
128
  }
80
- const { results, duplicates: rawDuplicates, files, groups, clusters } = await analyzePatterns(finalOptions);
129
+ const {
130
+ results,
131
+ duplicates: rawDuplicates,
132
+ files,
133
+ groups,
134
+ clusters
135
+ } = await analyzePatterns(finalOptions);
81
136
  let filteredDuplicates = rawDuplicates;
82
137
  if (finalOptions.minSeverity) {
83
- filteredDuplicates = filterBySeverity(filteredDuplicates, finalOptions.minSeverity);
138
+ filteredDuplicates = filterBySeverity(
139
+ filteredDuplicates,
140
+ finalOptions.minSeverity
141
+ );
84
142
  }
85
143
  if (finalOptions.excludeTestFixtures) {
86
- filteredDuplicates = filteredDuplicates.filter((d) => d.matchedRule !== "test-fixtures");
144
+ filteredDuplicates = filteredDuplicates.filter(
145
+ (d) => d.matchedRule !== "test-fixtures"
146
+ );
87
147
  }
88
148
  if (finalOptions.excludeTemplates) {
89
- filteredDuplicates = filteredDuplicates.filter((d) => d.matchedRule !== "templates");
149
+ filteredDuplicates = filteredDuplicates.filter(
150
+ (d) => d.matchedRule !== "templates"
151
+ );
90
152
  }
91
153
  const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
92
154
  const summary = generateSummary(results);
@@ -140,7 +202,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
140
202
  chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
141
203
  );
142
204
  console.log(
143
- chalk.yellow(`\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`)
205
+ chalk.yellow(
206
+ `\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`
207
+ )
144
208
  );
145
209
  console.log(
146
210
  chalk.red(
@@ -157,12 +221,16 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
157
221
  console.log(chalk.cyan(divider) + "\n");
158
222
  sortedTypes.forEach(([type, count]) => {
159
223
  const icon = getPatternIcon(type);
160
- console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
224
+ console.log(
225
+ `${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`
226
+ );
161
227
  });
162
228
  }
163
229
  if (!finalOptions.showRawDuplicates && groups && groups.length > 0) {
164
230
  console.log(chalk.cyan("\n" + divider));
165
- console.log(chalk.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`));
231
+ console.log(
232
+ chalk.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
233
+ );
166
234
  console.log(chalk.cyan(divider) + "\n");
167
235
  const severityOrder = {
168
236
  critical: 4,
@@ -180,39 +248,63 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
180
248
  const [file1, file2] = group.filePair.split("::");
181
249
  const file1Name = file1.split("/").pop() || file1;
182
250
  const file2Name = file2.split("/").pop() || file2;
183
- console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`);
184
- console.log(` Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`);
251
+ console.log(
252
+ `${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
253
+ );
254
+ console.log(
255
+ ` Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`
256
+ );
185
257
  const displayRanges = group.lineRanges.slice(0, 3);
186
258
  displayRanges.forEach((range) => {
187
- console.log(` ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`);
259
+ console.log(
260
+ ` ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`
261
+ );
188
262
  });
189
263
  if (group.lineRanges.length > 3) {
190
- console.log(` ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`);
264
+ console.log(
265
+ ` ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`
266
+ );
191
267
  }
192
268
  console.log();
193
269
  });
194
270
  if (groups.length > topGroups.length) {
195
- console.log(chalk.gray(` ... and ${groups.length - topGroups.length} more file pairs`));
271
+ console.log(
272
+ chalk.gray(
273
+ ` ... and ${groups.length - topGroups.length} more file pairs`
274
+ )
275
+ );
196
276
  }
197
277
  }
198
278
  if (!finalOptions.showRawDuplicates && clusters && clusters.length > 0) {
199
279
  console.log(chalk.cyan("\n" + divider));
200
- console.log(chalk.bold.white(` \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`));
280
+ console.log(
281
+ chalk.bold.white(` \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`)
282
+ );
201
283
  console.log(chalk.cyan(divider) + "\n");
202
284
  clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
203
285
  const severityBadge = getSeverityBadge(cluster.severity);
204
- console.log(`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`);
205
- console.log(` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`);
286
+ console.log(
287
+ `${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
288
+ );
289
+ console.log(
290
+ ` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
291
+ );
206
292
  const displayFiles = cluster.files.slice(0, 5);
207
- console.log(` Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`);
293
+ console.log(
294
+ ` Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`
295
+ );
208
296
  if (cluster.files.length > 5) {
209
- console.log(` ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`);
297
+ console.log(
298
+ ` ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`
299
+ );
210
300
  }
211
301
  if (cluster.reason) {
212
302
  console.log(` ${chalk.italic.gray(cluster.reason)}`);
213
303
  }
214
304
  if (cluster.suggestion) {
215
- console.log(` ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`);
305
+ console.log(
306
+ ` ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`
307
+ );
216
308
  }
217
309
  console.log();
218
310
  });
@@ -236,10 +328,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
236
328
  const severityBadge = getSeverityBadge(dup.severity);
237
329
  const file1Name = dup.file1.split("/").pop() || dup.file1;
238
330
  const file2Name = dup.file2.split("/").pop() || dup.file2;
239
- console.log(`${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`);
240
- console.log(` Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`);
241
- console.log(` ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`);
242
- console.log(` ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`);
331
+ console.log(
332
+ `${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
333
+ );
334
+ console.log(
335
+ ` Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`
336
+ );
337
+ console.log(
338
+ ` ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`
339
+ );
340
+ console.log(
341
+ ` ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`
342
+ );
243
343
  if (dup.reason) {
244
344
  console.log(` ${chalk.italic.gray(dup.reason)}`);
245
345
  }
@@ -249,7 +349,11 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
249
349
  console.log();
250
350
  });
251
351
  if (filteredDuplicates.length > topDuplicates.length) {
252
- console.log(chalk.gray(` ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`));
352
+ console.log(
353
+ chalk.gray(
354
+ ` ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`
355
+ )
356
+ );
253
357
  }
254
358
  }
255
359
  const allIssues = results.flatMap(
@@ -263,27 +367,45 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
263
367
  console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
264
368
  console.log(chalk.cyan(divider) + "\n");
265
369
  criticalIssues.slice(0, 5).forEach((issue) => {
266
- console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
370
+ console.log(
371
+ chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
372
+ );
267
373
  console.log(` ${chalk.dim(issue.message)}`);
268
- console.log(` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
269
- `);
374
+ console.log(
375
+ ` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
376
+ `
377
+ );
270
378
  });
271
379
  }
272
380
  if (totalIssues === 0) {
273
381
  console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
274
- console.log(chalk.yellow("\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"));
275
- console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
382
+ console.log(
383
+ chalk.yellow(
384
+ "\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
385
+ )
386
+ );
387
+ console.log(
388
+ chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
389
+ );
276
390
  console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
277
391
  console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
278
- console.log(chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5"));
392
+ console.log(
393
+ chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
394
+ );
279
395
  console.log("");
280
396
  }
281
397
  if (totalIssues > 0 && totalIssues < 5) {
282
- console.log(chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:"));
283
- console.log(chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3"));
398
+ console.log(
399
+ chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
400
+ );
401
+ console.log(
402
+ chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
403
+ );
284
404
  console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
285
405
  console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
286
- console.log(chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5"));
406
+ console.log(
407
+ chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
408
+ );
287
409
  console.log("");
288
410
  }
289
411
  console.log(chalk.cyan(divider));
@@ -301,7 +423,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
301
423
  )
302
424
  );
303
425
  console.log(
304
- chalk.dim("\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n")
426
+ chalk.dim(
427
+ "\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
428
+ )
305
429
  );
306
430
  });
307
431
  function getPatternIcon(type) {
package/dist/index.d.mts CHANGED
@@ -57,6 +57,7 @@ interface DetectionOptions {
57
57
  maxCandidatesPerBlock?: number;
58
58
  maxComparisons?: number;
59
59
  streamResults?: boolean;
60
+ onProgress?: (processed: number, total: number, message: string) => void;
60
61
  }
61
62
  /**
62
63
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
130
131
  createClusters?: boolean;
131
132
  minClusterTokenCost?: number;
132
133
  minClusterFiles?: number;
134
+ onProgress?: (processed: number, total: number, message: string) => void;
133
135
  }
134
136
  interface PatternSummary {
135
137
  totalPatterns: number;
package/dist/index.d.ts CHANGED
@@ -57,6 +57,7 @@ interface DetectionOptions {
57
57
  maxCandidatesPerBlock?: number;
58
58
  maxComparisons?: number;
59
59
  streamResults?: boolean;
60
+ onProgress?: (processed: number, total: number, message: string) => void;
60
61
  }
61
62
  /**
62
63
  * Detect duplicate patterns across files with enhanced analysis
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
130
131
  createClusters?: boolean;
131
132
  minClusterTokenCost?: number;
132
133
  minClusterFiles?: number;
134
+ onProgress?: (processed: number, total: number, message: string) => void;
133
135
  }
134
136
  interface PatternSummary {
135
137
  totalPatterns: number;
package/dist/index.js CHANGED
@@ -113,8 +113,14 @@ function calculatePythonSimilarity(pattern1, pattern2) {
113
113
  }
114
114
  function calculateNameSimilarity(name1, name2) {
115
115
  if (name1 === name2) return 1;
116
- const clean1 = name1.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, "");
117
- const clean2 = name2.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, "");
116
+ const clean1 = name1.replace(
117
+ /^(get|set|is|has|create|delete|update|fetch)_?/,
118
+ ""
119
+ );
120
+ const clean2 = name2.replace(
121
+ /^(get|set|is|has|create|delete|update|fetch)_?/,
122
+ ""
123
+ );
118
124
  if (clean1 === clean2) return 0.9;
119
125
  if (clean1.includes(clean2) || clean2.includes(clean1)) {
120
126
  return 0.7;
@@ -146,7 +152,10 @@ function detectPythonAntiPatterns(patterns) {
146
152
  const antiPatterns = [];
147
153
  const nameGroups = /* @__PURE__ */ new Map();
148
154
  for (const pattern of patterns) {
149
- const baseName = pattern.name.replace(/^(get|set|create|delete|update)_/, "");
155
+ const baseName = pattern.name.replace(
156
+ /^(get|set|create|delete|update)_/,
157
+ ""
158
+ );
150
159
  if (!nameGroups.has(baseName)) {
151
160
  nameGroups.set(baseName, []);
152
161
  }
@@ -437,11 +446,15 @@ async function detectDuplicatePatterns(files, options) {
437
446
  linesOfCode: block.linesOfCode
438
447
  }))
439
448
  );
440
- console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
449
+ if (!options.onProgress) {
450
+ console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
451
+ }
441
452
  const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
442
453
  if (pythonFiles.length > 0) {
443
454
  const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
444
- const patterns = await extractPythonPatterns2(pythonFiles.map((f) => f.file));
455
+ const patterns = await extractPythonPatterns2(
456
+ pythonFiles.map((f) => f.file)
457
+ );
445
458
  const pythonBlocks = patterns.filter((p) => p.code && p.code.trim().length > 0).map((p) => ({
446
459
  content: p.code,
447
460
  startLine: p.startLine,
@@ -453,11 +466,17 @@ async function detectDuplicatePatterns(files, options) {
453
466
  linesOfCode: p.endLine - p.startLine + 1
454
467
  }));
455
468
  allBlocks.push(...pythonBlocks);
456
- console.log(`Added ${pythonBlocks.length} Python patterns`);
469
+ if (!options.onProgress) {
470
+ console.log(`Added ${pythonBlocks.length} Python patterns`);
471
+ }
457
472
  }
458
473
  if (!approx && allBlocks.length > 500) {
459
- console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
460
- console.log(` Consider using approximate mode (default) for better performance.`);
474
+ console.log(
475
+ `\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`
476
+ );
477
+ console.log(
478
+ ` Consider using approximate mode (default) for better performance.`
479
+ );
461
480
  }
462
481
  const stopwords = /* @__PURE__ */ new Set([
463
482
  "return",
@@ -487,7 +506,11 @@ async function detectDuplicatePatterns(files, options) {
487
506
  "undefined",
488
507
  "this"
489
508
  ]);
490
- const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
509
+ const tokenize = (norm) => {
510
+ const punctuation = "(){}[];.,";
511
+ const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
512
+ return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
513
+ };
491
514
  const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
492
515
  const invertedIndex = /* @__PURE__ */ new Map();
493
516
  if (approx) {
@@ -504,9 +527,13 @@ async function detectDuplicatePatterns(files, options) {
504
527
  }
505
528
  const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
506
529
  if (totalComparisons !== void 0) {
507
- console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
530
+ console.log(
531
+ `Processing ${totalComparisons.toLocaleString()} comparisons in batches...`
532
+ );
508
533
  } else {
509
- console.log(`Using approximate candidate selection to reduce comparisons...`);
534
+ console.log(
535
+ `Using approximate candidate selection to reduce comparisons...`
536
+ );
510
537
  }
511
538
  let comparisonsProcessed = 0;
512
539
  let comparisonsBudgetExhausted = false;
@@ -517,16 +544,24 @@ async function detectDuplicatePatterns(files, options) {
517
544
  break;
518
545
  }
519
546
  if (i % batchSize === 0 && i > 0) {
520
- const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
521
- const duplicatesFound = duplicates.length;
522
- if (totalComparisons !== void 0) {
523
- const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
524
- const remaining = totalComparisons - comparisonsProcessed;
525
- const rate = comparisonsProcessed / parseFloat(elapsed);
526
- const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
527
- console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
547
+ if (options.onProgress) {
548
+ options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
528
549
  } else {
529
- console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
550
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
551
+ const duplicatesFound = duplicates.length;
552
+ if (totalComparisons !== void 0) {
553
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
554
+ const remaining = totalComparisons - comparisonsProcessed;
555
+ const rate = comparisonsProcessed / parseFloat(elapsed);
556
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
557
+ console.log(
558
+ ` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
559
+ );
560
+ } else {
561
+ console.log(
562
+ ` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
563
+ );
564
+ }
530
565
  }
531
566
  await new Promise((resolve) => setImmediate(resolve));
532
567
  }
@@ -560,8 +595,12 @@ async function detectDuplicatePatterns(files, options) {
560
595
  if (approx && candidates) {
561
596
  for (const { j } of candidates) {
562
597
  if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
563
- console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
564
- console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
598
+ console.log(
599
+ `\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`
600
+ );
601
+ console.log(
602
+ ` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`
603
+ );
565
604
  break;
566
605
  }
567
606
  comparisonsProcessed++;
@@ -594,10 +633,16 @@ async function detectDuplicatePatterns(files, options) {
594
633
  };
595
634
  duplicates.push(duplicate);
596
635
  if (streamResults) {
597
- console.log(`
598
- \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
599
- console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
600
- console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
636
+ console.log(
637
+ `
638
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
639
+ );
640
+ console.log(
641
+ ` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
642
+ );
643
+ console.log(
644
+ ` Token cost: ${duplicate.tokenCost.toLocaleString()}`
645
+ );
601
646
  }
602
647
  }
603
648
  }
@@ -635,17 +680,25 @@ async function detectDuplicatePatterns(files, options) {
635
680
  };
636
681
  duplicates.push(duplicate);
637
682
  if (streamResults) {
638
- console.log(`
639
- \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
640
- console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
641
- console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
683
+ console.log(
684
+ `
685
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
686
+ );
687
+ console.log(
688
+ ` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
689
+ );
690
+ console.log(
691
+ ` Token cost: ${duplicate.tokenCost.toLocaleString()}`
692
+ );
642
693
  }
643
694
  }
644
695
  }
645
696
  }
646
697
  }
647
698
  if (comparisonsBudgetExhausted) {
648
- console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
699
+ console.log(
700
+ `\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`
701
+ );
649
702
  }
650
703
  return duplicates.sort(
651
704
  (a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
@@ -671,7 +724,10 @@ function groupDuplicatesByFilePair(duplicates) {
671
724
  const result = [];
672
725
  for (const [filePair, groupDups] of groups.entries()) {
673
726
  const deduplicated = deduplicateOverlappingRanges(groupDups);
674
- const totalTokenCost = deduplicated.reduce((sum, d) => sum + d.tokenCost, 0);
727
+ const totalTokenCost = deduplicated.reduce(
728
+ (sum, d) => sum + d.tokenCost,
729
+ 0
730
+ );
675
731
  const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
676
732
  const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
677
733
  const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
@@ -777,7 +833,9 @@ function identifyCluster(dup) {
777
833
  if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
778
834
  const component1 = extractComponentName(dup.file1);
779
835
  const component2 = extractComponentName(dup.file2);
780
- console.log(`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`);
836
+ console.log(
837
+ `Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
838
+ );
781
839
  if (component1 && component2 && areSimilarComponents(component1, component2)) {
782
840
  const category = getComponentCategory(component1);
783
841
  console.log(`Creating cluster: component-${category}`);
@@ -876,7 +934,7 @@ function getClusterInfo(clusterId, patternType, fileCount) {
876
934
  suggestion: "Extract common middleware, error handling, and response formatting",
877
935
  reason: "API handler duplication leads to inconsistent error handling and response formats"
878
936
  },
879
- "validators": {
937
+ validators: {
880
938
  name: `Validator Patterns (${fileCount} files)`,
881
939
  suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
882
940
  reason: "Validator duplication causes inconsistent validation and harder maintenance"
@@ -929,7 +987,12 @@ function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
929
987
  return {
930
988
  toolName: "pattern-detect",
931
989
  score: 100,
932
- rawMetrics: { totalDuplicates: 0, totalTokenCost: 0, highImpactDuplicates: 0, totalFilesAnalyzed: 0 },
990
+ rawMetrics: {
991
+ totalDuplicates: 0,
992
+ totalTokenCost: 0,
993
+ highImpactDuplicates: 0,
994
+ totalFilesAnalyzed: 0
995
+ },
933
996
  factors: [],
934
997
  recommendations: []
935
998
  };
@@ -1053,13 +1116,22 @@ async function getSmartDefaults(directory, userOptions) {
1053
1116
  const { scanFiles: scanFiles2 } = await import("@aiready/core");
1054
1117
  const files = await scanFiles2(scanOptions);
1055
1118
  const estimatedBlocks = files.length * 3;
1056
- const maxCandidatesPerBlock = Math.max(3, Math.min(10, Math.floor(3e4 / estimatedBlocks)));
1119
+ const maxCandidatesPerBlock = Math.max(
1120
+ 3,
1121
+ Math.min(10, Math.floor(3e4 / estimatedBlocks))
1122
+ );
1057
1123
  const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
1058
- const minLines = Math.max(6, Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3)));
1059
- const minSharedTokens = Math.max(10, Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3)));
1124
+ const minLines = Math.max(
1125
+ 6,
1126
+ Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3))
1127
+ );
1128
+ const minSharedTokens = Math.max(
1129
+ 10,
1130
+ Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
1131
+ );
1060
1132
  const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
1061
1133
  const severity = estimatedBlocks > 5e3 ? "high" : "all";
1062
- let defaults = {
1134
+ const defaults = {
1063
1135
  rootDir: directory,
1064
1136
  minSimilarity,
1065
1137
  minLines,
@@ -1129,7 +1201,8 @@ async function analyzePatterns(options) {
1129
1201
  approx,
1130
1202
  minSharedTokens,
1131
1203
  maxCandidatesPerBlock,
1132
- streamResults
1204
+ streamResults,
1205
+ onProgress: options.onProgress
1133
1206
  });
1134
1207
  for (const file of files) {
1135
1208
  const fileDuplicates = duplicates.filter(
@@ -1157,7 +1230,9 @@ async function analyzePatterns(options) {
1157
1230
  medium: ["critical", "major", "minor"]
1158
1231
  };
1159
1232
  const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
1160
- filteredIssues = issues.filter((issue) => allowedSeverities.includes(issue.severity));
1233
+ filteredIssues = issues.filter(
1234
+ (issue) => allowedSeverities.includes(issue.severity)
1235
+ );
1161
1236
  }
1162
1237
  const totalTokenCost = fileDuplicates.reduce(
1163
1238
  (sum, dup) => sum + dup.tokenCost,
@@ -1179,7 +1254,11 @@ async function analyzePatterns(options) {
1179
1254
  }
1180
1255
  if (createClusters) {
1181
1256
  const allClusters = createRefactorClusters(duplicates);
1182
- clusters = filterClustersByImpact(allClusters, minClusterTokenCost, minClusterFiles);
1257
+ clusters = filterClustersByImpact(
1258
+ allClusters,
1259
+ minClusterTokenCost,
1260
+ minClusterFiles
1261
+ );
1183
1262
  }
1184
1263
  return { results, duplicates, files, groups, clusters };
1185
1264
  }
package/dist/index.mjs CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  generateSummary,
8
8
  getSeverityLabel,
9
9
  getSmartDefaults
10
- } from "./chunk-WKBCNITM.mjs";
10
+ } from "./chunk-YSDOUNJJ.mjs";
11
11
  export {
12
12
  analyzePatterns,
13
13
  calculatePatternScore,