@aiready/pattern-detect 0.11.31 → 0.11.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -1
- package/README.md +1 -1
- package/dist/chunk-SLDK5PQK.mjs +1129 -0
- package/dist/cli.js +244 -63
- package/dist/cli.mjs +160 -36
- package/dist/index.js +96 -30
- package/dist/index.mjs +1 -1
- package/dist/python-extractor-ELAKYK2W.mjs +140 -0
- package/package.json +2 -2
package/dist/cli.mjs
CHANGED
|
@@ -3,16 +3,65 @@ import {
|
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
filterBySeverity,
|
|
5
5
|
generateSummary
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-SLDK5PQK.mjs";
|
|
7
7
|
|
|
8
8
|
// src/cli.ts
|
|
9
9
|
import { Command } from "commander";
|
|
10
10
|
import chalk from "chalk";
|
|
11
11
|
import { writeFileSync, mkdirSync, existsSync } from "fs";
|
|
12
12
|
import { dirname } from "path";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
loadConfig,
|
|
15
|
+
mergeConfigWithDefaults,
|
|
16
|
+
resolveOutputPath
|
|
17
|
+
} from "@aiready/core";
|
|
14
18
|
var program = new Command();
|
|
15
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
19
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
20
|
+
"after",
|
|
21
|
+
"\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings\n\nPARAMETER TUNING:\n If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n aiready-patterns . # Basic analysis with smart defaults\n aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection\n aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough\n aiready-patterns . --output json > report.json # JSON export"
|
|
22
|
+
).argument("<directory>", "Directory to analyze").option(
|
|
23
|
+
"-s, --similarity <number>",
|
|
24
|
+
"Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
|
|
25
|
+
).option(
|
|
26
|
+
"-l, --min-lines <number>",
|
|
27
|
+
"Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
|
|
28
|
+
).option(
|
|
29
|
+
"--batch-size <number>",
|
|
30
|
+
"Batch size for comparisons. Higher = faster but more memory. Default: 100"
|
|
31
|
+
).option(
|
|
32
|
+
"--no-approx",
|
|
33
|
+
"Disable approximate candidate selection. Slower but more thorough on small repos"
|
|
34
|
+
).option(
|
|
35
|
+
"--min-shared-tokens <number>",
|
|
36
|
+
"Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
|
|
37
|
+
).option(
|
|
38
|
+
"--max-candidates <number>",
|
|
39
|
+
"Maximum candidates per block. Higher = more thorough but slower. Default: 100"
|
|
40
|
+
).option(
|
|
41
|
+
"--no-stream-results",
|
|
42
|
+
"Disable incremental output (default: enabled)"
|
|
43
|
+
).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
44
|
+
"--min-severity <level>",
|
|
45
|
+
"Minimum severity to show: critical|major|minor|info. Default: minor"
|
|
46
|
+
).option(
|
|
47
|
+
"--exclude-test-fixtures",
|
|
48
|
+
"Exclude test fixture duplication (beforeAll/afterAll)"
|
|
49
|
+
).option("--exclude-templates", "Exclude template file duplication").option(
|
|
50
|
+
"--include-tests",
|
|
51
|
+
"Include test files in analysis (excluded by default)"
|
|
52
|
+
).option(
|
|
53
|
+
"--max-results <number>",
|
|
54
|
+
"Maximum number of results to show in console output. Default: 10"
|
|
55
|
+
).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
|
|
56
|
+
"--min-cluster-tokens <number>",
|
|
57
|
+
"Minimum token cost for cluster reporting. Default: 1000"
|
|
58
|
+
).option(
|
|
59
|
+
"--min-cluster-files <number>",
|
|
60
|
+
"Minimum files for cluster reporting. Default: 3"
|
|
61
|
+
).option(
|
|
62
|
+
"--show-raw-duplicates",
|
|
63
|
+
"Show raw duplicates instead of grouped view"
|
|
64
|
+
).option(
|
|
16
65
|
"-o, --output <format>",
|
|
17
66
|
"Output format: console, json, html",
|
|
18
67
|
"console"
|
|
@@ -77,16 +126,29 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
77
126
|
(pattern) => !testPatterns.includes(pattern)
|
|
78
127
|
);
|
|
79
128
|
}
|
|
80
|
-
const {
|
|
129
|
+
const {
|
|
130
|
+
results,
|
|
131
|
+
duplicates: rawDuplicates,
|
|
132
|
+
files,
|
|
133
|
+
groups,
|
|
134
|
+
clusters
|
|
135
|
+
} = await analyzePatterns(finalOptions);
|
|
81
136
|
let filteredDuplicates = rawDuplicates;
|
|
82
137
|
if (finalOptions.minSeverity) {
|
|
83
|
-
filteredDuplicates = filterBySeverity(
|
|
138
|
+
filteredDuplicates = filterBySeverity(
|
|
139
|
+
filteredDuplicates,
|
|
140
|
+
finalOptions.minSeverity
|
|
141
|
+
);
|
|
84
142
|
}
|
|
85
143
|
if (finalOptions.excludeTestFixtures) {
|
|
86
|
-
filteredDuplicates = filteredDuplicates.filter(
|
|
144
|
+
filteredDuplicates = filteredDuplicates.filter(
|
|
145
|
+
(d) => d.matchedRule !== "test-fixtures"
|
|
146
|
+
);
|
|
87
147
|
}
|
|
88
148
|
if (finalOptions.excludeTemplates) {
|
|
89
|
-
filteredDuplicates = filteredDuplicates.filter(
|
|
149
|
+
filteredDuplicates = filteredDuplicates.filter(
|
|
150
|
+
(d) => d.matchedRule !== "templates"
|
|
151
|
+
);
|
|
90
152
|
}
|
|
91
153
|
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
92
154
|
const summary = generateSummary(results);
|
|
@@ -140,7 +202,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
140
202
|
chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
|
|
141
203
|
);
|
|
142
204
|
console.log(
|
|
143
|
-
chalk.yellow(
|
|
205
|
+
chalk.yellow(
|
|
206
|
+
`\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`
|
|
207
|
+
)
|
|
144
208
|
);
|
|
145
209
|
console.log(
|
|
146
210
|
chalk.red(
|
|
@@ -157,12 +221,16 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
157
221
|
console.log(chalk.cyan(divider) + "\n");
|
|
158
222
|
sortedTypes.forEach(([type, count]) => {
|
|
159
223
|
const icon = getPatternIcon(type);
|
|
160
|
-
console.log(
|
|
224
|
+
console.log(
|
|
225
|
+
`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`
|
|
226
|
+
);
|
|
161
227
|
});
|
|
162
228
|
}
|
|
163
229
|
if (!finalOptions.showRawDuplicates && groups && groups.length > 0) {
|
|
164
230
|
console.log(chalk.cyan("\n" + divider));
|
|
165
|
-
console.log(
|
|
231
|
+
console.log(
|
|
232
|
+
chalk.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
|
|
233
|
+
);
|
|
166
234
|
console.log(chalk.cyan(divider) + "\n");
|
|
167
235
|
const severityOrder = {
|
|
168
236
|
critical: 4,
|
|
@@ -180,39 +248,63 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
180
248
|
const [file1, file2] = group.filePair.split("::");
|
|
181
249
|
const file1Name = file1.split("/").pop() || file1;
|
|
182
250
|
const file2Name = file2.split("/").pop() || file2;
|
|
183
|
-
console.log(
|
|
184
|
-
|
|
251
|
+
console.log(
|
|
252
|
+
`${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
|
|
253
|
+
);
|
|
254
|
+
console.log(
|
|
255
|
+
` Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`
|
|
256
|
+
);
|
|
185
257
|
const displayRanges = group.lineRanges.slice(0, 3);
|
|
186
258
|
displayRanges.forEach((range) => {
|
|
187
|
-
console.log(
|
|
259
|
+
console.log(
|
|
260
|
+
` ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`
|
|
261
|
+
);
|
|
188
262
|
});
|
|
189
263
|
if (group.lineRanges.length > 3) {
|
|
190
|
-
console.log(
|
|
264
|
+
console.log(
|
|
265
|
+
` ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`
|
|
266
|
+
);
|
|
191
267
|
}
|
|
192
268
|
console.log();
|
|
193
269
|
});
|
|
194
270
|
if (groups.length > topGroups.length) {
|
|
195
|
-
console.log(
|
|
271
|
+
console.log(
|
|
272
|
+
chalk.gray(
|
|
273
|
+
` ... and ${groups.length - topGroups.length} more file pairs`
|
|
274
|
+
)
|
|
275
|
+
);
|
|
196
276
|
}
|
|
197
277
|
}
|
|
198
278
|
if (!finalOptions.showRawDuplicates && clusters && clusters.length > 0) {
|
|
199
279
|
console.log(chalk.cyan("\n" + divider));
|
|
200
|
-
console.log(
|
|
280
|
+
console.log(
|
|
281
|
+
chalk.bold.white(` \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`)
|
|
282
|
+
);
|
|
201
283
|
console.log(chalk.cyan(divider) + "\n");
|
|
202
284
|
clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
|
|
203
285
|
const severityBadge = getSeverityBadge(cluster.severity);
|
|
204
|
-
console.log(
|
|
205
|
-
|
|
286
|
+
console.log(
|
|
287
|
+
`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
|
|
288
|
+
);
|
|
289
|
+
console.log(
|
|
290
|
+
` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
|
|
291
|
+
);
|
|
206
292
|
const displayFiles = cluster.files.slice(0, 5);
|
|
207
|
-
console.log(
|
|
293
|
+
console.log(
|
|
294
|
+
` Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`
|
|
295
|
+
);
|
|
208
296
|
if (cluster.files.length > 5) {
|
|
209
|
-
console.log(
|
|
297
|
+
console.log(
|
|
298
|
+
` ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`
|
|
299
|
+
);
|
|
210
300
|
}
|
|
211
301
|
if (cluster.reason) {
|
|
212
302
|
console.log(` ${chalk.italic.gray(cluster.reason)}`);
|
|
213
303
|
}
|
|
214
304
|
if (cluster.suggestion) {
|
|
215
|
-
console.log(
|
|
305
|
+
console.log(
|
|
306
|
+
` ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`
|
|
307
|
+
);
|
|
216
308
|
}
|
|
217
309
|
console.log();
|
|
218
310
|
});
|
|
@@ -236,10 +328,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
236
328
|
const severityBadge = getSeverityBadge(dup.severity);
|
|
237
329
|
const file1Name = dup.file1.split("/").pop() || dup.file1;
|
|
238
330
|
const file2Name = dup.file2.split("/").pop() || dup.file2;
|
|
239
|
-
console.log(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
console.log(
|
|
331
|
+
console.log(
|
|
332
|
+
`${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
|
|
333
|
+
);
|
|
334
|
+
console.log(
|
|
335
|
+
` Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`
|
|
336
|
+
);
|
|
337
|
+
console.log(
|
|
338
|
+
` ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`
|
|
339
|
+
);
|
|
340
|
+
console.log(
|
|
341
|
+
` ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`
|
|
342
|
+
);
|
|
243
343
|
if (dup.reason) {
|
|
244
344
|
console.log(` ${chalk.italic.gray(dup.reason)}`);
|
|
245
345
|
}
|
|
@@ -249,7 +349,11 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
249
349
|
console.log();
|
|
250
350
|
});
|
|
251
351
|
if (filteredDuplicates.length > topDuplicates.length) {
|
|
252
|
-
console.log(
|
|
352
|
+
console.log(
|
|
353
|
+
chalk.gray(
|
|
354
|
+
` ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`
|
|
355
|
+
)
|
|
356
|
+
);
|
|
253
357
|
}
|
|
254
358
|
}
|
|
255
359
|
const allIssues = results.flatMap(
|
|
@@ -263,27 +367,45 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
263
367
|
console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
264
368
|
console.log(chalk.cyan(divider) + "\n");
|
|
265
369
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
266
|
-
console.log(
|
|
370
|
+
console.log(
|
|
371
|
+
chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
|
|
372
|
+
);
|
|
267
373
|
console.log(` ${chalk.dim(issue.message)}`);
|
|
268
|
-
console.log(
|
|
269
|
-
`)
|
|
374
|
+
console.log(
|
|
375
|
+
` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
|
|
376
|
+
`
|
|
377
|
+
);
|
|
270
378
|
});
|
|
271
379
|
}
|
|
272
380
|
if (totalIssues === 0) {
|
|
273
381
|
console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
274
|
-
console.log(
|
|
275
|
-
|
|
382
|
+
console.log(
|
|
383
|
+
chalk.yellow(
|
|
384
|
+
"\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
|
|
385
|
+
)
|
|
386
|
+
);
|
|
387
|
+
console.log(
|
|
388
|
+
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
389
|
+
);
|
|
276
390
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
277
391
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
278
|
-
console.log(
|
|
392
|
+
console.log(
|
|
393
|
+
chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
|
|
394
|
+
);
|
|
279
395
|
console.log("");
|
|
280
396
|
}
|
|
281
397
|
if (totalIssues > 0 && totalIssues < 5) {
|
|
282
|
-
console.log(
|
|
283
|
-
|
|
398
|
+
console.log(
|
|
399
|
+
chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
|
|
400
|
+
);
|
|
401
|
+
console.log(
|
|
402
|
+
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
403
|
+
);
|
|
284
404
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
285
405
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
286
|
-
console.log(
|
|
406
|
+
console.log(
|
|
407
|
+
chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
|
|
408
|
+
);
|
|
287
409
|
console.log("");
|
|
288
410
|
}
|
|
289
411
|
console.log(chalk.cyan(divider));
|
|
@@ -301,7 +423,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
301
423
|
)
|
|
302
424
|
);
|
|
303
425
|
console.log(
|
|
304
|
-
chalk.dim(
|
|
426
|
+
chalk.dim(
|
|
427
|
+
"\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
|
|
428
|
+
)
|
|
305
429
|
);
|
|
306
430
|
});
|
|
307
431
|
function getPatternIcon(type) {
|
package/dist/index.js
CHANGED
|
@@ -113,8 +113,14 @@ function calculatePythonSimilarity(pattern1, pattern2) {
|
|
|
113
113
|
}
|
|
114
114
|
function calculateNameSimilarity(name1, name2) {
|
|
115
115
|
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
|
|
116
|
+
const clean1 = name1.replace(
|
|
117
|
+
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
+
""
|
|
119
|
+
);
|
|
120
|
+
const clean2 = name2.replace(
|
|
121
|
+
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
+
""
|
|
123
|
+
);
|
|
118
124
|
if (clean1 === clean2) return 0.9;
|
|
119
125
|
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
120
126
|
return 0.7;
|
|
@@ -146,7 +152,10 @@ function detectPythonAntiPatterns(patterns) {
|
|
|
146
152
|
const antiPatterns = [];
|
|
147
153
|
const nameGroups = /* @__PURE__ */ new Map();
|
|
148
154
|
for (const pattern of patterns) {
|
|
149
|
-
const baseName = pattern.name.replace(
|
|
155
|
+
const baseName = pattern.name.replace(
|
|
156
|
+
/^(get|set|create|delete|update)_/,
|
|
157
|
+
""
|
|
158
|
+
);
|
|
150
159
|
if (!nameGroups.has(baseName)) {
|
|
151
160
|
nameGroups.set(baseName, []);
|
|
152
161
|
}
|
|
@@ -441,7 +450,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
441
450
|
const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
|
|
442
451
|
if (pythonFiles.length > 0) {
|
|
443
452
|
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
444
|
-
const patterns = await extractPythonPatterns2(
|
|
453
|
+
const patterns = await extractPythonPatterns2(
|
|
454
|
+
pythonFiles.map((f) => f.file)
|
|
455
|
+
);
|
|
445
456
|
const pythonBlocks = patterns.filter((p) => p.code && p.code.trim().length > 0).map((p) => ({
|
|
446
457
|
content: p.code,
|
|
447
458
|
startLine: p.startLine,
|
|
@@ -456,8 +467,12 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
456
467
|
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
457
468
|
}
|
|
458
469
|
if (!approx && allBlocks.length > 500) {
|
|
459
|
-
console.log(
|
|
460
|
-
|
|
470
|
+
console.log(
|
|
471
|
+
`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`
|
|
472
|
+
);
|
|
473
|
+
console.log(
|
|
474
|
+
` Consider using approximate mode (default) for better performance.`
|
|
475
|
+
);
|
|
461
476
|
}
|
|
462
477
|
const stopwords = /* @__PURE__ */ new Set([
|
|
463
478
|
"return",
|
|
@@ -504,9 +519,13 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
504
519
|
}
|
|
505
520
|
const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
|
|
506
521
|
if (totalComparisons !== void 0) {
|
|
507
|
-
console.log(
|
|
522
|
+
console.log(
|
|
523
|
+
`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`
|
|
524
|
+
);
|
|
508
525
|
} else {
|
|
509
|
-
console.log(
|
|
526
|
+
console.log(
|
|
527
|
+
`Using approximate candidate selection to reduce comparisons...`
|
|
528
|
+
);
|
|
510
529
|
}
|
|
511
530
|
let comparisonsProcessed = 0;
|
|
512
531
|
let comparisonsBudgetExhausted = false;
|
|
@@ -524,9 +543,13 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
524
543
|
const remaining = totalComparisons - comparisonsProcessed;
|
|
525
544
|
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
526
545
|
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
527
|
-
console.log(
|
|
546
|
+
console.log(
|
|
547
|
+
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
548
|
+
);
|
|
528
549
|
} else {
|
|
529
|
-
console.log(
|
|
550
|
+
console.log(
|
|
551
|
+
` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
|
|
552
|
+
);
|
|
530
553
|
}
|
|
531
554
|
await new Promise((resolve) => setImmediate(resolve));
|
|
532
555
|
}
|
|
@@ -560,8 +583,12 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
560
583
|
if (approx && candidates) {
|
|
561
584
|
for (const { j } of candidates) {
|
|
562
585
|
if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
|
|
563
|
-
console.log(
|
|
564
|
-
|
|
586
|
+
console.log(
|
|
587
|
+
`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`
|
|
588
|
+
);
|
|
589
|
+
console.log(
|
|
590
|
+
` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`
|
|
591
|
+
);
|
|
565
592
|
break;
|
|
566
593
|
}
|
|
567
594
|
comparisonsProcessed++;
|
|
@@ -594,10 +621,16 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
594
621
|
};
|
|
595
622
|
duplicates.push(duplicate);
|
|
596
623
|
if (streamResults) {
|
|
597
|
-
console.log(
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
624
|
+
console.log(
|
|
625
|
+
`
|
|
626
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
627
|
+
);
|
|
628
|
+
console.log(
|
|
629
|
+
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
630
|
+
);
|
|
631
|
+
console.log(
|
|
632
|
+
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
633
|
+
);
|
|
601
634
|
}
|
|
602
635
|
}
|
|
603
636
|
}
|
|
@@ -635,17 +668,25 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
635
668
|
};
|
|
636
669
|
duplicates.push(duplicate);
|
|
637
670
|
if (streamResults) {
|
|
638
|
-
console.log(
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
671
|
+
console.log(
|
|
672
|
+
`
|
|
673
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
674
|
+
);
|
|
675
|
+
console.log(
|
|
676
|
+
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
677
|
+
);
|
|
678
|
+
console.log(
|
|
679
|
+
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
680
|
+
);
|
|
642
681
|
}
|
|
643
682
|
}
|
|
644
683
|
}
|
|
645
684
|
}
|
|
646
685
|
}
|
|
647
686
|
if (comparisonsBudgetExhausted) {
|
|
648
|
-
console.log(
|
|
687
|
+
console.log(
|
|
688
|
+
`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`
|
|
689
|
+
);
|
|
649
690
|
}
|
|
650
691
|
return duplicates.sort(
|
|
651
692
|
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
@@ -671,7 +712,10 @@ function groupDuplicatesByFilePair(duplicates) {
|
|
|
671
712
|
const result = [];
|
|
672
713
|
for (const [filePair, groupDups] of groups.entries()) {
|
|
673
714
|
const deduplicated = deduplicateOverlappingRanges(groupDups);
|
|
674
|
-
const totalTokenCost = deduplicated.reduce(
|
|
715
|
+
const totalTokenCost = deduplicated.reduce(
|
|
716
|
+
(sum, d) => sum + d.tokenCost,
|
|
717
|
+
0
|
|
718
|
+
);
|
|
675
719
|
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
676
720
|
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
677
721
|
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
@@ -777,7 +821,9 @@ function identifyCluster(dup) {
|
|
|
777
821
|
if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
|
|
778
822
|
const component1 = extractComponentName(dup.file1);
|
|
779
823
|
const component2 = extractComponentName(dup.file2);
|
|
780
|
-
console.log(
|
|
824
|
+
console.log(
|
|
825
|
+
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
826
|
+
);
|
|
781
827
|
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
782
828
|
const category = getComponentCategory(component1);
|
|
783
829
|
console.log(`Creating cluster: component-${category}`);
|
|
@@ -876,7 +922,7 @@ function getClusterInfo(clusterId, patternType, fileCount) {
|
|
|
876
922
|
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
877
923
|
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
878
924
|
},
|
|
879
|
-
|
|
925
|
+
validators: {
|
|
880
926
|
name: `Validator Patterns (${fileCount} files)`,
|
|
881
927
|
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
882
928
|
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
@@ -929,7 +975,12 @@ function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
|
|
|
929
975
|
return {
|
|
930
976
|
toolName: "pattern-detect",
|
|
931
977
|
score: 100,
|
|
932
|
-
rawMetrics: {
|
|
978
|
+
rawMetrics: {
|
|
979
|
+
totalDuplicates: 0,
|
|
980
|
+
totalTokenCost: 0,
|
|
981
|
+
highImpactDuplicates: 0,
|
|
982
|
+
totalFilesAnalyzed: 0
|
|
983
|
+
},
|
|
933
984
|
factors: [],
|
|
934
985
|
recommendations: []
|
|
935
986
|
};
|
|
@@ -1053,10 +1104,19 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
1053
1104
|
const { scanFiles: scanFiles2 } = await import("@aiready/core");
|
|
1054
1105
|
const files = await scanFiles2(scanOptions);
|
|
1055
1106
|
const estimatedBlocks = files.length * 3;
|
|
1056
|
-
const maxCandidatesPerBlock = Math.max(
|
|
1107
|
+
const maxCandidatesPerBlock = Math.max(
|
|
1108
|
+
3,
|
|
1109
|
+
Math.min(10, Math.floor(3e4 / estimatedBlocks))
|
|
1110
|
+
);
|
|
1057
1111
|
const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
|
|
1058
|
-
const minLines = Math.max(
|
|
1059
|
-
|
|
1112
|
+
const minLines = Math.max(
|
|
1113
|
+
6,
|
|
1114
|
+
Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3))
|
|
1115
|
+
);
|
|
1116
|
+
const minSharedTokens = Math.max(
|
|
1117
|
+
10,
|
|
1118
|
+
Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
|
|
1119
|
+
);
|
|
1060
1120
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
1061
1121
|
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
1062
1122
|
let defaults = {
|
|
@@ -1157,7 +1217,9 @@ async function analyzePatterns(options) {
|
|
|
1157
1217
|
medium: ["critical", "major", "minor"]
|
|
1158
1218
|
};
|
|
1159
1219
|
const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
|
|
1160
|
-
filteredIssues = issues.filter(
|
|
1220
|
+
filteredIssues = issues.filter(
|
|
1221
|
+
(issue) => allowedSeverities.includes(issue.severity)
|
|
1222
|
+
);
|
|
1161
1223
|
}
|
|
1162
1224
|
const totalTokenCost = fileDuplicates.reduce(
|
|
1163
1225
|
(sum, dup) => sum + dup.tokenCost,
|
|
@@ -1179,7 +1241,11 @@ async function analyzePatterns(options) {
|
|
|
1179
1241
|
}
|
|
1180
1242
|
if (createClusters) {
|
|
1181
1243
|
const allClusters = createRefactorClusters(duplicates);
|
|
1182
|
-
clusters = filterClustersByImpact(
|
|
1244
|
+
clusters = filterClustersByImpact(
|
|
1245
|
+
allClusters,
|
|
1246
|
+
minClusterTokenCost,
|
|
1247
|
+
minClusterFiles
|
|
1248
|
+
);
|
|
1183
1249
|
}
|
|
1184
1250
|
return { results, duplicates, files, groups, clusters };
|
|
1185
1251
|
}
|