@aiready/pattern-detect 0.11.31 → 0.11.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -1
- package/README.md +1 -1
- package/dist/chunk-FWUKMJEQ.mjs +1133 -0
- package/dist/chunk-SLDK5PQK.mjs +1129 -0
- package/dist/chunk-YSDOUNJJ.mjs +1142 -0
- package/dist/cli.js +269 -75
- package/dist/cli.mjs +160 -36
- package/dist/index.d.mts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +121 -42
- package/dist/index.mjs +1 -1
- package/dist/python-extractor-ELAKYK2W.mjs +140 -0
- package/package.json +2 -2
package/dist/cli.mjs
CHANGED
|
@@ -3,16 +3,65 @@ import {
|
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
filterBySeverity,
|
|
5
5
|
generateSummary
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-YSDOUNJJ.mjs";
|
|
7
7
|
|
|
8
8
|
// src/cli.ts
|
|
9
9
|
import { Command } from "commander";
|
|
10
10
|
import chalk from "chalk";
|
|
11
11
|
import { writeFileSync, mkdirSync, existsSync } from "fs";
|
|
12
12
|
import { dirname } from "path";
|
|
13
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
loadConfig,
|
|
15
|
+
mergeConfigWithDefaults,
|
|
16
|
+
resolveOutputPath
|
|
17
|
+
} from "@aiready/core";
|
|
14
18
|
var program = new Command();
|
|
15
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
19
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
20
|
+
"after",
|
|
21
|
+
"\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings\n\nPARAMETER TUNING:\n If you get too few results: decrease --similarity, --min-lines, or --min-shared-tokens\n If analysis is too slow: increase --min-lines, --min-shared-tokens, or decrease --max-candidates\n If you get too many false positives: increase --similarity or --min-lines\n\nEXAMPLES:\n aiready-patterns . # Basic analysis with smart defaults\n aiready-patterns . --similarity 0.3 --min-lines 3 # More sensitive detection\n aiready-patterns . --max-candidates 50 --no-approx # Slower but more thorough\n aiready-patterns . --output json > report.json # JSON export"
|
|
22
|
+
).argument("<directory>", "Directory to analyze").option(
|
|
23
|
+
"-s, --similarity <number>",
|
|
24
|
+
"Minimum similarity score (0-1). Lower = more results, higher = fewer but more accurate. Default: 0.4"
|
|
25
|
+
).option(
|
|
26
|
+
"-l, --min-lines <number>",
|
|
27
|
+
"Minimum lines to consider. Lower = more results, higher = faster analysis. Default: 5"
|
|
28
|
+
).option(
|
|
29
|
+
"--batch-size <number>",
|
|
30
|
+
"Batch size for comparisons. Higher = faster but more memory. Default: 100"
|
|
31
|
+
).option(
|
|
32
|
+
"--no-approx",
|
|
33
|
+
"Disable approximate candidate selection. Slower but more thorough on small repos"
|
|
34
|
+
).option(
|
|
35
|
+
"--min-shared-tokens <number>",
|
|
36
|
+
"Minimum shared tokens to consider a candidate. Higher = faster, fewer results. Default: 8"
|
|
37
|
+
).option(
|
|
38
|
+
"--max-candidates <number>",
|
|
39
|
+
"Maximum candidates per block. Higher = more thorough but slower. Default: 100"
|
|
40
|
+
).option(
|
|
41
|
+
"--no-stream-results",
|
|
42
|
+
"Disable incremental output (default: enabled)"
|
|
43
|
+
).option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
44
|
+
"--min-severity <level>",
|
|
45
|
+
"Minimum severity to show: critical|major|minor|info. Default: minor"
|
|
46
|
+
).option(
|
|
47
|
+
"--exclude-test-fixtures",
|
|
48
|
+
"Exclude test fixture duplication (beforeAll/afterAll)"
|
|
49
|
+
).option("--exclude-templates", "Exclude template file duplication").option(
|
|
50
|
+
"--include-tests",
|
|
51
|
+
"Include test files in analysis (excluded by default)"
|
|
52
|
+
).option(
|
|
53
|
+
"--max-results <number>",
|
|
54
|
+
"Maximum number of results to show in console output. Default: 10"
|
|
55
|
+
).option("--no-group-by-file-pair", "Disable grouping duplicates by file pair").option("--no-create-clusters", "Disable creating refactor clusters").option(
|
|
56
|
+
"--min-cluster-tokens <number>",
|
|
57
|
+
"Minimum token cost for cluster reporting. Default: 1000"
|
|
58
|
+
).option(
|
|
59
|
+
"--min-cluster-files <number>",
|
|
60
|
+
"Minimum files for cluster reporting. Default: 3"
|
|
61
|
+
).option(
|
|
62
|
+
"--show-raw-duplicates",
|
|
63
|
+
"Show raw duplicates instead of grouped view"
|
|
64
|
+
).option(
|
|
16
65
|
"-o, --output <format>",
|
|
17
66
|
"Output format: console, json, html",
|
|
18
67
|
"console"
|
|
@@ -77,16 +126,29 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
77
126
|
(pattern) => !testPatterns.includes(pattern)
|
|
78
127
|
);
|
|
79
128
|
}
|
|
80
|
-
const {
|
|
129
|
+
const {
|
|
130
|
+
results,
|
|
131
|
+
duplicates: rawDuplicates,
|
|
132
|
+
files,
|
|
133
|
+
groups,
|
|
134
|
+
clusters
|
|
135
|
+
} = await analyzePatterns(finalOptions);
|
|
81
136
|
let filteredDuplicates = rawDuplicates;
|
|
82
137
|
if (finalOptions.minSeverity) {
|
|
83
|
-
filteredDuplicates = filterBySeverity(
|
|
138
|
+
filteredDuplicates = filterBySeverity(
|
|
139
|
+
filteredDuplicates,
|
|
140
|
+
finalOptions.minSeverity
|
|
141
|
+
);
|
|
84
142
|
}
|
|
85
143
|
if (finalOptions.excludeTestFixtures) {
|
|
86
|
-
filteredDuplicates = filteredDuplicates.filter(
|
|
144
|
+
filteredDuplicates = filteredDuplicates.filter(
|
|
145
|
+
(d) => d.matchedRule !== "test-fixtures"
|
|
146
|
+
);
|
|
87
147
|
}
|
|
88
148
|
if (finalOptions.excludeTemplates) {
|
|
89
|
-
filteredDuplicates = filteredDuplicates.filter(
|
|
149
|
+
filteredDuplicates = filteredDuplicates.filter(
|
|
150
|
+
(d) => d.matchedRule !== "templates"
|
|
151
|
+
);
|
|
90
152
|
}
|
|
91
153
|
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
92
154
|
const summary = generateSummary(results);
|
|
@@ -140,7 +202,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
140
202
|
chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
|
|
141
203
|
);
|
|
142
204
|
console.log(
|
|
143
|
-
chalk.yellow(
|
|
205
|
+
chalk.yellow(
|
|
206
|
+
`\u26A0 AI confusion patterns detected: ${chalk.bold(totalIssues)}`
|
|
207
|
+
)
|
|
144
208
|
);
|
|
145
209
|
console.log(
|
|
146
210
|
chalk.red(
|
|
@@ -157,12 +221,16 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
157
221
|
console.log(chalk.cyan(divider) + "\n");
|
|
158
222
|
sortedTypes.forEach(([type, count]) => {
|
|
159
223
|
const icon = getPatternIcon(type);
|
|
160
|
-
console.log(
|
|
224
|
+
console.log(
|
|
225
|
+
`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`
|
|
226
|
+
);
|
|
161
227
|
});
|
|
162
228
|
}
|
|
163
229
|
if (!finalOptions.showRawDuplicates && groups && groups.length > 0) {
|
|
164
230
|
console.log(chalk.cyan("\n" + divider));
|
|
165
|
-
console.log(
|
|
231
|
+
console.log(
|
|
232
|
+
chalk.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
|
|
233
|
+
);
|
|
166
234
|
console.log(chalk.cyan(divider) + "\n");
|
|
167
235
|
const severityOrder = {
|
|
168
236
|
critical: 4,
|
|
@@ -180,39 +248,63 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
180
248
|
const [file1, file2] = group.filePair.split("::");
|
|
181
249
|
const file1Name = file1.split("/").pop() || file1;
|
|
182
250
|
const file2Name = file2.split("/").pop() || file2;
|
|
183
|
-
console.log(
|
|
184
|
-
|
|
251
|
+
console.log(
|
|
252
|
+
`${idx + 1}. ${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
|
|
253
|
+
);
|
|
254
|
+
console.log(
|
|
255
|
+
` Occurrences: ${chalk.bold(group.occurrences)} | Total tokens: ${chalk.bold(group.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(group.averageSimilarity * 100) + "%")}`
|
|
256
|
+
);
|
|
185
257
|
const displayRanges = group.lineRanges.slice(0, 3);
|
|
186
258
|
displayRanges.forEach((range) => {
|
|
187
|
-
console.log(
|
|
259
|
+
console.log(
|
|
260
|
+
` ${chalk.gray(file1)}:${chalk.cyan(`${range.file1.start}-${range.file1.end}`)} \u2194 ${chalk.gray(file2)}:${chalk.cyan(`${range.file2.start}-${range.file2.end}`)}`
|
|
261
|
+
);
|
|
188
262
|
});
|
|
189
263
|
if (group.lineRanges.length > 3) {
|
|
190
|
-
console.log(
|
|
264
|
+
console.log(
|
|
265
|
+
` ${chalk.gray(`... and ${group.lineRanges.length - 3} more ranges`)}`
|
|
266
|
+
);
|
|
191
267
|
}
|
|
192
268
|
console.log();
|
|
193
269
|
});
|
|
194
270
|
if (groups.length > topGroups.length) {
|
|
195
|
-
console.log(
|
|
271
|
+
console.log(
|
|
272
|
+
chalk.gray(
|
|
273
|
+
` ... and ${groups.length - topGroups.length} more file pairs`
|
|
274
|
+
)
|
|
275
|
+
);
|
|
196
276
|
}
|
|
197
277
|
}
|
|
198
278
|
if (!finalOptions.showRawDuplicates && clusters && clusters.length > 0) {
|
|
199
279
|
console.log(chalk.cyan("\n" + divider));
|
|
200
|
-
console.log(
|
|
280
|
+
console.log(
|
|
281
|
+
chalk.bold.white(` \u{1F3AF} REFACTOR CLUSTERS (${clusters.length} patterns)`)
|
|
282
|
+
);
|
|
201
283
|
console.log(chalk.cyan(divider) + "\n");
|
|
202
284
|
clusters.sort((a, b) => b.totalTokenCost - a.totalTokenCost).forEach((cluster, idx) => {
|
|
203
285
|
const severityBadge = getSeverityBadge(cluster.severity);
|
|
204
|
-
console.log(
|
|
205
|
-
|
|
286
|
+
console.log(
|
|
287
|
+
`${idx + 1}. ${severityBadge} ${chalk.bold(cluster.name)}`
|
|
288
|
+
);
|
|
289
|
+
console.log(
|
|
290
|
+
` Total tokens: ${chalk.bold(cluster.totalTokenCost.toLocaleString())} | Avg similarity: ${chalk.bold(Math.round(cluster.averageSimilarity * 100) + "%")} | Duplicates: ${chalk.bold(cluster.duplicateCount)}`
|
|
291
|
+
);
|
|
206
292
|
const displayFiles = cluster.files.slice(0, 5);
|
|
207
|
-
console.log(
|
|
293
|
+
console.log(
|
|
294
|
+
` Files (${cluster.files.length}): ${displayFiles.map((f) => chalk.gray(f.split("/").pop() || f)).join(", ")}`
|
|
295
|
+
);
|
|
208
296
|
if (cluster.files.length > 5) {
|
|
209
|
-
console.log(
|
|
297
|
+
console.log(
|
|
298
|
+
` ${chalk.gray(`... and ${cluster.files.length - 5} more files`)}`
|
|
299
|
+
);
|
|
210
300
|
}
|
|
211
301
|
if (cluster.reason) {
|
|
212
302
|
console.log(` ${chalk.italic.gray(cluster.reason)}`);
|
|
213
303
|
}
|
|
214
304
|
if (cluster.suggestion) {
|
|
215
|
-
console.log(
|
|
305
|
+
console.log(
|
|
306
|
+
` ${chalk.cyan("\u2192")} ${chalk.italic(cluster.suggestion)}`
|
|
307
|
+
);
|
|
216
308
|
}
|
|
217
309
|
console.log();
|
|
218
310
|
});
|
|
@@ -236,10 +328,18 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
236
328
|
const severityBadge = getSeverityBadge(dup.severity);
|
|
237
329
|
const file1Name = dup.file1.split("/").pop() || dup.file1;
|
|
238
330
|
const file2Name = dup.file2.split("/").pop() || dup.file2;
|
|
239
|
-
console.log(
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
console.log(
|
|
331
|
+
console.log(
|
|
332
|
+
`${severityBadge} ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`
|
|
333
|
+
);
|
|
334
|
+
console.log(
|
|
335
|
+
` Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Pattern: ${dup.patternType} | Tokens: ${chalk.bold(dup.tokenCost.toLocaleString())}`
|
|
336
|
+
);
|
|
337
|
+
console.log(
|
|
338
|
+
` ${chalk.gray(dup.file1)}:${chalk.cyan(dup.line1 + "-" + dup.endLine1)}`
|
|
339
|
+
);
|
|
340
|
+
console.log(
|
|
341
|
+
` ${chalk.gray(dup.file2)}:${chalk.cyan(dup.line2 + "-" + dup.endLine2)}`
|
|
342
|
+
);
|
|
243
343
|
if (dup.reason) {
|
|
244
344
|
console.log(` ${chalk.italic.gray(dup.reason)}`);
|
|
245
345
|
}
|
|
@@ -249,7 +349,11 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
249
349
|
console.log();
|
|
250
350
|
});
|
|
251
351
|
if (filteredDuplicates.length > topDuplicates.length) {
|
|
252
|
-
console.log(
|
|
352
|
+
console.log(
|
|
353
|
+
chalk.gray(
|
|
354
|
+
` ... and ${filteredDuplicates.length - topDuplicates.length} more duplicates`
|
|
355
|
+
)
|
|
356
|
+
);
|
|
253
357
|
}
|
|
254
358
|
}
|
|
255
359
|
const allIssues = results.flatMap(
|
|
@@ -263,27 +367,45 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
263
367
|
console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
264
368
|
console.log(chalk.cyan(divider) + "\n");
|
|
265
369
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
266
|
-
console.log(
|
|
370
|
+
console.log(
|
|
371
|
+
chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`)
|
|
372
|
+
);
|
|
267
373
|
console.log(` ${chalk.dim(issue.message)}`);
|
|
268
|
-
console.log(
|
|
269
|
-
`)
|
|
374
|
+
console.log(
|
|
375
|
+
` ${chalk.green("\u2192")} ${chalk.italic(issue.suggestion)}
|
|
376
|
+
`
|
|
377
|
+
);
|
|
270
378
|
});
|
|
271
379
|
}
|
|
272
380
|
if (totalIssues === 0) {
|
|
273
381
|
console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
274
|
-
console.log(
|
|
275
|
-
|
|
382
|
+
console.log(
|
|
383
|
+
chalk.yellow(
|
|
384
|
+
"\u{1F4A1} If you expected to find duplicates, try adjusting parameters:"
|
|
385
|
+
)
|
|
386
|
+
);
|
|
387
|
+
console.log(
|
|
388
|
+
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
389
|
+
);
|
|
276
390
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
277
391
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
278
|
-
console.log(
|
|
392
|
+
console.log(
|
|
393
|
+
chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
|
|
394
|
+
);
|
|
279
395
|
console.log("");
|
|
280
396
|
}
|
|
281
397
|
if (totalIssues > 0 && totalIssues < 5) {
|
|
282
|
-
console.log(
|
|
283
|
-
|
|
398
|
+
console.log(
|
|
399
|
+
chalk.yellow("\n\u{1F4A1} Few results found. To find more duplicates, try:")
|
|
400
|
+
);
|
|
401
|
+
console.log(
|
|
402
|
+
chalk.dim(" \u2022 Lower similarity threshold: --similarity 0.3")
|
|
403
|
+
);
|
|
284
404
|
console.log(chalk.dim(" \u2022 Reduce minimum lines: --min-lines 3"));
|
|
285
405
|
console.log(chalk.dim(" \u2022 Include test files: --include-tests"));
|
|
286
|
-
console.log(
|
|
406
|
+
console.log(
|
|
407
|
+
chalk.dim(" \u2022 Lower shared tokens threshold: --min-shared-tokens 5")
|
|
408
|
+
);
|
|
287
409
|
console.log("");
|
|
288
410
|
}
|
|
289
411
|
console.log(chalk.cyan(divider));
|
|
@@ -301,7 +423,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
301
423
|
)
|
|
302
424
|
);
|
|
303
425
|
console.log(
|
|
304
|
-
chalk.dim(
|
|
426
|
+
chalk.dim(
|
|
427
|
+
"\u{1F41B} Found a bug? Report it: https://github.com/caopengau/aiready-pattern-detect/issues\n"
|
|
428
|
+
)
|
|
305
429
|
);
|
|
306
430
|
});
|
|
307
431
|
function getPatternIcon(type) {
|
package/dist/index.d.mts
CHANGED
|
@@ -57,6 +57,7 @@ interface DetectionOptions {
|
|
|
57
57
|
maxCandidatesPerBlock?: number;
|
|
58
58
|
maxComparisons?: number;
|
|
59
59
|
streamResults?: boolean;
|
|
60
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
60
61
|
}
|
|
61
62
|
/**
|
|
62
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
130
131
|
createClusters?: boolean;
|
|
131
132
|
minClusterTokenCost?: number;
|
|
132
133
|
minClusterFiles?: number;
|
|
134
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
133
135
|
}
|
|
134
136
|
interface PatternSummary {
|
|
135
137
|
totalPatterns: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -57,6 +57,7 @@ interface DetectionOptions {
|
|
|
57
57
|
maxCandidatesPerBlock?: number;
|
|
58
58
|
maxComparisons?: number;
|
|
59
59
|
streamResults?: boolean;
|
|
60
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
60
61
|
}
|
|
61
62
|
/**
|
|
62
63
|
* Detect duplicate patterns across files with enhanced analysis
|
|
@@ -130,6 +131,7 @@ interface PatternDetectOptions extends ScanOptions {
|
|
|
130
131
|
createClusters?: boolean;
|
|
131
132
|
minClusterTokenCost?: number;
|
|
132
133
|
minClusterFiles?: number;
|
|
134
|
+
onProgress?: (processed: number, total: number, message: string) => void;
|
|
133
135
|
}
|
|
134
136
|
interface PatternSummary {
|
|
135
137
|
totalPatterns: number;
|
package/dist/index.js
CHANGED
|
@@ -113,8 +113,14 @@ function calculatePythonSimilarity(pattern1, pattern2) {
|
|
|
113
113
|
}
|
|
114
114
|
function calculateNameSimilarity(name1, name2) {
|
|
115
115
|
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
|
|
116
|
+
const clean1 = name1.replace(
|
|
117
|
+
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
+
""
|
|
119
|
+
);
|
|
120
|
+
const clean2 = name2.replace(
|
|
121
|
+
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
+
""
|
|
123
|
+
);
|
|
118
124
|
if (clean1 === clean2) return 0.9;
|
|
119
125
|
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
120
126
|
return 0.7;
|
|
@@ -146,7 +152,10 @@ function detectPythonAntiPatterns(patterns) {
|
|
|
146
152
|
const antiPatterns = [];
|
|
147
153
|
const nameGroups = /* @__PURE__ */ new Map();
|
|
148
154
|
for (const pattern of patterns) {
|
|
149
|
-
const baseName = pattern.name.replace(
|
|
155
|
+
const baseName = pattern.name.replace(
|
|
156
|
+
/^(get|set|create|delete|update)_/,
|
|
157
|
+
""
|
|
158
|
+
);
|
|
150
159
|
if (!nameGroups.has(baseName)) {
|
|
151
160
|
nameGroups.set(baseName, []);
|
|
152
161
|
}
|
|
@@ -437,11 +446,15 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
437
446
|
linesOfCode: block.linesOfCode
|
|
438
447
|
}))
|
|
439
448
|
);
|
|
440
|
-
|
|
449
|
+
if (!options.onProgress) {
|
|
450
|
+
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
451
|
+
}
|
|
441
452
|
const pythonFiles = files.filter((f) => f.file.toLowerCase().endsWith(".py"));
|
|
442
453
|
if (pythonFiles.length > 0) {
|
|
443
454
|
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
444
|
-
const patterns = await extractPythonPatterns2(
|
|
455
|
+
const patterns = await extractPythonPatterns2(
|
|
456
|
+
pythonFiles.map((f) => f.file)
|
|
457
|
+
);
|
|
445
458
|
const pythonBlocks = patterns.filter((p) => p.code && p.code.trim().length > 0).map((p) => ({
|
|
446
459
|
content: p.code,
|
|
447
460
|
startLine: p.startLine,
|
|
@@ -453,11 +466,17 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
453
466
|
linesOfCode: p.endLine - p.startLine + 1
|
|
454
467
|
}));
|
|
455
468
|
allBlocks.push(...pythonBlocks);
|
|
456
|
-
|
|
469
|
+
if (!options.onProgress) {
|
|
470
|
+
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
471
|
+
}
|
|
457
472
|
}
|
|
458
473
|
if (!approx && allBlocks.length > 500) {
|
|
459
|
-
console.log(
|
|
460
|
-
|
|
474
|
+
console.log(
|
|
475
|
+
`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`
|
|
476
|
+
);
|
|
477
|
+
console.log(
|
|
478
|
+
` Consider using approximate mode (default) for better performance.`
|
|
479
|
+
);
|
|
461
480
|
}
|
|
462
481
|
const stopwords = /* @__PURE__ */ new Set([
|
|
463
482
|
"return",
|
|
@@ -487,7 +506,11 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
487
506
|
"undefined",
|
|
488
507
|
"this"
|
|
489
508
|
]);
|
|
490
|
-
const tokenize = (norm) =>
|
|
509
|
+
const tokenize = (norm) => {
|
|
510
|
+
const punctuation = "(){}[];.,";
|
|
511
|
+
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
512
|
+
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
513
|
+
};
|
|
491
514
|
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
492
515
|
const invertedIndex = /* @__PURE__ */ new Map();
|
|
493
516
|
if (approx) {
|
|
@@ -504,9 +527,13 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
504
527
|
}
|
|
505
528
|
const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
|
|
506
529
|
if (totalComparisons !== void 0) {
|
|
507
|
-
console.log(
|
|
530
|
+
console.log(
|
|
531
|
+
`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`
|
|
532
|
+
);
|
|
508
533
|
} else {
|
|
509
|
-
console.log(
|
|
534
|
+
console.log(
|
|
535
|
+
`Using approximate candidate selection to reduce comparisons...`
|
|
536
|
+
);
|
|
510
537
|
}
|
|
511
538
|
let comparisonsProcessed = 0;
|
|
512
539
|
let comparisonsBudgetExhausted = false;
|
|
@@ -517,16 +544,24 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
517
544
|
break;
|
|
518
545
|
}
|
|
519
546
|
if (i % batchSize === 0 && i > 0) {
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
if (totalComparisons !== void 0) {
|
|
523
|
-
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
524
|
-
const remaining = totalComparisons - comparisonsProcessed;
|
|
525
|
-
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
526
|
-
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
527
|
-
console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
|
|
547
|
+
if (options.onProgress) {
|
|
548
|
+
options.onProgress(i, allBlocks.length, `pattern-detect: analyzing blocks`);
|
|
528
549
|
} else {
|
|
529
|
-
|
|
550
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
551
|
+
const duplicatesFound = duplicates.length;
|
|
552
|
+
if (totalComparisons !== void 0) {
|
|
553
|
+
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
554
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
555
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
556
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
557
|
+
console.log(
|
|
558
|
+
` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`
|
|
559
|
+
);
|
|
560
|
+
} else {
|
|
561
|
+
console.log(
|
|
562
|
+
` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`
|
|
563
|
+
);
|
|
564
|
+
}
|
|
530
565
|
}
|
|
531
566
|
await new Promise((resolve) => setImmediate(resolve));
|
|
532
567
|
}
|
|
@@ -560,8 +595,12 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
560
595
|
if (approx && candidates) {
|
|
561
596
|
for (const { j } of candidates) {
|
|
562
597
|
if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
|
|
563
|
-
console.log(
|
|
564
|
-
|
|
598
|
+
console.log(
|
|
599
|
+
`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`
|
|
600
|
+
);
|
|
601
|
+
console.log(
|
|
602
|
+
` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`
|
|
603
|
+
);
|
|
565
604
|
break;
|
|
566
605
|
}
|
|
567
606
|
comparisonsProcessed++;
|
|
@@ -594,10 +633,16 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
594
633
|
};
|
|
595
634
|
duplicates.push(duplicate);
|
|
596
635
|
if (streamResults) {
|
|
597
|
-
console.log(
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
636
|
+
console.log(
|
|
637
|
+
`
|
|
638
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
639
|
+
);
|
|
640
|
+
console.log(
|
|
641
|
+
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
642
|
+
);
|
|
643
|
+
console.log(
|
|
644
|
+
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
645
|
+
);
|
|
601
646
|
}
|
|
602
647
|
}
|
|
603
648
|
}
|
|
@@ -635,17 +680,25 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
635
680
|
};
|
|
636
681
|
duplicates.push(duplicate);
|
|
637
682
|
if (streamResults) {
|
|
638
|
-
console.log(
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
683
|
+
console.log(
|
|
684
|
+
`
|
|
685
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`
|
|
686
|
+
);
|
|
687
|
+
console.log(
|
|
688
|
+
` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`
|
|
689
|
+
);
|
|
690
|
+
console.log(
|
|
691
|
+
` Token cost: ${duplicate.tokenCost.toLocaleString()}`
|
|
692
|
+
);
|
|
642
693
|
}
|
|
643
694
|
}
|
|
644
695
|
}
|
|
645
696
|
}
|
|
646
697
|
}
|
|
647
698
|
if (comparisonsBudgetExhausted) {
|
|
648
|
-
console.log(
|
|
699
|
+
console.log(
|
|
700
|
+
`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`
|
|
701
|
+
);
|
|
649
702
|
}
|
|
650
703
|
return duplicates.sort(
|
|
651
704
|
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
@@ -671,7 +724,10 @@ function groupDuplicatesByFilePair(duplicates) {
|
|
|
671
724
|
const result = [];
|
|
672
725
|
for (const [filePair, groupDups] of groups.entries()) {
|
|
673
726
|
const deduplicated = deduplicateOverlappingRanges(groupDups);
|
|
674
|
-
const totalTokenCost = deduplicated.reduce(
|
|
727
|
+
const totalTokenCost = deduplicated.reduce(
|
|
728
|
+
(sum, d) => sum + d.tokenCost,
|
|
729
|
+
0
|
|
730
|
+
);
|
|
675
731
|
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
676
732
|
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
677
733
|
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
@@ -777,7 +833,9 @@ function identifyCluster(dup) {
|
|
|
777
833
|
if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
|
|
778
834
|
const component1 = extractComponentName(dup.file1);
|
|
779
835
|
const component2 = extractComponentName(dup.file2);
|
|
780
|
-
console.log(
|
|
836
|
+
console.log(
|
|
837
|
+
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
838
|
+
);
|
|
781
839
|
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
782
840
|
const category = getComponentCategory(component1);
|
|
783
841
|
console.log(`Creating cluster: component-${category}`);
|
|
@@ -876,7 +934,7 @@ function getClusterInfo(clusterId, patternType, fileCount) {
|
|
|
876
934
|
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
877
935
|
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
878
936
|
},
|
|
879
|
-
|
|
937
|
+
validators: {
|
|
880
938
|
name: `Validator Patterns (${fileCount} files)`,
|
|
881
939
|
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
882
940
|
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
@@ -929,7 +987,12 @@ function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
|
|
|
929
987
|
return {
|
|
930
988
|
toolName: "pattern-detect",
|
|
931
989
|
score: 100,
|
|
932
|
-
rawMetrics: {
|
|
990
|
+
rawMetrics: {
|
|
991
|
+
totalDuplicates: 0,
|
|
992
|
+
totalTokenCost: 0,
|
|
993
|
+
highImpactDuplicates: 0,
|
|
994
|
+
totalFilesAnalyzed: 0
|
|
995
|
+
},
|
|
933
996
|
factors: [],
|
|
934
997
|
recommendations: []
|
|
935
998
|
};
|
|
@@ -1053,13 +1116,22 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
1053
1116
|
const { scanFiles: scanFiles2 } = await import("@aiready/core");
|
|
1054
1117
|
const files = await scanFiles2(scanOptions);
|
|
1055
1118
|
const estimatedBlocks = files.length * 3;
|
|
1056
|
-
const maxCandidatesPerBlock = Math.max(
|
|
1119
|
+
const maxCandidatesPerBlock = Math.max(
|
|
1120
|
+
3,
|
|
1121
|
+
Math.min(10, Math.floor(3e4 / estimatedBlocks))
|
|
1122
|
+
);
|
|
1057
1123
|
const minSimilarity = Math.min(0.75, 0.5 + estimatedBlocks / 1e4 * 0.25);
|
|
1058
|
-
const minLines = Math.max(
|
|
1059
|
-
|
|
1124
|
+
const minLines = Math.max(
|
|
1125
|
+
6,
|
|
1126
|
+
Math.min(12, 6 + Math.floor(estimatedBlocks / 2e3))
|
|
1127
|
+
);
|
|
1128
|
+
const minSharedTokens = Math.max(
|
|
1129
|
+
10,
|
|
1130
|
+
Math.min(20, 10 + Math.floor(estimatedBlocks / 2e3))
|
|
1131
|
+
);
|
|
1060
1132
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
1061
1133
|
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
1062
|
-
|
|
1134
|
+
const defaults = {
|
|
1063
1135
|
rootDir: directory,
|
|
1064
1136
|
minSimilarity,
|
|
1065
1137
|
minLines,
|
|
@@ -1129,7 +1201,8 @@ async function analyzePatterns(options) {
|
|
|
1129
1201
|
approx,
|
|
1130
1202
|
minSharedTokens,
|
|
1131
1203
|
maxCandidatesPerBlock,
|
|
1132
|
-
streamResults
|
|
1204
|
+
streamResults,
|
|
1205
|
+
onProgress: options.onProgress
|
|
1133
1206
|
});
|
|
1134
1207
|
for (const file of files) {
|
|
1135
1208
|
const fileDuplicates = duplicates.filter(
|
|
@@ -1157,7 +1230,9 @@ async function analyzePatterns(options) {
|
|
|
1157
1230
|
medium: ["critical", "major", "minor"]
|
|
1158
1231
|
};
|
|
1159
1232
|
const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
|
|
1160
|
-
filteredIssues = issues.filter(
|
|
1233
|
+
filteredIssues = issues.filter(
|
|
1234
|
+
(issue) => allowedSeverities.includes(issue.severity)
|
|
1235
|
+
);
|
|
1161
1236
|
}
|
|
1162
1237
|
const totalTokenCost = fileDuplicates.reduce(
|
|
1163
1238
|
(sum, dup) => sum + dup.tokenCost,
|
|
@@ -1179,7 +1254,11 @@ async function analyzePatterns(options) {
|
|
|
1179
1254
|
}
|
|
1180
1255
|
if (createClusters) {
|
|
1181
1256
|
const allClusters = createRefactorClusters(duplicates);
|
|
1182
|
-
clusters = filterClustersByImpact(
|
|
1257
|
+
clusters = filterClustersByImpact(
|
|
1258
|
+
allClusters,
|
|
1259
|
+
minClusterTokenCost,
|
|
1260
|
+
minClusterFiles
|
|
1261
|
+
);
|
|
1183
1262
|
}
|
|
1184
1263
|
return { results, duplicates, files, groups, clusters };
|
|
1185
1264
|
}
|