@aiready/pattern-detect 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +103 -3
- package/dist/chunk-4CZGZIDL.mjs +409 -0
- package/dist/chunk-57O7FEEM.mjs +400 -0
- package/dist/chunk-6VQTQRDW.mjs +245 -0
- package/dist/chunk-DNI7S33V.mjs +399 -0
- package/dist/chunk-JTJXOIO2.mjs +378 -0
- package/dist/chunk-N5DE7IYX.mjs +416 -0
- package/dist/chunk-YA3N6EC5.mjs +351 -0
- package/dist/chunk-ZNZ5O435.mjs +400 -0
- package/dist/cli.js +258 -48
- package/dist/cli.mjs +45 -24
- package/dist/index.d.mts +23 -1
- package/dist/index.d.ts +23 -1
- package/dist/index.js +214 -25
- package/dist/index.mjs +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -82,6 +82,7 @@ function extractCodeBlocks(content, minLines) {
|
|
|
82
82
|
blocks.push({
|
|
83
83
|
content: blockContent,
|
|
84
84
|
startLine: blockStart + 1,
|
|
85
|
+
endLine: i + 1,
|
|
85
86
|
patternType: categorizePattern(blockContent),
|
|
86
87
|
linesOfCode
|
|
87
88
|
});
|
|
@@ -97,6 +98,16 @@ function extractCodeBlocks(content, minLines) {
|
|
|
97
98
|
function normalizeCode(code) {
|
|
98
99
|
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
99
100
|
}
|
|
101
|
+
function jaccardSimilarity(tokens1, tokens2) {
|
|
102
|
+
const set1 = new Set(tokens1);
|
|
103
|
+
const set2 = new Set(tokens2);
|
|
104
|
+
let intersection = 0;
|
|
105
|
+
for (const token of set1) {
|
|
106
|
+
if (set2.has(token)) intersection++;
|
|
107
|
+
}
|
|
108
|
+
const union = set1.size + set2.size - intersection;
|
|
109
|
+
return union === 0 ? 0 : intersection / union;
|
|
110
|
+
}
|
|
100
111
|
function calculateSimilarity(block1, block2) {
|
|
101
112
|
const norm1 = normalizeCode(block1);
|
|
102
113
|
const norm2 = normalizeCode(block2);
|
|
@@ -106,39 +117,189 @@ function calculateSimilarity(block1, block2) {
|
|
|
106
117
|
const tokenSimilarity = (0, import_core.similarityScore)(tokens1.join(" "), tokens2.join(" "));
|
|
107
118
|
return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
|
|
108
119
|
}
|
|
109
|
-
function detectDuplicatePatterns(files, options) {
|
|
110
|
-
const {
|
|
120
|
+
async function detectDuplicatePatterns(files, options) {
|
|
121
|
+
const {
|
|
122
|
+
minSimilarity,
|
|
123
|
+
minLines,
|
|
124
|
+
maxBlocks = 500,
|
|
125
|
+
batchSize = 100,
|
|
126
|
+
approx = true,
|
|
127
|
+
minSharedTokens = 8,
|
|
128
|
+
maxCandidatesPerBlock = 100,
|
|
129
|
+
fastMode = true,
|
|
130
|
+
maxComparisons = 5e4,
|
|
131
|
+
// Cap at 50K comparisons by default
|
|
132
|
+
streamResults = false
|
|
133
|
+
} = options;
|
|
111
134
|
const duplicates = [];
|
|
112
|
-
|
|
135
|
+
let allBlocks = files.flatMap(
|
|
113
136
|
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
114
|
-
|
|
137
|
+
content: block.content,
|
|
138
|
+
startLine: block.startLine,
|
|
139
|
+
endLine: block.endLine,
|
|
115
140
|
file: file.file,
|
|
116
141
|
normalized: normalizeCode(block.content),
|
|
117
|
-
|
|
142
|
+
patternType: block.patternType,
|
|
143
|
+
tokenCost: (0, import_core.estimateTokens)(block.content),
|
|
144
|
+
linesOfCode: block.linesOfCode
|
|
118
145
|
}))
|
|
119
146
|
);
|
|
120
147
|
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
148
|
+
if (allBlocks.length > maxBlocks) {
|
|
149
|
+
console.log(`\u26A0\uFE0F Limiting to ${maxBlocks} blocks (sorted by size) to prevent memory issues`);
|
|
150
|
+
console.log(` Use --max-blocks to increase limit or --min-lines to filter smaller blocks`);
|
|
151
|
+
allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
|
|
152
|
+
}
|
|
153
|
+
const stopwords = /* @__PURE__ */ new Set([
|
|
154
|
+
"return",
|
|
155
|
+
"const",
|
|
156
|
+
"let",
|
|
157
|
+
"var",
|
|
158
|
+
"function",
|
|
159
|
+
"class",
|
|
160
|
+
"new",
|
|
161
|
+
"if",
|
|
162
|
+
"else",
|
|
163
|
+
"for",
|
|
164
|
+
"while",
|
|
165
|
+
"async",
|
|
166
|
+
"await",
|
|
167
|
+
"try",
|
|
168
|
+
"catch",
|
|
169
|
+
"switch",
|
|
170
|
+
"case",
|
|
171
|
+
"default",
|
|
172
|
+
"import",
|
|
173
|
+
"export",
|
|
174
|
+
"from",
|
|
175
|
+
"true",
|
|
176
|
+
"false",
|
|
177
|
+
"null",
|
|
178
|
+
"undefined",
|
|
179
|
+
"this"
|
|
180
|
+
]);
|
|
181
|
+
const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
182
|
+
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
183
|
+
const invertedIndex = /* @__PURE__ */ new Map();
|
|
184
|
+
if (approx) {
|
|
185
|
+
for (let i = 0; i < blockTokens.length; i++) {
|
|
186
|
+
for (const tok of blockTokens[i]) {
|
|
187
|
+
let arr = invertedIndex.get(tok);
|
|
188
|
+
if (!arr) {
|
|
189
|
+
arr = [];
|
|
190
|
+
invertedIndex.set(tok, arr);
|
|
191
|
+
}
|
|
192
|
+
arr.push(i);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
|
|
197
|
+
if (totalComparisons !== void 0) {
|
|
198
|
+
console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
|
|
199
|
+
} else {
|
|
200
|
+
console.log(`Using approximate candidate selection to reduce comparisons...`);
|
|
201
|
+
}
|
|
202
|
+
let comparisonsProcessed = 0;
|
|
203
|
+
let comparisonsBudgetExhausted = false;
|
|
204
|
+
const startTime = Date.now();
|
|
121
205
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
206
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) {
|
|
207
|
+
comparisonsBudgetExhausted = true;
|
|
208
|
+
break;
|
|
209
|
+
}
|
|
210
|
+
if (i % batchSize === 0 && i > 0) {
|
|
211
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
212
|
+
const duplicatesFound = duplicates.length;
|
|
213
|
+
if (totalComparisons !== void 0) {
|
|
214
|
+
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
215
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
216
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
217
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
218
|
+
console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
|
|
219
|
+
} else {
|
|
220
|
+
console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
|
|
221
|
+
}
|
|
222
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
223
|
+
}
|
|
224
|
+
const block1 = allBlocks[i];
|
|
225
|
+
let candidates = null;
|
|
226
|
+
if (approx) {
|
|
227
|
+
const counts = /* @__PURE__ */ new Map();
|
|
228
|
+
for (const tok of blockTokens[i]) {
|
|
229
|
+
const ids = invertedIndex.get(tok);
|
|
230
|
+
if (!ids) continue;
|
|
231
|
+
for (const j of ids) {
|
|
232
|
+
if (j <= i) continue;
|
|
233
|
+
if (allBlocks[j].file === block1.file) continue;
|
|
234
|
+
counts.set(j, (counts.get(j) || 0) + 1);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
|
|
238
|
+
}
|
|
239
|
+
if (approx && candidates) {
|
|
240
|
+
for (const { j } of candidates) {
|
|
241
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
242
|
+
comparisonsProcessed++;
|
|
243
|
+
const block2 = allBlocks[j];
|
|
244
|
+
const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
|
|
245
|
+
if (similarity >= minSimilarity) {
|
|
246
|
+
const duplicate = {
|
|
247
|
+
file1: block1.file,
|
|
248
|
+
file2: block2.file,
|
|
249
|
+
line1: block1.startLine,
|
|
250
|
+
line2: block2.startLine,
|
|
251
|
+
endLine1: block1.endLine,
|
|
252
|
+
endLine2: block2.endLine,
|
|
253
|
+
similarity,
|
|
254
|
+
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
255
|
+
patternType: block1.patternType,
|
|
256
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
257
|
+
linesOfCode: block1.linesOfCode
|
|
258
|
+
};
|
|
259
|
+
duplicates.push(duplicate);
|
|
260
|
+
if (streamResults) {
|
|
261
|
+
console.log(`
|
|
262
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
263
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
264
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
} else {
|
|
269
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
270
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
271
|
+
comparisonsProcessed++;
|
|
272
|
+
const block2 = allBlocks[j];
|
|
273
|
+
if (block1.file === block2.file) continue;
|
|
274
|
+
const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
|
|
275
|
+
if (similarity >= minSimilarity) {
|
|
276
|
+
const duplicate = {
|
|
277
|
+
file1: block1.file,
|
|
278
|
+
file2: block2.file,
|
|
279
|
+
line1: block1.startLine,
|
|
280
|
+
line2: block2.startLine,
|
|
281
|
+
endLine1: block1.endLine,
|
|
282
|
+
endLine2: block2.endLine,
|
|
283
|
+
similarity,
|
|
284
|
+
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
285
|
+
patternType: block1.patternType,
|
|
286
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
287
|
+
linesOfCode: block1.linesOfCode
|
|
288
|
+
};
|
|
289
|
+
duplicates.push(duplicate);
|
|
290
|
+
if (streamResults) {
|
|
291
|
+
console.log(`
|
|
292
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
293
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
294
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
139
297
|
}
|
|
140
298
|
}
|
|
141
299
|
}
|
|
300
|
+
if (comparisonsBudgetExhausted) {
|
|
301
|
+
console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
|
|
302
|
+
}
|
|
142
303
|
return duplicates.sort(
|
|
143
304
|
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
144
305
|
);
|
|
@@ -159,7 +320,20 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
159
320
|
return baseMessages[patternType] + urgency;
|
|
160
321
|
}
|
|
161
322
|
async function analyzePatterns(options) {
|
|
162
|
-
const {
|
|
323
|
+
const {
|
|
324
|
+
minSimilarity = 0.65,
|
|
325
|
+
// Lower default for fast Jaccard mode (Levenshtein would be 0.85+)
|
|
326
|
+
minLines = 5,
|
|
327
|
+
maxBlocks = 500,
|
|
328
|
+
batchSize = 100,
|
|
329
|
+
approx = true,
|
|
330
|
+
minSharedTokens = 8,
|
|
331
|
+
maxCandidatesPerBlock = 100,
|
|
332
|
+
fastMode = true,
|
|
333
|
+
maxComparisons = 5e4,
|
|
334
|
+
streamResults = false,
|
|
335
|
+
...scanOptions
|
|
336
|
+
} = options;
|
|
163
337
|
const files = await (0, import_core2.scanFiles)(scanOptions);
|
|
164
338
|
const results = [];
|
|
165
339
|
const fileContents = await Promise.all(
|
|
@@ -168,9 +342,17 @@ async function analyzePatterns(options) {
|
|
|
168
342
|
content: await (0, import_core2.readFileContent)(file)
|
|
169
343
|
}))
|
|
170
344
|
);
|
|
171
|
-
const duplicates = detectDuplicatePatterns(fileContents, {
|
|
345
|
+
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
172
346
|
minSimilarity,
|
|
173
|
-
minLines
|
|
347
|
+
minLines,
|
|
348
|
+
maxBlocks,
|
|
349
|
+
batchSize,
|
|
350
|
+
approx,
|
|
351
|
+
minSharedTokens,
|
|
352
|
+
maxCandidatesPerBlock,
|
|
353
|
+
fastMode,
|
|
354
|
+
maxComparisons,
|
|
355
|
+
streamResults
|
|
174
356
|
});
|
|
175
357
|
for (const file of files) {
|
|
176
358
|
const fileDuplicates = duplicates.filter(
|
|
@@ -235,6 +417,13 @@ function generateSummary(results) {
|
|
|
235
417
|
return {
|
|
236
418
|
file1: issue.location.file,
|
|
237
419
|
file2: fileMatch?.[1] || "unknown",
|
|
420
|
+
line1: issue.location.line,
|
|
421
|
+
line2: 0,
|
|
422
|
+
// Not available from Issue
|
|
423
|
+
endLine1: 0,
|
|
424
|
+
// Not available from Issue
|
|
425
|
+
endLine2: 0,
|
|
426
|
+
// Not available from Issue
|
|
238
427
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
239
428
|
patternType: typeMatch?.[1] || "unknown",
|
|
240
429
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
@@ -253,7 +442,7 @@ var import_chalk = __toESM(require("chalk"));
|
|
|
253
442
|
var import_fs = require("fs");
|
|
254
443
|
var import_path = require("path");
|
|
255
444
|
var program = new import_commander.Command();
|
|
256
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.
|
|
445
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
257
446
|
"-o, --output <format>",
|
|
258
447
|
"Output format: console, json, html",
|
|
259
448
|
"console"
|
|
@@ -263,6 +452,17 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
263
452
|
rootDir: directory,
|
|
264
453
|
minSimilarity: parseFloat(options.similarity),
|
|
265
454
|
minLines: parseInt(options.minLines),
|
|
455
|
+
maxBlocks: parseInt(options.maxBlocks),
|
|
456
|
+
batchSize: parseInt(options.batchSize),
|
|
457
|
+
approx: options.approx !== false,
|
|
458
|
+
// default true; --no-approx sets to false
|
|
459
|
+
minSharedTokens: parseInt(options.minSharedTokens),
|
|
460
|
+
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
461
|
+
fastMode: options.fastMode !== false,
|
|
462
|
+
// default true; --no-fast-mode sets to false
|
|
463
|
+
maxComparisons: parseInt(options.maxComparisons),
|
|
464
|
+
streamResults: options.streamResults === true,
|
|
465
|
+
// default false; --stream-results sets to true
|
|
266
466
|
include: options.include?.split(","),
|
|
267
467
|
exclude: options.exclude?.split(",")
|
|
268
468
|
});
|
|
@@ -291,9 +491,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
291
491
|
\u2713 HTML report saved to ${outputPath}`));
|
|
292
492
|
return;
|
|
293
493
|
}
|
|
294
|
-
|
|
494
|
+
const terminalWidth = process.stdout.columns || 80;
|
|
495
|
+
const dividerWidth = Math.min(60, terminalWidth - 2);
|
|
496
|
+
const divider = "\u2501".repeat(dividerWidth);
|
|
497
|
+
console.log(import_chalk.default.cyan(divider));
|
|
295
498
|
console.log(import_chalk.default.bold.white(" PATTERN ANALYSIS SUMMARY"));
|
|
296
|
-
console.log(import_chalk.default.cyan(
|
|
499
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
297
500
|
console.log(
|
|
298
501
|
import_chalk.default.white(`\u{1F4C1} Files analyzed: ${import_chalk.default.bold(results.length)}`)
|
|
299
502
|
);
|
|
@@ -305,18 +508,20 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
305
508
|
`\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
|
|
306
509
|
)
|
|
307
510
|
);
|
|
308
|
-
console.log(import_chalk.default.cyan("\n\u2501".repeat(60)));
|
|
309
|
-
console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
|
|
310
|
-
console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
|
|
311
511
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
312
|
-
sortedTypes.
|
|
313
|
-
|
|
314
|
-
console.log(
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
512
|
+
if (sortedTypes.length > 0) {
|
|
513
|
+
console.log(import_chalk.default.cyan("\n" + divider));
|
|
514
|
+
console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
|
|
515
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
516
|
+
sortedTypes.forEach(([type, count]) => {
|
|
517
|
+
const icon = getPatternIcon(type);
|
|
518
|
+
console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
|
|
519
|
+
});
|
|
520
|
+
}
|
|
521
|
+
if (summary.topDuplicates.length > 0 && totalIssues > 0) {
|
|
522
|
+
console.log(import_chalk.default.cyan("\n" + divider));
|
|
318
523
|
console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
319
|
-
console.log(import_chalk.default.cyan(
|
|
524
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
320
525
|
summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
|
|
321
526
|
const severityColor = dup.similarity > 0.95 ? import_chalk.default.red : dup.similarity > 0.9 ? import_chalk.default.yellow : import_chalk.default.blue;
|
|
322
527
|
console.log(
|
|
@@ -325,10 +530,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
325
530
|
)} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
|
|
326
531
|
);
|
|
327
532
|
console.log(
|
|
328
|
-
` ${import_chalk.default.dim(dup.file1)}`
|
|
533
|
+
` ${import_chalk.default.dim(dup.file1)}:${import_chalk.default.cyan(dup.line1)}-${import_chalk.default.cyan(dup.endLine1)}`
|
|
329
534
|
);
|
|
330
535
|
console.log(
|
|
331
|
-
` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}`
|
|
536
|
+
` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}:${import_chalk.default.cyan(dup.line2)}-${import_chalk.default.cyan(dup.endLine2)}`
|
|
332
537
|
);
|
|
333
538
|
console.log(
|
|
334
539
|
` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
@@ -343,9 +548,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
343
548
|
(issue) => issue.severity === "critical"
|
|
344
549
|
);
|
|
345
550
|
if (criticalIssues.length > 0) {
|
|
346
|
-
console.log(import_chalk.default.cyan(
|
|
551
|
+
console.log(import_chalk.default.cyan(divider));
|
|
347
552
|
console.log(import_chalk.default.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
348
|
-
console.log(import_chalk.default.cyan(
|
|
553
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
349
554
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
350
555
|
console.log(import_chalk.default.red("\u25CF ") + import_chalk.default.white(`${issue.file}:${issue.location.line}`));
|
|
351
556
|
console.log(` ${import_chalk.default.dim(issue.message)}`);
|
|
@@ -353,14 +558,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
353
558
|
`);
|
|
354
559
|
});
|
|
355
560
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
561
|
+
if (totalIssues === 0) {
|
|
562
|
+
console.log(import_chalk.default.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
563
|
+
}
|
|
564
|
+
console.log(import_chalk.default.cyan(divider));
|
|
565
|
+
if (totalIssues > 0) {
|
|
566
|
+
console.log(
|
|
567
|
+
import_chalk.default.white(
|
|
568
|
+
`
|
|
360
569
|
\u{1F4A1} Run with ${import_chalk.default.bold("--output json")} or ${import_chalk.default.bold("--output html")} for detailed reports
|
|
361
570
|
`
|
|
362
|
-
|
|
363
|
-
|
|
571
|
+
)
|
|
572
|
+
);
|
|
573
|
+
}
|
|
364
574
|
});
|
|
365
575
|
function getPatternIcon(type) {
|
|
366
576
|
const icons = {
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
generateSummary
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-N5DE7IYX.mjs";
|
|
6
6
|
|
|
7
7
|
// src/cli.ts
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -10,7 +10,7 @@ import chalk from "chalk";
|
|
|
10
10
|
import { writeFileSync } from "fs";
|
|
11
11
|
import { join } from "path";
|
|
12
12
|
var program = new Command();
|
|
13
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.
|
|
13
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
14
14
|
"-o, --output <format>",
|
|
15
15
|
"Output format: console, json, html",
|
|
16
16
|
"console"
|
|
@@ -20,6 +20,17 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
20
20
|
rootDir: directory,
|
|
21
21
|
minSimilarity: parseFloat(options.similarity),
|
|
22
22
|
minLines: parseInt(options.minLines),
|
|
23
|
+
maxBlocks: parseInt(options.maxBlocks),
|
|
24
|
+
batchSize: parseInt(options.batchSize),
|
|
25
|
+
approx: options.approx !== false,
|
|
26
|
+
// default true; --no-approx sets to false
|
|
27
|
+
minSharedTokens: parseInt(options.minSharedTokens),
|
|
28
|
+
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
29
|
+
fastMode: options.fastMode !== false,
|
|
30
|
+
// default true; --no-fast-mode sets to false
|
|
31
|
+
maxComparisons: parseInt(options.maxComparisons),
|
|
32
|
+
streamResults: options.streamResults === true,
|
|
33
|
+
// default false; --stream-results sets to true
|
|
23
34
|
include: options.include?.split(","),
|
|
24
35
|
exclude: options.exclude?.split(",")
|
|
25
36
|
});
|
|
@@ -48,9 +59,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
48
59
|
\u2713 HTML report saved to ${outputPath}`));
|
|
49
60
|
return;
|
|
50
61
|
}
|
|
51
|
-
|
|
62
|
+
const terminalWidth = process.stdout.columns || 80;
|
|
63
|
+
const dividerWidth = Math.min(60, terminalWidth - 2);
|
|
64
|
+
const divider = "\u2501".repeat(dividerWidth);
|
|
65
|
+
console.log(chalk.cyan(divider));
|
|
52
66
|
console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
|
|
53
|
-
console.log(chalk.cyan(
|
|
67
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
54
68
|
console.log(
|
|
55
69
|
chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
|
|
56
70
|
);
|
|
@@ -62,18 +76,20 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
62
76
|
`\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
|
|
63
77
|
)
|
|
64
78
|
);
|
|
65
|
-
console.log(chalk.cyan("\n\u2501".repeat(60)));
|
|
66
|
-
console.log(chalk.bold.white(" PATTERNS BY TYPE"));
|
|
67
|
-
console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
|
|
68
79
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
69
|
-
sortedTypes.
|
|
70
|
-
|
|
71
|
-
console.log(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
80
|
+
if (sortedTypes.length > 0) {
|
|
81
|
+
console.log(chalk.cyan("\n" + divider));
|
|
82
|
+
console.log(chalk.bold.white(" PATTERNS BY TYPE"));
|
|
83
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
84
|
+
sortedTypes.forEach(([type, count]) => {
|
|
85
|
+
const icon = getPatternIcon(type);
|
|
86
|
+
console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
if (summary.topDuplicates.length > 0 && totalIssues > 0) {
|
|
90
|
+
console.log(chalk.cyan("\n" + divider));
|
|
75
91
|
console.log(chalk.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
76
|
-
console.log(chalk.cyan(
|
|
92
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
77
93
|
summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
|
|
78
94
|
const severityColor = dup.similarity > 0.95 ? chalk.red : dup.similarity > 0.9 ? chalk.yellow : chalk.blue;
|
|
79
95
|
console.log(
|
|
@@ -82,10 +98,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
82
98
|
)} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
|
|
83
99
|
);
|
|
84
100
|
console.log(
|
|
85
|
-
` ${chalk.dim(dup.file1)}`
|
|
101
|
+
` ${chalk.dim(dup.file1)}:${chalk.cyan(dup.line1)}-${chalk.cyan(dup.endLine1)}`
|
|
86
102
|
);
|
|
87
103
|
console.log(
|
|
88
|
-
` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}`
|
|
104
|
+
` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}:${chalk.cyan(dup.line2)}-${chalk.cyan(dup.endLine2)}`
|
|
89
105
|
);
|
|
90
106
|
console.log(
|
|
91
107
|
` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
@@ -100,9 +116,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
100
116
|
(issue) => issue.severity === "critical"
|
|
101
117
|
);
|
|
102
118
|
if (criticalIssues.length > 0) {
|
|
103
|
-
console.log(chalk.cyan(
|
|
119
|
+
console.log(chalk.cyan(divider));
|
|
104
120
|
console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
105
|
-
console.log(chalk.cyan(
|
|
121
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
106
122
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
107
123
|
console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
|
|
108
124
|
console.log(` ${chalk.dim(issue.message)}`);
|
|
@@ -110,14 +126,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
110
126
|
`);
|
|
111
127
|
});
|
|
112
128
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
129
|
+
if (totalIssues === 0) {
|
|
130
|
+
console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
131
|
+
}
|
|
132
|
+
console.log(chalk.cyan(divider));
|
|
133
|
+
if (totalIssues > 0) {
|
|
134
|
+
console.log(
|
|
135
|
+
chalk.white(
|
|
136
|
+
`
|
|
117
137
|
\u{1F4A1} Run with ${chalk.bold("--output json")} or ${chalk.bold("--output html")} for detailed reports
|
|
118
138
|
`
|
|
119
|
-
|
|
120
|
-
|
|
139
|
+
)
|
|
140
|
+
);
|
|
141
|
+
}
|
|
121
142
|
});
|
|
122
143
|
function getPatternIcon(type) {
|
|
123
144
|
const icons = {
|
package/dist/index.d.mts
CHANGED
|
@@ -5,6 +5,8 @@ interface DuplicatePattern {
|
|
|
5
5
|
file2: string;
|
|
6
6
|
line1: number;
|
|
7
7
|
line2: number;
|
|
8
|
+
endLine1: number;
|
|
9
|
+
endLine2: number;
|
|
8
10
|
similarity: number;
|
|
9
11
|
snippet: string;
|
|
10
12
|
patternType: PatternType;
|
|
@@ -19,15 +21,31 @@ interface FileContent {
|
|
|
19
21
|
interface DetectionOptions {
|
|
20
22
|
minSimilarity: number;
|
|
21
23
|
minLines: number;
|
|
24
|
+
maxBlocks?: number;
|
|
25
|
+
batchSize?: number;
|
|
26
|
+
approx?: boolean;
|
|
27
|
+
minSharedTokens?: number;
|
|
28
|
+
maxCandidatesPerBlock?: number;
|
|
29
|
+
fastMode?: boolean;
|
|
30
|
+
maxComparisons?: number;
|
|
31
|
+
streamResults?: boolean;
|
|
22
32
|
}
|
|
23
33
|
/**
|
|
24
34
|
* Detect duplicate patterns across files with enhanced analysis
|
|
25
35
|
*/
|
|
26
|
-
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[]
|
|
36
|
+
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
|
|
27
37
|
|
|
28
38
|
interface PatternDetectOptions extends ScanOptions {
|
|
29
39
|
minSimilarity?: number;
|
|
30
40
|
minLines?: number;
|
|
41
|
+
maxBlocks?: number;
|
|
42
|
+
batchSize?: number;
|
|
43
|
+
approx?: boolean;
|
|
44
|
+
minSharedTokens?: number;
|
|
45
|
+
maxCandidatesPerBlock?: number;
|
|
46
|
+
fastMode?: boolean;
|
|
47
|
+
maxComparisons?: number;
|
|
48
|
+
streamResults?: boolean;
|
|
31
49
|
}
|
|
32
50
|
interface PatternSummary {
|
|
33
51
|
totalPatterns: number;
|
|
@@ -36,6 +54,10 @@ interface PatternSummary {
|
|
|
36
54
|
topDuplicates: Array<{
|
|
37
55
|
file1: string;
|
|
38
56
|
file2: string;
|
|
57
|
+
line1: number;
|
|
58
|
+
line2: number;
|
|
59
|
+
endLine1: number;
|
|
60
|
+
endLine2: number;
|
|
39
61
|
similarity: number;
|
|
40
62
|
patternType: PatternType;
|
|
41
63
|
tokenCost: number;
|
package/dist/index.d.ts
CHANGED
|
@@ -5,6 +5,8 @@ interface DuplicatePattern {
|
|
|
5
5
|
file2: string;
|
|
6
6
|
line1: number;
|
|
7
7
|
line2: number;
|
|
8
|
+
endLine1: number;
|
|
9
|
+
endLine2: number;
|
|
8
10
|
similarity: number;
|
|
9
11
|
snippet: string;
|
|
10
12
|
patternType: PatternType;
|
|
@@ -19,15 +21,31 @@ interface FileContent {
|
|
|
19
21
|
interface DetectionOptions {
|
|
20
22
|
minSimilarity: number;
|
|
21
23
|
minLines: number;
|
|
24
|
+
maxBlocks?: number;
|
|
25
|
+
batchSize?: number;
|
|
26
|
+
approx?: boolean;
|
|
27
|
+
minSharedTokens?: number;
|
|
28
|
+
maxCandidatesPerBlock?: number;
|
|
29
|
+
fastMode?: boolean;
|
|
30
|
+
maxComparisons?: number;
|
|
31
|
+
streamResults?: boolean;
|
|
22
32
|
}
|
|
23
33
|
/**
|
|
24
34
|
* Detect duplicate patterns across files with enhanced analysis
|
|
25
35
|
*/
|
|
26
|
-
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[]
|
|
36
|
+
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
|
|
27
37
|
|
|
28
38
|
interface PatternDetectOptions extends ScanOptions {
|
|
29
39
|
minSimilarity?: number;
|
|
30
40
|
minLines?: number;
|
|
41
|
+
maxBlocks?: number;
|
|
42
|
+
batchSize?: number;
|
|
43
|
+
approx?: boolean;
|
|
44
|
+
minSharedTokens?: number;
|
|
45
|
+
maxCandidatesPerBlock?: number;
|
|
46
|
+
fastMode?: boolean;
|
|
47
|
+
maxComparisons?: number;
|
|
48
|
+
streamResults?: boolean;
|
|
31
49
|
}
|
|
32
50
|
interface PatternSummary {
|
|
33
51
|
totalPatterns: number;
|
|
@@ -36,6 +54,10 @@ interface PatternSummary {
|
|
|
36
54
|
topDuplicates: Array<{
|
|
37
55
|
file1: string;
|
|
38
56
|
file2: string;
|
|
57
|
+
line1: number;
|
|
58
|
+
line2: number;
|
|
59
|
+
endLine1: number;
|
|
60
|
+
endLine2: number;
|
|
39
61
|
similarity: number;
|
|
40
62
|
patternType: PatternType;
|
|
41
63
|
tokenCost: number;
|