@aiready/pattern-detect 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +214 -3
- package/dist/chunk-AXHGYYYZ.mjs +404 -0
- package/dist/chunk-JKVKOXYR.mjs +407 -0
- package/dist/chunk-OFGMDX66.mjs +402 -0
- package/dist/chunk-QE4E3F7C.mjs +410 -0
- package/dist/chunk-TXWPOVYU.mjs +402 -0
- package/dist/cli.js +265 -65
- package/dist/cli.mjs +52 -32
- package/dist/index.d.mts +20 -3
- package/dist/index.d.ts +20 -3
- package/dist/index.js +214 -34
- package/dist/index.mjs +1 -1
- package/package.json +11 -11
- package/dist/chunk-K5O2HVB5.mjs +0 -114
- package/dist/chunk-RLWJXASG.mjs +0 -227
package/dist/cli.js
CHANGED
|
@@ -82,6 +82,7 @@ function extractCodeBlocks(content, minLines) {
|
|
|
82
82
|
blocks.push({
|
|
83
83
|
content: blockContent,
|
|
84
84
|
startLine: blockStart + 1,
|
|
85
|
+
endLine: i + 1,
|
|
85
86
|
patternType: categorizePattern(blockContent),
|
|
86
87
|
linesOfCode
|
|
87
88
|
});
|
|
@@ -97,47 +98,198 @@ function extractCodeBlocks(content, minLines) {
|
|
|
97
98
|
function normalizeCode(code) {
|
|
98
99
|
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
99
100
|
}
|
|
100
|
-
function
|
|
101
|
-
const
|
|
102
|
-
const
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
101
|
+
function jaccardSimilarity(tokens1, tokens2) {
|
|
102
|
+
const set1 = new Set(tokens1);
|
|
103
|
+
const set2 = new Set(tokens2);
|
|
104
|
+
let intersection = 0;
|
|
105
|
+
for (const token of set1) {
|
|
106
|
+
if (set2.has(token)) intersection++;
|
|
107
|
+
}
|
|
108
|
+
const union = set1.size + set2.size - intersection;
|
|
109
|
+
return union === 0 ? 0 : intersection / union;
|
|
108
110
|
}
|
|
109
|
-
function detectDuplicatePatterns(files, options) {
|
|
110
|
-
const {
|
|
111
|
+
async function detectDuplicatePatterns(files, options) {
|
|
112
|
+
const {
|
|
113
|
+
minSimilarity,
|
|
114
|
+
minLines,
|
|
115
|
+
batchSize = 100,
|
|
116
|
+
approx = true,
|
|
117
|
+
minSharedTokens = 8,
|
|
118
|
+
maxCandidatesPerBlock = 100,
|
|
119
|
+
streamResults = false
|
|
120
|
+
} = options;
|
|
111
121
|
const duplicates = [];
|
|
122
|
+
const maxComparisons = approx ? Infinity : 5e5;
|
|
112
123
|
const allBlocks = files.flatMap(
|
|
113
124
|
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
114
|
-
|
|
125
|
+
content: block.content,
|
|
126
|
+
startLine: block.startLine,
|
|
127
|
+
endLine: block.endLine,
|
|
115
128
|
file: file.file,
|
|
116
129
|
normalized: normalizeCode(block.content),
|
|
117
|
-
|
|
130
|
+
patternType: block.patternType,
|
|
131
|
+
tokenCost: (0, import_core.estimateTokens)(block.content),
|
|
132
|
+
linesOfCode: block.linesOfCode
|
|
118
133
|
}))
|
|
119
134
|
);
|
|
120
135
|
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
136
|
+
if (!approx && allBlocks.length > 500) {
|
|
137
|
+
console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
|
|
138
|
+
console.log(` Consider using approximate mode (default) for better performance.`);
|
|
139
|
+
}
|
|
140
|
+
const stopwords = /* @__PURE__ */ new Set([
|
|
141
|
+
"return",
|
|
142
|
+
"const",
|
|
143
|
+
"let",
|
|
144
|
+
"var",
|
|
145
|
+
"function",
|
|
146
|
+
"class",
|
|
147
|
+
"new",
|
|
148
|
+
"if",
|
|
149
|
+
"else",
|
|
150
|
+
"for",
|
|
151
|
+
"while",
|
|
152
|
+
"async",
|
|
153
|
+
"await",
|
|
154
|
+
"try",
|
|
155
|
+
"catch",
|
|
156
|
+
"switch",
|
|
157
|
+
"case",
|
|
158
|
+
"default",
|
|
159
|
+
"import",
|
|
160
|
+
"export",
|
|
161
|
+
"from",
|
|
162
|
+
"true",
|
|
163
|
+
"false",
|
|
164
|
+
"null",
|
|
165
|
+
"undefined",
|
|
166
|
+
"this"
|
|
167
|
+
]);
|
|
168
|
+
const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
169
|
+
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
170
|
+
const invertedIndex = /* @__PURE__ */ new Map();
|
|
171
|
+
if (approx) {
|
|
172
|
+
for (let i = 0; i < blockTokens.length; i++) {
|
|
173
|
+
for (const tok of blockTokens[i]) {
|
|
174
|
+
let arr = invertedIndex.get(tok);
|
|
175
|
+
if (!arr) {
|
|
176
|
+
arr = [];
|
|
177
|
+
invertedIndex.set(tok, arr);
|
|
178
|
+
}
|
|
179
|
+
arr.push(i);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
|
|
184
|
+
if (totalComparisons !== void 0) {
|
|
185
|
+
console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
|
|
186
|
+
} else {
|
|
187
|
+
console.log(`Using approximate candidate selection to reduce comparisons...`);
|
|
188
|
+
}
|
|
189
|
+
let comparisonsProcessed = 0;
|
|
190
|
+
let comparisonsBudgetExhausted = false;
|
|
191
|
+
const startTime = Date.now();
|
|
121
192
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
linesOfCode: block1.linesOfCode
|
|
138
|
-
});
|
|
193
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) {
|
|
194
|
+
comparisonsBudgetExhausted = true;
|
|
195
|
+
break;
|
|
196
|
+
}
|
|
197
|
+
if (i % batchSize === 0 && i > 0) {
|
|
198
|
+
const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
|
|
199
|
+
const duplicatesFound = duplicates.length;
|
|
200
|
+
if (totalComparisons !== void 0) {
|
|
201
|
+
const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
|
|
202
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
203
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
204
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
205
|
+
console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
|
|
206
|
+
} else {
|
|
207
|
+
console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
|
|
139
208
|
}
|
|
209
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
140
210
|
}
|
|
211
|
+
const block1 = allBlocks[i];
|
|
212
|
+
let candidates = null;
|
|
213
|
+
if (approx) {
|
|
214
|
+
const counts = /* @__PURE__ */ new Map();
|
|
215
|
+
for (const tok of blockTokens[i]) {
|
|
216
|
+
const ids = invertedIndex.get(tok);
|
|
217
|
+
if (!ids) continue;
|
|
218
|
+
for (const j of ids) {
|
|
219
|
+
if (j <= i) continue;
|
|
220
|
+
if (allBlocks[j].file === block1.file) continue;
|
|
221
|
+
counts.set(j, (counts.get(j) || 0) + 1);
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
|
|
225
|
+
}
|
|
226
|
+
if (approx && candidates) {
|
|
227
|
+
for (const { j } of candidates) {
|
|
228
|
+
if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
|
|
229
|
+
console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
|
|
230
|
+
console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
233
|
+
comparisonsProcessed++;
|
|
234
|
+
const block2 = allBlocks[j];
|
|
235
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
236
|
+
if (similarity >= minSimilarity) {
|
|
237
|
+
const duplicate = {
|
|
238
|
+
file1: block1.file,
|
|
239
|
+
file2: block2.file,
|
|
240
|
+
line1: block1.startLine,
|
|
241
|
+
line2: block2.startLine,
|
|
242
|
+
endLine1: block1.endLine,
|
|
243
|
+
endLine2: block2.endLine,
|
|
244
|
+
similarity,
|
|
245
|
+
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
246
|
+
patternType: block1.patternType,
|
|
247
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
248
|
+
linesOfCode: block1.linesOfCode
|
|
249
|
+
};
|
|
250
|
+
duplicates.push(duplicate);
|
|
251
|
+
if (streamResults) {
|
|
252
|
+
console.log(`
|
|
253
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
254
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
255
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
} else {
|
|
260
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
261
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
262
|
+
comparisonsProcessed++;
|
|
263
|
+
const block2 = allBlocks[j];
|
|
264
|
+
if (block1.file === block2.file) continue;
|
|
265
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
266
|
+
if (similarity >= minSimilarity) {
|
|
267
|
+
const duplicate = {
|
|
268
|
+
file1: block1.file,
|
|
269
|
+
file2: block2.file,
|
|
270
|
+
line1: block1.startLine,
|
|
271
|
+
line2: block2.startLine,
|
|
272
|
+
endLine1: block1.endLine,
|
|
273
|
+
endLine2: block2.endLine,
|
|
274
|
+
similarity,
|
|
275
|
+
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
276
|
+
patternType: block1.patternType,
|
|
277
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
278
|
+
linesOfCode: block1.linesOfCode
|
|
279
|
+
};
|
|
280
|
+
duplicates.push(duplicate);
|
|
281
|
+
if (streamResults) {
|
|
282
|
+
console.log(`
|
|
283
|
+
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
284
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
285
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
if (comparisonsBudgetExhausted) {
|
|
292
|
+
console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
|
|
141
293
|
}
|
|
142
294
|
return duplicates.sort(
|
|
143
295
|
(a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
|
|
@@ -159,7 +311,17 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
159
311
|
return baseMessages[patternType] + urgency;
|
|
160
312
|
}
|
|
161
313
|
async function analyzePatterns(options) {
|
|
162
|
-
const {
|
|
314
|
+
const {
|
|
315
|
+
minSimilarity = 0.4,
|
|
316
|
+
// Jaccard similarity default (40% threshold)
|
|
317
|
+
minLines = 5,
|
|
318
|
+
batchSize = 100,
|
|
319
|
+
approx = true,
|
|
320
|
+
minSharedTokens = 8,
|
|
321
|
+
maxCandidatesPerBlock = 100,
|
|
322
|
+
streamResults = false,
|
|
323
|
+
...scanOptions
|
|
324
|
+
} = options;
|
|
163
325
|
const files = await (0, import_core2.scanFiles)(scanOptions);
|
|
164
326
|
const results = [];
|
|
165
327
|
const fileContents = await Promise.all(
|
|
@@ -168,9 +330,14 @@ async function analyzePatterns(options) {
|
|
|
168
330
|
content: await (0, import_core2.readFileContent)(file)
|
|
169
331
|
}))
|
|
170
332
|
);
|
|
171
|
-
const duplicates = detectDuplicatePatterns(fileContents, {
|
|
333
|
+
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
172
334
|
minSimilarity,
|
|
173
|
-
minLines
|
|
335
|
+
minLines,
|
|
336
|
+
batchSize,
|
|
337
|
+
approx,
|
|
338
|
+
minSharedTokens,
|
|
339
|
+
maxCandidatesPerBlock,
|
|
340
|
+
streamResults
|
|
174
341
|
});
|
|
175
342
|
for (const file of files) {
|
|
176
343
|
const fileDuplicates = duplicates.filter(
|
|
@@ -233,8 +400,21 @@ function generateSummary(results) {
|
|
|
233
400
|
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
234
401
|
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
235
402
|
return {
|
|
236
|
-
|
|
237
|
-
|
|
403
|
+
files: [
|
|
404
|
+
{
|
|
405
|
+
path: issue.location.file,
|
|
406
|
+
startLine: issue.location.line,
|
|
407
|
+
endLine: 0
|
|
408
|
+
// Not available from Issue
|
|
409
|
+
},
|
|
410
|
+
{
|
|
411
|
+
path: fileMatch?.[1] || "unknown",
|
|
412
|
+
startLine: 0,
|
|
413
|
+
// Not available from Issue
|
|
414
|
+
endLine: 0
|
|
415
|
+
// Not available from Issue
|
|
416
|
+
}
|
|
417
|
+
],
|
|
238
418
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
239
419
|
patternType: typeMatch?.[1] || "unknown",
|
|
240
420
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
@@ -253,19 +433,28 @@ var import_chalk = __toESM(require("chalk"));
|
|
|
253
433
|
var import_fs = require("fs");
|
|
254
434
|
var import_path = require("path");
|
|
255
435
|
var program = new import_commander.Command();
|
|
256
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.
|
|
436
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
257
437
|
"-o, --output <format>",
|
|
258
438
|
"Output format: console, json, html",
|
|
259
439
|
"console"
|
|
260
440
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
261
441
|
console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
442
|
+
const startTime = Date.now();
|
|
262
443
|
const results = await analyzePatterns({
|
|
263
444
|
rootDir: directory,
|
|
264
445
|
minSimilarity: parseFloat(options.similarity),
|
|
265
446
|
minLines: parseInt(options.minLines),
|
|
447
|
+
batchSize: parseInt(options.batchSize),
|
|
448
|
+
approx: options.approx !== false,
|
|
449
|
+
// default true; --no-approx sets to false
|
|
450
|
+
minSharedTokens: parseInt(options.minSharedTokens),
|
|
451
|
+
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
452
|
+
streamResults: options.streamResults !== false,
|
|
453
|
+
// default true; --no-stream-results sets to false
|
|
266
454
|
include: options.include?.split(","),
|
|
267
455
|
exclude: options.exclude?.split(",")
|
|
268
456
|
});
|
|
457
|
+
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
269
458
|
const summary = generateSummary(results);
|
|
270
459
|
const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
|
|
271
460
|
if (options.output === "json") {
|
|
@@ -291,9 +480,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
291
480
|
\u2713 HTML report saved to ${outputPath}`));
|
|
292
481
|
return;
|
|
293
482
|
}
|
|
294
|
-
|
|
483
|
+
const terminalWidth = process.stdout.columns || 80;
|
|
484
|
+
const dividerWidth = Math.min(60, terminalWidth - 2);
|
|
485
|
+
const divider = "\u2501".repeat(dividerWidth);
|
|
486
|
+
console.log(import_chalk.default.cyan(divider));
|
|
295
487
|
console.log(import_chalk.default.bold.white(" PATTERN ANALYSIS SUMMARY"));
|
|
296
|
-
console.log(import_chalk.default.cyan(
|
|
488
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
297
489
|
console.log(
|
|
298
490
|
import_chalk.default.white(`\u{1F4C1} Files analyzed: ${import_chalk.default.bold(results.length)}`)
|
|
299
491
|
);
|
|
@@ -305,18 +497,23 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
305
497
|
`\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
|
|
306
498
|
)
|
|
307
499
|
);
|
|
308
|
-
console.log(
|
|
309
|
-
|
|
310
|
-
|
|
500
|
+
console.log(
|
|
501
|
+
import_chalk.default.gray(`\u23F1 Analysis time: ${import_chalk.default.bold(elapsedTime + "s")}`)
|
|
502
|
+
);
|
|
311
503
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
312
|
-
sortedTypes.
|
|
313
|
-
|
|
314
|
-
console.log(
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
504
|
+
if (sortedTypes.length > 0) {
|
|
505
|
+
console.log(import_chalk.default.cyan("\n" + divider));
|
|
506
|
+
console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
|
|
507
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
508
|
+
sortedTypes.forEach(([type, count]) => {
|
|
509
|
+
const icon = getPatternIcon(type);
|
|
510
|
+
console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
|
|
511
|
+
});
|
|
512
|
+
}
|
|
513
|
+
if (summary.topDuplicates.length > 0 && totalIssues > 0) {
|
|
514
|
+
console.log(import_chalk.default.cyan("\n" + divider));
|
|
318
515
|
console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
319
|
-
console.log(import_chalk.default.cyan(
|
|
516
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
320
517
|
summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
|
|
321
518
|
const severityColor = dup.similarity > 0.95 ? import_chalk.default.red : dup.similarity > 0.9 ? import_chalk.default.yellow : import_chalk.default.blue;
|
|
322
519
|
console.log(
|
|
@@ -324,12 +521,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
324
521
|
`${Math.round(dup.similarity * 100)}%`
|
|
325
522
|
)} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
|
|
326
523
|
);
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
);
|
|
524
|
+
dup.files.forEach((file, fileIdx) => {
|
|
525
|
+
const prefix = fileIdx === 0 ? " " : " \u2194 ";
|
|
526
|
+
console.log(
|
|
527
|
+
`${import_chalk.default.dim(prefix)}${import_chalk.default.dim(file.path)}:${import_chalk.default.cyan(file.startLine)}-${import_chalk.default.cyan(file.endLine)}`
|
|
528
|
+
);
|
|
529
|
+
});
|
|
333
530
|
console.log(
|
|
334
531
|
` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
335
532
|
`
|
|
@@ -343,9 +540,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
343
540
|
(issue) => issue.severity === "critical"
|
|
344
541
|
);
|
|
345
542
|
if (criticalIssues.length > 0) {
|
|
346
|
-
console.log(import_chalk.default.cyan(
|
|
543
|
+
console.log(import_chalk.default.cyan(divider));
|
|
347
544
|
console.log(import_chalk.default.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
348
|
-
console.log(import_chalk.default.cyan(
|
|
545
|
+
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
349
546
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
350
547
|
console.log(import_chalk.default.red("\u25CF ") + import_chalk.default.white(`${issue.file}:${issue.location.line}`));
|
|
351
548
|
console.log(` ${import_chalk.default.dim(issue.message)}`);
|
|
@@ -353,14 +550,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
353
550
|
`);
|
|
354
551
|
});
|
|
355
552
|
}
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
553
|
+
if (totalIssues === 0) {
|
|
554
|
+
console.log(import_chalk.default.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
555
|
+
}
|
|
556
|
+
console.log(import_chalk.default.cyan(divider));
|
|
557
|
+
if (totalIssues > 0) {
|
|
558
|
+
console.log(
|
|
559
|
+
import_chalk.default.white(
|
|
560
|
+
`
|
|
360
561
|
\u{1F4A1} Run with ${import_chalk.default.bold("--output json")} or ${import_chalk.default.bold("--output html")} for detailed reports
|
|
361
562
|
`
|
|
362
|
-
|
|
363
|
-
|
|
563
|
+
)
|
|
564
|
+
);
|
|
565
|
+
}
|
|
364
566
|
});
|
|
365
567
|
function getPatternIcon(type) {
|
|
366
568
|
const icons = {
|
|
@@ -421,8 +623,7 @@ function generateHTMLReport(summary, results) {
|
|
|
421
623
|
<tr>
|
|
422
624
|
<th>Similarity</th>
|
|
423
625
|
<th>Type</th>
|
|
424
|
-
<th>
|
|
425
|
-
<th>File 2</th>
|
|
626
|
+
<th>Files</th>
|
|
426
627
|
<th>Token Cost</th>
|
|
427
628
|
</tr>
|
|
428
629
|
</thead>
|
|
@@ -432,8 +633,7 @@ function generateHTMLReport(summary, results) {
|
|
|
432
633
|
<tr>
|
|
433
634
|
<td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
|
|
434
635
|
<td>${dup.patternType}</td>
|
|
435
|
-
<td
|
|
436
|
-
<td><code>${dup.file2}</code></td>
|
|
636
|
+
<td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
|
|
437
637
|
<td>${dup.tokenCost.toLocaleString()}</td>
|
|
438
638
|
</tr>
|
|
439
639
|
`
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
generateSummary
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-JKVKOXYR.mjs";
|
|
6
6
|
|
|
7
7
|
// src/cli.ts
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -10,19 +10,28 @@ import chalk from "chalk";
|
|
|
10
10
|
import { writeFileSync } from "fs";
|
|
11
11
|
import { join } from "path";
|
|
12
12
|
var program = new Command();
|
|
13
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.
|
|
13
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
14
14
|
"-o, --output <format>",
|
|
15
15
|
"Output format: console, json, html",
|
|
16
16
|
"console"
|
|
17
17
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
18
18
|
console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
19
|
+
const startTime = Date.now();
|
|
19
20
|
const results = await analyzePatterns({
|
|
20
21
|
rootDir: directory,
|
|
21
22
|
minSimilarity: parseFloat(options.similarity),
|
|
22
23
|
minLines: parseInt(options.minLines),
|
|
24
|
+
batchSize: parseInt(options.batchSize),
|
|
25
|
+
approx: options.approx !== false,
|
|
26
|
+
// default true; --no-approx sets to false
|
|
27
|
+
minSharedTokens: parseInt(options.minSharedTokens),
|
|
28
|
+
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
29
|
+
streamResults: options.streamResults !== false,
|
|
30
|
+
// default true; --no-stream-results sets to false
|
|
23
31
|
include: options.include?.split(","),
|
|
24
32
|
exclude: options.exclude?.split(",")
|
|
25
33
|
});
|
|
34
|
+
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
26
35
|
const summary = generateSummary(results);
|
|
27
36
|
const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
|
|
28
37
|
if (options.output === "json") {
|
|
@@ -48,9 +57,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
48
57
|
\u2713 HTML report saved to ${outputPath}`));
|
|
49
58
|
return;
|
|
50
59
|
}
|
|
51
|
-
|
|
60
|
+
const terminalWidth = process.stdout.columns || 80;
|
|
61
|
+
const dividerWidth = Math.min(60, terminalWidth - 2);
|
|
62
|
+
const divider = "\u2501".repeat(dividerWidth);
|
|
63
|
+
console.log(chalk.cyan(divider));
|
|
52
64
|
console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
|
|
53
|
-
console.log(chalk.cyan(
|
|
65
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
54
66
|
console.log(
|
|
55
67
|
chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
|
|
56
68
|
);
|
|
@@ -62,18 +74,23 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
62
74
|
`\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
|
|
63
75
|
)
|
|
64
76
|
);
|
|
65
|
-
console.log(
|
|
66
|
-
|
|
67
|
-
|
|
77
|
+
console.log(
|
|
78
|
+
chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
|
|
79
|
+
);
|
|
68
80
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
69
|
-
sortedTypes.
|
|
70
|
-
|
|
71
|
-
console.log(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
81
|
+
if (sortedTypes.length > 0) {
|
|
82
|
+
console.log(chalk.cyan("\n" + divider));
|
|
83
|
+
console.log(chalk.bold.white(" PATTERNS BY TYPE"));
|
|
84
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
85
|
+
sortedTypes.forEach(([type, count]) => {
|
|
86
|
+
const icon = getPatternIcon(type);
|
|
87
|
+
console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
if (summary.topDuplicates.length > 0 && totalIssues > 0) {
|
|
91
|
+
console.log(chalk.cyan("\n" + divider));
|
|
75
92
|
console.log(chalk.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
76
|
-
console.log(chalk.cyan(
|
|
93
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
77
94
|
summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
|
|
78
95
|
const severityColor = dup.similarity > 0.95 ? chalk.red : dup.similarity > 0.9 ? chalk.yellow : chalk.blue;
|
|
79
96
|
console.log(
|
|
@@ -81,12 +98,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
81
98
|
`${Math.round(dup.similarity * 100)}%`
|
|
82
99
|
)} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
|
|
83
100
|
);
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
);
|
|
101
|
+
dup.files.forEach((file, fileIdx) => {
|
|
102
|
+
const prefix = fileIdx === 0 ? " " : " \u2194 ";
|
|
103
|
+
console.log(
|
|
104
|
+
`${chalk.dim(prefix)}${chalk.dim(file.path)}:${chalk.cyan(file.startLine)}-${chalk.cyan(file.endLine)}`
|
|
105
|
+
);
|
|
106
|
+
});
|
|
90
107
|
console.log(
|
|
91
108
|
` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
92
109
|
`
|
|
@@ -100,9 +117,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
100
117
|
(issue) => issue.severity === "critical"
|
|
101
118
|
);
|
|
102
119
|
if (criticalIssues.length > 0) {
|
|
103
|
-
console.log(chalk.cyan(
|
|
120
|
+
console.log(chalk.cyan(divider));
|
|
104
121
|
console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
|
|
105
|
-
console.log(chalk.cyan(
|
|
122
|
+
console.log(chalk.cyan(divider) + "\n");
|
|
106
123
|
criticalIssues.slice(0, 5).forEach((issue) => {
|
|
107
124
|
console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
|
|
108
125
|
console.log(` ${chalk.dim(issue.message)}`);
|
|
@@ -110,14 +127,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
110
127
|
`);
|
|
111
128
|
});
|
|
112
129
|
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
130
|
+
if (totalIssues === 0) {
|
|
131
|
+
console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
|
|
132
|
+
}
|
|
133
|
+
console.log(chalk.cyan(divider));
|
|
134
|
+
if (totalIssues > 0) {
|
|
135
|
+
console.log(
|
|
136
|
+
chalk.white(
|
|
137
|
+
`
|
|
117
138
|
\u{1F4A1} Run with ${chalk.bold("--output json")} or ${chalk.bold("--output html")} for detailed reports
|
|
118
139
|
`
|
|
119
|
-
|
|
120
|
-
|
|
140
|
+
)
|
|
141
|
+
);
|
|
142
|
+
}
|
|
121
143
|
});
|
|
122
144
|
function getPatternIcon(type) {
|
|
123
145
|
const icons = {
|
|
@@ -178,8 +200,7 @@ function generateHTMLReport(summary, results) {
|
|
|
178
200
|
<tr>
|
|
179
201
|
<th>Similarity</th>
|
|
180
202
|
<th>Type</th>
|
|
181
|
-
<th>
|
|
182
|
-
<th>File 2</th>
|
|
203
|
+
<th>Files</th>
|
|
183
204
|
<th>Token Cost</th>
|
|
184
205
|
</tr>
|
|
185
206
|
</thead>
|
|
@@ -189,8 +210,7 @@ function generateHTMLReport(summary, results) {
|
|
|
189
210
|
<tr>
|
|
190
211
|
<td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
|
|
191
212
|
<td>${dup.patternType}</td>
|
|
192
|
-
<td
|
|
193
|
-
<td><code>${dup.file2}</code></td>
|
|
213
|
+
<td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
|
|
194
214
|
<td>${dup.tokenCost.toLocaleString()}</td>
|
|
195
215
|
</tr>
|
|
196
216
|
`
|
package/dist/index.d.mts
CHANGED
|
@@ -5,6 +5,8 @@ interface DuplicatePattern {
|
|
|
5
5
|
file2: string;
|
|
6
6
|
line1: number;
|
|
7
7
|
line2: number;
|
|
8
|
+
endLine1: number;
|
|
9
|
+
endLine2: number;
|
|
8
10
|
similarity: number;
|
|
9
11
|
snippet: string;
|
|
10
12
|
patternType: PatternType;
|
|
@@ -19,23 +21,38 @@ interface FileContent {
|
|
|
19
21
|
interface DetectionOptions {
|
|
20
22
|
minSimilarity: number;
|
|
21
23
|
minLines: number;
|
|
24
|
+
maxBlocks?: number;
|
|
25
|
+
batchSize?: number;
|
|
26
|
+
approx?: boolean;
|
|
27
|
+
minSharedTokens?: number;
|
|
28
|
+
maxCandidatesPerBlock?: number;
|
|
29
|
+
maxComparisons?: number;
|
|
30
|
+
streamResults?: boolean;
|
|
22
31
|
}
|
|
23
32
|
/**
|
|
24
33
|
* Detect duplicate patterns across files with enhanced analysis
|
|
25
34
|
*/
|
|
26
|
-
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[]
|
|
35
|
+
declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
|
|
27
36
|
|
|
28
37
|
interface PatternDetectOptions extends ScanOptions {
|
|
29
38
|
minSimilarity?: number;
|
|
30
39
|
minLines?: number;
|
|
40
|
+
batchSize?: number;
|
|
41
|
+
approx?: boolean;
|
|
42
|
+
minSharedTokens?: number;
|
|
43
|
+
maxCandidatesPerBlock?: number;
|
|
44
|
+
streamResults?: boolean;
|
|
31
45
|
}
|
|
32
46
|
interface PatternSummary {
|
|
33
47
|
totalPatterns: number;
|
|
34
48
|
totalTokenCost: number;
|
|
35
49
|
patternsByType: Record<PatternType, number>;
|
|
36
50
|
topDuplicates: Array<{
|
|
37
|
-
|
|
38
|
-
|
|
51
|
+
files: Array<{
|
|
52
|
+
path: string;
|
|
53
|
+
startLine: number;
|
|
54
|
+
endLine: number;
|
|
55
|
+
}>;
|
|
39
56
|
similarity: number;
|
|
40
57
|
patternType: PatternType;
|
|
41
58
|
tokenCost: number;
|