@aiready/pattern-detect 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -38
- package/dist/{chunk-N5DE7IYX.mjs → chunk-AXHGYYYZ.mjs} +5 -17
- package/dist/{chunk-57O7FEEM.mjs → chunk-JKVKOXYR.mjs} +43 -36
- package/dist/{chunk-DNI7S33V.mjs → chunk-OFGMDX66.mjs} +34 -31
- package/dist/{chunk-4CZGZIDL.mjs → chunk-QE4E3F7C.mjs} +20 -19
- package/dist/{chunk-ZNZ5O435.mjs → chunk-TXWPOVYU.mjs} +37 -35
- package/dist/cli.js +45 -55
- package/dist/cli.mjs +17 -18
- package/dist/index.d.mts +5 -10
- package/dist/index.d.ts +5 -10
- package/dist/index.js +29 -38
- package/dist/index.mjs +1 -1
- package/package.json +11 -11
- package/dist/chunk-6VQTQRDW.mjs +0 -245
- package/dist/chunk-JTJXOIO2.mjs +0 -378
- package/dist/chunk-K5O2HVB5.mjs +0 -114
- package/dist/chunk-RLWJXASG.mjs +0 -227
- package/dist/chunk-YA3N6EC5.mjs +0 -351
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { scanFiles, readFileContent } from "@aiready/core";
|
|
3
3
|
|
|
4
4
|
// src/detector.ts
|
|
5
|
-
import {
|
|
5
|
+
import { estimateTokens } from "@aiready/core";
|
|
6
6
|
function categorizePattern(code) {
|
|
7
7
|
const lower = code.toLowerCase();
|
|
8
8
|
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
@@ -80,15 +80,6 @@ function jaccardSimilarity(tokens1, tokens2) {
|
|
|
80
80
|
const union = set1.size + set2.size - intersection;
|
|
81
81
|
return union === 0 ? 0 : intersection / union;
|
|
82
82
|
}
|
|
83
|
-
function calculateSimilarity(block1, block2) {
|
|
84
|
-
const norm1 = normalizeCode(block1);
|
|
85
|
-
const norm2 = normalizeCode(block2);
|
|
86
|
-
const baseSimilarity = similarityScore(norm1, norm2);
|
|
87
|
-
const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
88
|
-
const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
89
|
-
const tokenSimilarity = similarityScore(tokens1.join(" "), tokens2.join(" "));
|
|
90
|
-
return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
|
|
91
|
-
}
|
|
92
83
|
async function detectDuplicatePatterns(files, options) {
|
|
93
84
|
const {
|
|
94
85
|
minSimilarity,
|
|
@@ -98,7 +89,6 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
98
89
|
approx = true,
|
|
99
90
|
minSharedTokens = 8,
|
|
100
91
|
maxCandidatesPerBlock = 100,
|
|
101
|
-
fastMode = true,
|
|
102
92
|
maxComparisons = 5e4,
|
|
103
93
|
// Cap at 50K comparisons by default
|
|
104
94
|
streamResults = false
|
|
@@ -213,7 +203,7 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
213
203
|
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
214
204
|
comparisonsProcessed++;
|
|
215
205
|
const block2 = allBlocks[j];
|
|
216
|
-
const similarity =
|
|
206
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
217
207
|
if (similarity >= minSimilarity) {
|
|
218
208
|
const duplicate = {
|
|
219
209
|
file1: block1.file,
|
|
@@ -243,7 +233,7 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
243
233
|
comparisonsProcessed++;
|
|
244
234
|
const block2 = allBlocks[j];
|
|
245
235
|
if (block1.file === block2.file) continue;
|
|
246
|
-
const similarity =
|
|
236
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
247
237
|
if (similarity >= minSimilarity) {
|
|
248
238
|
const duplicate = {
|
|
249
239
|
file1: block1.file,
|
|
@@ -293,15 +283,14 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
293
283
|
}
|
|
294
284
|
async function analyzePatterns(options) {
|
|
295
285
|
const {
|
|
296
|
-
minSimilarity = 0.
|
|
297
|
-
//
|
|
286
|
+
minSimilarity = 0.4,
|
|
287
|
+
// Jaccard similarity default (40% threshold)
|
|
298
288
|
minLines = 5,
|
|
299
289
|
maxBlocks = 500,
|
|
300
290
|
batchSize = 100,
|
|
301
291
|
approx = true,
|
|
302
292
|
minSharedTokens = 8,
|
|
303
293
|
maxCandidatesPerBlock = 100,
|
|
304
|
-
fastMode = true,
|
|
305
294
|
maxComparisons = 5e4,
|
|
306
295
|
streamResults = false,
|
|
307
296
|
...scanOptions
|
|
@@ -322,7 +311,6 @@ async function analyzePatterns(options) {
|
|
|
322
311
|
approx,
|
|
323
312
|
minSharedTokens,
|
|
324
313
|
maxCandidatesPerBlock,
|
|
325
|
-
fastMode,
|
|
326
314
|
maxComparisons,
|
|
327
315
|
streamResults
|
|
328
316
|
});
|
|
@@ -387,8 +375,21 @@ function generateSummary(results) {
|
|
|
387
375
|
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
388
376
|
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
389
377
|
return {
|
|
390
|
-
|
|
391
|
-
|
|
378
|
+
files: [
|
|
379
|
+
{
|
|
380
|
+
path: issue.location.file,
|
|
381
|
+
startLine: issue.location.line,
|
|
382
|
+
endLine: 0
|
|
383
|
+
// Not available from Issue
|
|
384
|
+
},
|
|
385
|
+
{
|
|
386
|
+
path: fileMatch?.[1] || "unknown",
|
|
387
|
+
startLine: 0,
|
|
388
|
+
// Not available from Issue
|
|
389
|
+
endLine: 0
|
|
390
|
+
// Not available from Issue
|
|
391
|
+
}
|
|
392
|
+
],
|
|
392
393
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
393
394
|
patternType: typeMatch?.[1] || "unknown",
|
|
394
395
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { scanFiles, readFileContent } from "@aiready/core";
|
|
3
3
|
|
|
4
4
|
// src/detector.ts
|
|
5
|
-
import {
|
|
5
|
+
import { estimateTokens } from "@aiready/core";
|
|
6
6
|
function categorizePattern(code) {
|
|
7
7
|
const lower = code.toLowerCase();
|
|
8
8
|
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
@@ -54,6 +54,7 @@ function extractCodeBlocks(content, minLines) {
|
|
|
54
54
|
blocks.push({
|
|
55
55
|
content: blockContent,
|
|
56
56
|
startLine: blockStart + 1,
|
|
57
|
+
endLine: i + 1,
|
|
57
58
|
patternType: categorizePattern(blockContent),
|
|
58
59
|
linesOfCode
|
|
59
60
|
});
|
|
@@ -79,44 +80,32 @@ function jaccardSimilarity(tokens1, tokens2) {
|
|
|
79
80
|
const union = set1.size + set2.size - intersection;
|
|
80
81
|
return union === 0 ? 0 : intersection / union;
|
|
81
82
|
}
|
|
82
|
-
function calculateSimilarity(block1, block2) {
|
|
83
|
-
const norm1 = normalizeCode(block1);
|
|
84
|
-
const norm2 = normalizeCode(block2);
|
|
85
|
-
const baseSimilarity = similarityScore(norm1, norm2);
|
|
86
|
-
const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
87
|
-
const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
88
|
-
const tokenSimilarity = similarityScore(tokens1.join(" "), tokens2.join(" "));
|
|
89
|
-
return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
|
|
90
|
-
}
|
|
91
83
|
async function detectDuplicatePatterns(files, options) {
|
|
92
84
|
const {
|
|
93
85
|
minSimilarity,
|
|
94
86
|
minLines,
|
|
95
|
-
maxBlocks = 500,
|
|
96
87
|
batchSize = 100,
|
|
97
88
|
approx = true,
|
|
98
89
|
minSharedTokens = 8,
|
|
99
90
|
maxCandidatesPerBlock = 100,
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
// Cap at 50K comparisons by default
|
|
91
|
+
maxComparisons = 1e5,
|
|
92
|
+
// Cap at 100K comparisons by default
|
|
103
93
|
streamResults = false
|
|
104
94
|
} = options;
|
|
105
95
|
const duplicates = [];
|
|
106
|
-
|
|
96
|
+
const allBlocks = files.flatMap(
|
|
107
97
|
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
108
|
-
|
|
98
|
+
content: block.content,
|
|
99
|
+
startLine: block.startLine,
|
|
100
|
+
endLine: block.endLine,
|
|
109
101
|
file: file.file,
|
|
110
102
|
normalized: normalizeCode(block.content),
|
|
111
|
-
|
|
103
|
+
patternType: block.patternType,
|
|
104
|
+
tokenCost: estimateTokens(block.content),
|
|
105
|
+
linesOfCode: block.linesOfCode
|
|
112
106
|
}))
|
|
113
107
|
);
|
|
114
108
|
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
115
|
-
if (allBlocks.length > maxBlocks) {
|
|
116
|
-
console.log(`\u26A0\uFE0F Limiting to ${maxBlocks} blocks (sorted by size) to prevent memory issues`);
|
|
117
|
-
console.log(` Use --max-blocks to increase limit or --min-lines to filter smaller blocks`);
|
|
118
|
-
allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
|
|
119
|
-
}
|
|
120
109
|
const stopwords = /* @__PURE__ */ new Set([
|
|
121
110
|
"return",
|
|
122
111
|
"const",
|
|
@@ -208,13 +197,15 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
208
197
|
if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
|
|
209
198
|
comparisonsProcessed++;
|
|
210
199
|
const block2 = allBlocks[j];
|
|
211
|
-
const similarity =
|
|
200
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
212
201
|
if (similarity >= minSimilarity) {
|
|
213
202
|
const duplicate = {
|
|
214
203
|
file1: block1.file,
|
|
215
204
|
file2: block2.file,
|
|
216
205
|
line1: block1.startLine,
|
|
217
206
|
line2: block2.startLine,
|
|
207
|
+
endLine1: block1.endLine,
|
|
208
|
+
endLine2: block2.endLine,
|
|
218
209
|
similarity,
|
|
219
210
|
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
220
211
|
patternType: block1.patternType,
|
|
@@ -225,7 +216,7 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
225
216
|
if (streamResults) {
|
|
226
217
|
console.log(`
|
|
227
218
|
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
228
|
-
console.log(` ${duplicate.file1}:${duplicate.line1} \u21D4 ${duplicate.file2}:${duplicate.line2}`);
|
|
219
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
229
220
|
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
230
221
|
}
|
|
231
222
|
}
|
|
@@ -236,13 +227,15 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
236
227
|
comparisonsProcessed++;
|
|
237
228
|
const block2 = allBlocks[j];
|
|
238
229
|
if (block1.file === block2.file) continue;
|
|
239
|
-
const similarity =
|
|
230
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
240
231
|
if (similarity >= minSimilarity) {
|
|
241
232
|
const duplicate = {
|
|
242
233
|
file1: block1.file,
|
|
243
234
|
file2: block2.file,
|
|
244
235
|
line1: block1.startLine,
|
|
245
236
|
line2: block2.startLine,
|
|
237
|
+
endLine1: block1.endLine,
|
|
238
|
+
endLine2: block2.endLine,
|
|
246
239
|
similarity,
|
|
247
240
|
snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
|
|
248
241
|
patternType: block1.patternType,
|
|
@@ -253,7 +246,7 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
253
246
|
if (streamResults) {
|
|
254
247
|
console.log(`
|
|
255
248
|
\u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
256
|
-
console.log(` ${duplicate.file1}:${duplicate.line1} \u21D4 ${duplicate.file2}:${duplicate.line2}`);
|
|
249
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
257
250
|
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
258
251
|
}
|
|
259
252
|
}
|
|
@@ -284,16 +277,14 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
284
277
|
}
|
|
285
278
|
async function analyzePatterns(options) {
|
|
286
279
|
const {
|
|
287
|
-
minSimilarity = 0.
|
|
288
|
-
//
|
|
280
|
+
minSimilarity = 0.4,
|
|
281
|
+
// Jaccard similarity default (40% threshold)
|
|
289
282
|
minLines = 5,
|
|
290
|
-
maxBlocks = 500,
|
|
291
283
|
batchSize = 100,
|
|
292
284
|
approx = true,
|
|
293
285
|
minSharedTokens = 8,
|
|
294
286
|
maxCandidatesPerBlock = 100,
|
|
295
|
-
|
|
296
|
-
maxComparisons = 5e4,
|
|
287
|
+
maxComparisons = 1e5,
|
|
297
288
|
streamResults = false,
|
|
298
289
|
...scanOptions
|
|
299
290
|
} = options;
|
|
@@ -308,12 +299,10 @@ async function analyzePatterns(options) {
|
|
|
308
299
|
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
309
300
|
minSimilarity,
|
|
310
301
|
minLines,
|
|
311
|
-
maxBlocks,
|
|
312
302
|
batchSize,
|
|
313
303
|
approx,
|
|
314
304
|
minSharedTokens,
|
|
315
305
|
maxCandidatesPerBlock,
|
|
316
|
-
fastMode,
|
|
317
306
|
maxComparisons,
|
|
318
307
|
streamResults
|
|
319
308
|
});
|
|
@@ -378,8 +367,21 @@ function generateSummary(results) {
|
|
|
378
367
|
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
379
368
|
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
380
369
|
return {
|
|
381
|
-
|
|
382
|
-
|
|
370
|
+
files: [
|
|
371
|
+
{
|
|
372
|
+
path: issue.location.file,
|
|
373
|
+
startLine: issue.location.line,
|
|
374
|
+
endLine: 0
|
|
375
|
+
// Not available from Issue
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
path: fileMatch?.[1] || "unknown",
|
|
379
|
+
startLine: 0,
|
|
380
|
+
// Not available from Issue
|
|
381
|
+
endLine: 0
|
|
382
|
+
// Not available from Issue
|
|
383
|
+
}
|
|
384
|
+
],
|
|
383
385
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
384
386
|
patternType: typeMatch?.[1] || "unknown",
|
|
385
387
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
package/dist/cli.js
CHANGED
|
@@ -108,31 +108,19 @@ function jaccardSimilarity(tokens1, tokens2) {
|
|
|
108
108
|
const union = set1.size + set2.size - intersection;
|
|
109
109
|
return union === 0 ? 0 : intersection / union;
|
|
110
110
|
}
|
|
111
|
-
function calculateSimilarity(block1, block2) {
|
|
112
|
-
const norm1 = normalizeCode(block1);
|
|
113
|
-
const norm2 = normalizeCode(block2);
|
|
114
|
-
const baseSimilarity = (0, import_core.similarityScore)(norm1, norm2);
|
|
115
|
-
const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
116
|
-
const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
|
|
117
|
-
const tokenSimilarity = (0, import_core.similarityScore)(tokens1.join(" "), tokens2.join(" "));
|
|
118
|
-
return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
|
|
119
|
-
}
|
|
120
111
|
async function detectDuplicatePatterns(files, options) {
|
|
121
112
|
const {
|
|
122
113
|
minSimilarity,
|
|
123
114
|
minLines,
|
|
124
|
-
maxBlocks = 500,
|
|
125
115
|
batchSize = 100,
|
|
126
116
|
approx = true,
|
|
127
117
|
minSharedTokens = 8,
|
|
128
118
|
maxCandidatesPerBlock = 100,
|
|
129
|
-
fastMode = true,
|
|
130
|
-
maxComparisons = 5e4,
|
|
131
|
-
// Cap at 50K comparisons by default
|
|
132
119
|
streamResults = false
|
|
133
120
|
} = options;
|
|
134
121
|
const duplicates = [];
|
|
135
|
-
|
|
122
|
+
const maxComparisons = approx ? Infinity : 5e5;
|
|
123
|
+
const allBlocks = files.flatMap(
|
|
136
124
|
(file) => extractCodeBlocks(file.content, minLines).map((block) => ({
|
|
137
125
|
content: block.content,
|
|
138
126
|
startLine: block.startLine,
|
|
@@ -145,10 +133,9 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
145
133
|
}))
|
|
146
134
|
);
|
|
147
135
|
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
148
|
-
if (allBlocks.length >
|
|
149
|
-
console.log(`\u26A0\uFE0F
|
|
150
|
-
console.log(`
|
|
151
|
-
allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
|
|
136
|
+
if (!approx && allBlocks.length > 500) {
|
|
137
|
+
console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
|
|
138
|
+
console.log(` Consider using approximate mode (default) for better performance.`);
|
|
152
139
|
}
|
|
153
140
|
const stopwords = /* @__PURE__ */ new Set([
|
|
154
141
|
"return",
|
|
@@ -238,10 +225,14 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
238
225
|
}
|
|
239
226
|
if (approx && candidates) {
|
|
240
227
|
for (const { j } of candidates) {
|
|
241
|
-
if (maxComparisons && comparisonsProcessed >= maxComparisons)
|
|
228
|
+
if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
|
|
229
|
+
console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
|
|
230
|
+
console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
|
|
231
|
+
break;
|
|
232
|
+
}
|
|
242
233
|
comparisonsProcessed++;
|
|
243
234
|
const block2 = allBlocks[j];
|
|
244
|
-
const similarity =
|
|
235
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
245
236
|
if (similarity >= minSimilarity) {
|
|
246
237
|
const duplicate = {
|
|
247
238
|
file1: block1.file,
|
|
@@ -271,7 +262,7 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
271
262
|
comparisonsProcessed++;
|
|
272
263
|
const block2 = allBlocks[j];
|
|
273
264
|
if (block1.file === block2.file) continue;
|
|
274
|
-
const similarity =
|
|
265
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
275
266
|
if (similarity >= minSimilarity) {
|
|
276
267
|
const duplicate = {
|
|
277
268
|
file1: block1.file,
|
|
@@ -321,16 +312,13 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
321
312
|
}
|
|
322
313
|
async function analyzePatterns(options) {
|
|
323
314
|
const {
|
|
324
|
-
minSimilarity = 0.
|
|
325
|
-
//
|
|
315
|
+
minSimilarity = 0.4,
|
|
316
|
+
// Jaccard similarity default (40% threshold)
|
|
326
317
|
minLines = 5,
|
|
327
|
-
maxBlocks = 500,
|
|
328
318
|
batchSize = 100,
|
|
329
319
|
approx = true,
|
|
330
320
|
minSharedTokens = 8,
|
|
331
321
|
maxCandidatesPerBlock = 100,
|
|
332
|
-
fastMode = true,
|
|
333
|
-
maxComparisons = 5e4,
|
|
334
322
|
streamResults = false,
|
|
335
323
|
...scanOptions
|
|
336
324
|
} = options;
|
|
@@ -345,13 +333,10 @@ async function analyzePatterns(options) {
|
|
|
345
333
|
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
346
334
|
minSimilarity,
|
|
347
335
|
minLines,
|
|
348
|
-
maxBlocks,
|
|
349
336
|
batchSize,
|
|
350
337
|
approx,
|
|
351
338
|
minSharedTokens,
|
|
352
339
|
maxCandidatesPerBlock,
|
|
353
|
-
fastMode,
|
|
354
|
-
maxComparisons,
|
|
355
340
|
streamResults
|
|
356
341
|
});
|
|
357
342
|
for (const file of files) {
|
|
@@ -415,15 +400,21 @@ function generateSummary(results) {
|
|
|
415
400
|
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
416
401
|
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
417
402
|
return {
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
403
|
+
files: [
|
|
404
|
+
{
|
|
405
|
+
path: issue.location.file,
|
|
406
|
+
startLine: issue.location.line,
|
|
407
|
+
endLine: 0
|
|
408
|
+
// Not available from Issue
|
|
409
|
+
},
|
|
410
|
+
{
|
|
411
|
+
path: fileMatch?.[1] || "unknown",
|
|
412
|
+
startLine: 0,
|
|
413
|
+
// Not available from Issue
|
|
414
|
+
endLine: 0
|
|
415
|
+
// Not available from Issue
|
|
416
|
+
}
|
|
417
|
+
],
|
|
427
418
|
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
428
419
|
patternType: typeMatch?.[1] || "unknown",
|
|
429
420
|
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
@@ -442,30 +433,28 @@ var import_chalk = __toESM(require("chalk"));
|
|
|
442
433
|
var import_fs = require("fs");
|
|
443
434
|
var import_path = require("path");
|
|
444
435
|
var program = new import_commander.Command();
|
|
445
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--
|
|
436
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
446
437
|
"-o, --output <format>",
|
|
447
438
|
"Output format: console, json, html",
|
|
448
439
|
"console"
|
|
449
440
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
450
441
|
console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
442
|
+
const startTime = Date.now();
|
|
451
443
|
const results = await analyzePatterns({
|
|
452
444
|
rootDir: directory,
|
|
453
445
|
minSimilarity: parseFloat(options.similarity),
|
|
454
446
|
minLines: parseInt(options.minLines),
|
|
455
|
-
maxBlocks: parseInt(options.maxBlocks),
|
|
456
447
|
batchSize: parseInt(options.batchSize),
|
|
457
448
|
approx: options.approx !== false,
|
|
458
449
|
// default true; --no-approx sets to false
|
|
459
450
|
minSharedTokens: parseInt(options.minSharedTokens),
|
|
460
451
|
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
461
|
-
|
|
462
|
-
// default true; --no-
|
|
463
|
-
maxComparisons: parseInt(options.maxComparisons),
|
|
464
|
-
streamResults: options.streamResults === true,
|
|
465
|
-
// default false; --stream-results sets to true
|
|
452
|
+
streamResults: options.streamResults !== false,
|
|
453
|
+
// default true; --no-stream-results sets to false
|
|
466
454
|
include: options.include?.split(","),
|
|
467
455
|
exclude: options.exclude?.split(",")
|
|
468
456
|
});
|
|
457
|
+
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
469
458
|
const summary = generateSummary(results);
|
|
470
459
|
const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
|
|
471
460
|
if (options.output === "json") {
|
|
@@ -508,6 +497,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
508
497
|
`\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
|
|
509
498
|
)
|
|
510
499
|
);
|
|
500
|
+
console.log(
|
|
501
|
+
import_chalk.default.gray(`\u23F1 Analysis time: ${import_chalk.default.bold(elapsedTime + "s")}`)
|
|
502
|
+
);
|
|
511
503
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
512
504
|
if (sortedTypes.length > 0) {
|
|
513
505
|
console.log(import_chalk.default.cyan("\n" + divider));
|
|
@@ -529,12 +521,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
529
521
|
`${Math.round(dup.similarity * 100)}%`
|
|
530
522
|
)} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
|
|
531
523
|
);
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
);
|
|
524
|
+
dup.files.forEach((file, fileIdx) => {
|
|
525
|
+
const prefix = fileIdx === 0 ? " " : " \u2194 ";
|
|
526
|
+
console.log(
|
|
527
|
+
`${import_chalk.default.dim(prefix)}${import_chalk.default.dim(file.path)}:${import_chalk.default.cyan(file.startLine)}-${import_chalk.default.cyan(file.endLine)}`
|
|
528
|
+
);
|
|
529
|
+
});
|
|
538
530
|
console.log(
|
|
539
531
|
` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
540
532
|
`
|
|
@@ -631,8 +623,7 @@ function generateHTMLReport(summary, results) {
|
|
|
631
623
|
<tr>
|
|
632
624
|
<th>Similarity</th>
|
|
633
625
|
<th>Type</th>
|
|
634
|
-
<th>
|
|
635
|
-
<th>File 2</th>
|
|
626
|
+
<th>Files</th>
|
|
636
627
|
<th>Token Cost</th>
|
|
637
628
|
</tr>
|
|
638
629
|
</thead>
|
|
@@ -642,8 +633,7 @@ function generateHTMLReport(summary, results) {
|
|
|
642
633
|
<tr>
|
|
643
634
|
<td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
|
|
644
635
|
<td>${dup.patternType}</td>
|
|
645
|
-
<td
|
|
646
|
-
<td><code>${dup.file2}</code></td>
|
|
636
|
+
<td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
|
|
647
637
|
<td>${dup.tokenCost.toLocaleString()}</td>
|
|
648
638
|
</tr>
|
|
649
639
|
`
|
package/dist/cli.mjs
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
analyzePatterns,
|
|
4
4
|
generateSummary
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-JKVKOXYR.mjs";
|
|
6
6
|
|
|
7
7
|
// src/cli.ts
|
|
8
8
|
import { Command } from "commander";
|
|
@@ -10,30 +10,28 @@ import chalk from "chalk";
|
|
|
10
10
|
import { writeFileSync } from "fs";
|
|
11
11
|
import { join } from "path";
|
|
12
12
|
var program = new Command();
|
|
13
|
-
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--
|
|
13
|
+
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
|
|
14
14
|
"-o, --output <format>",
|
|
15
15
|
"Output format: console, json, html",
|
|
16
16
|
"console"
|
|
17
17
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
18
18
|
console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
19
|
+
const startTime = Date.now();
|
|
19
20
|
const results = await analyzePatterns({
|
|
20
21
|
rootDir: directory,
|
|
21
22
|
minSimilarity: parseFloat(options.similarity),
|
|
22
23
|
minLines: parseInt(options.minLines),
|
|
23
|
-
maxBlocks: parseInt(options.maxBlocks),
|
|
24
24
|
batchSize: parseInt(options.batchSize),
|
|
25
25
|
approx: options.approx !== false,
|
|
26
26
|
// default true; --no-approx sets to false
|
|
27
27
|
minSharedTokens: parseInt(options.minSharedTokens),
|
|
28
28
|
maxCandidatesPerBlock: parseInt(options.maxCandidates),
|
|
29
|
-
|
|
30
|
-
// default true; --no-
|
|
31
|
-
maxComparisons: parseInt(options.maxComparisons),
|
|
32
|
-
streamResults: options.streamResults === true,
|
|
33
|
-
// default false; --stream-results sets to true
|
|
29
|
+
streamResults: options.streamResults !== false,
|
|
30
|
+
// default true; --no-stream-results sets to false
|
|
34
31
|
include: options.include?.split(","),
|
|
35
32
|
exclude: options.exclude?.split(",")
|
|
36
33
|
});
|
|
34
|
+
const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
|
|
37
35
|
const summary = generateSummary(results);
|
|
38
36
|
const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
|
|
39
37
|
if (options.output === "json") {
|
|
@@ -76,6 +74,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
76
74
|
`\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
|
|
77
75
|
)
|
|
78
76
|
);
|
|
77
|
+
console.log(
|
|
78
|
+
chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
|
|
79
|
+
);
|
|
79
80
|
const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
|
|
80
81
|
if (sortedTypes.length > 0) {
|
|
81
82
|
console.log(chalk.cyan("\n" + divider));
|
|
@@ -97,12 +98,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
97
98
|
`${Math.round(dup.similarity * 100)}%`
|
|
98
99
|
)} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
|
|
99
100
|
);
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
);
|
|
101
|
+
dup.files.forEach((file, fileIdx) => {
|
|
102
|
+
const prefix = fileIdx === 0 ? " " : " \u2194 ";
|
|
103
|
+
console.log(
|
|
104
|
+
`${chalk.dim(prefix)}${chalk.dim(file.path)}:${chalk.cyan(file.startLine)}-${chalk.cyan(file.endLine)}`
|
|
105
|
+
);
|
|
106
|
+
});
|
|
106
107
|
console.log(
|
|
107
108
|
` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
|
|
108
109
|
`
|
|
@@ -199,8 +200,7 @@ function generateHTMLReport(summary, results) {
|
|
|
199
200
|
<tr>
|
|
200
201
|
<th>Similarity</th>
|
|
201
202
|
<th>Type</th>
|
|
202
|
-
<th>
|
|
203
|
-
<th>File 2</th>
|
|
203
|
+
<th>Files</th>
|
|
204
204
|
<th>Token Cost</th>
|
|
205
205
|
</tr>
|
|
206
206
|
</thead>
|
|
@@ -210,8 +210,7 @@ function generateHTMLReport(summary, results) {
|
|
|
210
210
|
<tr>
|
|
211
211
|
<td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
|
|
212
212
|
<td>${dup.patternType}</td>
|
|
213
|
-
<td
|
|
214
|
-
<td><code>${dup.file2}</code></td>
|
|
213
|
+
<td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
|
|
215
214
|
<td>${dup.tokenCost.toLocaleString()}</td>
|
|
216
215
|
</tr>
|
|
217
216
|
`
|
package/dist/index.d.mts
CHANGED
|
@@ -26,7 +26,6 @@ interface DetectionOptions {
|
|
|
26
26
|
approx?: boolean;
|
|
27
27
|
minSharedTokens?: number;
|
|
28
28
|
maxCandidatesPerBlock?: number;
|
|
29
|
-
fastMode?: boolean;
|
|
30
29
|
maxComparisons?: number;
|
|
31
30
|
streamResults?: boolean;
|
|
32
31
|
}
|
|
@@ -38,13 +37,10 @@ declare function detectDuplicatePatterns(files: FileContent[], options: Detectio
|
|
|
38
37
|
interface PatternDetectOptions extends ScanOptions {
|
|
39
38
|
minSimilarity?: number;
|
|
40
39
|
minLines?: number;
|
|
41
|
-
maxBlocks?: number;
|
|
42
40
|
batchSize?: number;
|
|
43
41
|
approx?: boolean;
|
|
44
42
|
minSharedTokens?: number;
|
|
45
43
|
maxCandidatesPerBlock?: number;
|
|
46
|
-
fastMode?: boolean;
|
|
47
|
-
maxComparisons?: number;
|
|
48
44
|
streamResults?: boolean;
|
|
49
45
|
}
|
|
50
46
|
interface PatternSummary {
|
|
@@ -52,12 +48,11 @@ interface PatternSummary {
|
|
|
52
48
|
totalTokenCost: number;
|
|
53
49
|
patternsByType: Record<PatternType, number>;
|
|
54
50
|
topDuplicates: Array<{
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
endLine2: number;
|
|
51
|
+
files: Array<{
|
|
52
|
+
path: string;
|
|
53
|
+
startLine: number;
|
|
54
|
+
endLine: number;
|
|
55
|
+
}>;
|
|
61
56
|
similarity: number;
|
|
62
57
|
patternType: PatternType;
|
|
63
58
|
tokenCost: number;
|