@aiready/pattern-detect 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  import { scanFiles, readFileContent } from "@aiready/core";
3
3
 
4
4
  // src/detector.ts
5
- import { similarityScore, estimateTokens } from "@aiready/core";
5
+ import { estimateTokens } from "@aiready/core";
6
6
  function categorizePattern(code) {
7
7
  const lower = code.toLowerCase();
8
8
  if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
@@ -80,15 +80,6 @@ function jaccardSimilarity(tokens1, tokens2) {
80
80
  const union = set1.size + set2.size - intersection;
81
81
  return union === 0 ? 0 : intersection / union;
82
82
  }
83
- function calculateSimilarity(block1, block2) {
84
- const norm1 = normalizeCode(block1);
85
- const norm2 = normalizeCode(block2);
86
- const baseSimilarity = similarityScore(norm1, norm2);
87
- const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
88
- const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
89
- const tokenSimilarity = similarityScore(tokens1.join(" "), tokens2.join(" "));
90
- return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
91
- }
92
83
  async function detectDuplicatePatterns(files, options) {
93
84
  const {
94
85
  minSimilarity,
@@ -98,7 +89,6 @@ async function detectDuplicatePatterns(files, options) {
98
89
  approx = true,
99
90
  minSharedTokens = 8,
100
91
  maxCandidatesPerBlock = 100,
101
- fastMode = true,
102
92
  maxComparisons = 5e4,
103
93
  // Cap at 50K comparisons by default
104
94
  streamResults = false
@@ -213,7 +203,7 @@ async function detectDuplicatePatterns(files, options) {
213
203
  if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
214
204
  comparisonsProcessed++;
215
205
  const block2 = allBlocks[j];
216
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
206
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
217
207
  if (similarity >= minSimilarity) {
218
208
  const duplicate = {
219
209
  file1: block1.file,
@@ -243,7 +233,7 @@ async function detectDuplicatePatterns(files, options) {
243
233
  comparisonsProcessed++;
244
234
  const block2 = allBlocks[j];
245
235
  if (block1.file === block2.file) continue;
246
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
236
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
247
237
  if (similarity >= minSimilarity) {
248
238
  const duplicate = {
249
239
  file1: block1.file,
@@ -293,15 +283,14 @@ function getRefactoringSuggestion(patternType, similarity) {
293
283
  }
294
284
  async function analyzePatterns(options) {
295
285
  const {
296
- minSimilarity = 0.65,
297
- // Lower default for fast Jaccard mode (Levenshtein would be 0.85+)
286
+ minSimilarity = 0.4,
287
+ // Jaccard similarity default (40% threshold)
298
288
  minLines = 5,
299
289
  maxBlocks = 500,
300
290
  batchSize = 100,
301
291
  approx = true,
302
292
  minSharedTokens = 8,
303
293
  maxCandidatesPerBlock = 100,
304
- fastMode = true,
305
294
  maxComparisons = 5e4,
306
295
  streamResults = false,
307
296
  ...scanOptions
@@ -322,7 +311,6 @@ async function analyzePatterns(options) {
322
311
  approx,
323
312
  minSharedTokens,
324
313
  maxCandidatesPerBlock,
325
- fastMode,
326
314
  maxComparisons,
327
315
  streamResults
328
316
  });
@@ -387,8 +375,21 @@ function generateSummary(results) {
387
375
  const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
388
376
  const fileMatch = issue.message.match(/similar to (.+?) \(/);
389
377
  return {
390
- file1: issue.location.file,
391
- file2: fileMatch?.[1] || "unknown",
378
+ files: [
379
+ {
380
+ path: issue.location.file,
381
+ startLine: issue.location.line,
382
+ endLine: 0
383
+ // Not available from Issue
384
+ },
385
+ {
386
+ path: fileMatch?.[1] || "unknown",
387
+ startLine: 0,
388
+ // Not available from Issue
389
+ endLine: 0
390
+ // Not available from Issue
391
+ }
392
+ ],
392
393
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
393
394
  patternType: typeMatch?.[1] || "unknown",
394
395
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
@@ -2,7 +2,7 @@
2
2
  import { scanFiles, readFileContent } from "@aiready/core";
3
3
 
4
4
  // src/detector.ts
5
- import { similarityScore, estimateTokens } from "@aiready/core";
5
+ import { estimateTokens } from "@aiready/core";
6
6
  function categorizePattern(code) {
7
7
  const lower = code.toLowerCase();
8
8
  if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
@@ -54,6 +54,7 @@ function extractCodeBlocks(content, minLines) {
54
54
  blocks.push({
55
55
  content: blockContent,
56
56
  startLine: blockStart + 1,
57
+ endLine: i + 1,
57
58
  patternType: categorizePattern(blockContent),
58
59
  linesOfCode
59
60
  });
@@ -79,44 +80,32 @@ function jaccardSimilarity(tokens1, tokens2) {
79
80
  const union = set1.size + set2.size - intersection;
80
81
  return union === 0 ? 0 : intersection / union;
81
82
  }
82
- function calculateSimilarity(block1, block2) {
83
- const norm1 = normalizeCode(block1);
84
- const norm2 = normalizeCode(block2);
85
- const baseSimilarity = similarityScore(norm1, norm2);
86
- const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
87
- const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
88
- const tokenSimilarity = similarityScore(tokens1.join(" "), tokens2.join(" "));
89
- return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
90
- }
91
83
  async function detectDuplicatePatterns(files, options) {
92
84
  const {
93
85
  minSimilarity,
94
86
  minLines,
95
- maxBlocks = 500,
96
87
  batchSize = 100,
97
88
  approx = true,
98
89
  minSharedTokens = 8,
99
90
  maxCandidatesPerBlock = 100,
100
- fastMode = true,
101
- maxComparisons = 5e4,
102
- // Cap at 50K comparisons by default
91
+ maxComparisons = 1e5,
92
+ // Cap at 100K comparisons by default
103
93
  streamResults = false
104
94
  } = options;
105
95
  const duplicates = [];
106
- let allBlocks = files.flatMap(
96
+ const allBlocks = files.flatMap(
107
97
  (file) => extractCodeBlocks(file.content, minLines).map((block) => ({
108
- ...block,
98
+ content: block.content,
99
+ startLine: block.startLine,
100
+ endLine: block.endLine,
109
101
  file: file.file,
110
102
  normalized: normalizeCode(block.content),
111
- tokenCost: estimateTokens(block.content)
103
+ patternType: block.patternType,
104
+ tokenCost: estimateTokens(block.content),
105
+ linesOfCode: block.linesOfCode
112
106
  }))
113
107
  );
114
108
  console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
115
- if (allBlocks.length > maxBlocks) {
116
- console.log(`\u26A0\uFE0F Limiting to ${maxBlocks} blocks (sorted by size) to prevent memory issues`);
117
- console.log(` Use --max-blocks to increase limit or --min-lines to filter smaller blocks`);
118
- allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
119
- }
120
109
  const stopwords = /* @__PURE__ */ new Set([
121
110
  "return",
122
111
  "const",
@@ -208,13 +197,15 @@ async function detectDuplicatePatterns(files, options) {
208
197
  if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
209
198
  comparisonsProcessed++;
210
199
  const block2 = allBlocks[j];
211
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
200
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
212
201
  if (similarity >= minSimilarity) {
213
202
  const duplicate = {
214
203
  file1: block1.file,
215
204
  file2: block2.file,
216
205
  line1: block1.startLine,
217
206
  line2: block2.startLine,
207
+ endLine1: block1.endLine,
208
+ endLine2: block2.endLine,
218
209
  similarity,
219
210
  snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
220
211
  patternType: block1.patternType,
@@ -225,7 +216,7 @@ async function detectDuplicatePatterns(files, options) {
225
216
  if (streamResults) {
226
217
  console.log(`
227
218
  \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
228
- console.log(` ${duplicate.file1}:${duplicate.line1} \u21D4 ${duplicate.file2}:${duplicate.line2}`);
219
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
229
220
  console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
230
221
  }
231
222
  }
@@ -236,13 +227,15 @@ async function detectDuplicatePatterns(files, options) {
236
227
  comparisonsProcessed++;
237
228
  const block2 = allBlocks[j];
238
229
  if (block1.file === block2.file) continue;
239
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
230
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
240
231
  if (similarity >= minSimilarity) {
241
232
  const duplicate = {
242
233
  file1: block1.file,
243
234
  file2: block2.file,
244
235
  line1: block1.startLine,
245
236
  line2: block2.startLine,
237
+ endLine1: block1.endLine,
238
+ endLine2: block2.endLine,
246
239
  similarity,
247
240
  snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
248
241
  patternType: block1.patternType,
@@ -253,7 +246,7 @@ async function detectDuplicatePatterns(files, options) {
253
246
  if (streamResults) {
254
247
  console.log(`
255
248
  \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
256
- console.log(` ${duplicate.file1}:${duplicate.line1} \u21D4 ${duplicate.file2}:${duplicate.line2}`);
249
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
257
250
  console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
258
251
  }
259
252
  }
@@ -284,16 +277,14 @@ function getRefactoringSuggestion(patternType, similarity) {
284
277
  }
285
278
  async function analyzePatterns(options) {
286
279
  const {
287
- minSimilarity = 0.7,
288
- // Lower default for fast Jaccard mode (was 0.85 for Levenshtein)
280
+ minSimilarity = 0.4,
281
+ // Jaccard similarity default (40% threshold)
289
282
  minLines = 5,
290
- maxBlocks = 500,
291
283
  batchSize = 100,
292
284
  approx = true,
293
285
  minSharedTokens = 8,
294
286
  maxCandidatesPerBlock = 100,
295
- fastMode = true,
296
- maxComparisons = 5e4,
287
+ maxComparisons = 1e5,
297
288
  streamResults = false,
298
289
  ...scanOptions
299
290
  } = options;
@@ -308,12 +299,10 @@ async function analyzePatterns(options) {
308
299
  const duplicates = await detectDuplicatePatterns(fileContents, {
309
300
  minSimilarity,
310
301
  minLines,
311
- maxBlocks,
312
302
  batchSize,
313
303
  approx,
314
304
  minSharedTokens,
315
305
  maxCandidatesPerBlock,
316
- fastMode,
317
306
  maxComparisons,
318
307
  streamResults
319
308
  });
@@ -378,8 +367,21 @@ function generateSummary(results) {
378
367
  const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
379
368
  const fileMatch = issue.message.match(/similar to (.+?) \(/);
380
369
  return {
381
- file1: issue.location.file,
382
- file2: fileMatch?.[1] || "unknown",
370
+ files: [
371
+ {
372
+ path: issue.location.file,
373
+ startLine: issue.location.line,
374
+ endLine: 0
375
+ // Not available from Issue
376
+ },
377
+ {
378
+ path: fileMatch?.[1] || "unknown",
379
+ startLine: 0,
380
+ // Not available from Issue
381
+ endLine: 0
382
+ // Not available from Issue
383
+ }
384
+ ],
383
385
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
384
386
  patternType: typeMatch?.[1] || "unknown",
385
387
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
package/dist/cli.js CHANGED
@@ -108,31 +108,19 @@ function jaccardSimilarity(tokens1, tokens2) {
108
108
  const union = set1.size + set2.size - intersection;
109
109
  return union === 0 ? 0 : intersection / union;
110
110
  }
111
- function calculateSimilarity(block1, block2) {
112
- const norm1 = normalizeCode(block1);
113
- const norm2 = normalizeCode(block2);
114
- const baseSimilarity = (0, import_core.similarityScore)(norm1, norm2);
115
- const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
116
- const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
117
- const tokenSimilarity = (0, import_core.similarityScore)(tokens1.join(" "), tokens2.join(" "));
118
- return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
119
- }
120
111
  async function detectDuplicatePatterns(files, options) {
121
112
  const {
122
113
  minSimilarity,
123
114
  minLines,
124
- maxBlocks = 500,
125
115
  batchSize = 100,
126
116
  approx = true,
127
117
  minSharedTokens = 8,
128
118
  maxCandidatesPerBlock = 100,
129
- fastMode = true,
130
- maxComparisons = 5e4,
131
- // Cap at 50K comparisons by default
132
119
  streamResults = false
133
120
  } = options;
134
121
  const duplicates = [];
135
- let allBlocks = files.flatMap(
122
+ const maxComparisons = approx ? Infinity : 5e5;
123
+ const allBlocks = files.flatMap(
136
124
  (file) => extractCodeBlocks(file.content, minLines).map((block) => ({
137
125
  content: block.content,
138
126
  startLine: block.startLine,
@@ -145,10 +133,9 @@ async function detectDuplicatePatterns(files, options) {
145
133
  }))
146
134
  );
147
135
  console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
148
- if (allBlocks.length > maxBlocks) {
149
- console.log(`\u26A0\uFE0F Limiting to ${maxBlocks} blocks (sorted by size) to prevent memory issues`);
150
- console.log(` Use --max-blocks to increase limit or --min-lines to filter smaller blocks`);
151
- allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
136
+ if (!approx && allBlocks.length > 500) {
137
+ console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
138
+ console.log(` Consider using approximate mode (default) for better performance.`);
152
139
  }
153
140
  const stopwords = /* @__PURE__ */ new Set([
154
141
  "return",
@@ -238,10 +225,14 @@ async function detectDuplicatePatterns(files, options) {
238
225
  }
239
226
  if (approx && candidates) {
240
227
  for (const { j } of candidates) {
241
- if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
228
+ if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
229
+ console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
230
+ console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
231
+ break;
232
+ }
242
233
  comparisonsProcessed++;
243
234
  const block2 = allBlocks[j];
244
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
235
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
245
236
  if (similarity >= minSimilarity) {
246
237
  const duplicate = {
247
238
  file1: block1.file,
@@ -271,7 +262,7 @@ async function detectDuplicatePatterns(files, options) {
271
262
  comparisonsProcessed++;
272
263
  const block2 = allBlocks[j];
273
264
  if (block1.file === block2.file) continue;
274
- const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
265
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
275
266
  if (similarity >= minSimilarity) {
276
267
  const duplicate = {
277
268
  file1: block1.file,
@@ -321,16 +312,13 @@ function getRefactoringSuggestion(patternType, similarity) {
321
312
  }
322
313
  async function analyzePatterns(options) {
323
314
  const {
324
- minSimilarity = 0.65,
325
- // Lower default for fast Jaccard mode (Levenshtein would be 0.85+)
315
+ minSimilarity = 0.4,
316
+ // Jaccard similarity default (40% threshold)
326
317
  minLines = 5,
327
- maxBlocks = 500,
328
318
  batchSize = 100,
329
319
  approx = true,
330
320
  minSharedTokens = 8,
331
321
  maxCandidatesPerBlock = 100,
332
- fastMode = true,
333
- maxComparisons = 5e4,
334
322
  streamResults = false,
335
323
  ...scanOptions
336
324
  } = options;
@@ -345,13 +333,10 @@ async function analyzePatterns(options) {
345
333
  const duplicates = await detectDuplicatePatterns(fileContents, {
346
334
  minSimilarity,
347
335
  minLines,
348
- maxBlocks,
349
336
  batchSize,
350
337
  approx,
351
338
  minSharedTokens,
352
339
  maxCandidatesPerBlock,
353
- fastMode,
354
- maxComparisons,
355
340
  streamResults
356
341
  });
357
342
  for (const file of files) {
@@ -415,15 +400,21 @@ function generateSummary(results) {
415
400
  const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
416
401
  const fileMatch = issue.message.match(/similar to (.+?) \(/);
417
402
  return {
418
- file1: issue.location.file,
419
- file2: fileMatch?.[1] || "unknown",
420
- line1: issue.location.line,
421
- line2: 0,
422
- // Not available from Issue
423
- endLine1: 0,
424
- // Not available from Issue
425
- endLine2: 0,
426
- // Not available from Issue
403
+ files: [
404
+ {
405
+ path: issue.location.file,
406
+ startLine: issue.location.line,
407
+ endLine: 0
408
+ // Not available from Issue
409
+ },
410
+ {
411
+ path: fileMatch?.[1] || "unknown",
412
+ startLine: 0,
413
+ // Not available from Issue
414
+ endLine: 0
415
+ // Not available from Issue
416
+ }
417
+ ],
427
418
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
428
419
  patternType: typeMatch?.[1] || "unknown",
429
420
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
@@ -442,30 +433,28 @@ var import_chalk = __toESM(require("chalk"));
442
433
  var import_fs = require("fs");
443
434
  var import_path = require("path");
444
435
  var program = new import_commander.Command();
445
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
436
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
446
437
  "-o, --output <format>",
447
438
  "Output format: console, json, html",
448
439
  "console"
449
440
  ).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
450
441
  console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
442
+ const startTime = Date.now();
451
443
  const results = await analyzePatterns({
452
444
  rootDir: directory,
453
445
  minSimilarity: parseFloat(options.similarity),
454
446
  minLines: parseInt(options.minLines),
455
- maxBlocks: parseInt(options.maxBlocks),
456
447
  batchSize: parseInt(options.batchSize),
457
448
  approx: options.approx !== false,
458
449
  // default true; --no-approx sets to false
459
450
  minSharedTokens: parseInt(options.minSharedTokens),
460
451
  maxCandidatesPerBlock: parseInt(options.maxCandidates),
461
- fastMode: options.fastMode !== false,
462
- // default true; --no-fast-mode sets to false
463
- maxComparisons: parseInt(options.maxComparisons),
464
- streamResults: options.streamResults === true,
465
- // default false; --stream-results sets to true
452
+ streamResults: options.streamResults !== false,
453
+ // default true; --no-stream-results sets to false
466
454
  include: options.include?.split(","),
467
455
  exclude: options.exclude?.split(",")
468
456
  });
457
+ const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
469
458
  const summary = generateSummary(results);
470
459
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
471
460
  if (options.output === "json") {
@@ -508,6 +497,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
508
497
  `\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
509
498
  )
510
499
  );
500
+ console.log(
501
+ import_chalk.default.gray(`\u23F1 Analysis time: ${import_chalk.default.bold(elapsedTime + "s")}`)
502
+ );
511
503
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
512
504
  if (sortedTypes.length > 0) {
513
505
  console.log(import_chalk.default.cyan("\n" + divider));
@@ -529,12 +521,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
529
521
  `${Math.round(dup.similarity * 100)}%`
530
522
  )} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
531
523
  );
532
- console.log(
533
- ` ${import_chalk.default.dim(dup.file1)}:${import_chalk.default.cyan(dup.line1)}-${import_chalk.default.cyan(dup.endLine1)}`
534
- );
535
- console.log(
536
- ` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}:${import_chalk.default.cyan(dup.line2)}-${import_chalk.default.cyan(dup.endLine2)}`
537
- );
524
+ dup.files.forEach((file, fileIdx) => {
525
+ const prefix = fileIdx === 0 ? " " : " \u2194 ";
526
+ console.log(
527
+ `${import_chalk.default.dim(prefix)}${import_chalk.default.dim(file.path)}:${import_chalk.default.cyan(file.startLine)}-${import_chalk.default.cyan(file.endLine)}`
528
+ );
529
+ });
538
530
  console.log(
539
531
  ` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
540
532
  `
@@ -631,8 +623,7 @@ function generateHTMLReport(summary, results) {
631
623
  <tr>
632
624
  <th>Similarity</th>
633
625
  <th>Type</th>
634
- <th>File 1</th>
635
- <th>File 2</th>
626
+ <th>Files</th>
636
627
  <th>Token Cost</th>
637
628
  </tr>
638
629
  </thead>
@@ -642,8 +633,7 @@ function generateHTMLReport(summary, results) {
642
633
  <tr>
643
634
  <td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
644
635
  <td>${dup.patternType}</td>
645
- <td><code>${dup.file1}</code></td>
646
- <td><code>${dup.file2}</code></td>
636
+ <td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
647
637
  <td>${dup.tokenCost.toLocaleString()}</td>
648
638
  </tr>
649
639
  `
package/dist/cli.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  analyzePatterns,
4
4
  generateSummary
5
- } from "./chunk-N5DE7IYX.mjs";
5
+ } from "./chunk-JKVKOXYR.mjs";
6
6
 
7
7
  // src/cli.ts
8
8
  import { Command } from "commander";
@@ -10,30 +10,28 @@ import chalk from "chalk";
10
10
  import { writeFileSync } from "fs";
11
11
  import { join } from "path";
12
12
  var program = new Command();
13
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
13
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
14
14
  "-o, --output <format>",
15
15
  "Output format: console, json, html",
16
16
  "console"
17
17
  ).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
18
18
  console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
19
+ const startTime = Date.now();
19
20
  const results = await analyzePatterns({
20
21
  rootDir: directory,
21
22
  minSimilarity: parseFloat(options.similarity),
22
23
  minLines: parseInt(options.minLines),
23
- maxBlocks: parseInt(options.maxBlocks),
24
24
  batchSize: parseInt(options.batchSize),
25
25
  approx: options.approx !== false,
26
26
  // default true; --no-approx sets to false
27
27
  minSharedTokens: parseInt(options.minSharedTokens),
28
28
  maxCandidatesPerBlock: parseInt(options.maxCandidates),
29
- fastMode: options.fastMode !== false,
30
- // default true; --no-fast-mode sets to false
31
- maxComparisons: parseInt(options.maxComparisons),
32
- streamResults: options.streamResults === true,
33
- // default false; --stream-results sets to true
29
+ streamResults: options.streamResults !== false,
30
+ // default true; --no-stream-results sets to false
34
31
  include: options.include?.split(","),
35
32
  exclude: options.exclude?.split(",")
36
33
  });
34
+ const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
37
35
  const summary = generateSummary(results);
38
36
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
39
37
  if (options.output === "json") {
@@ -76,6 +74,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
76
74
  `\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
77
75
  )
78
76
  );
77
+ console.log(
78
+ chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
79
+ );
79
80
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
80
81
  if (sortedTypes.length > 0) {
81
82
  console.log(chalk.cyan("\n" + divider));
@@ -97,12 +98,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
97
98
  `${Math.round(dup.similarity * 100)}%`
98
99
  )} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
99
100
  );
100
- console.log(
101
- ` ${chalk.dim(dup.file1)}:${chalk.cyan(dup.line1)}-${chalk.cyan(dup.endLine1)}`
102
- );
103
- console.log(
104
- ` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}:${chalk.cyan(dup.line2)}-${chalk.cyan(dup.endLine2)}`
105
- );
101
+ dup.files.forEach((file, fileIdx) => {
102
+ const prefix = fileIdx === 0 ? " " : " \u2194 ";
103
+ console.log(
104
+ `${chalk.dim(prefix)}${chalk.dim(file.path)}:${chalk.cyan(file.startLine)}-${chalk.cyan(file.endLine)}`
105
+ );
106
+ });
106
107
  console.log(
107
108
  ` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
108
109
  `
@@ -199,8 +200,7 @@ function generateHTMLReport(summary, results) {
199
200
  <tr>
200
201
  <th>Similarity</th>
201
202
  <th>Type</th>
202
- <th>File 1</th>
203
- <th>File 2</th>
203
+ <th>Files</th>
204
204
  <th>Token Cost</th>
205
205
  </tr>
206
206
  </thead>
@@ -210,8 +210,7 @@ function generateHTMLReport(summary, results) {
210
210
  <tr>
211
211
  <td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
212
212
  <td>${dup.patternType}</td>
213
- <td><code>${dup.file1}</code></td>
214
- <td><code>${dup.file2}</code></td>
213
+ <td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
215
214
  <td>${dup.tokenCost.toLocaleString()}</td>
216
215
  </tr>
217
216
  `
package/dist/index.d.mts CHANGED
@@ -26,7 +26,6 @@ interface DetectionOptions {
26
26
  approx?: boolean;
27
27
  minSharedTokens?: number;
28
28
  maxCandidatesPerBlock?: number;
29
- fastMode?: boolean;
30
29
  maxComparisons?: number;
31
30
  streamResults?: boolean;
32
31
  }
@@ -38,13 +37,10 @@ declare function detectDuplicatePatterns(files: FileContent[], options: Detectio
38
37
  interface PatternDetectOptions extends ScanOptions {
39
38
  minSimilarity?: number;
40
39
  minLines?: number;
41
- maxBlocks?: number;
42
40
  batchSize?: number;
43
41
  approx?: boolean;
44
42
  minSharedTokens?: number;
45
43
  maxCandidatesPerBlock?: number;
46
- fastMode?: boolean;
47
- maxComparisons?: number;
48
44
  streamResults?: boolean;
49
45
  }
50
46
  interface PatternSummary {
@@ -52,12 +48,11 @@ interface PatternSummary {
52
48
  totalTokenCost: number;
53
49
  patternsByType: Record<PatternType, number>;
54
50
  topDuplicates: Array<{
55
- file1: string;
56
- file2: string;
57
- line1: number;
58
- line2: number;
59
- endLine1: number;
60
- endLine2: number;
51
+ files: Array<{
52
+ path: string;
53
+ startLine: number;
54
+ endLine: number;
55
+ }>;
61
56
  similarity: number;
62
57
  patternType: PatternType;
63
58
  tokenCost: number;