@aiready/pattern-detect 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -82,6 +82,7 @@ function extractCodeBlocks(content, minLines) {
82
82
  blocks.push({
83
83
  content: blockContent,
84
84
  startLine: blockStart + 1,
85
+ endLine: i + 1,
85
86
  patternType: categorizePattern(blockContent),
86
87
  linesOfCode
87
88
  });
@@ -97,47 +98,198 @@ function extractCodeBlocks(content, minLines) {
97
98
  function normalizeCode(code) {
98
99
  return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
99
100
  }
100
- function calculateSimilarity(block1, block2) {
101
- const norm1 = normalizeCode(block1);
102
- const norm2 = normalizeCode(block2);
103
- const baseSimilarity = (0, import_core.similarityScore)(norm1, norm2);
104
- const tokens1 = norm1.split(/[\s(){}[\];,]+/).filter(Boolean);
105
- const tokens2 = norm2.split(/[\s(){}[\];,]+/).filter(Boolean);
106
- const tokenSimilarity = (0, import_core.similarityScore)(tokens1.join(" "), tokens2.join(" "));
107
- return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
101
+ function jaccardSimilarity(tokens1, tokens2) {
102
+ const set1 = new Set(tokens1);
103
+ const set2 = new Set(tokens2);
104
+ let intersection = 0;
105
+ for (const token of set1) {
106
+ if (set2.has(token)) intersection++;
107
+ }
108
+ const union = set1.size + set2.size - intersection;
109
+ return union === 0 ? 0 : intersection / union;
108
110
  }
109
- function detectDuplicatePatterns(files, options) {
110
- const { minSimilarity, minLines } = options;
111
+ async function detectDuplicatePatterns(files, options) {
112
+ const {
113
+ minSimilarity,
114
+ minLines,
115
+ batchSize = 100,
116
+ approx = true,
117
+ minSharedTokens = 8,
118
+ maxCandidatesPerBlock = 100,
119
+ streamResults = false
120
+ } = options;
111
121
  const duplicates = [];
122
+ const maxComparisons = approx ? Infinity : 5e5;
112
123
  const allBlocks = files.flatMap(
113
124
  (file) => extractCodeBlocks(file.content, minLines).map((block) => ({
114
- ...block,
125
+ content: block.content,
126
+ startLine: block.startLine,
127
+ endLine: block.endLine,
115
128
  file: file.file,
116
129
  normalized: normalizeCode(block.content),
117
- tokenCost: (0, import_core.estimateTokens)(block.content)
130
+ patternType: block.patternType,
131
+ tokenCost: (0, import_core.estimateTokens)(block.content),
132
+ linesOfCode: block.linesOfCode
118
133
  }))
119
134
  );
120
135
  console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
136
+ if (!approx && allBlocks.length > 500) {
137
+ console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
138
+ console.log(` Consider using approximate mode (default) for better performance.`);
139
+ }
140
+ const stopwords = /* @__PURE__ */ new Set([
141
+ "return",
142
+ "const",
143
+ "let",
144
+ "var",
145
+ "function",
146
+ "class",
147
+ "new",
148
+ "if",
149
+ "else",
150
+ "for",
151
+ "while",
152
+ "async",
153
+ "await",
154
+ "try",
155
+ "catch",
156
+ "switch",
157
+ "case",
158
+ "default",
159
+ "import",
160
+ "export",
161
+ "from",
162
+ "true",
163
+ "false",
164
+ "null",
165
+ "undefined",
166
+ "this"
167
+ ]);
168
+ const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
169
+ const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
170
+ const invertedIndex = /* @__PURE__ */ new Map();
171
+ if (approx) {
172
+ for (let i = 0; i < blockTokens.length; i++) {
173
+ for (const tok of blockTokens[i]) {
174
+ let arr = invertedIndex.get(tok);
175
+ if (!arr) {
176
+ arr = [];
177
+ invertedIndex.set(tok, arr);
178
+ }
179
+ arr.push(i);
180
+ }
181
+ }
182
+ }
183
+ const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
184
+ if (totalComparisons !== void 0) {
185
+ console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
186
+ } else {
187
+ console.log(`Using approximate candidate selection to reduce comparisons...`);
188
+ }
189
+ let comparisonsProcessed = 0;
190
+ let comparisonsBudgetExhausted = false;
191
+ const startTime = Date.now();
121
192
  for (let i = 0; i < allBlocks.length; i++) {
122
- for (let j = i + 1; j < allBlocks.length; j++) {
123
- const block1 = allBlocks[i];
124
- const block2 = allBlocks[j];
125
- if (block1.file === block2.file) continue;
126
- const similarity = calculateSimilarity(block1.content, block2.content);
127
- if (similarity >= minSimilarity) {
128
- duplicates.push({
129
- file1: block1.file,
130
- file2: block2.file,
131
- line1: block1.startLine,
132
- line2: block2.startLine,
133
- similarity,
134
- snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
135
- patternType: block1.patternType,
136
- tokenCost: block1.tokenCost + block2.tokenCost,
137
- linesOfCode: block1.linesOfCode
138
- });
193
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) {
194
+ comparisonsBudgetExhausted = true;
195
+ break;
196
+ }
197
+ if (i % batchSize === 0 && i > 0) {
198
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
199
+ const duplicatesFound = duplicates.length;
200
+ if (totalComparisons !== void 0) {
201
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
202
+ const remaining = totalComparisons - comparisonsProcessed;
203
+ const rate = comparisonsProcessed / parseFloat(elapsed);
204
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
205
+ console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
206
+ } else {
207
+ console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
139
208
  }
209
+ await new Promise((resolve) => setImmediate(resolve));
140
210
  }
211
+ const block1 = allBlocks[i];
212
+ let candidates = null;
213
+ if (approx) {
214
+ const counts = /* @__PURE__ */ new Map();
215
+ for (const tok of blockTokens[i]) {
216
+ const ids = invertedIndex.get(tok);
217
+ if (!ids) continue;
218
+ for (const j of ids) {
219
+ if (j <= i) continue;
220
+ if (allBlocks[j].file === block1.file) continue;
221
+ counts.set(j, (counts.get(j) || 0) + 1);
222
+ }
223
+ }
224
+ candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
225
+ }
226
+ if (approx && candidates) {
227
+ for (const { j } of candidates) {
228
+ if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
229
+ console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
230
+ console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
231
+ break;
232
+ }
233
+ comparisonsProcessed++;
234
+ const block2 = allBlocks[j];
235
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
236
+ if (similarity >= minSimilarity) {
237
+ const duplicate = {
238
+ file1: block1.file,
239
+ file2: block2.file,
240
+ line1: block1.startLine,
241
+ line2: block2.startLine,
242
+ endLine1: block1.endLine,
243
+ endLine2: block2.endLine,
244
+ similarity,
245
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
246
+ patternType: block1.patternType,
247
+ tokenCost: block1.tokenCost + block2.tokenCost,
248
+ linesOfCode: block1.linesOfCode
249
+ };
250
+ duplicates.push(duplicate);
251
+ if (streamResults) {
252
+ console.log(`
253
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
254
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
255
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
256
+ }
257
+ }
258
+ }
259
+ } else {
260
+ for (let j = i + 1; j < allBlocks.length; j++) {
261
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
262
+ comparisonsProcessed++;
263
+ const block2 = allBlocks[j];
264
+ if (block1.file === block2.file) continue;
265
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
266
+ if (similarity >= minSimilarity) {
267
+ const duplicate = {
268
+ file1: block1.file,
269
+ file2: block2.file,
270
+ line1: block1.startLine,
271
+ line2: block2.startLine,
272
+ endLine1: block1.endLine,
273
+ endLine2: block2.endLine,
274
+ similarity,
275
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
276
+ patternType: block1.patternType,
277
+ tokenCost: block1.tokenCost + block2.tokenCost,
278
+ linesOfCode: block1.linesOfCode
279
+ };
280
+ duplicates.push(duplicate);
281
+ if (streamResults) {
282
+ console.log(`
283
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
284
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
285
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
286
+ }
287
+ }
288
+ }
289
+ }
290
+ }
291
+ if (comparisonsBudgetExhausted) {
292
+ console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
141
293
  }
142
294
  return duplicates.sort(
143
295
  (a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
@@ -159,7 +311,17 @@ function getRefactoringSuggestion(patternType, similarity) {
159
311
  return baseMessages[patternType] + urgency;
160
312
  }
161
313
  async function analyzePatterns(options) {
162
- const { minSimilarity = 0.85, minLines = 5, ...scanOptions } = options;
314
+ const {
315
+ minSimilarity = 0.4,
316
+ // Jaccard similarity default (40% threshold)
317
+ minLines = 5,
318
+ batchSize = 100,
319
+ approx = true,
320
+ minSharedTokens = 8,
321
+ maxCandidatesPerBlock = 100,
322
+ streamResults = false,
323
+ ...scanOptions
324
+ } = options;
163
325
  const files = await (0, import_core2.scanFiles)(scanOptions);
164
326
  const results = [];
165
327
  const fileContents = await Promise.all(
@@ -168,9 +330,14 @@ async function analyzePatterns(options) {
168
330
  content: await (0, import_core2.readFileContent)(file)
169
331
  }))
170
332
  );
171
- const duplicates = detectDuplicatePatterns(fileContents, {
333
+ const duplicates = await detectDuplicatePatterns(fileContents, {
172
334
  minSimilarity,
173
- minLines
335
+ minLines,
336
+ batchSize,
337
+ approx,
338
+ minSharedTokens,
339
+ maxCandidatesPerBlock,
340
+ streamResults
174
341
  });
175
342
  for (const file of files) {
176
343
  const fileDuplicates = duplicates.filter(
@@ -233,8 +400,21 @@ function generateSummary(results) {
233
400
  const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
234
401
  const fileMatch = issue.message.match(/similar to (.+?) \(/);
235
402
  return {
236
- file1: issue.location.file,
237
- file2: fileMatch?.[1] || "unknown",
403
+ files: [
404
+ {
405
+ path: issue.location.file,
406
+ startLine: issue.location.line,
407
+ endLine: 0
408
+ // Not available from Issue
409
+ },
410
+ {
411
+ path: fileMatch?.[1] || "unknown",
412
+ startLine: 0,
413
+ // Not available from Issue
414
+ endLine: 0
415
+ // Not available from Issue
416
+ }
417
+ ],
238
418
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
239
419
  patternType: typeMatch?.[1] || "unknown",
240
420
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
@@ -253,19 +433,28 @@ var import_chalk = __toESM(require("chalk"));
253
433
  var import_fs = require("fs");
254
434
  var import_path = require("path");
255
435
  var program = new import_commander.Command();
256
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.85").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
436
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
257
437
  "-o, --output <format>",
258
438
  "Output format: console, json, html",
259
439
  "console"
260
440
  ).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
261
441
  console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
442
+ const startTime = Date.now();
262
443
  const results = await analyzePatterns({
263
444
  rootDir: directory,
264
445
  minSimilarity: parseFloat(options.similarity),
265
446
  minLines: parseInt(options.minLines),
447
+ batchSize: parseInt(options.batchSize),
448
+ approx: options.approx !== false,
449
+ // default true; --no-approx sets to false
450
+ minSharedTokens: parseInt(options.minSharedTokens),
451
+ maxCandidatesPerBlock: parseInt(options.maxCandidates),
452
+ streamResults: options.streamResults !== false,
453
+ // default true; --no-stream-results sets to false
266
454
  include: options.include?.split(","),
267
455
  exclude: options.exclude?.split(",")
268
456
  });
457
+ const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
269
458
  const summary = generateSummary(results);
270
459
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
271
460
  if (options.output === "json") {
@@ -291,9 +480,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
291
480
  \u2713 HTML report saved to ${outputPath}`));
292
481
  return;
293
482
  }
294
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
483
+ const terminalWidth = process.stdout.columns || 80;
484
+ const dividerWidth = Math.min(60, terminalWidth - 2);
485
+ const divider = "\u2501".repeat(dividerWidth);
486
+ console.log(import_chalk.default.cyan(divider));
295
487
  console.log(import_chalk.default.bold.white(" PATTERN ANALYSIS SUMMARY"));
296
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
488
+ console.log(import_chalk.default.cyan(divider) + "\n");
297
489
  console.log(
298
490
  import_chalk.default.white(`\u{1F4C1} Files analyzed: ${import_chalk.default.bold(results.length)}`)
299
491
  );
@@ -305,18 +497,23 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
305
497
  `\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
306
498
  )
307
499
  );
308
- console.log(import_chalk.default.cyan("\n\u2501".repeat(60)));
309
- console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
310
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
500
+ console.log(
501
+ import_chalk.default.gray(`\u23F1 Analysis time: ${import_chalk.default.bold(elapsedTime + "s")}`)
502
+ );
311
503
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
312
- sortedTypes.forEach(([type, count]) => {
313
- const icon = getPatternIcon(type);
314
- console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
315
- });
316
- if (summary.topDuplicates.length > 0) {
317
- console.log(import_chalk.default.cyan("\n\u2501".repeat(60)));
504
+ if (sortedTypes.length > 0) {
505
+ console.log(import_chalk.default.cyan("\n" + divider));
506
+ console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
507
+ console.log(import_chalk.default.cyan(divider) + "\n");
508
+ sortedTypes.forEach(([type, count]) => {
509
+ const icon = getPatternIcon(type);
510
+ console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
511
+ });
512
+ }
513
+ if (summary.topDuplicates.length > 0 && totalIssues > 0) {
514
+ console.log(import_chalk.default.cyan("\n" + divider));
318
515
  console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
319
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
516
+ console.log(import_chalk.default.cyan(divider) + "\n");
320
517
  summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
321
518
  const severityColor = dup.similarity > 0.95 ? import_chalk.default.red : dup.similarity > 0.9 ? import_chalk.default.yellow : import_chalk.default.blue;
322
519
  console.log(
@@ -324,12 +521,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
324
521
  `${Math.round(dup.similarity * 100)}%`
325
522
  )} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
326
523
  );
327
- console.log(
328
- ` ${import_chalk.default.dim(dup.file1)}`
329
- );
330
- console.log(
331
- ` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}`
332
- );
524
+ dup.files.forEach((file, fileIdx) => {
525
+ const prefix = fileIdx === 0 ? " " : " \u2194 ";
526
+ console.log(
527
+ `${import_chalk.default.dim(prefix)}${import_chalk.default.dim(file.path)}:${import_chalk.default.cyan(file.startLine)}-${import_chalk.default.cyan(file.endLine)}`
528
+ );
529
+ });
333
530
  console.log(
334
531
  ` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
335
532
  `
@@ -343,9 +540,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
343
540
  (issue) => issue.severity === "critical"
344
541
  );
345
542
  if (criticalIssues.length > 0) {
346
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
543
+ console.log(import_chalk.default.cyan(divider));
347
544
  console.log(import_chalk.default.bold.white(" CRITICAL ISSUES (>95% similar)"));
348
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
545
+ console.log(import_chalk.default.cyan(divider) + "\n");
349
546
  criticalIssues.slice(0, 5).forEach((issue) => {
350
547
  console.log(import_chalk.default.red("\u25CF ") + import_chalk.default.white(`${issue.file}:${issue.location.line}`));
351
548
  console.log(` ${import_chalk.default.dim(issue.message)}`);
@@ -353,14 +550,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
353
550
  `);
354
551
  });
355
552
  }
356
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
357
- console.log(
358
- import_chalk.default.white(
359
- `
553
+ if (totalIssues === 0) {
554
+ console.log(import_chalk.default.green("\n\u2728 Great! No duplicate patterns detected.\n"));
555
+ }
556
+ console.log(import_chalk.default.cyan(divider));
557
+ if (totalIssues > 0) {
558
+ console.log(
559
+ import_chalk.default.white(
560
+ `
360
561
  \u{1F4A1} Run with ${import_chalk.default.bold("--output json")} or ${import_chalk.default.bold("--output html")} for detailed reports
361
562
  `
362
- )
363
- );
563
+ )
564
+ );
565
+ }
364
566
  });
365
567
  function getPatternIcon(type) {
366
568
  const icons = {
@@ -421,8 +623,7 @@ function generateHTMLReport(summary, results) {
421
623
  <tr>
422
624
  <th>Similarity</th>
423
625
  <th>Type</th>
424
- <th>File 1</th>
425
- <th>File 2</th>
626
+ <th>Files</th>
426
627
  <th>Token Cost</th>
427
628
  </tr>
428
629
  </thead>
@@ -432,8 +633,7 @@ function generateHTMLReport(summary, results) {
432
633
  <tr>
433
634
  <td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
434
635
  <td>${dup.patternType}</td>
435
- <td><code>${dup.file1}</code></td>
436
- <td><code>${dup.file2}</code></td>
636
+ <td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
437
637
  <td>${dup.tokenCost.toLocaleString()}</td>
438
638
  </tr>
439
639
  `
package/dist/cli.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  analyzePatterns,
4
4
  generateSummary
5
- } from "./chunk-RLWJXASG.mjs";
5
+ } from "./chunk-JKVKOXYR.mjs";
6
6
 
7
7
  // src/cli.ts
8
8
  import { Command } from "commander";
@@ -10,19 +10,28 @@ import chalk from "chalk";
10
10
  import { writeFileSync } from "fs";
11
11
  import { join } from "path";
12
12
  var program = new Command();
13
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.85").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
13
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
14
14
  "-o, --output <format>",
15
15
  "Output format: console, json, html",
16
16
  "console"
17
17
  ).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
18
18
  console.log(chalk.blue("\u{1F50D} Analyzing patterns...\n"));
19
+ const startTime = Date.now();
19
20
  const results = await analyzePatterns({
20
21
  rootDir: directory,
21
22
  minSimilarity: parseFloat(options.similarity),
22
23
  minLines: parseInt(options.minLines),
24
+ batchSize: parseInt(options.batchSize),
25
+ approx: options.approx !== false,
26
+ // default true; --no-approx sets to false
27
+ minSharedTokens: parseInt(options.minSharedTokens),
28
+ maxCandidatesPerBlock: parseInt(options.maxCandidates),
29
+ streamResults: options.streamResults !== false,
30
+ // default true; --no-stream-results sets to false
23
31
  include: options.include?.split(","),
24
32
  exclude: options.exclude?.split(",")
25
33
  });
34
+ const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
26
35
  const summary = generateSummary(results);
27
36
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
28
37
  if (options.output === "json") {
@@ -48,9 +57,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
48
57
  \u2713 HTML report saved to ${outputPath}`));
49
58
  return;
50
59
  }
51
- console.log(chalk.cyan("\u2501".repeat(60)));
60
+ const terminalWidth = process.stdout.columns || 80;
61
+ const dividerWidth = Math.min(60, terminalWidth - 2);
62
+ const divider = "\u2501".repeat(dividerWidth);
63
+ console.log(chalk.cyan(divider));
52
64
  console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
53
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
65
+ console.log(chalk.cyan(divider) + "\n");
54
66
  console.log(
55
67
  chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
56
68
  );
@@ -62,18 +74,23 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
62
74
  `\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
63
75
  )
64
76
  );
65
- console.log(chalk.cyan("\n\u2501".repeat(60)));
66
- console.log(chalk.bold.white(" PATTERNS BY TYPE"));
67
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
77
+ console.log(
78
+ chalk.gray(`\u23F1 Analysis time: ${chalk.bold(elapsedTime + "s")}`)
79
+ );
68
80
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
69
- sortedTypes.forEach(([type, count]) => {
70
- const icon = getPatternIcon(type);
71
- console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
72
- });
73
- if (summary.topDuplicates.length > 0) {
74
- console.log(chalk.cyan("\n\u2501".repeat(60)));
81
+ if (sortedTypes.length > 0) {
82
+ console.log(chalk.cyan("\n" + divider));
83
+ console.log(chalk.bold.white(" PATTERNS BY TYPE"));
84
+ console.log(chalk.cyan(divider) + "\n");
85
+ sortedTypes.forEach(([type, count]) => {
86
+ const icon = getPatternIcon(type);
87
+ console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
88
+ });
89
+ }
90
+ if (summary.topDuplicates.length > 0 && totalIssues > 0) {
91
+ console.log(chalk.cyan("\n" + divider));
75
92
  console.log(chalk.bold.white(" TOP DUPLICATE PATTERNS"));
76
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
93
+ console.log(chalk.cyan(divider) + "\n");
77
94
  summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
78
95
  const severityColor = dup.similarity > 0.95 ? chalk.red : dup.similarity > 0.9 ? chalk.yellow : chalk.blue;
79
96
  console.log(
@@ -81,12 +98,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
81
98
  `${Math.round(dup.similarity * 100)}%`
82
99
  )} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
83
100
  );
84
- console.log(
85
- ` ${chalk.dim(dup.file1)}`
86
- );
87
- console.log(
88
- ` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}`
89
- );
101
+ dup.files.forEach((file, fileIdx) => {
102
+ const prefix = fileIdx === 0 ? " " : " \u2194 ";
103
+ console.log(
104
+ `${chalk.dim(prefix)}${chalk.dim(file.path)}:${chalk.cyan(file.startLine)}-${chalk.cyan(file.endLine)}`
105
+ );
106
+ });
90
107
  console.log(
91
108
  ` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
92
109
  `
@@ -100,9 +117,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
100
117
  (issue) => issue.severity === "critical"
101
118
  );
102
119
  if (criticalIssues.length > 0) {
103
- console.log(chalk.cyan("\u2501".repeat(60)));
120
+ console.log(chalk.cyan(divider));
104
121
  console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
105
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
122
+ console.log(chalk.cyan(divider) + "\n");
106
123
  criticalIssues.slice(0, 5).forEach((issue) => {
107
124
  console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
108
125
  console.log(` ${chalk.dim(issue.message)}`);
@@ -110,14 +127,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
110
127
  `);
111
128
  });
112
129
  }
113
- console.log(chalk.cyan("\u2501".repeat(60)));
114
- console.log(
115
- chalk.white(
116
- `
130
+ if (totalIssues === 0) {
131
+ console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
132
+ }
133
+ console.log(chalk.cyan(divider));
134
+ if (totalIssues > 0) {
135
+ console.log(
136
+ chalk.white(
137
+ `
117
138
  \u{1F4A1} Run with ${chalk.bold("--output json")} or ${chalk.bold("--output html")} for detailed reports
118
139
  `
119
- )
120
- );
140
+ )
141
+ );
142
+ }
121
143
  });
122
144
  function getPatternIcon(type) {
123
145
  const icons = {
@@ -178,8 +200,7 @@ function generateHTMLReport(summary, results) {
178
200
  <tr>
179
201
  <th>Similarity</th>
180
202
  <th>Type</th>
181
- <th>File 1</th>
182
- <th>File 2</th>
203
+ <th>Files</th>
183
204
  <th>Token Cost</th>
184
205
  </tr>
185
206
  </thead>
@@ -189,8 +210,7 @@ function generateHTMLReport(summary, results) {
189
210
  <tr>
190
211
  <td class="${dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor"}">${Math.round(dup.similarity * 100)}%</td>
191
212
  <td>${dup.patternType}</td>
192
- <td><code>${dup.file1}</code></td>
193
- <td><code>${dup.file2}</code></td>
213
+ <td>${dup.files.map((f) => `<code>${f.path}:${f.startLine}-${f.endLine}</code>`).join("<br>\u2194<br>")}</td>
194
214
  <td>${dup.tokenCost.toLocaleString()}</td>
195
215
  </tr>
196
216
  `
package/dist/index.d.mts CHANGED
@@ -5,6 +5,8 @@ interface DuplicatePattern {
5
5
  file2: string;
6
6
  line1: number;
7
7
  line2: number;
8
+ endLine1: number;
9
+ endLine2: number;
8
10
  similarity: number;
9
11
  snippet: string;
10
12
  patternType: PatternType;
@@ -19,23 +21,38 @@ interface FileContent {
19
21
  interface DetectionOptions {
20
22
  minSimilarity: number;
21
23
  minLines: number;
24
+ maxBlocks?: number;
25
+ batchSize?: number;
26
+ approx?: boolean;
27
+ minSharedTokens?: number;
28
+ maxCandidatesPerBlock?: number;
29
+ maxComparisons?: number;
30
+ streamResults?: boolean;
22
31
  }
23
32
  /**
24
33
  * Detect duplicate patterns across files with enhanced analysis
25
34
  */
26
- declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[];
35
+ declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
27
36
 
28
37
  interface PatternDetectOptions extends ScanOptions {
29
38
  minSimilarity?: number;
30
39
  minLines?: number;
40
+ batchSize?: number;
41
+ approx?: boolean;
42
+ minSharedTokens?: number;
43
+ maxCandidatesPerBlock?: number;
44
+ streamResults?: boolean;
31
45
  }
32
46
  interface PatternSummary {
33
47
  totalPatterns: number;
34
48
  totalTokenCost: number;
35
49
  patternsByType: Record<PatternType, number>;
36
50
  topDuplicates: Array<{
37
- file1: string;
38
- file2: string;
51
+ files: Array<{
52
+ path: string;
53
+ startLine: number;
54
+ endLine: number;
55
+ }>;
39
56
  similarity: number;
40
57
  patternType: PatternType;
41
58
  tokenCost: number;