@aiready/pattern-detect 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -82,6 +82,7 @@ function extractCodeBlocks(content, minLines) {
82
82
  blocks.push({
83
83
  content: blockContent,
84
84
  startLine: blockStart + 1,
85
+ endLine: i + 1,
85
86
  patternType: categorizePattern(blockContent),
86
87
  linesOfCode
87
88
  });
@@ -97,6 +98,16 @@ function extractCodeBlocks(content, minLines) {
97
98
  function normalizeCode(code) {
98
99
  return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
99
100
  }
101
+ function jaccardSimilarity(tokens1, tokens2) {
102
+ const set1 = new Set(tokens1);
103
+ const set2 = new Set(tokens2);
104
+ let intersection = 0;
105
+ for (const token of set1) {
106
+ if (set2.has(token)) intersection++;
107
+ }
108
+ const union = set1.size + set2.size - intersection;
109
+ return union === 0 ? 0 : intersection / union;
110
+ }
100
111
  function calculateSimilarity(block1, block2) {
101
112
  const norm1 = normalizeCode(block1);
102
113
  const norm2 = normalizeCode(block2);
@@ -106,39 +117,189 @@ function calculateSimilarity(block1, block2) {
106
117
  const tokenSimilarity = (0, import_core.similarityScore)(tokens1.join(" "), tokens2.join(" "));
107
118
  return baseSimilarity * 0.4 + tokenSimilarity * 0.6;
108
119
  }
109
- function detectDuplicatePatterns(files, options) {
110
- const { minSimilarity, minLines } = options;
120
+ async function detectDuplicatePatterns(files, options) {
121
+ const {
122
+ minSimilarity,
123
+ minLines,
124
+ maxBlocks = 500,
125
+ batchSize = 100,
126
+ approx = true,
127
+ minSharedTokens = 8,
128
+ maxCandidatesPerBlock = 100,
129
+ fastMode = true,
130
+ maxComparisons = 5e4,
131
+ // Cap at 50K comparisons by default
132
+ streamResults = false
133
+ } = options;
111
134
  const duplicates = [];
112
- const allBlocks = files.flatMap(
135
+ let allBlocks = files.flatMap(
113
136
  (file) => extractCodeBlocks(file.content, minLines).map((block) => ({
114
- ...block,
137
+ content: block.content,
138
+ startLine: block.startLine,
139
+ endLine: block.endLine,
115
140
  file: file.file,
116
141
  normalized: normalizeCode(block.content),
117
- tokenCost: (0, import_core.estimateTokens)(block.content)
142
+ patternType: block.patternType,
143
+ tokenCost: (0, import_core.estimateTokens)(block.content),
144
+ linesOfCode: block.linesOfCode
118
145
  }))
119
146
  );
120
147
  console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
148
+ if (allBlocks.length > maxBlocks) {
149
+ console.log(`\u26A0\uFE0F Limiting to ${maxBlocks} blocks (sorted by size) to prevent memory issues`);
150
+ console.log(` Use --max-blocks to increase limit or --min-lines to filter smaller blocks`);
151
+ allBlocks = allBlocks.sort((a, b) => b.linesOfCode - a.linesOfCode).slice(0, maxBlocks);
152
+ }
153
+ const stopwords = /* @__PURE__ */ new Set([
154
+ "return",
155
+ "const",
156
+ "let",
157
+ "var",
158
+ "function",
159
+ "class",
160
+ "new",
161
+ "if",
162
+ "else",
163
+ "for",
164
+ "while",
165
+ "async",
166
+ "await",
167
+ "try",
168
+ "catch",
169
+ "switch",
170
+ "case",
171
+ "default",
172
+ "import",
173
+ "export",
174
+ "from",
175
+ "true",
176
+ "false",
177
+ "null",
178
+ "undefined",
179
+ "this"
180
+ ]);
181
+ const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
182
+ const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
183
+ const invertedIndex = /* @__PURE__ */ new Map();
184
+ if (approx) {
185
+ for (let i = 0; i < blockTokens.length; i++) {
186
+ for (const tok of blockTokens[i]) {
187
+ let arr = invertedIndex.get(tok);
188
+ if (!arr) {
189
+ arr = [];
190
+ invertedIndex.set(tok, arr);
191
+ }
192
+ arr.push(i);
193
+ }
194
+ }
195
+ }
196
+ const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
197
+ if (totalComparisons !== void 0) {
198
+ console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
199
+ } else {
200
+ console.log(`Using approximate candidate selection to reduce comparisons...`);
201
+ }
202
+ let comparisonsProcessed = 0;
203
+ let comparisonsBudgetExhausted = false;
204
+ const startTime = Date.now();
121
205
  for (let i = 0; i < allBlocks.length; i++) {
122
- for (let j = i + 1; j < allBlocks.length; j++) {
123
- const block1 = allBlocks[i];
124
- const block2 = allBlocks[j];
125
- if (block1.file === block2.file) continue;
126
- const similarity = calculateSimilarity(block1.content, block2.content);
127
- if (similarity >= minSimilarity) {
128
- duplicates.push({
129
- file1: block1.file,
130
- file2: block2.file,
131
- line1: block1.startLine,
132
- line2: block2.startLine,
133
- similarity,
134
- snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
135
- patternType: block1.patternType,
136
- tokenCost: block1.tokenCost + block2.tokenCost,
137
- linesOfCode: block1.linesOfCode
138
- });
206
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) {
207
+ comparisonsBudgetExhausted = true;
208
+ break;
209
+ }
210
+ if (i % batchSize === 0 && i > 0) {
211
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
212
+ const duplicatesFound = duplicates.length;
213
+ if (totalComparisons !== void 0) {
214
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
215
+ const remaining = totalComparisons - comparisonsProcessed;
216
+ const rate = comparisonsProcessed / parseFloat(elapsed);
217
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
218
+ console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
219
+ } else {
220
+ console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
221
+ }
222
+ await new Promise((resolve) => setImmediate(resolve));
223
+ }
224
+ const block1 = allBlocks[i];
225
+ let candidates = null;
226
+ if (approx) {
227
+ const counts = /* @__PURE__ */ new Map();
228
+ for (const tok of blockTokens[i]) {
229
+ const ids = invertedIndex.get(tok);
230
+ if (!ids) continue;
231
+ for (const j of ids) {
232
+ if (j <= i) continue;
233
+ if (allBlocks[j].file === block1.file) continue;
234
+ counts.set(j, (counts.get(j) || 0) + 1);
235
+ }
236
+ }
237
+ candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
238
+ }
239
+ if (approx && candidates) {
240
+ for (const { j } of candidates) {
241
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
242
+ comparisonsProcessed++;
243
+ const block2 = allBlocks[j];
244
+ const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
245
+ if (similarity >= minSimilarity) {
246
+ const duplicate = {
247
+ file1: block1.file,
248
+ file2: block2.file,
249
+ line1: block1.startLine,
250
+ line2: block2.startLine,
251
+ endLine1: block1.endLine,
252
+ endLine2: block2.endLine,
253
+ similarity,
254
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
255
+ patternType: block1.patternType,
256
+ tokenCost: block1.tokenCost + block2.tokenCost,
257
+ linesOfCode: block1.linesOfCode
258
+ };
259
+ duplicates.push(duplicate);
260
+ if (streamResults) {
261
+ console.log(`
262
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
263
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
264
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
265
+ }
266
+ }
267
+ }
268
+ } else {
269
+ for (let j = i + 1; j < allBlocks.length; j++) {
270
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
271
+ comparisonsProcessed++;
272
+ const block2 = allBlocks[j];
273
+ if (block1.file === block2.file) continue;
274
+ const similarity = fastMode ? jaccardSimilarity(blockTokens[i], blockTokens[j]) : calculateSimilarity(block1.content, block2.content);
275
+ if (similarity >= minSimilarity) {
276
+ const duplicate = {
277
+ file1: block1.file,
278
+ file2: block2.file,
279
+ line1: block1.startLine,
280
+ line2: block2.startLine,
281
+ endLine1: block1.endLine,
282
+ endLine2: block2.endLine,
283
+ similarity,
284
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
285
+ patternType: block1.patternType,
286
+ tokenCost: block1.tokenCost + block2.tokenCost,
287
+ linesOfCode: block1.linesOfCode
288
+ };
289
+ duplicates.push(duplicate);
290
+ if (streamResults) {
291
+ console.log(`
292
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
293
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
294
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
295
+ }
296
+ }
139
297
  }
140
298
  }
141
299
  }
300
+ if (comparisonsBudgetExhausted) {
301
+ console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
302
+ }
142
303
  return duplicates.sort(
143
304
  (a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
144
305
  );
@@ -159,7 +320,20 @@ function getRefactoringSuggestion(patternType, similarity) {
159
320
  return baseMessages[patternType] + urgency;
160
321
  }
161
322
  async function analyzePatterns(options) {
162
- const { minSimilarity = 0.85, minLines = 5, ...scanOptions } = options;
323
+ const {
324
+ minSimilarity = 0.65,
325
+ // Lower default for fast Jaccard mode (Levenshtein would be 0.85+)
326
+ minLines = 5,
327
+ maxBlocks = 500,
328
+ batchSize = 100,
329
+ approx = true,
330
+ minSharedTokens = 8,
331
+ maxCandidatesPerBlock = 100,
332
+ fastMode = true,
333
+ maxComparisons = 5e4,
334
+ streamResults = false,
335
+ ...scanOptions
336
+ } = options;
163
337
  const files = await (0, import_core2.scanFiles)(scanOptions);
164
338
  const results = [];
165
339
  const fileContents = await Promise.all(
@@ -168,9 +342,17 @@ async function analyzePatterns(options) {
168
342
  content: await (0, import_core2.readFileContent)(file)
169
343
  }))
170
344
  );
171
- const duplicates = detectDuplicatePatterns(fileContents, {
345
+ const duplicates = await detectDuplicatePatterns(fileContents, {
172
346
  minSimilarity,
173
- minLines
347
+ minLines,
348
+ maxBlocks,
349
+ batchSize,
350
+ approx,
351
+ minSharedTokens,
352
+ maxCandidatesPerBlock,
353
+ fastMode,
354
+ maxComparisons,
355
+ streamResults
174
356
  });
175
357
  for (const file of files) {
176
358
  const fileDuplicates = duplicates.filter(
@@ -235,6 +417,13 @@ function generateSummary(results) {
235
417
  return {
236
418
  file1: issue.location.file,
237
419
  file2: fileMatch?.[1] || "unknown",
420
+ line1: issue.location.line,
421
+ line2: 0,
422
+ // Not available from Issue
423
+ endLine1: 0,
424
+ // Not available from Issue
425
+ endLine2: 0,
426
+ // Not available from Issue
238
427
  similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
239
428
  patternType: typeMatch?.[1] || "unknown",
240
429
  tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
@@ -253,7 +442,7 @@ var import_chalk = __toESM(require("chalk"));
253
442
  var import_fs = require("fs");
254
443
  var import_path = require("path");
255
444
  var program = new import_commander.Command();
256
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.85").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
445
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
257
446
  "-o, --output <format>",
258
447
  "Output format: console, json, html",
259
448
  "console"
@@ -263,6 +452,17 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
263
452
  rootDir: directory,
264
453
  minSimilarity: parseFloat(options.similarity),
265
454
  minLines: parseInt(options.minLines),
455
+ maxBlocks: parseInt(options.maxBlocks),
456
+ batchSize: parseInt(options.batchSize),
457
+ approx: options.approx !== false,
458
+ // default true; --no-approx sets to false
459
+ minSharedTokens: parseInt(options.minSharedTokens),
460
+ maxCandidatesPerBlock: parseInt(options.maxCandidates),
461
+ fastMode: options.fastMode !== false,
462
+ // default true; --no-fast-mode sets to false
463
+ maxComparisons: parseInt(options.maxComparisons),
464
+ streamResults: options.streamResults === true,
465
+ // default false; --stream-results sets to true
266
466
  include: options.include?.split(","),
267
467
  exclude: options.exclude?.split(",")
268
468
  });
@@ -291,9 +491,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
291
491
  \u2713 HTML report saved to ${outputPath}`));
292
492
  return;
293
493
  }
294
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
494
+ const terminalWidth = process.stdout.columns || 80;
495
+ const dividerWidth = Math.min(60, terminalWidth - 2);
496
+ const divider = "\u2501".repeat(dividerWidth);
497
+ console.log(import_chalk.default.cyan(divider));
295
498
  console.log(import_chalk.default.bold.white(" PATTERN ANALYSIS SUMMARY"));
296
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
499
+ console.log(import_chalk.default.cyan(divider) + "\n");
297
500
  console.log(
298
501
  import_chalk.default.white(`\u{1F4C1} Files analyzed: ${import_chalk.default.bold(results.length)}`)
299
502
  );
@@ -305,18 +508,20 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
305
508
  `\u{1F4B0} Token cost (wasted): ${import_chalk.default.bold(summary.totalTokenCost.toLocaleString())}`
306
509
  )
307
510
  );
308
- console.log(import_chalk.default.cyan("\n\u2501".repeat(60)));
309
- console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
310
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
311
511
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
312
- sortedTypes.forEach(([type, count]) => {
313
- const icon = getPatternIcon(type);
314
- console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
315
- });
316
- if (summary.topDuplicates.length > 0) {
317
- console.log(import_chalk.default.cyan("\n\u2501".repeat(60)));
512
+ if (sortedTypes.length > 0) {
513
+ console.log(import_chalk.default.cyan("\n" + divider));
514
+ console.log(import_chalk.default.bold.white(" PATTERNS BY TYPE"));
515
+ console.log(import_chalk.default.cyan(divider) + "\n");
516
+ sortedTypes.forEach(([type, count]) => {
517
+ const icon = getPatternIcon(type);
518
+ console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
519
+ });
520
+ }
521
+ if (summary.topDuplicates.length > 0 && totalIssues > 0) {
522
+ console.log(import_chalk.default.cyan("\n" + divider));
318
523
  console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
319
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
524
+ console.log(import_chalk.default.cyan(divider) + "\n");
320
525
  summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
321
526
  const severityColor = dup.similarity > 0.95 ? import_chalk.default.red : dup.similarity > 0.9 ? import_chalk.default.yellow : import_chalk.default.blue;
322
527
  console.log(
@@ -325,10 +530,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
325
530
  )} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
326
531
  );
327
532
  console.log(
328
- ` ${import_chalk.default.dim(dup.file1)}`
533
+ ` ${import_chalk.default.dim(dup.file1)}:${import_chalk.default.cyan(dup.line1)}-${import_chalk.default.cyan(dup.endLine1)}`
329
534
  );
330
535
  console.log(
331
- ` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}`
536
+ ` ${import_chalk.default.dim("\u2194")} ${import_chalk.default.dim(dup.file2)}:${import_chalk.default.cyan(dup.line2)}-${import_chalk.default.cyan(dup.endLine2)}`
332
537
  );
333
538
  console.log(
334
539
  ` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
@@ -343,9 +548,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
343
548
  (issue) => issue.severity === "critical"
344
549
  );
345
550
  if (criticalIssues.length > 0) {
346
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
551
+ console.log(import_chalk.default.cyan(divider));
347
552
  console.log(import_chalk.default.bold.white(" CRITICAL ISSUES (>95% similar)"));
348
- console.log(import_chalk.default.cyan("\u2501".repeat(60)) + "\n");
553
+ console.log(import_chalk.default.cyan(divider) + "\n");
349
554
  criticalIssues.slice(0, 5).forEach((issue) => {
350
555
  console.log(import_chalk.default.red("\u25CF ") + import_chalk.default.white(`${issue.file}:${issue.location.line}`));
351
556
  console.log(` ${import_chalk.default.dim(issue.message)}`);
@@ -353,14 +558,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
353
558
  `);
354
559
  });
355
560
  }
356
- console.log(import_chalk.default.cyan("\u2501".repeat(60)));
357
- console.log(
358
- import_chalk.default.white(
359
- `
561
+ if (totalIssues === 0) {
562
+ console.log(import_chalk.default.green("\n\u2728 Great! No duplicate patterns detected.\n"));
563
+ }
564
+ console.log(import_chalk.default.cyan(divider));
565
+ if (totalIssues > 0) {
566
+ console.log(
567
+ import_chalk.default.white(
568
+ `
360
569
  \u{1F4A1} Run with ${import_chalk.default.bold("--output json")} or ${import_chalk.default.bold("--output html")} for detailed reports
361
570
  `
362
- )
363
- );
571
+ )
572
+ );
573
+ }
364
574
  });
365
575
  function getPatternIcon(type) {
366
576
  const icons = {
package/dist/cli.mjs CHANGED
@@ -2,7 +2,7 @@
2
2
  import {
3
3
  analyzePatterns,
4
4
  generateSummary
5
- } from "./chunk-RLWJXASG.mjs";
5
+ } from "./chunk-N5DE7IYX.mjs";
6
6
 
7
7
  // src/cli.ts
8
8
  import { Command } from "commander";
@@ -10,7 +10,7 @@ import chalk from "chalk";
10
10
  import { writeFileSync } from "fs";
11
11
  import { join } from "path";
12
12
  var program = new Command();
13
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.85").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
13
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--max-blocks <number>", "Maximum blocks to analyze (prevents OOM)", "500").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-fast-mode", "Use slower but more accurate Levenshtein distance (default: fast Jaccard)").option("--max-comparisons <number>", "Maximum total comparisons budget", "50000").option("--stream-results", "Output duplicates incrementally as found (useful for slow analysis)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
14
14
  "-o, --output <format>",
15
15
  "Output format: console, json, html",
16
16
  "console"
@@ -20,6 +20,17 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
20
20
  rootDir: directory,
21
21
  minSimilarity: parseFloat(options.similarity),
22
22
  minLines: parseInt(options.minLines),
23
+ maxBlocks: parseInt(options.maxBlocks),
24
+ batchSize: parseInt(options.batchSize),
25
+ approx: options.approx !== false,
26
+ // default true; --no-approx sets to false
27
+ minSharedTokens: parseInt(options.minSharedTokens),
28
+ maxCandidatesPerBlock: parseInt(options.maxCandidates),
29
+ fastMode: options.fastMode !== false,
30
+ // default true; --no-fast-mode sets to false
31
+ maxComparisons: parseInt(options.maxComparisons),
32
+ streamResults: options.streamResults === true,
33
+ // default false; --stream-results sets to true
23
34
  include: options.include?.split(","),
24
35
  exclude: options.exclude?.split(",")
25
36
  });
@@ -48,9 +59,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
48
59
  \u2713 HTML report saved to ${outputPath}`));
49
60
  return;
50
61
  }
51
- console.log(chalk.cyan("\u2501".repeat(60)));
62
+ const terminalWidth = process.stdout.columns || 80;
63
+ const dividerWidth = Math.min(60, terminalWidth - 2);
64
+ const divider = "\u2501".repeat(dividerWidth);
65
+ console.log(chalk.cyan(divider));
52
66
  console.log(chalk.bold.white(" PATTERN ANALYSIS SUMMARY"));
53
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
67
+ console.log(chalk.cyan(divider) + "\n");
54
68
  console.log(
55
69
  chalk.white(`\u{1F4C1} Files analyzed: ${chalk.bold(results.length)}`)
56
70
  );
@@ -62,18 +76,20 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
62
76
  `\u{1F4B0} Token cost (wasted): ${chalk.bold(summary.totalTokenCost.toLocaleString())}`
63
77
  )
64
78
  );
65
- console.log(chalk.cyan("\n\u2501".repeat(60)));
66
- console.log(chalk.bold.white(" PATTERNS BY TYPE"));
67
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
68
79
  const sortedTypes = Object.entries(summary.patternsByType).filter(([, count]) => count > 0).sort(([, a], [, b]) => b - a);
69
- sortedTypes.forEach(([type, count]) => {
70
- const icon = getPatternIcon(type);
71
- console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
72
- });
73
- if (summary.topDuplicates.length > 0) {
74
- console.log(chalk.cyan("\n\u2501".repeat(60)));
80
+ if (sortedTypes.length > 0) {
81
+ console.log(chalk.cyan("\n" + divider));
82
+ console.log(chalk.bold.white(" PATTERNS BY TYPE"));
83
+ console.log(chalk.cyan(divider) + "\n");
84
+ sortedTypes.forEach(([type, count]) => {
85
+ const icon = getPatternIcon(type);
86
+ console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
87
+ });
88
+ }
89
+ if (summary.topDuplicates.length > 0 && totalIssues > 0) {
90
+ console.log(chalk.cyan("\n" + divider));
75
91
  console.log(chalk.bold.white(" TOP DUPLICATE PATTERNS"));
76
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
92
+ console.log(chalk.cyan(divider) + "\n");
77
93
  summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
78
94
  const severityColor = dup.similarity > 0.95 ? chalk.red : dup.similarity > 0.9 ? chalk.yellow : chalk.blue;
79
95
  console.log(
@@ -82,10 +98,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
82
98
  )} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
83
99
  );
84
100
  console.log(
85
- ` ${chalk.dim(dup.file1)}`
101
+ ` ${chalk.dim(dup.file1)}:${chalk.cyan(dup.line1)}-${chalk.cyan(dup.endLine1)}`
86
102
  );
87
103
  console.log(
88
- ` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}`
104
+ ` ${chalk.dim("\u2194")} ${chalk.dim(dup.file2)}:${chalk.cyan(dup.line2)}-${chalk.cyan(dup.endLine2)}`
89
105
  );
90
106
  console.log(
91
107
  ` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
@@ -100,9 +116,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
100
116
  (issue) => issue.severity === "critical"
101
117
  );
102
118
  if (criticalIssues.length > 0) {
103
- console.log(chalk.cyan("\u2501".repeat(60)));
119
+ console.log(chalk.cyan(divider));
104
120
  console.log(chalk.bold.white(" CRITICAL ISSUES (>95% similar)"));
105
- console.log(chalk.cyan("\u2501".repeat(60)) + "\n");
121
+ console.log(chalk.cyan(divider) + "\n");
106
122
  criticalIssues.slice(0, 5).forEach((issue) => {
107
123
  console.log(chalk.red("\u25CF ") + chalk.white(`${issue.file}:${issue.location.line}`));
108
124
  console.log(` ${chalk.dim(issue.message)}`);
@@ -110,14 +126,19 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
110
126
  `);
111
127
  });
112
128
  }
113
- console.log(chalk.cyan("\u2501".repeat(60)));
114
- console.log(
115
- chalk.white(
116
- `
129
+ if (totalIssues === 0) {
130
+ console.log(chalk.green("\n\u2728 Great! No duplicate patterns detected.\n"));
131
+ }
132
+ console.log(chalk.cyan(divider));
133
+ if (totalIssues > 0) {
134
+ console.log(
135
+ chalk.white(
136
+ `
117
137
  \u{1F4A1} Run with ${chalk.bold("--output json")} or ${chalk.bold("--output html")} for detailed reports
118
138
  `
119
- )
120
- );
139
+ )
140
+ );
141
+ }
121
142
  });
122
143
  function getPatternIcon(type) {
123
144
  const icons = {
package/dist/index.d.mts CHANGED
@@ -5,6 +5,8 @@ interface DuplicatePattern {
5
5
  file2: string;
6
6
  line1: number;
7
7
  line2: number;
8
+ endLine1: number;
9
+ endLine2: number;
8
10
  similarity: number;
9
11
  snippet: string;
10
12
  patternType: PatternType;
@@ -19,15 +21,31 @@ interface FileContent {
19
21
  interface DetectionOptions {
20
22
  minSimilarity: number;
21
23
  minLines: number;
24
+ maxBlocks?: number;
25
+ batchSize?: number;
26
+ approx?: boolean;
27
+ minSharedTokens?: number;
28
+ maxCandidatesPerBlock?: number;
29
+ fastMode?: boolean;
30
+ maxComparisons?: number;
31
+ streamResults?: boolean;
22
32
  }
23
33
  /**
24
34
  * Detect duplicate patterns across files with enhanced analysis
25
35
  */
26
- declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[];
36
+ declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
27
37
 
28
38
  interface PatternDetectOptions extends ScanOptions {
29
39
  minSimilarity?: number;
30
40
  minLines?: number;
41
+ maxBlocks?: number;
42
+ batchSize?: number;
43
+ approx?: boolean;
44
+ minSharedTokens?: number;
45
+ maxCandidatesPerBlock?: number;
46
+ fastMode?: boolean;
47
+ maxComparisons?: number;
48
+ streamResults?: boolean;
31
49
  }
32
50
  interface PatternSummary {
33
51
  totalPatterns: number;
@@ -36,6 +54,10 @@ interface PatternSummary {
36
54
  topDuplicates: Array<{
37
55
  file1: string;
38
56
  file2: string;
57
+ line1: number;
58
+ line2: number;
59
+ endLine1: number;
60
+ endLine2: number;
39
61
  similarity: number;
40
62
  patternType: PatternType;
41
63
  tokenCost: number;
package/dist/index.d.ts CHANGED
@@ -5,6 +5,8 @@ interface DuplicatePattern {
5
5
  file2: string;
6
6
  line1: number;
7
7
  line2: number;
8
+ endLine1: number;
9
+ endLine2: number;
8
10
  similarity: number;
9
11
  snippet: string;
10
12
  patternType: PatternType;
@@ -19,15 +21,31 @@ interface FileContent {
19
21
  interface DetectionOptions {
20
22
  minSimilarity: number;
21
23
  minLines: number;
24
+ maxBlocks?: number;
25
+ batchSize?: number;
26
+ approx?: boolean;
27
+ minSharedTokens?: number;
28
+ maxCandidatesPerBlock?: number;
29
+ fastMode?: boolean;
30
+ maxComparisons?: number;
31
+ streamResults?: boolean;
22
32
  }
23
33
  /**
24
34
  * Detect duplicate patterns across files with enhanced analysis
25
35
  */
26
- declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): DuplicatePattern[];
36
+ declare function detectDuplicatePatterns(files: FileContent[], options: DetectionOptions): Promise<DuplicatePattern[]>;
27
37
 
28
38
  interface PatternDetectOptions extends ScanOptions {
29
39
  minSimilarity?: number;
30
40
  minLines?: number;
41
+ maxBlocks?: number;
42
+ batchSize?: number;
43
+ approx?: boolean;
44
+ minSharedTokens?: number;
45
+ maxCandidatesPerBlock?: number;
46
+ fastMode?: boolean;
47
+ maxComparisons?: number;
48
+ streamResults?: boolean;
31
49
  }
32
50
  interface PatternSummary {
33
51
  totalPatterns: number;
@@ -36,6 +54,10 @@ interface PatternSummary {
36
54
  topDuplicates: Array<{
37
55
  file1: string;
38
56
  file2: string;
57
+ line1: number;
58
+ line2: number;
59
+ endLine1: number;
60
+ endLine2: number;
39
61
  similarity: number;
40
62
  patternType: PatternType;
41
63
  tokenCost: number;