@aiready/pattern-detect 0.5.4 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -60,6 +60,14 @@ aiready-patterns ./src --similarity 0.9
60
60
  # Only look at larger patterns
61
61
  aiready-patterns ./src --min-lines 10
62
62
 
63
+ # Filter by severity (focus on critical issues first)
64
+ aiready-patterns ./src --severity critical # Only >95% similar
65
+ aiready-patterns ./src --severity high # Only >90% similar
66
+ aiready-patterns ./src --severity medium # Only >80% similar
67
+
68
+ # Include test files (excluded by default)
69
+ aiready-patterns ./src --include-tests
70
+
63
71
  # Memory optimization for large codebases
64
72
  aiready-patterns ./src --max-blocks 1000 --batch-size 200
65
73
 
@@ -112,7 +120,9 @@ Create an `aiready.json` or `aiready.config.json` file in your project root to p
112
120
  "minSimilarity": 0.5,
113
121
  "minLines": 8,
114
122
  "approx": false,
115
- "batchSize": 200
123
+ "batchSize": 200,
124
+ "severity": "high",
125
+ "includeTests": false
116
126
  }
117
127
  }
118
128
  }
@@ -0,0 +1,419 @@
1
+ // src/index.ts
2
+ import { scanFiles, readFileContent } from "@aiready/core";
3
+
4
+ // src/detector.ts
5
+ import { estimateTokens } from "@aiready/core";
6
+ function categorizePattern(code) {
7
+ const lower = code.toLowerCase();
8
+ if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
9
+ return "api-handler";
10
+ }
11
+ if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
12
+ return "validator";
13
+ }
14
+ if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
15
+ return "component";
16
+ }
17
+ if (lower.includes("class ") || lower.includes("this.")) {
18
+ return "class-method";
19
+ }
20
+ if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
21
+ return "utility";
22
+ }
23
+ if (lower.includes("function") || lower.includes("=>")) {
24
+ return "function";
25
+ }
26
+ return "unknown";
27
+ }
28
+ function extractCodeBlocks(content, minLines) {
29
+ const lines = content.split("\n");
30
+ const blocks = [];
31
+ let currentBlock = [];
32
+ let blockStart = 0;
33
+ let braceDepth = 0;
34
+ let inFunction = false;
35
+ for (let i = 0; i < lines.length; i++) {
36
+ const line = lines[i];
37
+ const trimmed = line.trim();
38
+ if (!inFunction && (trimmed.includes("function ") || trimmed.includes("=>") || trimmed.includes("async ") || /^(export\s+)?(async\s+)?function\s+/.test(trimmed) || /^(export\s+)?const\s+\w+\s*=\s*(async\s*)?\(/.test(trimmed))) {
39
+ inFunction = true;
40
+ blockStart = i;
41
+ }
42
+ for (const char of line) {
43
+ if (char === "{") braceDepth++;
44
+ if (char === "}") braceDepth--;
45
+ }
46
+ if (inFunction) {
47
+ currentBlock.push(line);
48
+ }
49
+ if (inFunction && braceDepth === 0 && currentBlock.length >= minLines) {
50
+ const blockContent = currentBlock.join("\n");
51
+ const linesOfCode = currentBlock.filter(
52
+ (l) => l.trim() && !l.trim().startsWith("//")
53
+ ).length;
54
+ blocks.push({
55
+ content: blockContent,
56
+ startLine: blockStart + 1,
57
+ endLine: i + 1,
58
+ patternType: categorizePattern(blockContent),
59
+ linesOfCode
60
+ });
61
+ currentBlock = [];
62
+ inFunction = false;
63
+ } else if (inFunction && braceDepth === 0) {
64
+ currentBlock = [];
65
+ inFunction = false;
66
+ }
67
+ }
68
+ return blocks;
69
+ }
70
+ function normalizeCode(code) {
71
+ return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
72
+ }
73
+ function jaccardSimilarity(tokens1, tokens2) {
74
+ const set1 = new Set(tokens1);
75
+ const set2 = new Set(tokens2);
76
+ let intersection = 0;
77
+ for (const token of set1) {
78
+ if (set2.has(token)) intersection++;
79
+ }
80
+ const union = set1.size + set2.size - intersection;
81
+ return union === 0 ? 0 : intersection / union;
82
+ }
83
+ async function detectDuplicatePatterns(files, options) {
84
+ const {
85
+ minSimilarity,
86
+ minLines,
87
+ batchSize = 100,
88
+ approx = true,
89
+ minSharedTokens = 8,
90
+ maxCandidatesPerBlock = 100,
91
+ streamResults = false
92
+ } = options;
93
+ const duplicates = [];
94
+ const maxComparisons = approx ? Infinity : 5e5;
95
+ const allBlocks = files.flatMap(
96
+ (file) => extractCodeBlocks(file.content, minLines).map((block) => ({
97
+ content: block.content,
98
+ startLine: block.startLine,
99
+ endLine: block.endLine,
100
+ file: file.file,
101
+ normalized: normalizeCode(block.content),
102
+ patternType: block.patternType,
103
+ tokenCost: estimateTokens(block.content),
104
+ linesOfCode: block.linesOfCode
105
+ }))
106
+ );
107
+ console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
108
+ if (!approx && allBlocks.length > 500) {
109
+ console.log(`\u26A0\uFE0F Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B\xB2) complexity).`);
110
+ console.log(` Consider using approximate mode (default) for better performance.`);
111
+ }
112
+ const stopwords = /* @__PURE__ */ new Set([
113
+ "return",
114
+ "const",
115
+ "let",
116
+ "var",
117
+ "function",
118
+ "class",
119
+ "new",
120
+ "if",
121
+ "else",
122
+ "for",
123
+ "while",
124
+ "async",
125
+ "await",
126
+ "try",
127
+ "catch",
128
+ "switch",
129
+ "case",
130
+ "default",
131
+ "import",
132
+ "export",
133
+ "from",
134
+ "true",
135
+ "false",
136
+ "null",
137
+ "undefined",
138
+ "this"
139
+ ]);
140
+ const tokenize = (norm) => norm.split(/[\s(){}\[\];,\.]+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
141
+ const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
142
+ const invertedIndex = /* @__PURE__ */ new Map();
143
+ if (approx) {
144
+ for (let i = 0; i < blockTokens.length; i++) {
145
+ for (const tok of blockTokens[i]) {
146
+ let arr = invertedIndex.get(tok);
147
+ if (!arr) {
148
+ arr = [];
149
+ invertedIndex.set(tok, arr);
150
+ }
151
+ arr.push(i);
152
+ }
153
+ }
154
+ }
155
+ const totalComparisons = approx ? void 0 : allBlocks.length * (allBlocks.length - 1) / 2;
156
+ if (totalComparisons !== void 0) {
157
+ console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
158
+ } else {
159
+ console.log(`Using approximate candidate selection to reduce comparisons...`);
160
+ }
161
+ let comparisonsProcessed = 0;
162
+ let comparisonsBudgetExhausted = false;
163
+ const startTime = Date.now();
164
+ for (let i = 0; i < allBlocks.length; i++) {
165
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) {
166
+ comparisonsBudgetExhausted = true;
167
+ break;
168
+ }
169
+ if (i % batchSize === 0 && i > 0) {
170
+ const elapsed = ((Date.now() - startTime) / 1e3).toFixed(1);
171
+ const duplicatesFound = duplicates.length;
172
+ if (totalComparisons !== void 0) {
173
+ const progress = (comparisonsProcessed / totalComparisons * 100).toFixed(1);
174
+ const remaining = totalComparisons - comparisonsProcessed;
175
+ const rate = comparisonsProcessed / parseFloat(elapsed);
176
+ const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
177
+ console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
178
+ } else {
179
+ console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
180
+ }
181
+ await new Promise((resolve) => setImmediate(resolve));
182
+ }
183
+ const block1 = allBlocks[i];
184
+ let candidates = null;
185
+ if (approx) {
186
+ const counts = /* @__PURE__ */ new Map();
187
+ for (const tok of blockTokens[i]) {
188
+ const ids = invertedIndex.get(tok);
189
+ if (!ids) continue;
190
+ for (const j of ids) {
191
+ if (j <= i) continue;
192
+ if (allBlocks[j].file === block1.file) continue;
193
+ counts.set(j, (counts.get(j) || 0) + 1);
194
+ }
195
+ }
196
+ candidates = Array.from(counts.entries()).filter(([, shared]) => shared >= minSharedTokens).sort((a, b) => b[1] - a[1]).slice(0, maxCandidatesPerBlock).map(([j, shared]) => ({ j, shared }));
197
+ }
198
+ if (approx && candidates) {
199
+ for (const { j } of candidates) {
200
+ if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
201
+ console.log(`\u26A0\uFE0F Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
202
+ console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
203
+ break;
204
+ }
205
+ comparisonsProcessed++;
206
+ const block2 = allBlocks[j];
207
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
208
+ if (similarity >= minSimilarity) {
209
+ const duplicate = {
210
+ file1: block1.file,
211
+ file2: block2.file,
212
+ line1: block1.startLine,
213
+ line2: block2.startLine,
214
+ endLine1: block1.endLine,
215
+ endLine2: block2.endLine,
216
+ similarity,
217
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
218
+ patternType: block1.patternType,
219
+ tokenCost: block1.tokenCost + block2.tokenCost,
220
+ linesOfCode: block1.linesOfCode
221
+ };
222
+ duplicates.push(duplicate);
223
+ if (streamResults) {
224
+ console.log(`
225
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
226
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
227
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
228
+ }
229
+ }
230
+ }
231
+ } else {
232
+ for (let j = i + 1; j < allBlocks.length; j++) {
233
+ if (maxComparisons && comparisonsProcessed >= maxComparisons) break;
234
+ comparisonsProcessed++;
235
+ const block2 = allBlocks[j];
236
+ if (block1.file === block2.file) continue;
237
+ const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
238
+ if (similarity >= minSimilarity) {
239
+ const duplicate = {
240
+ file1: block1.file,
241
+ file2: block2.file,
242
+ line1: block1.startLine,
243
+ line2: block2.startLine,
244
+ endLine1: block1.endLine,
245
+ endLine2: block2.endLine,
246
+ similarity,
247
+ snippet: block1.content.split("\n").slice(0, 5).join("\n") + "\n...",
248
+ patternType: block1.patternType,
249
+ tokenCost: block1.tokenCost + block2.tokenCost,
250
+ linesOfCode: block1.linesOfCode
251
+ };
252
+ duplicates.push(duplicate);
253
+ if (streamResults) {
254
+ console.log(`
255
+ \u2705 Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
256
+ console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} \u21D4 ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
257
+ console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
258
+ }
259
+ }
260
+ }
261
+ }
262
+ }
263
+ if (comparisonsBudgetExhausted) {
264
+ console.log(`\u26A0\uFE0F Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
265
+ }
266
+ return duplicates.sort(
267
+ (a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost
268
+ );
269
+ }
270
+
271
+ // src/index.ts
272
+ function getRefactoringSuggestion(patternType, similarity) {
273
+ const baseMessages = {
274
+ "api-handler": "Extract common middleware or create a base handler class",
275
+ validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
276
+ utility: "Move to a shared utilities file and reuse across modules",
277
+ "class-method": "Consider inheritance or composition to share behavior",
278
+ component: "Extract shared logic into a custom hook or HOC",
279
+ function: "Extract into a shared helper function",
280
+ unknown: "Extract common logic into a reusable module"
281
+ };
282
+ const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
283
+ return baseMessages[patternType] + urgency;
284
+ }
285
+ async function analyzePatterns(options) {
286
+ const {
287
+ minSimilarity = 0.4,
288
+ // Jaccard similarity default (40% threshold)
289
+ minLines = 5,
290
+ batchSize = 100,
291
+ approx = true,
292
+ minSharedTokens = 8,
293
+ maxCandidatesPerBlock = 100,
294
+ streamResults = false,
295
+ severity = "all",
296
+ includeTests = false,
297
+ ...scanOptions
298
+ } = options;
299
+ const files = await scanFiles(scanOptions);
300
+ const results = [];
301
+ const fileContents = await Promise.all(
302
+ files.map(async (file) => ({
303
+ file,
304
+ content: await readFileContent(file)
305
+ }))
306
+ );
307
+ const duplicates = await detectDuplicatePatterns(fileContents, {
308
+ minSimilarity,
309
+ minLines,
310
+ batchSize,
311
+ approx,
312
+ minSharedTokens,
313
+ maxCandidatesPerBlock,
314
+ streamResults
315
+ });
316
+ for (const file of files) {
317
+ const fileDuplicates = duplicates.filter(
318
+ (dup) => dup.file1 === file || dup.file2 === file
319
+ );
320
+ const issues = fileDuplicates.map((dup) => {
321
+ const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
322
+ const severity2 = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
323
+ return {
324
+ type: "duplicate-pattern",
325
+ severity: severity2,
326
+ message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
327
+ location: {
328
+ file,
329
+ line: dup.file1 === file ? dup.line1 : dup.line2
330
+ },
331
+ suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
332
+ };
333
+ });
334
+ let filteredIssues = issues;
335
+ if (severity !== "all") {
336
+ const severityMap = {
337
+ critical: ["critical"],
338
+ high: ["critical", "major"],
339
+ medium: ["critical", "major", "minor"]
340
+ };
341
+ const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
342
+ filteredIssues = issues.filter((issue) => allowedSeverities.includes(issue.severity));
343
+ }
344
+ const totalTokenCost = fileDuplicates.reduce(
345
+ (sum, dup) => sum + dup.tokenCost,
346
+ 0
347
+ );
348
+ results.push({
349
+ fileName: file,
350
+ issues: filteredIssues,
351
+ metrics: {
352
+ tokenCost: totalTokenCost,
353
+ consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
354
+ }
355
+ });
356
+ }
357
+ return results;
358
+ }
359
+ function generateSummary(results) {
360
+ const allIssues = results.flatMap((r) => r.issues);
361
+ const totalTokenCost = results.reduce(
362
+ (sum, r) => sum + (r.metrics.tokenCost || 0),
363
+ 0
364
+ );
365
+ const patternsByType = {
366
+ "api-handler": 0,
367
+ validator: 0,
368
+ utility: 0,
369
+ "class-method": 0,
370
+ component: 0,
371
+ function: 0,
372
+ unknown: 0
373
+ };
374
+ allIssues.forEach((issue) => {
375
+ const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
376
+ if (match) {
377
+ const type = match[1];
378
+ patternsByType[type] = (patternsByType[type] || 0) + 1;
379
+ }
380
+ });
381
+ const topDuplicates = allIssues.slice(0, 10).map((issue) => {
382
+ const similarityMatch = issue.message.match(/(\d+)% similar/);
383
+ const tokenMatch = issue.message.match(/\((\d+) tokens/);
384
+ const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
385
+ const fileMatch = issue.message.match(/similar to (.+?) \(/);
386
+ return {
387
+ files: [
388
+ {
389
+ path: issue.location.file,
390
+ startLine: issue.location.line,
391
+ endLine: 0
392
+ // Not available from Issue
393
+ },
394
+ {
395
+ path: fileMatch?.[1] || "unknown",
396
+ startLine: 0,
397
+ // Not available from Issue
398
+ endLine: 0
399
+ // Not available from Issue
400
+ }
401
+ ],
402
+ similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
403
+ patternType: typeMatch?.[1] || "unknown",
404
+ tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
405
+ };
406
+ });
407
+ return {
408
+ totalPatterns: allIssues.length,
409
+ totalTokenCost,
410
+ patternsByType,
411
+ topDuplicates
412
+ };
413
+ }
414
+
415
+ export {
416
+ detectDuplicatePatterns,
417
+ analyzePatterns,
418
+ generateSummary
419
+ };
package/dist/cli.js CHANGED
@@ -320,6 +320,8 @@ async function analyzePatterns(options) {
320
320
  minSharedTokens = 8,
321
321
  maxCandidatesPerBlock = 100,
322
322
  streamResults = false,
323
+ severity = "all",
324
+ includeTests = false,
323
325
  ...scanOptions
324
326
  } = options;
325
327
  const files = await (0, import_core2.scanFiles)(scanOptions);
@@ -345,10 +347,10 @@ async function analyzePatterns(options) {
345
347
  );
346
348
  const issues = fileDuplicates.map((dup) => {
347
349
  const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
348
- const severity = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
350
+ const severity2 = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
349
351
  return {
350
352
  type: "duplicate-pattern",
351
- severity,
353
+ severity: severity2,
352
354
  message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
353
355
  location: {
354
356
  file,
@@ -357,13 +359,23 @@ async function analyzePatterns(options) {
357
359
  suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
358
360
  };
359
361
  });
362
+ let filteredIssues = issues;
363
+ if (severity !== "all") {
364
+ const severityMap = {
365
+ critical: ["critical"],
366
+ high: ["critical", "major"],
367
+ medium: ["critical", "major", "minor"]
368
+ };
369
+ const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
370
+ filteredIssues = issues.filter((issue) => allowedSeverities.includes(issue.severity));
371
+ }
360
372
  const totalTokenCost = fileDuplicates.reduce(
361
373
  (sum, dup) => sum + dup.tokenCost,
362
374
  0
363
375
  );
364
376
  results.push({
365
377
  fileName: file,
366
- issues,
378
+ issues: filteredIssues,
367
379
  metrics: {
368
380
  tokenCost: totalTokenCost,
369
381
  consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
@@ -434,7 +446,7 @@ var import_fs = require("fs");
434
446
  var import_path = require("path");
435
447
  var import_core3 = require("@aiready/core");
436
448
  var program = new import_commander.Command();
437
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
449
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText("after", "\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option("--severity <level>", "Filter by severity: critical|high|medium|all", "all").option("--include-tests", "Include test files in analysis (excluded by default)").option(
438
450
  "-o, --output <format>",
439
451
  "Output format: console, json, html",
440
452
  "console"
@@ -451,7 +463,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
451
463
  maxCandidatesPerBlock: 100,
452
464
  streamResults: true,
453
465
  include: void 0,
454
- exclude: void 0
466
+ exclude: void 0,
467
+ severity: "all",
468
+ includeTests: false
455
469
  };
456
470
  const mergedConfig = (0, import_core3.mergeConfigWithDefaults)(config, defaults);
457
471
  const finalOptions = {
@@ -465,9 +479,39 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
465
479
  maxCandidatesPerBlock: options.maxCandidates ? parseInt(options.maxCandidates) : mergedConfig.maxCandidatesPerBlock,
466
480
  streamResults: options.streamResults !== false && mergedConfig.streamResults,
467
481
  include: options.include?.split(",") || mergedConfig.include,
468
- exclude: options.exclude?.split(",") || mergedConfig.exclude
482
+ exclude: options.exclude?.split(",") || mergedConfig.exclude,
483
+ severity: options.severity || mergedConfig.severity,
484
+ includeTests: options.includeTests || mergedConfig.includeTests
469
485
  };
486
+ if (!finalOptions.includeTests) {
487
+ const testPatterns = [
488
+ "**/*.test.*",
489
+ "**/*.spec.*",
490
+ "**/__tests__/**",
491
+ "**/test/**",
492
+ "**/*.test",
493
+ "**/*.spec"
494
+ ];
495
+ finalOptions.exclude = finalOptions.exclude ? [...finalOptions.exclude, ...testPatterns] : testPatterns;
496
+ }
470
497
  const results = await analyzePatterns(finalOptions);
498
+ const { scanFiles: scanFiles2, readFileContent: readFileContent2 } = await import("@aiready/core");
499
+ const files = await scanFiles2(finalOptions);
500
+ const fileContents = await Promise.all(
501
+ files.map(async (file) => ({
502
+ file,
503
+ content: await readFileContent2(file)
504
+ }))
505
+ );
506
+ const rawDuplicates = await detectDuplicatePatterns(fileContents, {
507
+ minSimilarity: finalOptions.minSimilarity,
508
+ minLines: finalOptions.minLines,
509
+ batchSize: finalOptions.batchSize,
510
+ approx: finalOptions.approx,
511
+ minSharedTokens: finalOptions.minSharedTokens,
512
+ maxCandidatesPerBlock: finalOptions.maxCandidatesPerBlock,
513
+ streamResults: finalOptions.streamResults
514
+ });
471
515
  const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
472
516
  const summary = generateSummary(results);
473
517
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
@@ -524,27 +568,30 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
524
568
  console.log(`${icon} ${import_chalk.default.white(type.padEnd(15))} ${import_chalk.default.bold(count)}`);
525
569
  });
526
570
  }
527
- if (summary.topDuplicates.length > 0 && totalIssues > 0) {
571
+ if (totalIssues > 0) {
528
572
  console.log(import_chalk.default.cyan("\n" + divider));
529
573
  console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
530
574
  console.log(import_chalk.default.cyan(divider) + "\n");
531
- summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
532
- const severityColor = dup.similarity > 0.95 ? import_chalk.default.red : dup.similarity > 0.9 ? import_chalk.default.yellow : import_chalk.default.blue;
533
- console.log(
534
- `${import_chalk.default.dim(`${idx + 1}.`)} ${severityColor(
535
- `${Math.round(dup.similarity * 100)}%`
536
- )} ${getPatternIcon(dup.patternType)} ${import_chalk.default.white(dup.patternType)}`
537
- );
538
- dup.files.forEach((file, fileIdx) => {
539
- const prefix = fileIdx === 0 ? " " : " \u2194 ";
540
- console.log(
541
- `${import_chalk.default.dim(prefix)}${import_chalk.default.dim(file.path)}:${import_chalk.default.cyan(file.startLine)}-${import_chalk.default.cyan(file.endLine)}`
542
- );
543
- });
544
- console.log(
545
- ` ${import_chalk.default.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
546
- `
547
- );
575
+ let filteredDuplicates = rawDuplicates;
576
+ if (finalOptions.severity !== "all") {
577
+ const severityThresholds = {
578
+ critical: 0.95,
579
+ high: 0.9,
580
+ medium: 0.4
581
+ };
582
+ const threshold = severityThresholds[finalOptions.severity] || 0.4;
583
+ filteredDuplicates = rawDuplicates.filter((dup) => dup.similarity >= threshold);
584
+ }
585
+ const topDuplicates = filteredDuplicates.sort((a, b) => b.similarity - a.similarity).slice(0, 10);
586
+ topDuplicates.forEach((dup, idx) => {
587
+ const severity = dup.similarity > 0.95 ? "CRITICAL" : dup.similarity > 0.9 ? "HIGH" : "MEDIUM";
588
+ const severityIcon = dup.similarity > 0.95 ? "\u{1F534}" : dup.similarity > 0.9 ? "\u{1F7E1}" : "\u{1F535}";
589
+ const file1Name = dup.file1.split("/").pop() || dup.file1;
590
+ const file2Name = dup.file2.split("/").pop() || dup.file2;
591
+ console.log(`${severityIcon} ${severity}: ${import_chalk.default.bold(file1Name)} \u2194 ${import_chalk.default.bold(file2Name)}`);
592
+ console.log(` Similarity: ${import_chalk.default.bold(Math.round(dup.similarity * 100) + "%")} | Wasted: ${import_chalk.default.bold(dup.tokenCost.toLocaleString())} tokens each`);
593
+ console.log(` Location: lines ${import_chalk.default.cyan(dup.line1 + "-" + dup.endLine1)} \u2194 lines ${import_chalk.default.cyan(dup.line2 + "-" + dup.endLine2)}
594
+ `);
548
595
  });
549
596
  }
550
597
  const allIssues = results.flatMap(
package/dist/cli.mjs CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  import {
3
3
  analyzePatterns,
4
+ detectDuplicatePatterns,
4
5
  generateSummary
5
- } from "./chunk-JKVKOXYR.mjs";
6
+ } from "./chunk-CHFK6EBT.mjs";
6
7
 
7
8
  // src/cli.ts
8
9
  import { Command } from "commander";
@@ -11,7 +12,7 @@ import { writeFileSync } from "fs";
11
12
  import { join } from "path";
12
13
  import { loadConfig, mergeConfigWithDefaults } from "@aiready/core";
13
14
  var program = new Command();
14
- program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option(
15
+ program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText("after", "\nCONFIGURATION:\n Supports config files: aiready.json, aiready.config.json, .aiready.json, .aireadyrc.json, aiready.config.js, .aireadyrc.js\n CLI options override config file settings").argument("<directory>", "Directory to analyze").option("-s, --similarity <number>", "Minimum similarity score (0-1)", "0.40").option("-l, --min-lines <number>", "Minimum lines to consider", "5").option("--batch-size <number>", "Batch size for comparisons", "100").option("--no-approx", "Disable approximate candidate selection (faster on small repos, slower on large)").option("--min-shared-tokens <number>", "Minimum shared tokens to consider a candidate", "8").option("--max-candidates <number>", "Maximum candidates per block", "100").option("--no-stream-results", "Disable incremental output (default: enabled)").option("--include <patterns>", "File patterns to include (comma-separated)").option("--exclude <patterns>", "File patterns to exclude (comma-separated)").option("--severity <level>", "Filter by severity: critical|high|medium|all", "all").option("--include-tests", "Include test files in analysis (excluded by default)").option(
15
16
  "-o, --output <format>",
16
17
  "Output format: console, json, html",
17
18
  "console"
@@ -28,7 +29,9 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
28
29
  maxCandidatesPerBlock: 100,
29
30
  streamResults: true,
30
31
  include: void 0,
31
- exclude: void 0
32
+ exclude: void 0,
33
+ severity: "all",
34
+ includeTests: false
32
35
  };
33
36
  const mergedConfig = mergeConfigWithDefaults(config, defaults);
34
37
  const finalOptions = {
@@ -42,9 +45,39 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
42
45
  maxCandidatesPerBlock: options.maxCandidates ? parseInt(options.maxCandidates) : mergedConfig.maxCandidatesPerBlock,
43
46
  streamResults: options.streamResults !== false && mergedConfig.streamResults,
44
47
  include: options.include?.split(",") || mergedConfig.include,
45
- exclude: options.exclude?.split(",") || mergedConfig.exclude
48
+ exclude: options.exclude?.split(",") || mergedConfig.exclude,
49
+ severity: options.severity || mergedConfig.severity,
50
+ includeTests: options.includeTests || mergedConfig.includeTests
46
51
  };
52
+ if (!finalOptions.includeTests) {
53
+ const testPatterns = [
54
+ "**/*.test.*",
55
+ "**/*.spec.*",
56
+ "**/__tests__/**",
57
+ "**/test/**",
58
+ "**/*.test",
59
+ "**/*.spec"
60
+ ];
61
+ finalOptions.exclude = finalOptions.exclude ? [...finalOptions.exclude, ...testPatterns] : testPatterns;
62
+ }
47
63
  const results = await analyzePatterns(finalOptions);
64
+ const { scanFiles, readFileContent } = await import("@aiready/core");
65
+ const files = await scanFiles(finalOptions);
66
+ const fileContents = await Promise.all(
67
+ files.map(async (file) => ({
68
+ file,
69
+ content: await readFileContent(file)
70
+ }))
71
+ );
72
+ const rawDuplicates = await detectDuplicatePatterns(fileContents, {
73
+ minSimilarity: finalOptions.minSimilarity,
74
+ minLines: finalOptions.minLines,
75
+ batchSize: finalOptions.batchSize,
76
+ approx: finalOptions.approx,
77
+ minSharedTokens: finalOptions.minSharedTokens,
78
+ maxCandidatesPerBlock: finalOptions.maxCandidatesPerBlock,
79
+ streamResults: finalOptions.streamResults
80
+ });
48
81
  const elapsedTime = ((Date.now() - startTime) / 1e3).toFixed(2);
49
82
  const summary = generateSummary(results);
50
83
  const totalIssues = results.reduce((sum, r) => sum + r.issues.length, 0);
@@ -101,27 +134,30 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
101
134
  console.log(`${icon} ${chalk.white(type.padEnd(15))} ${chalk.bold(count)}`);
102
135
  });
103
136
  }
104
- if (summary.topDuplicates.length > 0 && totalIssues > 0) {
137
+ if (totalIssues > 0) {
105
138
  console.log(chalk.cyan("\n" + divider));
106
139
  console.log(chalk.bold.white(" TOP DUPLICATE PATTERNS"));
107
140
  console.log(chalk.cyan(divider) + "\n");
108
- summary.topDuplicates.slice(0, 10).forEach((dup, idx) => {
109
- const severityColor = dup.similarity > 0.95 ? chalk.red : dup.similarity > 0.9 ? chalk.yellow : chalk.blue;
110
- console.log(
111
- `${chalk.dim(`${idx + 1}.`)} ${severityColor(
112
- `${Math.round(dup.similarity * 100)}%`
113
- )} ${getPatternIcon(dup.patternType)} ${chalk.white(dup.patternType)}`
114
- );
115
- dup.files.forEach((file, fileIdx) => {
116
- const prefix = fileIdx === 0 ? " " : " \u2194 ";
117
- console.log(
118
- `${chalk.dim(prefix)}${chalk.dim(file.path)}:${chalk.cyan(file.startLine)}-${chalk.cyan(file.endLine)}`
119
- );
120
- });
121
- console.log(
122
- ` ${chalk.red(`${dup.tokenCost.toLocaleString()} tokens wasted`)}
123
- `
124
- );
141
+ let filteredDuplicates = rawDuplicates;
142
+ if (finalOptions.severity !== "all") {
143
+ const severityThresholds = {
144
+ critical: 0.95,
145
+ high: 0.9,
146
+ medium: 0.4
147
+ };
148
+ const threshold = severityThresholds[finalOptions.severity] || 0.4;
149
+ filteredDuplicates = rawDuplicates.filter((dup) => dup.similarity >= threshold);
150
+ }
151
+ const topDuplicates = filteredDuplicates.sort((a, b) => b.similarity - a.similarity).slice(0, 10);
152
+ topDuplicates.forEach((dup, idx) => {
153
+ const severity = dup.similarity > 0.95 ? "CRITICAL" : dup.similarity > 0.9 ? "HIGH" : "MEDIUM";
154
+ const severityIcon = dup.similarity > 0.95 ? "\u{1F534}" : dup.similarity > 0.9 ? "\u{1F7E1}" : "\u{1F535}";
155
+ const file1Name = dup.file1.split("/").pop() || dup.file1;
156
+ const file2Name = dup.file2.split("/").pop() || dup.file2;
157
+ console.log(`${severityIcon} ${severity}: ${chalk.bold(file1Name)} \u2194 ${chalk.bold(file2Name)}`);
158
+ console.log(` Similarity: ${chalk.bold(Math.round(dup.similarity * 100) + "%")} | Wasted: ${chalk.bold(dup.tokenCost.toLocaleString())} tokens each`);
159
+ console.log(` Location: lines ${chalk.cyan(dup.line1 + "-" + dup.endLine1)} \u2194 lines ${chalk.cyan(dup.line2 + "-" + dup.endLine2)}
160
+ `);
125
161
  });
126
162
  }
127
163
  const allIssues = results.flatMap(
package/dist/index.d.mts CHANGED
@@ -42,6 +42,8 @@ interface PatternDetectOptions extends ScanOptions {
42
42
  minSharedTokens?: number;
43
43
  maxCandidatesPerBlock?: number;
44
44
  streamResults?: boolean;
45
+ severity?: string;
46
+ includeTests?: boolean;
45
47
  }
46
48
  interface PatternSummary {
47
49
  totalPatterns: number;
package/dist/index.d.ts CHANGED
@@ -42,6 +42,8 @@ interface PatternDetectOptions extends ScanOptions {
42
42
  minSharedTokens?: number;
43
43
  maxCandidatesPerBlock?: number;
44
44
  streamResults?: boolean;
45
+ severity?: string;
46
+ includeTests?: boolean;
45
47
  }
46
48
  interface PatternSummary {
47
49
  totalPatterns: number;
package/dist/index.js CHANGED
@@ -318,6 +318,8 @@ async function analyzePatterns(options) {
318
318
  minSharedTokens = 8,
319
319
  maxCandidatesPerBlock = 100,
320
320
  streamResults = false,
321
+ severity = "all",
322
+ includeTests = false,
321
323
  ...scanOptions
322
324
  } = options;
323
325
  const files = await (0, import_core2.scanFiles)(scanOptions);
@@ -343,10 +345,10 @@ async function analyzePatterns(options) {
343
345
  );
344
346
  const issues = fileDuplicates.map((dup) => {
345
347
  const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
346
- const severity = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
348
+ const severity2 = dup.similarity > 0.95 ? "critical" : dup.similarity > 0.9 ? "major" : "minor";
347
349
  return {
348
350
  type: "duplicate-pattern",
349
- severity,
351
+ severity: severity2,
350
352
  message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
351
353
  location: {
352
354
  file,
@@ -355,13 +357,23 @@ async function analyzePatterns(options) {
355
357
  suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
356
358
  };
357
359
  });
360
+ let filteredIssues = issues;
361
+ if (severity !== "all") {
362
+ const severityMap = {
363
+ critical: ["critical"],
364
+ high: ["critical", "major"],
365
+ medium: ["critical", "major", "minor"]
366
+ };
367
+ const allowedSeverities = severityMap[severity] || ["critical", "major", "minor"];
368
+ filteredIssues = issues.filter((issue) => allowedSeverities.includes(issue.severity));
369
+ }
358
370
  const totalTokenCost = fileDuplicates.reduce(
359
371
  (sum, dup) => sum + dup.tokenCost,
360
372
  0
361
373
  );
362
374
  results.push({
363
375
  fileName: file,
364
- issues,
376
+ issues: filteredIssues,
365
377
  metrics: {
366
378
  tokenCost: totalTokenCost,
367
379
  consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
package/dist/index.mjs CHANGED
@@ -2,7 +2,7 @@ import {
2
2
  analyzePatterns,
3
3
  detectDuplicatePatterns,
4
4
  generateSummary
5
- } from "./chunk-JKVKOXYR.mjs";
5
+ } from "./chunk-CHFK6EBT.mjs";
6
6
  export {
7
7
  analyzePatterns,
8
8
  detectDuplicatePatterns,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aiready/pattern-detect",
3
- "version": "0.5.4",
3
+ "version": "0.6.1",
4
4
  "description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",
@@ -45,7 +45,7 @@
45
45
  "dependencies": {
46
46
  "commander": "^14.0.0",
47
47
  "chalk": "^5.3.0",
48
- "@aiready/core": "0.2.4"
48
+ "@aiready/core": "0.2.5"
49
49
  },
50
50
  "devDependencies": {
51
51
  "tsup": "^8.3.5",