@aiready/pattern-detect 0.17.13 → 0.17.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -166,19 +166,91 @@ var INFRA_RULES = [
166
166
  {
167
167
  name: "cli-command-definitions",
168
168
  detect: (file, code) => {
169
- const isCliFile = file.includes("/commands/") || file.includes("/cli/") || file.endsWith(".command.ts");
169
+ const basename = file.split("/").pop() || "";
170
+ const isCliFile = file.includes("/commands/") || file.includes("/cli/") || file.endsWith(".command.ts") || basename === "cli.ts" || basename === "cli.js" || basename === "cli.tsx" || basename === "cli-action.ts";
170
171
  const hasCommandPattern = (code.includes(".command(") || code.includes("defineCommand")) && (code.includes(".description(") || code.includes(".option(")) && (code.includes(".action(") || code.includes("async"));
171
172
  return isCliFile && hasCommandPattern;
172
173
  },
173
174
  severity: import_core3.Severity.Info,
174
175
  reason: "CLI command definitions follow standard Commander.js patterns and are intentionally similar",
175
176
  suggestion: "Command boilerplate duplication is acceptable for CLI interfaces"
177
+ },
178
+ // DynamoDB Single-Table Design - Standard single-table patterns with prefixed keys
179
+ {
180
+ name: "dynamodb-single-table",
181
+ detect: (file, code) => {
182
+ const hasDynamoDBPattern = code.includes("docClient") || code.includes("dynamodb") || code.includes("DynamoDB") || code.includes("queryItems") || code.includes("putItem") || code.includes("getItem") || code.includes("updateItem") || code.includes("deleteItem");
183
+ const hasKeyPrefix = code.includes("userId:") && code.includes("#") || code.includes("pk:") && code.includes("#") || code.includes("Key:") && code.includes("#") || /[A-Z]+#/.test(code);
184
+ const hasSingleTablePattern = code.includes("KeyConditionExpression") || code.includes("pk =") || code.includes("sk =") || code.includes("userId") && code.includes("timestamp");
185
+ return hasDynamoDBPattern && (hasKeyPrefix || hasSingleTablePattern);
186
+ },
187
+ severity: import_core3.Severity.Info,
188
+ reason: "DynamoDB single-table design with prefixed keys is a standard pattern for efficient data access",
189
+ suggestion: "Single-table query patterns are intentionally similar and should not be refactored"
190
+ },
191
+ // CLI Main Function Boilerplate - Standard argument parsing patterns
192
+ {
193
+ name: "cli-main-boilerplate",
194
+ detect: (file, code) => {
195
+ const basename = file.split("/").pop() || "";
196
+ const isCliFile = file.includes("/cli/") || file.includes("/commands/") || basename.startsWith("cli") || basename.endsWith(".cli.ts") || basename.endsWith(".cli.js");
197
+ const hasMainFunction = code.includes("function main()") || code.includes("async function main()") || code.includes("const main =") || code.includes("main()");
198
+ const hasArgParsing = code.includes("process.argv") || code.includes("yargs") || code.includes("commander") || code.includes("minimist") || code.includes(".parse(") || code.includes("args") && code.includes("._");
199
+ return isCliFile && hasMainFunction && hasArgParsing;
200
+ },
201
+ severity: import_core3.Severity.Info,
202
+ reason: "CLI main functions with argument parsing follow standard boilerplate patterns",
203
+ suggestion: "CLI argument parsing boilerplate is acceptable and should not be flagged as duplication"
176
204
  }
177
205
  ];
178
206
 
179
207
  // src/rules/categories/logic-rules.ts
180
208
  var import_core4 = require("@aiready/core");
181
209
  var LOGIC_RULES = [
210
+ // Enum Semantic Difference - Different enum names indicate different semantic meanings
211
+ {
212
+ name: "enum-semantic-difference",
213
+ detect: (file, code) => {
214
+ const enumRegex = /(?:export\s+)?(?:const\s+)?enum\s+([A-Z][a-zA-Z0-9]*)/g;
215
+ const enums = [];
216
+ let match;
217
+ while ((match = enumRegex.exec(code)) !== null) {
218
+ enums.push(match[1]);
219
+ }
220
+ return enums.length > 0;
221
+ },
222
+ severity: import_core4.Severity.Info,
223
+ reason: "Enums with different names represent different semantic domain concepts, even if they share similar values",
224
+ suggestion: "Different enums (e.g., EscalationPriority vs HealthSeverity) serve different purposes and should not be merged"
225
+ },
226
+ // Enum Value Similarity - Common enum values like LOW, MEDIUM, HIGH are standard
227
+ {
228
+ name: "enum-value-similarity",
229
+ detect: (file, code) => {
230
+ const hasCommonEnumValues = (code.includes("LOW = 'low'") || code.includes("LOW = 0") || code.includes("LOW = 'LOW'")) && (code.includes("HIGH = 'high'") || code.includes("HIGH = 2") || code.includes("HIGH = 'HIGH'")) && (code.includes("MEDIUM = 'medium'") || code.includes("MEDIUM = 1") || code.includes("MEDIUM = 'MEDIUM'"));
231
+ const isEnumDefinition = /(?:export\s+)?(?:const\s+)?enum\s+/.test(code) || code.includes("enum ") && code.includes("{") && code.includes("}");
232
+ return hasCommonEnumValues && isEnumDefinition;
233
+ },
234
+ severity: import_core4.Severity.Info,
235
+ reason: "Common enum values (LOW, MEDIUM, HIGH, CRITICAL) are standard patterns used across different domain enums",
236
+ suggestion: "Enum value similarity is expected for severity/priority enums and should not be flagged as duplication"
237
+ },
238
+ // Re-export / Barrel files - Intentional API surface consolidation
239
+ {
240
+ name: "re-export-files",
241
+ detect: (file, code) => {
242
+ const isIndexFile = file.endsWith("/index.ts") || file.endsWith("/index.js") || file.endsWith("/index.tsx") || file.endsWith("/index.jsx");
243
+ const lines = code.split("\n").filter((l) => l.trim());
244
+ if (lines.length === 0) return false;
245
+ const reExportLines = lines.filter(
246
+ (l) => /^export\s+(\{[^}]+\}|\*)\s+from\s+/.test(l.trim()) || /^export\s+\*\s+as\s+\w+\s+from\s+/.test(l.trim())
247
+ ).length;
248
+ return isIndexFile && reExportLines > 0 && reExportLines / lines.length > 0.5;
249
+ },
250
+ severity: import_core4.Severity.Info,
251
+ reason: "Barrel/index files intentionally re-export for API surface consolidation",
252
+ suggestion: "Re-exports in barrel files are expected and not true duplication"
253
+ },
182
254
  // Type Definitions - Duplication for type safety and module independence
183
255
  {
184
256
  name: "type-definitions",
@@ -192,6 +264,20 @@ var LOGIC_RULES = [
192
264
  reason: "Type/interface definitions are intentionally duplicated for module independence",
193
265
  suggestion: "Extract to shared types package only if causing maintenance burden"
194
266
  },
267
+ // Cross-Package Type Definitions - Different packages may have similar types
268
+ {
269
+ name: "cross-package-types",
270
+ detect: (file, code) => {
271
+ const hasTypeDefinition = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
272
+ const isPackageOrApp = file.includes("/packages/") || file.includes("/apps/") || file.includes("/core/");
273
+ const packageMatch = file.match(/\/(packages|apps|core)\/([^/]+)\//);
274
+ const hasPackageStructure = packageMatch !== null;
275
+ return hasTypeDefinition && isPackageOrApp && hasPackageStructure;
276
+ },
277
+ severity: import_core4.Severity.Info,
278
+ reason: "Types in different packages/modules are often intentionally similar for module independence",
279
+ suggestion: "Cross-package type duplication is acceptable for decoupled module architecture"
280
+ },
195
281
  // Utility Functions - Small helpers in dedicated utility files
196
282
  {
197
283
  name: "utility-functions",
@@ -274,6 +360,22 @@ var LOGIC_RULES = [
274
360
  severity: import_core4.Severity.Info,
275
361
  reason: "Validation functions are inherently similar and often intentionally duplicated for domain clarity",
276
362
  suggestion: "Consider extracting to shared validators only if validation logic becomes complex"
363
+ },
364
+ // Singleton Getter Pattern - Standard singleton initialization pattern
365
+ {
366
+ name: "singleton-getter",
367
+ detect: (file, code) => {
368
+ const hasSingletonGetter = /(?:export\s+)?(?:async\s+)?function\s+get[A-Z][a-zA-Z0-9]*\s*\(/.test(
369
+ code
370
+ ) || /(?:export\s+)?const\s+get[A-Z][a-zA-Z0-9]*\s*=\s*(?:async\s+)?\(\)\s*=>/.test(
371
+ code
372
+ );
373
+ const hasSingletonPattern = code.includes("if (!") && code.includes("instance") && code.includes(" = ") || code.includes("if (!_") && code.includes(" = new ") || code.includes("if (") && code.includes(" === null") && code.includes(" = new ");
374
+ return hasSingletonGetter && hasSingletonPattern;
375
+ },
376
+ severity: import_core4.Severity.Info,
377
+ reason: "Singleton getter functions follow standard initialization pattern and are intentionally similar",
378
+ suggestion: "Singleton getters are boilerplate and acceptable duplication for lazy initialization"
277
379
  }
278
380
  ];
279
381
 
@@ -742,6 +844,22 @@ function getRefactoringSuggestion(patternType, similarity) {
742
844
  return baseMessages[patternType] + urgency;
743
845
  }
744
846
  function generateSummary(results) {
847
+ if (!Array.isArray(results)) {
848
+ return {
849
+ totalPatterns: 0,
850
+ totalTokenCost: 0,
851
+ patternsByType: {
852
+ "api-handler": 0,
853
+ validator: 0,
854
+ utility: 0,
855
+ "class-method": 0,
856
+ component: 0,
857
+ function: 0,
858
+ unknown: 0
859
+ },
860
+ topDuplicates: []
861
+ };
862
+ }
745
863
  const allIssues = results.flatMap((r) => r.issues || []);
746
864
  const totalTokenCost = results.reduce(
747
865
  (sum, r) => sum + (r.metrics?.tokenCost || 0),
@@ -2,10 +2,10 @@ import {
2
2
  analyzePatterns,
3
3
  generateSummary,
4
4
  getSmartDefaults
5
- } from "../chunk-P3BOCGVV.mjs";
6
- import "../chunk-IPBGVPUX.mjs";
7
- import "../chunk-2P7BQHGR.mjs";
8
- import "../chunk-PHJE6A3J.mjs";
5
+ } from "../chunk-WQX7IHAN.mjs";
6
+ import "../chunk-JWP5TCDM.mjs";
7
+ import "../chunk-KDXWIT6W.mjs";
8
+ import "../chunk-G3GZFYRI.mjs";
9
9
  export {
10
10
  analyzePatterns,
11
11
  generateSummary,
@@ -0,0 +1,499 @@
1
+ import {
2
+ detectDuplicatePatterns
3
+ } from "./chunk-RH5JPWEC.mjs";
4
+ import {
5
+ calculateSeverity
6
+ } from "./chunk-UKQFCUQA.mjs";
7
+
8
+ // src/grouping.ts
9
+ import { Severity, getSeverityLevel } from "@aiready/core";
10
+ import path from "path";
11
+ function groupDuplicatesByFilePair(duplicates) {
12
+ const groups = /* @__PURE__ */ new Map();
13
+ for (const dup of duplicates) {
14
+ const files = [dup.file1, dup.file2].sort();
15
+ const key = files.join("::");
16
+ if (!groups.has(key)) {
17
+ groups.set(key, {
18
+ filePair: key,
19
+ severity: dup.severity,
20
+ occurrences: 0,
21
+ totalTokenCost: 0,
22
+ averageSimilarity: 0,
23
+ patternTypes: /* @__PURE__ */ new Set(),
24
+ lineRanges: []
25
+ });
26
+ }
27
+ const group = groups.get(key);
28
+ group.occurrences++;
29
+ group.totalTokenCost += dup.tokenCost;
30
+ group.averageSimilarity += dup.similarity;
31
+ group.patternTypes.add(dup.patternType);
32
+ group.lineRanges.push({
33
+ file1: { start: dup.line1, end: dup.endLine1 },
34
+ file2: { start: dup.line2, end: dup.endLine2 }
35
+ });
36
+ const currentSev = dup.severity;
37
+ if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
38
+ group.severity = currentSev;
39
+ }
40
+ }
41
+ return Array.from(groups.values()).map((g) => ({
42
+ ...g,
43
+ averageSimilarity: g.averageSimilarity / g.occurrences
44
+ }));
45
+ }
46
+ function createRefactorClusters(duplicates) {
47
+ const adjacency = /* @__PURE__ */ new Map();
48
+ const visited = /* @__PURE__ */ new Set();
49
+ const components = [];
50
+ for (const dup of duplicates) {
51
+ if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
52
+ if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
53
+ adjacency.get(dup.file1).add(dup.file2);
54
+ adjacency.get(dup.file2).add(dup.file1);
55
+ }
56
+ for (const file of adjacency.keys()) {
57
+ if (visited.has(file)) continue;
58
+ const component = [];
59
+ const queue = [file];
60
+ visited.add(file);
61
+ while (queue.length > 0) {
62
+ const curr = queue.shift();
63
+ component.push(curr);
64
+ for (const neighbor of adjacency.get(curr) || []) {
65
+ if (!visited.has(neighbor)) {
66
+ visited.add(neighbor);
67
+ queue.push(neighbor);
68
+ }
69
+ }
70
+ }
71
+ components.push(component);
72
+ }
73
+ const clusters = [];
74
+ for (const component of components) {
75
+ if (component.length < 2) continue;
76
+ const componentDups = duplicates.filter(
77
+ (d) => component.includes(d.file1) && component.includes(d.file2)
78
+ );
79
+ const totalTokenCost = componentDups.reduce(
80
+ (sum, d) => sum + d.tokenCost,
81
+ 0
82
+ );
83
+ const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
84
+ const name = determineClusterName(component);
85
+ const { severity, reason, suggestion } = calculateSeverity(
86
+ component[0],
87
+ component[1],
88
+ "",
89
+ // Code not available here
90
+ avgSimilarity,
91
+ 30
92
+ // Assume substantial if clustered
93
+ );
94
+ clusters.push({
95
+ id: `cluster-${clusters.length}`,
96
+ name,
97
+ files: component,
98
+ severity,
99
+ duplicateCount: componentDups.length,
100
+ totalTokenCost,
101
+ averageSimilarity: avgSimilarity,
102
+ reason,
103
+ suggestion
104
+ });
105
+ }
106
+ return clusters;
107
+ }
108
+ function determineClusterName(files) {
109
+ if (files.length === 0) return "Unknown Cluster";
110
+ if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
111
+ if (files.some((f) => f.includes("buttons")))
112
+ return "Button Component Variants";
113
+ if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
114
+ if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
115
+ const first = files[0];
116
+ const dirName = path.dirname(first).split(path.sep).pop();
117
+ if (dirName && dirName !== "." && dirName !== "..") {
118
+ return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
119
+ }
120
+ return "Shared Pattern Group";
121
+ }
122
+ function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
123
+ return clusters.filter(
124
+ (c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
125
+ );
126
+ }
127
+ function isPureInterfaceDefinition(code) {
128
+ const trimmed = code.trim();
129
+ if (!trimmed.startsWith("interface ") && !trimmed.startsWith("type ") && !trimmed.startsWith("export interface ") && !trimmed.startsWith("export type ") && !trimmed.startsWith("enum ") && !trimmed.startsWith("export enum ")) {
130
+ return false;
131
+ }
132
+ if (trimmed.includes("={") || trimmed.includes("=> {") || trimmed.includes("function ") || trimmed.includes("() {") || trimmed.includes(" implements ")) {
133
+ return false;
134
+ }
135
+ if (trimmed.length > 200) return false;
136
+ return true;
137
+ }
138
+ var BRAND_INDICATORS = [
139
+ "cyberpunk",
140
+ "cyber-blue",
141
+ "cyber-purple",
142
+ "slate-900",
143
+ "slate-400",
144
+ "zinc-",
145
+ "indigo-",
146
+ "neon-",
147
+ "glassmorphism",
148
+ "backdrop-blur"
149
+ ];
150
+ function isBrandSpecificComponent(filePath) {
151
+ const lower = filePath.toLowerCase();
152
+ const brandingTerms = ["landing", "clawmore", "platform", "apps/"];
153
+ for (const term of brandingTerms) {
154
+ if (lower.includes(term)) return true;
155
+ }
156
+ return false;
157
+ }
158
+ function areBrandSpecificVariants(file1, file2, code1, code2) {
159
+ const f1IsBrand = isBrandSpecificComponent(file1);
160
+ const f2IsBrand = isBrandSpecificComponent(file2);
161
+ if (f1IsBrand && f2IsBrand && file1 !== file2) {
162
+ const hasBrandKeyword = (code) => {
163
+ const lowerCode = code.toLowerCase();
164
+ return BRAND_INDICATORS.some((ind) => lowerCode.includes(ind));
165
+ };
166
+ const code1Brand = hasBrandKeyword(code1);
167
+ const code2Brand = hasBrandKeyword(code2);
168
+ if (code1Brand && code2Brand) {
169
+ return true;
170
+ }
171
+ }
172
+ return false;
173
+ }
174
+ function filterBrandSpecificVariants(duplicates) {
175
+ return duplicates.filter((dup) => {
176
+ if (dup.file1 === dup.file2) return true;
177
+ const isBrandVariant = areBrandSpecificVariants(
178
+ dup.file1,
179
+ dup.file2,
180
+ dup.code1,
181
+ dup.code2
182
+ );
183
+ if (isBrandVariant) {
184
+ dup.severity = Severity.Info;
185
+ dup.suggestion = "Brand-specific themed component variant (intentional)";
186
+ }
187
+ const isInterfaceDef = isPureInterfaceDefinition(dup.code1) && isPureInterfaceDefinition(dup.code2);
188
+ if (isInterfaceDef) {
189
+ dup.severity = Severity.Info;
190
+ dup.suggestion = "Pure interface/type definition - intentional for module independence";
191
+ }
192
+ return true;
193
+ });
194
+ }
195
+
196
+ // src/config.ts
197
+ import { scanFiles } from "@aiready/core";
198
+ async function getSmartDefaults(directory, userOptions) {
199
+ if (userOptions.useSmartDefaults === false) {
200
+ return {
201
+ rootDir: directory,
202
+ minSimilarity: 0.6,
203
+ minLines: 8,
204
+ batchSize: 100,
205
+ approx: true,
206
+ minSharedTokens: 12,
207
+ maxCandidatesPerBlock: 5,
208
+ streamResults: false,
209
+ severity: "all",
210
+ includeTests: false
211
+ };
212
+ }
213
+ const scanOptions = {
214
+ rootDir: directory,
215
+ include: userOptions.include || ["**/*.{ts,tsx,js,jsx,py,java}"],
216
+ exclude: userOptions.exclude
217
+ };
218
+ const files = await scanFiles(scanOptions);
219
+ const fileCount = files.length;
220
+ const estimatedBlocks = fileCount * 5;
221
+ const minLines = Math.max(
222
+ 6,
223
+ Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
224
+ );
225
+ const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
226
+ const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
227
+ const severity = estimatedBlocks > 3e3 ? "high" : "all";
228
+ const maxCandidatesPerBlock = Math.max(
229
+ 5,
230
+ Math.min(100, Math.floor(1e6 / estimatedBlocks))
231
+ );
232
+ const defaults = {
233
+ rootDir: directory,
234
+ minSimilarity,
235
+ minLines,
236
+ batchSize,
237
+ approx: true,
238
+ minSharedTokens: 10,
239
+ maxCandidatesPerBlock,
240
+ streamResults: false,
241
+ severity,
242
+ includeTests: false
243
+ };
244
+ const result = { ...defaults };
245
+ for (const key of Object.keys(defaults)) {
246
+ if (key in userOptions && userOptions[key] !== void 0) {
247
+ result[key] = userOptions[key];
248
+ }
249
+ }
250
+ return result;
251
+ }
252
+ function logConfiguration(config, estimatedBlocks) {
253
+ if (config.suppressToolConfig) return;
254
+ console.log("\u{1F4CB} Configuration:");
255
+ console.log(` Repository size: ~${estimatedBlocks} code blocks`);
256
+ console.log(` Similarity threshold: ${config.minSimilarity}`);
257
+ console.log(` Minimum lines: ${config.minLines}`);
258
+ console.log(` Approximate mode: ${config.approx ? "enabled" : "disabled"}`);
259
+ console.log(` Max candidates per block: ${config.maxCandidatesPerBlock}`);
260
+ console.log(` Min shared tokens: ${config.minSharedTokens}`);
261
+ console.log(` Severity filter: ${config.severity}`);
262
+ console.log(` Include tests: ${config.includeTests}`);
263
+ if (config.excludePatterns && config.excludePatterns.length > 0) {
264
+ console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
265
+ }
266
+ if (config.confidenceThreshold && config.confidenceThreshold > 0) {
267
+ console.log(` Confidence threshold: ${config.confidenceThreshold}`);
268
+ }
269
+ if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
270
+ console.log(
271
+ ` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
272
+ );
273
+ }
274
+ console.log("");
275
+ }
276
+
277
+ // src/summary.ts
278
+ import { Severity as Severity2 } from "@aiready/core";
279
+ function getRefactoringSuggestion(patternType, similarity) {
280
+ const baseMessages = {
281
+ "api-handler": "Extract common middleware or create a base handler class",
282
+ validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
283
+ utility: "Move to a shared utilities file and reuse across modules",
284
+ "class-method": "Consider inheritance or composition to share behavior",
285
+ component: "Extract shared logic into a custom hook or HOC",
286
+ function: "Extract into a shared helper function",
287
+ unknown: "Extract common logic into a reusable module"
288
+ };
289
+ const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
290
+ return baseMessages[patternType] + urgency;
291
+ }
292
+ function generateSummary(results) {
293
+ if (!Array.isArray(results)) {
294
+ return {
295
+ totalPatterns: 0,
296
+ totalTokenCost: 0,
297
+ patternsByType: {
298
+ "api-handler": 0,
299
+ validator: 0,
300
+ utility: 0,
301
+ "class-method": 0,
302
+ component: 0,
303
+ function: 0,
304
+ unknown: 0
305
+ },
306
+ topDuplicates: []
307
+ };
308
+ }
309
+ const allIssues = results.flatMap((r) => r.issues || []);
310
+ const totalTokenCost = results.reduce(
311
+ (sum, r) => sum + (r.metrics?.tokenCost || 0),
312
+ 0
313
+ );
314
+ const patternsByType = {
315
+ "api-handler": 0,
316
+ validator: 0,
317
+ utility: 0,
318
+ "class-method": 0,
319
+ component: 0,
320
+ function: 0,
321
+ unknown: 0
322
+ };
323
+ allIssues.forEach((issue) => {
324
+ const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
325
+ if (match) {
326
+ const type = match[1] || "unknown";
327
+ patternsByType[type] = (patternsByType[type] || 0) + 1;
328
+ }
329
+ });
330
+ const topDuplicates = allIssues.slice(0, 10).map((issue) => {
331
+ const similarityMatch = issue.message.match(/(\d+)% similar/);
332
+ const tokenMatch = issue.message.match(/\((\d+) tokens/);
333
+ const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
334
+ const fileMatch = issue.message.match(/similar to (.+?) \(/);
335
+ return {
336
+ files: [
337
+ {
338
+ path: issue.location.file,
339
+ startLine: issue.location.line,
340
+ endLine: 0
341
+ },
342
+ {
343
+ path: fileMatch?.[1] || "unknown",
344
+ startLine: 0,
345
+ endLine: 0
346
+ }
347
+ ],
348
+ similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
349
+ patternType: typeMatch?.[1] || "unknown",
350
+ tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
351
+ };
352
+ });
353
+ return {
354
+ totalPatterns: allIssues.length,
355
+ totalTokenCost,
356
+ patternsByType,
357
+ topDuplicates
358
+ };
359
+ }
360
+ function filterBySeverity(issues, severity) {
361
+ if (severity === "all") return issues;
362
+ const severityMap = {
363
+ critical: [Severity2.Critical],
364
+ high: [Severity2.Critical, Severity2.Major],
365
+ medium: [Severity2.Critical, Severity2.Major, Severity2.Minor]
366
+ };
367
+ const allowed = severityMap[severity] || [
368
+ Severity2.Critical,
369
+ Severity2.Major,
370
+ Severity2.Minor
371
+ ];
372
+ return issues.filter((issue) => allowed.includes(issue.severity));
373
+ }
374
+ function calculateSeverity2(similarity) {
375
+ if (similarity > 0.95) return Severity2.Critical;
376
+ if (similarity > 0.9) return Severity2.Major;
377
+ return Severity2.Minor;
378
+ }
379
+
380
+ // src/analyzer.ts
381
+ import { scanFiles as scanFiles2, readFileContent, Severity as Severity3, IssueType } from "@aiready/core";
382
+ async function analyzePatterns(options) {
383
+ const smartDefaults = await getSmartDefaults(options.rootDir || ".", options);
384
+ const finalOptions = { ...smartDefaults, ...options };
385
+ const {
386
+ minSimilarity = 0.4,
387
+ minLines = 5,
388
+ batchSize = 100,
389
+ approx = true,
390
+ minSharedTokens = 8,
391
+ maxCandidatesPerBlock = 100,
392
+ streamResults = false,
393
+ severity = "all",
394
+ groupByFilePair = true,
395
+ createClusters = true,
396
+ minClusterTokenCost = 1e3,
397
+ minClusterFiles = 3,
398
+ excludePatterns = [],
399
+ confidenceThreshold = 0,
400
+ ignoreWhitelist = [],
401
+ ...scanOptions
402
+ } = finalOptions;
403
+ const files = await scanFiles2(scanOptions);
404
+ const estimatedBlocks = files.length * 3;
405
+ logConfiguration(finalOptions, estimatedBlocks);
406
+ const results = [];
407
+ const READ_BATCH_SIZE = 50;
408
+ const fileContents = [];
409
+ for (let i = 0; i < files.length; i += READ_BATCH_SIZE) {
410
+ const batch = files.slice(i, i + READ_BATCH_SIZE);
411
+ const batchContents = await Promise.all(
412
+ batch.map(async (file) => ({
413
+ file,
414
+ content: await readFileContent(file)
415
+ }))
416
+ );
417
+ fileContents.push(...batchContents);
418
+ }
419
+ const duplicates = await detectDuplicatePatterns(fileContents, {
420
+ minSimilarity,
421
+ minLines,
422
+ batchSize,
423
+ approx,
424
+ minSharedTokens,
425
+ maxCandidatesPerBlock,
426
+ streamResults,
427
+ excludePatterns,
428
+ confidenceThreshold,
429
+ ignoreWhitelist,
430
+ onProgress: options.onProgress
431
+ });
432
+ filterBrandSpecificVariants(duplicates);
433
+ for (const file of files) {
434
+ const fileDuplicates = duplicates.filter(
435
+ (dup) => dup.file1 === file || dup.file2 === file
436
+ );
437
+ const issues = fileDuplicates.map((dup) => {
438
+ const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
439
+ let severityLevel;
440
+ if (dup.severity === "info" || dup.severity === "Info") {
441
+ severityLevel = Severity3.Info;
442
+ } else {
443
+ severityLevel = calculateSeverity2(dup.similarity);
444
+ }
445
+ return {
446
+ type: IssueType.DuplicatePattern,
447
+ severity: severityLevel,
448
+ message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
449
+ location: {
450
+ file,
451
+ line: dup.file1 === file ? dup.line1 : dup.line2
452
+ },
453
+ suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
454
+ };
455
+ });
456
+ const filteredIssues = filterBySeverity(issues, severity || "all");
457
+ const totalTokenCost = fileDuplicates.reduce(
458
+ (sum, dup) => sum + dup.tokenCost,
459
+ 0
460
+ );
461
+ results.push({
462
+ fileName: file,
463
+ issues: filteredIssues,
464
+ metrics: {
465
+ tokenCost: totalTokenCost,
466
+ consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
467
+ }
468
+ });
469
+ }
470
+ let groups;
471
+ let clusters;
472
+ if (groupByFilePair) {
473
+ groups = groupDuplicatesByFilePair(duplicates);
474
+ }
475
+ if (createClusters) {
476
+ const allClusters = createRefactorClusters(duplicates);
477
+ clusters = filterClustersByImpact(
478
+ allClusters,
479
+ minClusterTokenCost,
480
+ minClusterFiles
481
+ );
482
+ }
483
+ return { results, duplicates, files, groups, clusters, config: finalOptions };
484
+ }
485
+
486
+ export {
487
+ groupDuplicatesByFilePair,
488
+ createRefactorClusters,
489
+ filterClustersByImpact,
490
+ areBrandSpecificVariants,
491
+ filterBrandSpecificVariants,
492
+ getSmartDefaults,
493
+ logConfiguration,
494
+ generateSummary,
495
+ filterBySeverity,
496
+ calculateSeverity2 as calculateSeverity,
497
+ Severity3 as Severity,
498
+ analyzePatterns
499
+ };