@aiready/pattern-detect 0.17.14 → 0.17.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,12 +174,67 @@ var INFRA_RULES = [
174
174
  severity: import_core3.Severity.Info,
175
175
  reason: "CLI command definitions follow standard Commander.js patterns and are intentionally similar",
176
176
  suggestion: "Command boilerplate duplication is acceptable for CLI interfaces"
177
+ },
178
+ // DynamoDB Single-Table Design - Standard single-table patterns with prefixed keys
179
+ {
180
+ name: "dynamodb-single-table",
181
+ detect: (file, code) => {
182
+ const hasDynamoDBPattern = code.includes("docClient") || code.includes("dynamodb") || code.includes("DynamoDB") || code.includes("queryItems") || code.includes("putItem") || code.includes("getItem") || code.includes("updateItem") || code.includes("deleteItem");
183
+ const hasKeyPrefix = code.includes("userId:") && code.includes("#") || code.includes("pk:") && code.includes("#") || code.includes("Key:") && code.includes("#") || /[A-Z]+#/.test(code);
184
+ const hasSingleTablePattern = code.includes("KeyConditionExpression") || code.includes("pk =") || code.includes("sk =") || code.includes("userId") && code.includes("timestamp");
185
+ return hasDynamoDBPattern && (hasKeyPrefix || hasSingleTablePattern);
186
+ },
187
+ severity: import_core3.Severity.Info,
188
+ reason: "DynamoDB single-table design with prefixed keys is a standard pattern for efficient data access",
189
+ suggestion: "Single-table query patterns are intentionally similar and should not be refactored"
190
+ },
191
+ // CLI Main Function Boilerplate - Standard argument parsing patterns
192
+ {
193
+ name: "cli-main-boilerplate",
194
+ detect: (file, code) => {
195
+ const basename = file.split("/").pop() || "";
196
+ const isCliFile = file.includes("/cli/") || file.includes("/commands/") || basename.startsWith("cli") || basename.endsWith(".cli.ts") || basename.endsWith(".cli.js");
197
+ const hasMainFunction = code.includes("function main()") || code.includes("async function main()") || code.includes("const main =") || code.includes("main()");
198
+ const hasArgParsing = code.includes("process.argv") || code.includes("yargs") || code.includes("commander") || code.includes("minimist") || code.includes(".parse(") || code.includes("args") && code.includes("._");
199
+ return isCliFile && hasMainFunction && hasArgParsing;
200
+ },
201
+ severity: import_core3.Severity.Info,
202
+ reason: "CLI main functions with argument parsing follow standard boilerplate patterns",
203
+ suggestion: "CLI argument parsing boilerplate is acceptable and should not be flagged as duplication"
177
204
  }
178
205
  ];
179
206
 
180
207
  // src/rules/categories/logic-rules.ts
181
208
  var import_core4 = require("@aiready/core");
182
209
  var LOGIC_RULES = [
210
+ // Enum Semantic Difference - Different enum names indicate different semantic meanings
211
+ {
212
+ name: "enum-semantic-difference",
213
+ detect: (file, code) => {
214
+ const enumRegex = /(?:export\s+)?(?:const\s+)?enum\s+([A-Z][a-zA-Z0-9]*)/g;
215
+ const enums = [];
216
+ let match;
217
+ while ((match = enumRegex.exec(code)) !== null) {
218
+ enums.push(match[1]);
219
+ }
220
+ return enums.length > 0;
221
+ },
222
+ severity: import_core4.Severity.Info,
223
+ reason: "Enums with different names represent different semantic domain concepts, even if they share similar values",
224
+ suggestion: "Different enums (e.g., EscalationPriority vs HealthSeverity) serve different purposes and should not be merged"
225
+ },
226
+ // Enum Value Similarity - Common enum values like LOW, MEDIUM, HIGH are standard
227
+ {
228
+ name: "enum-value-similarity",
229
+ detect: (file, code) => {
230
+ const hasCommonEnumValues = (code.includes("LOW = 'low'") || code.includes("LOW = 0") || code.includes("LOW = 'LOW'")) && (code.includes("HIGH = 'high'") || code.includes("HIGH = 2") || code.includes("HIGH = 'HIGH'")) && (code.includes("MEDIUM = 'medium'") || code.includes("MEDIUM = 1") || code.includes("MEDIUM = 'MEDIUM'"));
231
+ const isEnumDefinition = /(?:export\s+)?(?:const\s+)?enum\s+/.test(code) || code.includes("enum ") && code.includes("{") && code.includes("}");
232
+ return hasCommonEnumValues && isEnumDefinition;
233
+ },
234
+ severity: import_core4.Severity.Info,
235
+ reason: "Common enum values (LOW, MEDIUM, HIGH, CRITICAL) are standard patterns used across different domain enums",
236
+ suggestion: "Enum value similarity is expected for severity/priority enums and should not be flagged as duplication"
237
+ },
183
238
  // Re-export / Barrel files - Intentional API surface consolidation
184
239
  {
185
240
  name: "re-export-files",
@@ -209,6 +264,20 @@ var LOGIC_RULES = [
209
264
  reason: "Type/interface definitions are intentionally duplicated for module independence",
210
265
  suggestion: "Extract to shared types package only if causing maintenance burden"
211
266
  },
267
+ // Cross-Package Type Definitions - Different packages may have similar types
268
+ {
269
+ name: "cross-package-types",
270
+ detect: (file, code) => {
271
+ const hasTypeDefinition = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
272
+ const isPackageOrApp = file.includes("/packages/") || file.includes("/apps/") || file.includes("/core/");
273
+ const packageMatch = file.match(/\/(packages|apps|core)\/([^/]+)\//);
274
+ const hasPackageStructure = packageMatch !== null;
275
+ return hasTypeDefinition && isPackageOrApp && hasPackageStructure;
276
+ },
277
+ severity: import_core4.Severity.Info,
278
+ reason: "Types in different packages/modules are often intentionally similar for module independence",
279
+ suggestion: "Cross-package type duplication is acceptable for decoupled module architecture"
280
+ },
212
281
  // Utility Functions - Small helpers in dedicated utility files
213
282
  {
214
283
  name: "utility-functions",
@@ -291,6 +360,22 @@ var LOGIC_RULES = [
291
360
  severity: import_core4.Severity.Info,
292
361
  reason: "Validation functions are inherently similar and often intentionally duplicated for domain clarity",
293
362
  suggestion: "Consider extracting to shared validators only if validation logic becomes complex"
363
+ },
364
+ // Singleton Getter Pattern - Standard singleton initialization pattern
365
+ {
366
+ name: "singleton-getter",
367
+ detect: (file, code) => {
368
+ const hasSingletonGetter = /(?:export\s+)?(?:async\s+)?function\s+get[A-Z][a-zA-Z0-9]*\s*\(/.test(
369
+ code
370
+ ) || /(?:export\s+)?const\s+get[A-Z][a-zA-Z0-9]*\s*=\s*(?:async\s+)?\(\)\s*=>/.test(
371
+ code
372
+ );
373
+ const hasSingletonPattern = code.includes("if (!") && code.includes("instance") && code.includes(" = ") || code.includes("if (!_") && code.includes(" = new ") || code.includes("if (") && code.includes(" === null") && code.includes(" = new ");
374
+ return hasSingletonGetter && hasSingletonPattern;
375
+ },
376
+ severity: import_core4.Severity.Info,
377
+ reason: "Singleton getter functions follow standard initialization pattern and are intentionally similar",
378
+ suggestion: "Singleton getters are boilerplate and acceptable duplication for lazy initialization"
294
379
  }
295
380
  ];
296
381
 
@@ -2,10 +2,10 @@ import {
2
2
  analyzePatterns,
3
3
  generateSummary,
4
4
  getSmartDefaults
5
- } from "../chunk-C4ZGC4KA.mjs";
6
- import "../chunk-RH5JPWEC.mjs";
5
+ } from "../chunk-WQX7IHAN.mjs";
6
+ import "../chunk-JWP5TCDM.mjs";
7
+ import "../chunk-KDXWIT6W.mjs";
7
8
  import "../chunk-G3GZFYRI.mjs";
8
- import "../chunk-UKQFCUQA.mjs";
9
9
  export {
10
10
  analyzePatterns,
11
11
  generateSummary,
@@ -0,0 +1,514 @@
1
+ import {
2
+ detectDuplicatePatterns
3
+ } from "./chunk-KZQXBBR3.mjs";
4
+ import {
5
+ calculateSeverity
6
+ } from "./chunk-ATXO4JL7.mjs";
7
+
8
+ // src/grouping.ts
9
+ import { Severity, getSeverityLevel } from "@aiready/core";
10
+ import path from "path";
11
+ function groupDuplicatesByFilePair(duplicates) {
12
+ const groups = /* @__PURE__ */ new Map();
13
+ for (const dup of duplicates) {
14
+ const files = [dup.file1, dup.file2].sort();
15
+ const key = files.join("::");
16
+ if (!groups.has(key)) {
17
+ groups.set(key, {
18
+ filePair: key,
19
+ severity: dup.severity,
20
+ occurrences: 0,
21
+ totalTokenCost: 0,
22
+ averageSimilarity: 0,
23
+ patternTypes: /* @__PURE__ */ new Set(),
24
+ lineRanges: []
25
+ });
26
+ }
27
+ const group = groups.get(key);
28
+ group.occurrences++;
29
+ group.totalTokenCost += dup.tokenCost;
30
+ group.averageSimilarity += dup.similarity;
31
+ group.patternTypes.add(dup.patternType);
32
+ group.lineRanges.push({
33
+ file1: { start: dup.line1, end: dup.endLine1 },
34
+ file2: { start: dup.line2, end: dup.endLine2 }
35
+ });
36
+ const currentSev = dup.severity;
37
+ if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
38
+ group.severity = currentSev;
39
+ }
40
+ }
41
+ return Array.from(groups.values()).map((g) => ({
42
+ ...g,
43
+ averageSimilarity: g.averageSimilarity / g.occurrences
44
+ }));
45
+ }
46
+ function createRefactorClusters(duplicates) {
47
+ const adjacency = /* @__PURE__ */ new Map();
48
+ const visited = /* @__PURE__ */ new Set();
49
+ const components = [];
50
+ for (const dup of duplicates) {
51
+ if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
52
+ if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
53
+ adjacency.get(dup.file1).add(dup.file2);
54
+ adjacency.get(dup.file2).add(dup.file1);
55
+ }
56
+ for (const file of adjacency.keys()) {
57
+ if (visited.has(file)) continue;
58
+ const component = [];
59
+ const queue = [file];
60
+ visited.add(file);
61
+ while (queue.length > 0) {
62
+ const curr = queue.shift();
63
+ component.push(curr);
64
+ for (const neighbor of adjacency.get(curr) || []) {
65
+ if (!visited.has(neighbor)) {
66
+ visited.add(neighbor);
67
+ queue.push(neighbor);
68
+ }
69
+ }
70
+ }
71
+ components.push(component);
72
+ }
73
+ const clusters = [];
74
+ for (const component of components) {
75
+ if (component.length < 2) continue;
76
+ const componentDups = duplicates.filter(
77
+ (d) => component.includes(d.file1) && component.includes(d.file2)
78
+ );
79
+ const totalTokenCost = componentDups.reduce(
80
+ (sum, d) => sum + d.tokenCost,
81
+ 0
82
+ );
83
+ const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
84
+ const name = determineClusterName(component);
85
+ const { severity, reason, suggestion } = calculateSeverity(
86
+ component[0],
87
+ component[1],
88
+ "",
89
+ // Code not available here
90
+ avgSimilarity,
91
+ 30
92
+ // Assume substantial if clustered
93
+ );
94
+ clusters.push({
95
+ id: `cluster-${clusters.length}`,
96
+ name,
97
+ files: component,
98
+ severity,
99
+ duplicateCount: componentDups.length,
100
+ totalTokenCost,
101
+ averageSimilarity: avgSimilarity,
102
+ reason,
103
+ suggestion
104
+ });
105
+ }
106
+ return clusters;
107
+ }
108
+ function determineClusterName(files) {
109
+ if (files.length === 0) return "Unknown Cluster";
110
+ if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
111
+ if (files.some((f) => f.includes("buttons")))
112
+ return "Button Component Variants";
113
+ if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
114
+ if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
115
+ const first = files[0];
116
+ const dirName = path.dirname(first).split(path.sep).pop();
117
+ if (dirName && dirName !== "." && dirName !== "..") {
118
+ return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
119
+ }
120
+ return "Shared Pattern Group";
121
+ }
122
+ function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
123
+ return clusters.filter(
124
+ (c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
125
+ );
126
+ }
127
+ function isPureInterfaceDefinition(code) {
128
+ const trimmed = code.trim();
129
+ if (!trimmed.startsWith("interface ") && !trimmed.startsWith("type ") && !trimmed.startsWith("export interface ") && !trimmed.startsWith("export type ") && !trimmed.startsWith("enum ") && !trimmed.startsWith("export enum ")) {
130
+ return false;
131
+ }
132
+ if (trimmed.includes("={") || trimmed.includes("=> {") || trimmed.includes("function ") || trimmed.includes("() {") || trimmed.includes(" implements ")) {
133
+ return false;
134
+ }
135
+ if (trimmed.length > 200) return false;
136
+ return true;
137
+ }
138
+ var BRAND_INDICATORS = [
139
+ "cyberpunk",
140
+ "cyber-blue",
141
+ "cyber-purple",
142
+ "slate-900",
143
+ "slate-400",
144
+ "zinc-",
145
+ "indigo-",
146
+ "neon-",
147
+ "glassmorphism",
148
+ "backdrop-blur"
149
+ ];
150
+ function isBrandSpecificComponent(filePath) {
151
+ const lower = filePath.toLowerCase();
152
+ const brandingTerms = ["landing", "clawmore", "platform", "apps/"];
153
+ for (const term of brandingTerms) {
154
+ if (lower.includes(term)) return true;
155
+ }
156
+ return false;
157
+ }
158
+ function areBrandSpecificVariants(file1, file2, code1, code2) {
159
+ const f1IsBrand = isBrandSpecificComponent(file1);
160
+ const f2IsBrand = isBrandSpecificComponent(file2);
161
+ if (f1IsBrand && f2IsBrand && file1 !== file2) {
162
+ const hasBrandKeyword = (code) => {
163
+ const lowerCode = code.toLowerCase();
164
+ return BRAND_INDICATORS.some((ind) => lowerCode.includes(ind));
165
+ };
166
+ const code1Brand = hasBrandKeyword(code1);
167
+ const code2Brand = hasBrandKeyword(code2);
168
+ if (code1Brand && code2Brand) {
169
+ return true;
170
+ }
171
+ }
172
+ return false;
173
+ }
174
+ function filterBrandSpecificVariants(duplicates) {
175
+ return duplicates.filter((dup) => {
176
+ if (dup.file1 === dup.file2) return true;
177
+ const isBrandVariant = areBrandSpecificVariants(
178
+ dup.file1,
179
+ dup.file2,
180
+ dup.code1,
181
+ dup.code2
182
+ );
183
+ if (isBrandVariant) {
184
+ dup.severity = Severity.Info;
185
+ dup.suggestion = "Brand-specific themed component variant (intentional)";
186
+ }
187
+ const isInterfaceDef = isPureInterfaceDefinition(dup.code1) && isPureInterfaceDefinition(dup.code2);
188
+ if (isInterfaceDef) {
189
+ dup.severity = Severity.Info;
190
+ dup.suggestion = "Pure interface/type definition - intentional for module independence";
191
+ }
192
+ return true;
193
+ });
194
+ }
195
+
196
+ // src/config.ts
197
+ import { scanFiles } from "@aiready/core";
198
+ async function getSmartDefaults(directory, userOptions) {
199
+ if (userOptions.useSmartDefaults === false) {
200
+ return {
201
+ rootDir: directory,
202
+ minSimilarity: 0.6,
203
+ minLines: 8,
204
+ batchSize: 100,
205
+ approx: true,
206
+ minSharedTokens: 12,
207
+ maxCandidatesPerBlock: 5,
208
+ streamResults: false,
209
+ severity: "all",
210
+ includeTests: false
211
+ };
212
+ }
213
+ const scanOptions = {
214
+ rootDir: directory,
215
+ include: userOptions.include || ["**/*.{ts,tsx,js,jsx,py,java}"],
216
+ exclude: userOptions.exclude
217
+ };
218
+ const files = await scanFiles(scanOptions);
219
+ const fileCount = files.length;
220
+ const estimatedBlocks = fileCount * 5;
221
+ const minLines = Math.max(
222
+ 6,
223
+ Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
224
+ );
225
+ const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
226
+ const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
227
+ const severity = estimatedBlocks > 3e3 ? "high" : "all";
228
+ const maxCandidatesPerBlock = Math.max(
229
+ 5,
230
+ Math.min(100, Math.floor(1e6 / estimatedBlocks))
231
+ );
232
+ const defaults = {
233
+ rootDir: directory,
234
+ minSimilarity,
235
+ minLines,
236
+ batchSize,
237
+ approx: true,
238
+ minSharedTokens: 10,
239
+ maxCandidatesPerBlock,
240
+ streamResults: false,
241
+ severity,
242
+ includeTests: false
243
+ };
244
+ const result = { ...defaults };
245
+ for (const key of Object.keys(defaults)) {
246
+ if (key in userOptions && userOptions[key] !== void 0) {
247
+ result[key] = userOptions[key];
248
+ }
249
+ }
250
+ return result;
251
+ }
252
+ function logConfiguration(config, estimatedBlocks) {
253
+ if (config.suppressToolConfig) return;
254
+ console.log("\u{1F4CB} Configuration:");
255
+ console.log(` Repository size: ~${estimatedBlocks} code blocks`);
256
+ console.log(` Similarity threshold: ${config.minSimilarity}`);
257
+ console.log(` Minimum lines: ${config.minLines}`);
258
+ console.log(` Approximate mode: ${config.approx ? "enabled" : "disabled"}`);
259
+ console.log(` Max candidates per block: ${config.maxCandidatesPerBlock}`);
260
+ console.log(` Min shared tokens: ${config.minSharedTokens}`);
261
+ console.log(` Severity filter: ${config.severity}`);
262
+ console.log(` Include tests: ${config.includeTests}`);
263
+ if (config.excludePatterns && config.excludePatterns.length > 0) {
264
+ console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
265
+ }
266
+ if (config.confidenceThreshold && config.confidenceThreshold > 0) {
267
+ console.log(` Confidence threshold: ${config.confidenceThreshold}`);
268
+ }
269
+ if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
270
+ console.log(
271
+ ` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
272
+ );
273
+ }
274
+ console.log("");
275
+ }
276
+
277
+ // src/summary.ts
278
+ import { Severity as Severity2 } from "@aiready/core";
279
+ function getRefactoringSuggestion(patternType, similarity) {
280
+ const baseMessages = {
281
+ "api-handler": "Extract common middleware or create a base handler class",
282
+ validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
283
+ utility: "Move to a shared utilities file and reuse across modules",
284
+ "class-method": "Consider inheritance or composition to share behavior",
285
+ component: "Extract shared logic into a custom hook or HOC",
286
+ function: "Extract into a shared helper function",
287
+ unknown: "Extract common logic into a reusable module"
288
+ };
289
+ const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
290
+ return baseMessages[patternType] + urgency;
291
+ }
292
+ function generateSummary(results) {
293
+ if (!Array.isArray(results)) {
294
+ return {
295
+ totalPatterns: 0,
296
+ totalTokenCost: 0,
297
+ patternsByType: {
298
+ "api-handler": 0,
299
+ validator: 0,
300
+ utility: 0,
301
+ "class-method": 0,
302
+ component: 0,
303
+ function: 0,
304
+ unknown: 0
305
+ },
306
+ topDuplicates: []
307
+ };
308
+ }
309
+ const allIssues = results.flatMap((r) => r.issues || []);
310
+ const totalTokenCost = results.reduce(
311
+ (sum, r) => sum + (r.metrics?.tokenCost || 0),
312
+ 0
313
+ );
314
+ const patternsByType = {
315
+ "api-handler": 0,
316
+ validator: 0,
317
+ utility: 0,
318
+ "class-method": 0,
319
+ component: 0,
320
+ function: 0,
321
+ unknown: 0
322
+ };
323
+ allIssues.forEach((issue) => {
324
+ const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
325
+ if (match) {
326
+ const type = match[1] || "unknown";
327
+ patternsByType[type] = (patternsByType[type] || 0) + 1;
328
+ }
329
+ });
330
+ const topDuplicates = allIssues.slice(0, 10).map((issue) => {
331
+ const similarityMatch = issue.message.match(/(\d+)% similar/);
332
+ const tokenMatch = issue.message.match(/\((\d+) tokens/);
333
+ const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
334
+ const fileMatch = issue.message.match(/similar to (.+?) \(/);
335
+ return {
336
+ files: [
337
+ {
338
+ path: issue.location.file,
339
+ startLine: issue.location.line,
340
+ endLine: 0
341
+ },
342
+ {
343
+ path: fileMatch?.[1] || "unknown",
344
+ startLine: 0,
345
+ endLine: 0
346
+ }
347
+ ],
348
+ similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
349
+ patternType: typeMatch?.[1] || "unknown",
350
+ tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
351
+ };
352
+ });
353
+ return {
354
+ totalPatterns: allIssues.length,
355
+ totalTokenCost,
356
+ patternsByType,
357
+ topDuplicates
358
+ };
359
+ }
360
+ function filterBySeverity(issues, severity) {
361
+ if (severity === "all") return issues;
362
+ const severityMap = {
363
+ critical: [Severity2.Critical],
364
+ high: [Severity2.Critical, Severity2.Major],
365
+ medium: [Severity2.Critical, Severity2.Major, Severity2.Minor]
366
+ };
367
+ const allowed = severityMap[severity] || [
368
+ Severity2.Critical,
369
+ Severity2.Major,
370
+ Severity2.Minor
371
+ ];
372
+ return issues.filter((issue) => allowed.includes(issue.severity));
373
+ }
374
+ function getSeverityLabel(severity) {
375
+ switch (severity) {
376
+ case Severity2.Critical:
377
+ return "CRITICAL";
378
+ case Severity2.Major:
379
+ return "HIGH";
380
+ case Severity2.Minor:
381
+ return "MEDIUM";
382
+ case Severity2.Info:
383
+ return "LOW";
384
+ default:
385
+ return "UNKNOWN";
386
+ }
387
+ }
388
+ function calculateSeverity2(similarity) {
389
+ if (similarity > 0.95) return Severity2.Critical;
390
+ if (similarity > 0.9) return Severity2.Major;
391
+ return Severity2.Minor;
392
+ }
393
+
394
+ // src/analyzer.ts
395
+ import { scanFiles as scanFiles2, readFileContent, Severity as Severity3, IssueType } from "@aiready/core";
396
+ async function analyzePatterns(options) {
397
+ const smartDefaults = await getSmartDefaults(options.rootDir || ".", options);
398
+ const finalOptions = { ...smartDefaults, ...options };
399
+ const {
400
+ minSimilarity = 0.4,
401
+ minLines = 5,
402
+ batchSize = 100,
403
+ approx = true,
404
+ minSharedTokens = 8,
405
+ maxCandidatesPerBlock = 100,
406
+ streamResults = false,
407
+ severity = "all",
408
+ groupByFilePair = true,
409
+ createClusters = true,
410
+ minClusterTokenCost = 1e3,
411
+ minClusterFiles = 3,
412
+ excludePatterns = [],
413
+ confidenceThreshold = 0,
414
+ ignoreWhitelist = [],
415
+ ...scanOptions
416
+ } = finalOptions;
417
+ const files = await scanFiles2(scanOptions);
418
+ const estimatedBlocks = files.length * 3;
419
+ logConfiguration(finalOptions, estimatedBlocks);
420
+ const results = [];
421
+ const READ_BATCH_SIZE = 50;
422
+ const fileContents = [];
423
+ for (let i = 0; i < files.length; i += READ_BATCH_SIZE) {
424
+ const batch = files.slice(i, i + READ_BATCH_SIZE);
425
+ const batchContents = await Promise.all(
426
+ batch.map(async (file) => ({
427
+ file,
428
+ content: await readFileContent(file)
429
+ }))
430
+ );
431
+ fileContents.push(...batchContents);
432
+ }
433
+ const duplicates = await detectDuplicatePatterns(fileContents, {
434
+ minSimilarity,
435
+ minLines,
436
+ batchSize,
437
+ approx,
438
+ minSharedTokens,
439
+ maxCandidatesPerBlock,
440
+ streamResults,
441
+ excludePatterns,
442
+ confidenceThreshold,
443
+ ignoreWhitelist,
444
+ onProgress: options.onProgress
445
+ });
446
+ filterBrandSpecificVariants(duplicates);
447
+ for (const file of files) {
448
+ const fileDuplicates = duplicates.filter(
449
+ (dup) => dup.file1 === file || dup.file2 === file
450
+ );
451
+ const issues = fileDuplicates.map((dup) => {
452
+ const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
453
+ let severityLevel;
454
+ if (dup.severity === "info" || dup.severity === "Info") {
455
+ severityLevel = Severity3.Info;
456
+ } else {
457
+ severityLevel = calculateSeverity2(dup.similarity);
458
+ }
459
+ return {
460
+ type: IssueType.DuplicatePattern,
461
+ severity: severityLevel,
462
+ message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
463
+ location: {
464
+ file,
465
+ line: dup.file1 === file ? dup.line1 : dup.line2
466
+ },
467
+ suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
468
+ };
469
+ });
470
+ const filteredIssues = filterBySeverity(issues, severity || "all");
471
+ const totalTokenCost = fileDuplicates.reduce(
472
+ (sum, dup) => sum + dup.tokenCost,
473
+ 0
474
+ );
475
+ results.push({
476
+ fileName: file,
477
+ issues: filteredIssues,
478
+ metrics: {
479
+ tokenCost: totalTokenCost,
480
+ consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
481
+ }
482
+ });
483
+ }
484
+ let groups;
485
+ let clusters;
486
+ if (groupByFilePair) {
487
+ groups = groupDuplicatesByFilePair(duplicates);
488
+ }
489
+ if (createClusters) {
490
+ const allClusters = createRefactorClusters(duplicates);
491
+ clusters = filterClustersByImpact(
492
+ allClusters,
493
+ minClusterTokenCost,
494
+ minClusterFiles
495
+ );
496
+ }
497
+ return { results, duplicates, files, groups, clusters, config: finalOptions };
498
+ }
499
+
500
+ export {
501
+ groupDuplicatesByFilePair,
502
+ createRefactorClusters,
503
+ filterClustersByImpact,
504
+ areBrandSpecificVariants,
505
+ filterBrandSpecificVariants,
506
+ getSmartDefaults,
507
+ logConfiguration,
508
+ generateSummary,
509
+ filterBySeverity,
510
+ getSeverityLabel,
511
+ calculateSeverity2 as calculateSeverity,
512
+ Severity3 as Severity,
513
+ analyzePatterns
514
+ };