@aiready/pattern-detect 0.17.14 → 0.17.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer-entry/index.js +85 -0
- package/dist/analyzer-entry/index.mjs +3 -3
- package/dist/chunk-4SKGAZEW.mjs +514 -0
- package/dist/chunk-ATXO4JL7.mjs +404 -0
- package/dist/chunk-F42Q2M4O.mjs +143 -0
- package/dist/chunk-JWP5TCDM.mjs +143 -0
- package/dist/chunk-KDXWIT6W.mjs +408 -0
- package/dist/chunk-KZQXBBR3.mjs +143 -0
- package/dist/chunk-NVV4UFIV.mjs +514 -0
- package/dist/chunk-PFA2DO73.mjs +392 -0
- package/dist/chunk-WQX7IHAN.mjs +514 -0
- package/dist/cli.js +89 -4
- package/dist/cli.mjs +8 -8
- package/dist/context-rules-entry/index.js +85 -0
- package/dist/context-rules-entry/index.mjs +1 -1
- package/dist/detector-entry/index.js +85 -0
- package/dist/detector-entry/index.mjs +2 -2
- package/dist/index.js +85 -0
- package/dist/index.mjs +5 -5
- package/package.json +5 -5
|
@@ -174,12 +174,67 @@ var INFRA_RULES = [
|
|
|
174
174
|
severity: import_core3.Severity.Info,
|
|
175
175
|
reason: "CLI command definitions follow standard Commander.js patterns and are intentionally similar",
|
|
176
176
|
suggestion: "Command boilerplate duplication is acceptable for CLI interfaces"
|
|
177
|
+
},
|
|
178
|
+
// DynamoDB Single-Table Design - Standard single-table patterns with prefixed keys
|
|
179
|
+
{
|
|
180
|
+
name: "dynamodb-single-table",
|
|
181
|
+
detect: (file, code) => {
|
|
182
|
+
const hasDynamoDBPattern = code.includes("docClient") || code.includes("dynamodb") || code.includes("DynamoDB") || code.includes("queryItems") || code.includes("putItem") || code.includes("getItem") || code.includes("updateItem") || code.includes("deleteItem");
|
|
183
|
+
const hasKeyPrefix = code.includes("userId:") && code.includes("#") || code.includes("pk:") && code.includes("#") || code.includes("Key:") && code.includes("#") || /[A-Z]+#/.test(code);
|
|
184
|
+
const hasSingleTablePattern = code.includes("KeyConditionExpression") || code.includes("pk =") || code.includes("sk =") || code.includes("userId") && code.includes("timestamp");
|
|
185
|
+
return hasDynamoDBPattern && (hasKeyPrefix || hasSingleTablePattern);
|
|
186
|
+
},
|
|
187
|
+
severity: import_core3.Severity.Info,
|
|
188
|
+
reason: "DynamoDB single-table design with prefixed keys is a standard pattern for efficient data access",
|
|
189
|
+
suggestion: "Single-table query patterns are intentionally similar and should not be refactored"
|
|
190
|
+
},
|
|
191
|
+
// CLI Main Function Boilerplate - Standard argument parsing patterns
|
|
192
|
+
{
|
|
193
|
+
name: "cli-main-boilerplate",
|
|
194
|
+
detect: (file, code) => {
|
|
195
|
+
const basename = file.split("/").pop() || "";
|
|
196
|
+
const isCliFile = file.includes("/cli/") || file.includes("/commands/") || basename.startsWith("cli") || basename.endsWith(".cli.ts") || basename.endsWith(".cli.js");
|
|
197
|
+
const hasMainFunction = code.includes("function main()") || code.includes("async function main()") || code.includes("const main =") || code.includes("main()");
|
|
198
|
+
const hasArgParsing = code.includes("process.argv") || code.includes("yargs") || code.includes("commander") || code.includes("minimist") || code.includes(".parse(") || code.includes("args") && code.includes("._");
|
|
199
|
+
return isCliFile && hasMainFunction && hasArgParsing;
|
|
200
|
+
},
|
|
201
|
+
severity: import_core3.Severity.Info,
|
|
202
|
+
reason: "CLI main functions with argument parsing follow standard boilerplate patterns",
|
|
203
|
+
suggestion: "CLI argument parsing boilerplate is acceptable and should not be flagged as duplication"
|
|
177
204
|
}
|
|
178
205
|
];
|
|
179
206
|
|
|
180
207
|
// src/rules/categories/logic-rules.ts
|
|
181
208
|
var import_core4 = require("@aiready/core");
|
|
182
209
|
var LOGIC_RULES = [
|
|
210
|
+
// Enum Semantic Difference - Different enum names indicate different semantic meanings
|
|
211
|
+
{
|
|
212
|
+
name: "enum-semantic-difference",
|
|
213
|
+
detect: (file, code) => {
|
|
214
|
+
const enumRegex = /(?:export\s+)?(?:const\s+)?enum\s+([A-Z][a-zA-Z0-9]*)/g;
|
|
215
|
+
const enums = [];
|
|
216
|
+
let match;
|
|
217
|
+
while ((match = enumRegex.exec(code)) !== null) {
|
|
218
|
+
enums.push(match[1]);
|
|
219
|
+
}
|
|
220
|
+
return enums.length > 0;
|
|
221
|
+
},
|
|
222
|
+
severity: import_core4.Severity.Info,
|
|
223
|
+
reason: "Enums with different names represent different semantic domain concepts, even if they share similar values",
|
|
224
|
+
suggestion: "Different enums (e.g., EscalationPriority vs HealthSeverity) serve different purposes and should not be merged"
|
|
225
|
+
},
|
|
226
|
+
// Enum Value Similarity - Common enum values like LOW, MEDIUM, HIGH are standard
|
|
227
|
+
{
|
|
228
|
+
name: "enum-value-similarity",
|
|
229
|
+
detect: (file, code) => {
|
|
230
|
+
const hasCommonEnumValues = (code.includes("LOW = 'low'") || code.includes("LOW = 0") || code.includes("LOW = 'LOW'")) && (code.includes("HIGH = 'high'") || code.includes("HIGH = 2") || code.includes("HIGH = 'HIGH'")) && (code.includes("MEDIUM = 'medium'") || code.includes("MEDIUM = 1") || code.includes("MEDIUM = 'MEDIUM'"));
|
|
231
|
+
const isEnumDefinition = /(?:export\s+)?(?:const\s+)?enum\s+/.test(code) || code.includes("enum ") && code.includes("{") && code.includes("}");
|
|
232
|
+
return hasCommonEnumValues && isEnumDefinition;
|
|
233
|
+
},
|
|
234
|
+
severity: import_core4.Severity.Info,
|
|
235
|
+
reason: "Common enum values (LOW, MEDIUM, HIGH, CRITICAL) are standard patterns used across different domain enums",
|
|
236
|
+
suggestion: "Enum value similarity is expected for severity/priority enums and should not be flagged as duplication"
|
|
237
|
+
},
|
|
183
238
|
// Re-export / Barrel files - Intentional API surface consolidation
|
|
184
239
|
{
|
|
185
240
|
name: "re-export-files",
|
|
@@ -209,6 +264,20 @@ var LOGIC_RULES = [
|
|
|
209
264
|
reason: "Type/interface definitions are intentionally duplicated for module independence",
|
|
210
265
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
211
266
|
},
|
|
267
|
+
// Cross-Package Type Definitions - Different packages may have similar types
|
|
268
|
+
{
|
|
269
|
+
name: "cross-package-types",
|
|
270
|
+
detect: (file, code) => {
|
|
271
|
+
const hasTypeDefinition = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
272
|
+
const isPackageOrApp = file.includes("/packages/") || file.includes("/apps/") || file.includes("/core/");
|
|
273
|
+
const packageMatch = file.match(/\/(packages|apps|core)\/([^/]+)\//);
|
|
274
|
+
const hasPackageStructure = packageMatch !== null;
|
|
275
|
+
return hasTypeDefinition && isPackageOrApp && hasPackageStructure;
|
|
276
|
+
},
|
|
277
|
+
severity: import_core4.Severity.Info,
|
|
278
|
+
reason: "Types in different packages/modules are often intentionally similar for module independence",
|
|
279
|
+
suggestion: "Cross-package type duplication is acceptable for decoupled module architecture"
|
|
280
|
+
},
|
|
212
281
|
// Utility Functions - Small helpers in dedicated utility files
|
|
213
282
|
{
|
|
214
283
|
name: "utility-functions",
|
|
@@ -291,6 +360,22 @@ var LOGIC_RULES = [
|
|
|
291
360
|
severity: import_core4.Severity.Info,
|
|
292
361
|
reason: "Validation functions are inherently similar and often intentionally duplicated for domain clarity",
|
|
293
362
|
suggestion: "Consider extracting to shared validators only if validation logic becomes complex"
|
|
363
|
+
},
|
|
364
|
+
// Singleton Getter Pattern - Standard singleton initialization pattern
|
|
365
|
+
{
|
|
366
|
+
name: "singleton-getter",
|
|
367
|
+
detect: (file, code) => {
|
|
368
|
+
const hasSingletonGetter = /(?:export\s+)?(?:async\s+)?function\s+get[A-Z][a-zA-Z0-9]*\s*\(/.test(
|
|
369
|
+
code
|
|
370
|
+
) || /(?:export\s+)?const\s+get[A-Z][a-zA-Z0-9]*\s*=\s*(?:async\s+)?\(\)\s*=>/.test(
|
|
371
|
+
code
|
|
372
|
+
);
|
|
373
|
+
const hasSingletonPattern = code.includes("if (!") && code.includes("instance") && code.includes(" = ") || code.includes("if (!_") && code.includes(" = new ") || code.includes("if (") && code.includes(" === null") && code.includes(" = new ");
|
|
374
|
+
return hasSingletonGetter && hasSingletonPattern;
|
|
375
|
+
},
|
|
376
|
+
severity: import_core4.Severity.Info,
|
|
377
|
+
reason: "Singleton getter functions follow standard initialization pattern and are intentionally similar",
|
|
378
|
+
suggestion: "Singleton getters are boilerplate and acceptable duplication for lazy initialization"
|
|
294
379
|
}
|
|
295
380
|
];
|
|
296
381
|
|
|
@@ -2,10 +2,10 @@ import {
|
|
|
2
2
|
analyzePatterns,
|
|
3
3
|
generateSummary,
|
|
4
4
|
getSmartDefaults
|
|
5
|
-
} from "../chunk-
|
|
6
|
-
import "../chunk-
|
|
5
|
+
} from "../chunk-WQX7IHAN.mjs";
|
|
6
|
+
import "../chunk-JWP5TCDM.mjs";
|
|
7
|
+
import "../chunk-KDXWIT6W.mjs";
|
|
7
8
|
import "../chunk-G3GZFYRI.mjs";
|
|
8
|
-
import "../chunk-UKQFCUQA.mjs";
|
|
9
9
|
export {
|
|
10
10
|
analyzePatterns,
|
|
11
11
|
generateSummary,
|
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
import {
|
|
2
|
+
detectDuplicatePatterns
|
|
3
|
+
} from "./chunk-KZQXBBR3.mjs";
|
|
4
|
+
import {
|
|
5
|
+
calculateSeverity
|
|
6
|
+
} from "./chunk-ATXO4JL7.mjs";
|
|
7
|
+
|
|
8
|
+
// src/grouping.ts
|
|
9
|
+
import { Severity, getSeverityLevel } from "@aiready/core";
|
|
10
|
+
import path from "path";
|
|
11
|
+
function groupDuplicatesByFilePair(duplicates) {
|
|
12
|
+
const groups = /* @__PURE__ */ new Map();
|
|
13
|
+
for (const dup of duplicates) {
|
|
14
|
+
const files = [dup.file1, dup.file2].sort();
|
|
15
|
+
const key = files.join("::");
|
|
16
|
+
if (!groups.has(key)) {
|
|
17
|
+
groups.set(key, {
|
|
18
|
+
filePair: key,
|
|
19
|
+
severity: dup.severity,
|
|
20
|
+
occurrences: 0,
|
|
21
|
+
totalTokenCost: 0,
|
|
22
|
+
averageSimilarity: 0,
|
|
23
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
24
|
+
lineRanges: []
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
const group = groups.get(key);
|
|
28
|
+
group.occurrences++;
|
|
29
|
+
group.totalTokenCost += dup.tokenCost;
|
|
30
|
+
group.averageSimilarity += dup.similarity;
|
|
31
|
+
group.patternTypes.add(dup.patternType);
|
|
32
|
+
group.lineRanges.push({
|
|
33
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
34
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
35
|
+
});
|
|
36
|
+
const currentSev = dup.severity;
|
|
37
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
38
|
+
group.severity = currentSev;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return Array.from(groups.values()).map((g) => ({
|
|
42
|
+
...g,
|
|
43
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
function createRefactorClusters(duplicates) {
|
|
47
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
48
|
+
const visited = /* @__PURE__ */ new Set();
|
|
49
|
+
const components = [];
|
|
50
|
+
for (const dup of duplicates) {
|
|
51
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
52
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
53
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
54
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
55
|
+
}
|
|
56
|
+
for (const file of adjacency.keys()) {
|
|
57
|
+
if (visited.has(file)) continue;
|
|
58
|
+
const component = [];
|
|
59
|
+
const queue = [file];
|
|
60
|
+
visited.add(file);
|
|
61
|
+
while (queue.length > 0) {
|
|
62
|
+
const curr = queue.shift();
|
|
63
|
+
component.push(curr);
|
|
64
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
65
|
+
if (!visited.has(neighbor)) {
|
|
66
|
+
visited.add(neighbor);
|
|
67
|
+
queue.push(neighbor);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
components.push(component);
|
|
72
|
+
}
|
|
73
|
+
const clusters = [];
|
|
74
|
+
for (const component of components) {
|
|
75
|
+
if (component.length < 2) continue;
|
|
76
|
+
const componentDups = duplicates.filter(
|
|
77
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
78
|
+
);
|
|
79
|
+
const totalTokenCost = componentDups.reduce(
|
|
80
|
+
(sum, d) => sum + d.tokenCost,
|
|
81
|
+
0
|
|
82
|
+
);
|
|
83
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
84
|
+
const name = determineClusterName(component);
|
|
85
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
86
|
+
component[0],
|
|
87
|
+
component[1],
|
|
88
|
+
"",
|
|
89
|
+
// Code not available here
|
|
90
|
+
avgSimilarity,
|
|
91
|
+
30
|
|
92
|
+
// Assume substantial if clustered
|
|
93
|
+
);
|
|
94
|
+
clusters.push({
|
|
95
|
+
id: `cluster-${clusters.length}`,
|
|
96
|
+
name,
|
|
97
|
+
files: component,
|
|
98
|
+
severity,
|
|
99
|
+
duplicateCount: componentDups.length,
|
|
100
|
+
totalTokenCost,
|
|
101
|
+
averageSimilarity: avgSimilarity,
|
|
102
|
+
reason,
|
|
103
|
+
suggestion
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return clusters;
|
|
107
|
+
}
|
|
108
|
+
function determineClusterName(files) {
|
|
109
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
110
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
111
|
+
if (files.some((f) => f.includes("buttons")))
|
|
112
|
+
return "Button Component Variants";
|
|
113
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
114
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
115
|
+
const first = files[0];
|
|
116
|
+
const dirName = path.dirname(first).split(path.sep).pop();
|
|
117
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
118
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
119
|
+
}
|
|
120
|
+
return "Shared Pattern Group";
|
|
121
|
+
}
|
|
122
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
123
|
+
return clusters.filter(
|
|
124
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
function isPureInterfaceDefinition(code) {
|
|
128
|
+
const trimmed = code.trim();
|
|
129
|
+
if (!trimmed.startsWith("interface ") && !trimmed.startsWith("type ") && !trimmed.startsWith("export interface ") && !trimmed.startsWith("export type ") && !trimmed.startsWith("enum ") && !trimmed.startsWith("export enum ")) {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (trimmed.includes("={") || trimmed.includes("=> {") || trimmed.includes("function ") || trimmed.includes("() {") || trimmed.includes(" implements ")) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
if (trimmed.length > 200) return false;
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
var BRAND_INDICATORS = [
|
|
139
|
+
"cyberpunk",
|
|
140
|
+
"cyber-blue",
|
|
141
|
+
"cyber-purple",
|
|
142
|
+
"slate-900",
|
|
143
|
+
"slate-400",
|
|
144
|
+
"zinc-",
|
|
145
|
+
"indigo-",
|
|
146
|
+
"neon-",
|
|
147
|
+
"glassmorphism",
|
|
148
|
+
"backdrop-blur"
|
|
149
|
+
];
|
|
150
|
+
function isBrandSpecificComponent(filePath) {
|
|
151
|
+
const lower = filePath.toLowerCase();
|
|
152
|
+
const brandingTerms = ["landing", "clawmore", "platform", "apps/"];
|
|
153
|
+
for (const term of brandingTerms) {
|
|
154
|
+
if (lower.includes(term)) return true;
|
|
155
|
+
}
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
function areBrandSpecificVariants(file1, file2, code1, code2) {
|
|
159
|
+
const f1IsBrand = isBrandSpecificComponent(file1);
|
|
160
|
+
const f2IsBrand = isBrandSpecificComponent(file2);
|
|
161
|
+
if (f1IsBrand && f2IsBrand && file1 !== file2) {
|
|
162
|
+
const hasBrandKeyword = (code) => {
|
|
163
|
+
const lowerCode = code.toLowerCase();
|
|
164
|
+
return BRAND_INDICATORS.some((ind) => lowerCode.includes(ind));
|
|
165
|
+
};
|
|
166
|
+
const code1Brand = hasBrandKeyword(code1);
|
|
167
|
+
const code2Brand = hasBrandKeyword(code2);
|
|
168
|
+
if (code1Brand && code2Brand) {
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
function filterBrandSpecificVariants(duplicates) {
|
|
175
|
+
return duplicates.filter((dup) => {
|
|
176
|
+
if (dup.file1 === dup.file2) return true;
|
|
177
|
+
const isBrandVariant = areBrandSpecificVariants(
|
|
178
|
+
dup.file1,
|
|
179
|
+
dup.file2,
|
|
180
|
+
dup.code1,
|
|
181
|
+
dup.code2
|
|
182
|
+
);
|
|
183
|
+
if (isBrandVariant) {
|
|
184
|
+
dup.severity = Severity.Info;
|
|
185
|
+
dup.suggestion = "Brand-specific themed component variant (intentional)";
|
|
186
|
+
}
|
|
187
|
+
const isInterfaceDef = isPureInterfaceDefinition(dup.code1) && isPureInterfaceDefinition(dup.code2);
|
|
188
|
+
if (isInterfaceDef) {
|
|
189
|
+
dup.severity = Severity.Info;
|
|
190
|
+
dup.suggestion = "Pure interface/type definition - intentional for module independence";
|
|
191
|
+
}
|
|
192
|
+
return true;
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/config.ts
|
|
197
|
+
import { scanFiles } from "@aiready/core";
|
|
198
|
+
async function getSmartDefaults(directory, userOptions) {
|
|
199
|
+
if (userOptions.useSmartDefaults === false) {
|
|
200
|
+
return {
|
|
201
|
+
rootDir: directory,
|
|
202
|
+
minSimilarity: 0.6,
|
|
203
|
+
minLines: 8,
|
|
204
|
+
batchSize: 100,
|
|
205
|
+
approx: true,
|
|
206
|
+
minSharedTokens: 12,
|
|
207
|
+
maxCandidatesPerBlock: 5,
|
|
208
|
+
streamResults: false,
|
|
209
|
+
severity: "all",
|
|
210
|
+
includeTests: false
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
const scanOptions = {
|
|
214
|
+
rootDir: directory,
|
|
215
|
+
include: userOptions.include || ["**/*.{ts,tsx,js,jsx,py,java}"],
|
|
216
|
+
exclude: userOptions.exclude
|
|
217
|
+
};
|
|
218
|
+
const files = await scanFiles(scanOptions);
|
|
219
|
+
const fileCount = files.length;
|
|
220
|
+
const estimatedBlocks = fileCount * 5;
|
|
221
|
+
const minLines = Math.max(
|
|
222
|
+
6,
|
|
223
|
+
Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
|
|
224
|
+
);
|
|
225
|
+
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
226
|
+
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
227
|
+
const severity = estimatedBlocks > 3e3 ? "high" : "all";
|
|
228
|
+
const maxCandidatesPerBlock = Math.max(
|
|
229
|
+
5,
|
|
230
|
+
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
231
|
+
);
|
|
232
|
+
const defaults = {
|
|
233
|
+
rootDir: directory,
|
|
234
|
+
minSimilarity,
|
|
235
|
+
minLines,
|
|
236
|
+
batchSize,
|
|
237
|
+
approx: true,
|
|
238
|
+
minSharedTokens: 10,
|
|
239
|
+
maxCandidatesPerBlock,
|
|
240
|
+
streamResults: false,
|
|
241
|
+
severity,
|
|
242
|
+
includeTests: false
|
|
243
|
+
};
|
|
244
|
+
const result = { ...defaults };
|
|
245
|
+
for (const key of Object.keys(defaults)) {
|
|
246
|
+
if (key in userOptions && userOptions[key] !== void 0) {
|
|
247
|
+
result[key] = userOptions[key];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return result;
|
|
251
|
+
}
|
|
252
|
+
function logConfiguration(config, estimatedBlocks) {
|
|
253
|
+
if (config.suppressToolConfig) return;
|
|
254
|
+
console.log("\u{1F4CB} Configuration:");
|
|
255
|
+
console.log(` Repository size: ~${estimatedBlocks} code blocks`);
|
|
256
|
+
console.log(` Similarity threshold: ${config.minSimilarity}`);
|
|
257
|
+
console.log(` Minimum lines: ${config.minLines}`);
|
|
258
|
+
console.log(` Approximate mode: ${config.approx ? "enabled" : "disabled"}`);
|
|
259
|
+
console.log(` Max candidates per block: ${config.maxCandidatesPerBlock}`);
|
|
260
|
+
console.log(` Min shared tokens: ${config.minSharedTokens}`);
|
|
261
|
+
console.log(` Severity filter: ${config.severity}`);
|
|
262
|
+
console.log(` Include tests: ${config.includeTests}`);
|
|
263
|
+
if (config.excludePatterns && config.excludePatterns.length > 0) {
|
|
264
|
+
console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
|
|
265
|
+
}
|
|
266
|
+
if (config.confidenceThreshold && config.confidenceThreshold > 0) {
|
|
267
|
+
console.log(` Confidence threshold: ${config.confidenceThreshold}`);
|
|
268
|
+
}
|
|
269
|
+
if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
|
|
270
|
+
console.log(
|
|
271
|
+
` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
console.log("");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// src/summary.ts
|
|
278
|
+
import { Severity as Severity2 } from "@aiready/core";
|
|
279
|
+
function getRefactoringSuggestion(patternType, similarity) {
|
|
280
|
+
const baseMessages = {
|
|
281
|
+
"api-handler": "Extract common middleware or create a base handler class",
|
|
282
|
+
validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
|
|
283
|
+
utility: "Move to a shared utilities file and reuse across modules",
|
|
284
|
+
"class-method": "Consider inheritance or composition to share behavior",
|
|
285
|
+
component: "Extract shared logic into a custom hook or HOC",
|
|
286
|
+
function: "Extract into a shared helper function",
|
|
287
|
+
unknown: "Extract common logic into a reusable module"
|
|
288
|
+
};
|
|
289
|
+
const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
|
|
290
|
+
return baseMessages[patternType] + urgency;
|
|
291
|
+
}
|
|
292
|
+
function generateSummary(results) {
|
|
293
|
+
if (!Array.isArray(results)) {
|
|
294
|
+
return {
|
|
295
|
+
totalPatterns: 0,
|
|
296
|
+
totalTokenCost: 0,
|
|
297
|
+
patternsByType: {
|
|
298
|
+
"api-handler": 0,
|
|
299
|
+
validator: 0,
|
|
300
|
+
utility: 0,
|
|
301
|
+
"class-method": 0,
|
|
302
|
+
component: 0,
|
|
303
|
+
function: 0,
|
|
304
|
+
unknown: 0
|
|
305
|
+
},
|
|
306
|
+
topDuplicates: []
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
const allIssues = results.flatMap((r) => r.issues || []);
|
|
310
|
+
const totalTokenCost = results.reduce(
|
|
311
|
+
(sum, r) => sum + (r.metrics?.tokenCost || 0),
|
|
312
|
+
0
|
|
313
|
+
);
|
|
314
|
+
const patternsByType = {
|
|
315
|
+
"api-handler": 0,
|
|
316
|
+
validator: 0,
|
|
317
|
+
utility: 0,
|
|
318
|
+
"class-method": 0,
|
|
319
|
+
component: 0,
|
|
320
|
+
function: 0,
|
|
321
|
+
unknown: 0
|
|
322
|
+
};
|
|
323
|
+
allIssues.forEach((issue) => {
|
|
324
|
+
const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
325
|
+
if (match) {
|
|
326
|
+
const type = match[1] || "unknown";
|
|
327
|
+
patternsByType[type] = (patternsByType[type] || 0) + 1;
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
const topDuplicates = allIssues.slice(0, 10).map((issue) => {
|
|
331
|
+
const similarityMatch = issue.message.match(/(\d+)% similar/);
|
|
332
|
+
const tokenMatch = issue.message.match(/\((\d+) tokens/);
|
|
333
|
+
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
334
|
+
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
335
|
+
return {
|
|
336
|
+
files: [
|
|
337
|
+
{
|
|
338
|
+
path: issue.location.file,
|
|
339
|
+
startLine: issue.location.line,
|
|
340
|
+
endLine: 0
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
path: fileMatch?.[1] || "unknown",
|
|
344
|
+
startLine: 0,
|
|
345
|
+
endLine: 0
|
|
346
|
+
}
|
|
347
|
+
],
|
|
348
|
+
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
349
|
+
patternType: typeMatch?.[1] || "unknown",
|
|
350
|
+
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
return {
|
|
354
|
+
totalPatterns: allIssues.length,
|
|
355
|
+
totalTokenCost,
|
|
356
|
+
patternsByType,
|
|
357
|
+
topDuplicates
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
function filterBySeverity(issues, severity) {
|
|
361
|
+
if (severity === "all") return issues;
|
|
362
|
+
const severityMap = {
|
|
363
|
+
critical: [Severity2.Critical],
|
|
364
|
+
high: [Severity2.Critical, Severity2.Major],
|
|
365
|
+
medium: [Severity2.Critical, Severity2.Major, Severity2.Minor]
|
|
366
|
+
};
|
|
367
|
+
const allowed = severityMap[severity] || [
|
|
368
|
+
Severity2.Critical,
|
|
369
|
+
Severity2.Major,
|
|
370
|
+
Severity2.Minor
|
|
371
|
+
];
|
|
372
|
+
return issues.filter((issue) => allowed.includes(issue.severity));
|
|
373
|
+
}
|
|
374
|
+
function getSeverityLabel(severity) {
|
|
375
|
+
switch (severity) {
|
|
376
|
+
case Severity2.Critical:
|
|
377
|
+
return "CRITICAL";
|
|
378
|
+
case Severity2.Major:
|
|
379
|
+
return "HIGH";
|
|
380
|
+
case Severity2.Minor:
|
|
381
|
+
return "MEDIUM";
|
|
382
|
+
case Severity2.Info:
|
|
383
|
+
return "LOW";
|
|
384
|
+
default:
|
|
385
|
+
return "UNKNOWN";
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
function calculateSeverity2(similarity) {
|
|
389
|
+
if (similarity > 0.95) return Severity2.Critical;
|
|
390
|
+
if (similarity > 0.9) return Severity2.Major;
|
|
391
|
+
return Severity2.Minor;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// src/analyzer.ts
|
|
395
|
+
import { scanFiles as scanFiles2, readFileContent, Severity as Severity3, IssueType } from "@aiready/core";
|
|
396
|
+
async function analyzePatterns(options) {
|
|
397
|
+
const smartDefaults = await getSmartDefaults(options.rootDir || ".", options);
|
|
398
|
+
const finalOptions = { ...smartDefaults, ...options };
|
|
399
|
+
const {
|
|
400
|
+
minSimilarity = 0.4,
|
|
401
|
+
minLines = 5,
|
|
402
|
+
batchSize = 100,
|
|
403
|
+
approx = true,
|
|
404
|
+
minSharedTokens = 8,
|
|
405
|
+
maxCandidatesPerBlock = 100,
|
|
406
|
+
streamResults = false,
|
|
407
|
+
severity = "all",
|
|
408
|
+
groupByFilePair = true,
|
|
409
|
+
createClusters = true,
|
|
410
|
+
minClusterTokenCost = 1e3,
|
|
411
|
+
minClusterFiles = 3,
|
|
412
|
+
excludePatterns = [],
|
|
413
|
+
confidenceThreshold = 0,
|
|
414
|
+
ignoreWhitelist = [],
|
|
415
|
+
...scanOptions
|
|
416
|
+
} = finalOptions;
|
|
417
|
+
const files = await scanFiles2(scanOptions);
|
|
418
|
+
const estimatedBlocks = files.length * 3;
|
|
419
|
+
logConfiguration(finalOptions, estimatedBlocks);
|
|
420
|
+
const results = [];
|
|
421
|
+
const READ_BATCH_SIZE = 50;
|
|
422
|
+
const fileContents = [];
|
|
423
|
+
for (let i = 0; i < files.length; i += READ_BATCH_SIZE) {
|
|
424
|
+
const batch = files.slice(i, i + READ_BATCH_SIZE);
|
|
425
|
+
const batchContents = await Promise.all(
|
|
426
|
+
batch.map(async (file) => ({
|
|
427
|
+
file,
|
|
428
|
+
content: await readFileContent(file)
|
|
429
|
+
}))
|
|
430
|
+
);
|
|
431
|
+
fileContents.push(...batchContents);
|
|
432
|
+
}
|
|
433
|
+
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
434
|
+
minSimilarity,
|
|
435
|
+
minLines,
|
|
436
|
+
batchSize,
|
|
437
|
+
approx,
|
|
438
|
+
minSharedTokens,
|
|
439
|
+
maxCandidatesPerBlock,
|
|
440
|
+
streamResults,
|
|
441
|
+
excludePatterns,
|
|
442
|
+
confidenceThreshold,
|
|
443
|
+
ignoreWhitelist,
|
|
444
|
+
onProgress: options.onProgress
|
|
445
|
+
});
|
|
446
|
+
filterBrandSpecificVariants(duplicates);
|
|
447
|
+
for (const file of files) {
|
|
448
|
+
const fileDuplicates = duplicates.filter(
|
|
449
|
+
(dup) => dup.file1 === file || dup.file2 === file
|
|
450
|
+
);
|
|
451
|
+
const issues = fileDuplicates.map((dup) => {
|
|
452
|
+
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
453
|
+
let severityLevel;
|
|
454
|
+
if (dup.severity === "info" || dup.severity === "Info") {
|
|
455
|
+
severityLevel = Severity3.Info;
|
|
456
|
+
} else {
|
|
457
|
+
severityLevel = calculateSeverity2(dup.similarity);
|
|
458
|
+
}
|
|
459
|
+
return {
|
|
460
|
+
type: IssueType.DuplicatePattern,
|
|
461
|
+
severity: severityLevel,
|
|
462
|
+
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
463
|
+
location: {
|
|
464
|
+
file,
|
|
465
|
+
line: dup.file1 === file ? dup.line1 : dup.line2
|
|
466
|
+
},
|
|
467
|
+
suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
|
|
468
|
+
};
|
|
469
|
+
});
|
|
470
|
+
const filteredIssues = filterBySeverity(issues, severity || "all");
|
|
471
|
+
const totalTokenCost = fileDuplicates.reduce(
|
|
472
|
+
(sum, dup) => sum + dup.tokenCost,
|
|
473
|
+
0
|
|
474
|
+
);
|
|
475
|
+
results.push({
|
|
476
|
+
fileName: file,
|
|
477
|
+
issues: filteredIssues,
|
|
478
|
+
metrics: {
|
|
479
|
+
tokenCost: totalTokenCost,
|
|
480
|
+
consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
let groups;
|
|
485
|
+
let clusters;
|
|
486
|
+
if (groupByFilePair) {
|
|
487
|
+
groups = groupDuplicatesByFilePair(duplicates);
|
|
488
|
+
}
|
|
489
|
+
if (createClusters) {
|
|
490
|
+
const allClusters = createRefactorClusters(duplicates);
|
|
491
|
+
clusters = filterClustersByImpact(
|
|
492
|
+
allClusters,
|
|
493
|
+
minClusterTokenCost,
|
|
494
|
+
minClusterFiles
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
return { results, duplicates, files, groups, clusters, config: finalOptions };
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
export {
|
|
501
|
+
groupDuplicatesByFilePair,
|
|
502
|
+
createRefactorClusters,
|
|
503
|
+
filterClustersByImpact,
|
|
504
|
+
areBrandSpecificVariants,
|
|
505
|
+
filterBrandSpecificVariants,
|
|
506
|
+
getSmartDefaults,
|
|
507
|
+
logConfiguration,
|
|
508
|
+
generateSummary,
|
|
509
|
+
filterBySeverity,
|
|
510
|
+
getSeverityLabel,
|
|
511
|
+
calculateSeverity2 as calculateSeverity,
|
|
512
|
+
Severity3 as Severity,
|
|
513
|
+
analyzePatterns
|
|
514
|
+
};
|