@aiready/pattern-detect 0.12.0 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2R7HOR5H.mjs +777 -0
- package/dist/chunk-3VRQYFW3.mjs +782 -0
- package/dist/chunk-5LYDB7DY.mjs +771 -0
- package/dist/chunk-7EJGNGXM.mjs +771 -0
- package/dist/chunk-A76JUWER.mjs +786 -0
- package/dist/chunk-CCHM2VLK.mjs +1051 -0
- package/dist/chunk-DQSLTL7J.mjs +788 -0
- package/dist/chunk-EZT3NZGB.mjs +1057 -0
- package/dist/chunk-H5FB2USZ.mjs +762 -0
- package/dist/chunk-INJ4SBTV.mjs +754 -0
- package/dist/chunk-JAFZCZAP.mjs +776 -0
- package/dist/chunk-JWR3AHKO.mjs +788 -0
- package/dist/chunk-LUA5FXSZ.mjs +771 -0
- package/dist/chunk-QX2BQJEO.mjs +1058 -0
- package/dist/chunk-RMGDSNLE.mjs +770 -0
- package/dist/chunk-TCG2G32F.mjs +911 -0
- package/dist/chunk-TGBZP7SB.mjs +773 -0
- package/dist/chunk-XCWY2DQY.mjs +788 -0
- package/dist/chunk-XUUVS54V.mjs +776 -0
- package/dist/chunk-YJYDBFT3.mjs +780 -0
- package/dist/cli.js +279 -698
- package/dist/cli.mjs +35 -29
- package/dist/index.d.mts +41 -54
- package/dist/index.d.ts +41 -54
- package/dist/index.js +248 -666
- package/dist/index.mjs +3 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -6,13 +6,6 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
6
6
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
7
|
var __getProtoOf = Object.getPrototypeOf;
|
|
8
8
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
-
var __esm = (fn, res) => function __init() {
|
|
10
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
11
|
-
};
|
|
12
|
-
var __export = (target, all) => {
|
|
13
|
-
for (var name in all)
|
|
14
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
-
};
|
|
16
9
|
var __copyProps = (to, from, except, desc) => {
|
|
17
10
|
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
11
|
for (let key of __getOwnPropNames(from))
|
|
@@ -30,164 +23,17 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
23
|
mod
|
|
31
24
|
));
|
|
32
25
|
|
|
33
|
-
// src/extractors/python-extractor.ts
|
|
34
|
-
var python_extractor_exports = {};
|
|
35
|
-
__export(python_extractor_exports, {
|
|
36
|
-
calculatePythonSimilarity: () => calculatePythonSimilarity,
|
|
37
|
-
detectPythonAntiPatterns: () => detectPythonAntiPatterns,
|
|
38
|
-
extractPythonPatterns: () => extractPythonPatterns
|
|
39
|
-
});
|
|
40
|
-
async function extractPythonPatterns(files) {
|
|
41
|
-
const patterns = [];
|
|
42
|
-
const parser = (0, import_core.getParser)("dummy.py");
|
|
43
|
-
if (!parser) {
|
|
44
|
-
console.warn("Python parser not available");
|
|
45
|
-
return patterns;
|
|
46
|
-
}
|
|
47
|
-
const pythonFiles = files.filter((f) => f.toLowerCase().endsWith(".py"));
|
|
48
|
-
for (const file of pythonFiles) {
|
|
49
|
-
try {
|
|
50
|
-
const fs = await import("fs");
|
|
51
|
-
const code = await fs.promises.readFile(file, "utf-8");
|
|
52
|
-
const result = parser.parse(code, file);
|
|
53
|
-
for (const exp of result.exports) {
|
|
54
|
-
if (exp.type === "function") {
|
|
55
|
-
patterns.push({
|
|
56
|
-
file,
|
|
57
|
-
name: exp.name,
|
|
58
|
-
type: "function",
|
|
59
|
-
startLine: exp.loc?.start.line || 0,
|
|
60
|
-
endLine: exp.loc?.end.line || 0,
|
|
61
|
-
imports: exp.imports || [],
|
|
62
|
-
dependencies: exp.dependencies || [],
|
|
63
|
-
signature: generatePythonSignature(exp),
|
|
64
|
-
language: "python"
|
|
65
|
-
});
|
|
66
|
-
} else if (exp.type === "class") {
|
|
67
|
-
patterns.push({
|
|
68
|
-
file,
|
|
69
|
-
name: exp.name,
|
|
70
|
-
type: "class",
|
|
71
|
-
startLine: exp.loc?.start.line || 0,
|
|
72
|
-
endLine: exp.loc?.end.line || 0,
|
|
73
|
-
imports: exp.imports || [],
|
|
74
|
-
dependencies: exp.dependencies || [],
|
|
75
|
-
signature: `class ${exp.name}`,
|
|
76
|
-
language: "python"
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
} catch (error) {
|
|
81
|
-
console.warn(`Failed to extract patterns from ${file}:`, error);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return patterns;
|
|
85
|
-
}
|
|
86
|
-
function generatePythonSignature(exp) {
|
|
87
|
-
const params = exp.parameters?.join(", ") || "";
|
|
88
|
-
return `def ${exp.name}(${params})`;
|
|
89
|
-
}
|
|
90
|
-
function calculatePythonSimilarity(pattern1, pattern2) {
|
|
91
|
-
let similarity = 0;
|
|
92
|
-
let factors = 0;
|
|
93
|
-
const nameSimilarity = calculateNameSimilarity(pattern1.name, pattern2.name);
|
|
94
|
-
similarity += nameSimilarity * 0.3;
|
|
95
|
-
factors += 0.3;
|
|
96
|
-
const importSimilarity = calculateImportSimilarity(
|
|
97
|
-
pattern1.imports || [],
|
|
98
|
-
pattern2.imports || []
|
|
99
|
-
);
|
|
100
|
-
similarity += importSimilarity * 0.4;
|
|
101
|
-
factors += 0.4;
|
|
102
|
-
if (pattern1.type === pattern2.type) {
|
|
103
|
-
similarity += 0.1;
|
|
104
|
-
}
|
|
105
|
-
factors += 0.1;
|
|
106
|
-
const sigSimilarity = calculateSignatureSimilarity(
|
|
107
|
-
pattern1.signature,
|
|
108
|
-
pattern2.signature
|
|
109
|
-
);
|
|
110
|
-
similarity += sigSimilarity * 0.2;
|
|
111
|
-
factors += 0.2;
|
|
112
|
-
return factors > 0 ? similarity / factors : 0;
|
|
113
|
-
}
|
|
114
|
-
function calculateNameSimilarity(name1, name2) {
|
|
115
|
-
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
-
""
|
|
119
|
-
);
|
|
120
|
-
const clean2 = name2.replace(
|
|
121
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
-
""
|
|
123
|
-
);
|
|
124
|
-
if (clean1 === clean2) return 0.9;
|
|
125
|
-
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
126
|
-
return 0.7;
|
|
127
|
-
}
|
|
128
|
-
const set1 = new Set(clean1.split("_"));
|
|
129
|
-
const set2 = new Set(clean2.split("_"));
|
|
130
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
131
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
132
|
-
return intersection.size / union.size;
|
|
133
|
-
}
|
|
134
|
-
function calculateImportSimilarity(imports1, imports2) {
|
|
135
|
-
if (imports1.length === 0 && imports2.length === 0) return 1;
|
|
136
|
-
if (imports1.length === 0 || imports2.length === 0) return 0;
|
|
137
|
-
const set1 = new Set(imports1);
|
|
138
|
-
const set2 = new Set(imports2);
|
|
139
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
140
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
141
|
-
return intersection.size / union.size;
|
|
142
|
-
}
|
|
143
|
-
function calculateSignatureSimilarity(sig1, sig2) {
|
|
144
|
-
if (sig1 === sig2) return 1;
|
|
145
|
-
const params1 = (sig1.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
146
|
-
const params2 = (sig2.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
147
|
-
if (params1 === params2) return 0.8;
|
|
148
|
-
if (Math.abs(params1 - params2) === 1) return 0.5;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
function detectPythonAntiPatterns(patterns) {
|
|
152
|
-
const antiPatterns = [];
|
|
153
|
-
const nameGroups = /* @__PURE__ */ new Map();
|
|
154
|
-
for (const pattern of patterns) {
|
|
155
|
-
const baseName = pattern.name.replace(
|
|
156
|
-
/^(get|set|create|delete|update)_/,
|
|
157
|
-
""
|
|
158
|
-
);
|
|
159
|
-
if (!nameGroups.has(baseName)) {
|
|
160
|
-
nameGroups.set(baseName, []);
|
|
161
|
-
}
|
|
162
|
-
nameGroups.get(baseName).push(pattern);
|
|
163
|
-
}
|
|
164
|
-
for (const [baseName, group] of nameGroups) {
|
|
165
|
-
if (group.length >= 3) {
|
|
166
|
-
antiPatterns.push(
|
|
167
|
-
`Found ${group.length} functions with similar names (${baseName}): Consider consolidating`
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return antiPatterns;
|
|
172
|
-
}
|
|
173
|
-
var import_core;
|
|
174
|
-
var init_python_extractor = __esm({
|
|
175
|
-
"src/extractors/python-extractor.ts"() {
|
|
176
|
-
"use strict";
|
|
177
|
-
import_core = require("@aiready/core");
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
|
|
181
26
|
// src/cli.ts
|
|
182
27
|
var import_commander = require("commander");
|
|
183
28
|
|
|
184
29
|
// src/index.ts
|
|
185
|
-
var
|
|
30
|
+
var import_core5 = require("@aiready/core");
|
|
186
31
|
|
|
187
32
|
// src/detector.ts
|
|
188
33
|
var import_core2 = require("@aiready/core");
|
|
189
34
|
|
|
190
35
|
// src/context-rules.ts
|
|
36
|
+
var import_core = require("@aiready/core");
|
|
191
37
|
var CONTEXT_RULES = [
|
|
192
38
|
// Test Fixtures - Intentional duplication for test isolation
|
|
193
39
|
{
|
|
@@ -197,7 +43,7 @@ var CONTEXT_RULES = [
|
|
|
197
43
|
const hasTestFixtures = code.includes("beforeAll") || code.includes("afterAll") || code.includes("beforeEach") || code.includes("afterEach") || code.includes("setUp") || code.includes("tearDown");
|
|
198
44
|
return isTestFile && hasTestFixtures;
|
|
199
45
|
},
|
|
200
|
-
severity:
|
|
46
|
+
severity: import_core.Severity.Info,
|
|
201
47
|
reason: "Test fixture duplication is intentional for test isolation",
|
|
202
48
|
suggestion: "Consider if shared test setup would improve maintainability without coupling tests"
|
|
203
49
|
},
|
|
@@ -209,7 +55,7 @@ var CONTEXT_RULES = [
|
|
|
209
55
|
const hasTemplateContent = (code.includes("return") || code.includes("export")) && (code.includes("html") || code.includes("subject") || code.includes("body"));
|
|
210
56
|
return isTemplate && hasTemplateContent;
|
|
211
57
|
},
|
|
212
|
-
severity:
|
|
58
|
+
severity: import_core.Severity.Minor,
|
|
213
59
|
reason: "Template duplication may be intentional for maintainability and branding consistency",
|
|
214
60
|
suggestion: "Extract shared structure only if templates become hard to maintain"
|
|
215
61
|
},
|
|
@@ -221,7 +67,7 @@ var CONTEXT_RULES = [
|
|
|
221
67
|
const hasPageObjectPatterns = code.includes("page.") || code.includes("await page") || code.includes("locator") || code.includes("getBy") || code.includes("selector") || code.includes("click(") || code.includes("fill(");
|
|
222
68
|
return isE2ETest && hasPageObjectPatterns;
|
|
223
69
|
},
|
|
224
|
-
severity:
|
|
70
|
+
severity: import_core.Severity.Minor,
|
|
225
71
|
reason: "E2E test duplication ensures test independence and reduces coupling",
|
|
226
72
|
suggestion: "Consider page object pattern only if duplication causes maintenance issues"
|
|
227
73
|
},
|
|
@@ -231,7 +77,7 @@ var CONTEXT_RULES = [
|
|
|
231
77
|
detect: (file) => {
|
|
232
78
|
return file.endsWith(".config.ts") || file.endsWith(".config.js") || file.includes("jest.config") || file.includes("vite.config") || file.includes("webpack.config") || file.includes("rollup.config") || file.includes("tsconfig");
|
|
233
79
|
},
|
|
234
|
-
severity:
|
|
80
|
+
severity: import_core.Severity.Minor,
|
|
235
81
|
reason: "Configuration files often have similar structure by design",
|
|
236
82
|
suggestion: "Consider shared config base only if configurations become hard to maintain"
|
|
237
83
|
},
|
|
@@ -243,7 +89,7 @@ var CONTEXT_RULES = [
|
|
|
243
89
|
const hasTypeDefinitions = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
244
90
|
return isTypeFile && hasTypeDefinitions;
|
|
245
91
|
},
|
|
246
|
-
severity:
|
|
92
|
+
severity: import_core.Severity.Info,
|
|
247
93
|
reason: "Type duplication may be intentional for module independence and type safety",
|
|
248
94
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
249
95
|
},
|
|
@@ -253,7 +99,7 @@ var CONTEXT_RULES = [
|
|
|
253
99
|
detect: (file) => {
|
|
254
100
|
return file.includes("/migrations/") || file.includes("/migrate/") || file.includes(".migration.");
|
|
255
101
|
},
|
|
256
|
-
severity:
|
|
102
|
+
severity: import_core.Severity.Info,
|
|
257
103
|
reason: "Migration scripts are typically one-off and intentionally similar",
|
|
258
104
|
suggestion: "Duplication is acceptable for migration scripts"
|
|
259
105
|
},
|
|
@@ -265,7 +111,7 @@ var CONTEXT_RULES = [
|
|
|
265
111
|
const hasMockData = code.includes("mock") || code.includes("Mock") || code.includes("fixture") || code.includes("stub") || code.includes("export const");
|
|
266
112
|
return isMockFile && hasMockData;
|
|
267
113
|
},
|
|
268
|
-
severity:
|
|
114
|
+
severity: import_core.Severity.Info,
|
|
269
115
|
reason: "Mock data duplication is expected for comprehensive test coverage",
|
|
270
116
|
suggestion: "Consider shared factories only for complex mock generation"
|
|
271
117
|
}
|
|
@@ -283,38 +129,43 @@ function calculateSeverity(file1, file2, code, similarity, linesOfCode) {
|
|
|
283
129
|
}
|
|
284
130
|
if (similarity >= 0.95 && linesOfCode >= 30) {
|
|
285
131
|
return {
|
|
286
|
-
severity:
|
|
132
|
+
severity: import_core.Severity.Critical,
|
|
287
133
|
reason: "Large nearly-identical code blocks waste tokens and create maintenance burden",
|
|
288
134
|
suggestion: "Extract to shared utility module immediately"
|
|
289
135
|
};
|
|
290
136
|
} else if (similarity >= 0.95 && linesOfCode >= 15) {
|
|
291
137
|
return {
|
|
292
|
-
severity:
|
|
138
|
+
severity: import_core.Severity.Major,
|
|
293
139
|
reason: "Nearly identical code should be consolidated",
|
|
294
140
|
suggestion: "Move to shared utility file"
|
|
295
141
|
};
|
|
296
142
|
} else if (similarity >= 0.85) {
|
|
297
143
|
return {
|
|
298
|
-
severity:
|
|
144
|
+
severity: import_core.Severity.Major,
|
|
299
145
|
reason: "High similarity indicates significant duplication",
|
|
300
146
|
suggestion: "Extract common logic to shared function"
|
|
301
147
|
};
|
|
302
148
|
} else if (similarity >= 0.7) {
|
|
303
149
|
return {
|
|
304
|
-
severity:
|
|
150
|
+
severity: import_core.Severity.Minor,
|
|
305
151
|
reason: "Moderate similarity detected",
|
|
306
152
|
suggestion: "Consider extracting shared patterns if code evolves together"
|
|
307
153
|
};
|
|
308
154
|
} else {
|
|
309
155
|
return {
|
|
310
|
-
severity:
|
|
156
|
+
severity: import_core.Severity.Minor,
|
|
311
157
|
reason: "Minor similarity detected",
|
|
312
158
|
suggestion: "Monitor but refactoring may not be worthwhile"
|
|
313
159
|
};
|
|
314
160
|
}
|
|
315
161
|
}
|
|
316
162
|
function filterBySeverity(duplicates, minSeverity) {
|
|
317
|
-
const severityOrder = [
|
|
163
|
+
const severityOrder = [
|
|
164
|
+
import_core.Severity.Info,
|
|
165
|
+
import_core.Severity.Minor,
|
|
166
|
+
import_core.Severity.Major,
|
|
167
|
+
import_core.Severity.Critical
|
|
168
|
+
];
|
|
318
169
|
const minIndex = severityOrder.indexOf(minSeverity);
|
|
319
170
|
if (minIndex === -1) return duplicates;
|
|
320
171
|
return duplicates.filter((dup) => {
|
|
@@ -323,261 +174,127 @@ function filterBySeverity(duplicates, minSeverity) {
|
|
|
323
174
|
});
|
|
324
175
|
}
|
|
325
176
|
|
|
326
|
-
// src/
|
|
327
|
-
function
|
|
328
|
-
|
|
329
|
-
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
330
|
-
return "api-handler";
|
|
331
|
-
}
|
|
332
|
-
if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
|
|
333
|
-
return "validator";
|
|
334
|
-
}
|
|
335
|
-
if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
|
|
336
|
-
return "component";
|
|
337
|
-
}
|
|
338
|
-
if (lower.includes("class ") || lower.includes("this.")) {
|
|
339
|
-
return "class-method";
|
|
340
|
-
}
|
|
341
|
-
if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
|
|
342
|
-
return "utility";
|
|
343
|
-
}
|
|
344
|
-
if (lower.includes("function") || lower.includes("=>")) {
|
|
345
|
-
return "function";
|
|
346
|
-
}
|
|
347
|
-
return "unknown";
|
|
177
|
+
// src/detector.ts
|
|
178
|
+
function normalizeCode(code) {
|
|
179
|
+
return code.replace(/\/\/.*/g, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/['"`]/g, '"').replace(/\s+/g, " ").trim().toLowerCase();
|
|
348
180
|
}
|
|
349
|
-
function
|
|
350
|
-
const lines = content.split("\n");
|
|
181
|
+
function extractBlocks(file, content) {
|
|
351
182
|
const blocks = [];
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
let
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
if (
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
if (char === "}") braceDepth--;
|
|
183
|
+
const lines = content.split("\n");
|
|
184
|
+
const blockRegex = /^\s*(?:export\s+)?(?:async\s+)?(function|class|const|interface|type)\s+([a-zA-Z0-9_]+)|^\s*(app\.(?:get|post|put|delete|patch|use))\(/gm;
|
|
185
|
+
let match;
|
|
186
|
+
while ((match = blockRegex.exec(content)) !== null) {
|
|
187
|
+
const startLine = content.substring(0, match.index).split("\n").length;
|
|
188
|
+
let type;
|
|
189
|
+
let name;
|
|
190
|
+
if (match[1]) {
|
|
191
|
+
type = match[1];
|
|
192
|
+
name = match[2];
|
|
193
|
+
} else {
|
|
194
|
+
type = "handler";
|
|
195
|
+
name = match[3];
|
|
366
196
|
}
|
|
367
|
-
|
|
368
|
-
|
|
197
|
+
let endLine = -1;
|
|
198
|
+
let openBraces = 0;
|
|
199
|
+
let foundStart = false;
|
|
200
|
+
for (let i = match.index; i < content.length; i++) {
|
|
201
|
+
if (content[i] === "{") {
|
|
202
|
+
openBraces++;
|
|
203
|
+
foundStart = true;
|
|
204
|
+
} else if (content[i] === "}") {
|
|
205
|
+
openBraces--;
|
|
206
|
+
}
|
|
207
|
+
if (foundStart && openBraces === 0) {
|
|
208
|
+
endLine = content.substring(0, i + 1).split("\n").length;
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
369
211
|
}
|
|
370
|
-
if (
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
content: blockContent,
|
|
378
|
-
startLine: blockStart + 1,
|
|
379
|
-
endLine: i + 1,
|
|
380
|
-
patternType: categorizePattern(blockContent),
|
|
381
|
-
linesOfCode: loc
|
|
382
|
-
});
|
|
212
|
+
if (endLine === -1) {
|
|
213
|
+
const remaining = content.slice(match.index);
|
|
214
|
+
const nextLineMatch = remaining.indexOf("\n");
|
|
215
|
+
if (nextLineMatch !== -1) {
|
|
216
|
+
endLine = startLine;
|
|
217
|
+
} else {
|
|
218
|
+
endLine = lines.length;
|
|
383
219
|
}
|
|
384
|
-
currentBlock = [];
|
|
385
|
-
inFunction = false;
|
|
386
|
-
} else if (inFunction && braceDepth === 0) {
|
|
387
|
-
currentBlock = [];
|
|
388
|
-
inFunction = false;
|
|
389
220
|
}
|
|
221
|
+
endLine = Math.max(startLine, endLine);
|
|
222
|
+
const blockCode = lines.slice(startLine - 1, endLine).join("\n");
|
|
223
|
+
const tokens = (0, import_core2.estimateTokens)(blockCode);
|
|
224
|
+
blocks.push({
|
|
225
|
+
file,
|
|
226
|
+
startLine,
|
|
227
|
+
endLine,
|
|
228
|
+
code: blockCode,
|
|
229
|
+
tokens,
|
|
230
|
+
patternType: inferPatternType(type, name)
|
|
231
|
+
});
|
|
390
232
|
}
|
|
391
233
|
return blocks;
|
|
392
234
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
398
|
-
}
|
|
399
|
-
var stopwords = /* @__PURE__ */ new Set([
|
|
400
|
-
"return",
|
|
401
|
-
"const",
|
|
402
|
-
"let",
|
|
403
|
-
"var",
|
|
404
|
-
"function",
|
|
405
|
-
"class",
|
|
406
|
-
"new",
|
|
407
|
-
"if",
|
|
408
|
-
"else",
|
|
409
|
-
"for",
|
|
410
|
-
"while",
|
|
411
|
-
"async",
|
|
412
|
-
"await",
|
|
413
|
-
"try",
|
|
414
|
-
"catch",
|
|
415
|
-
"switch",
|
|
416
|
-
"case",
|
|
417
|
-
"default",
|
|
418
|
-
"import",
|
|
419
|
-
"export",
|
|
420
|
-
"from",
|
|
421
|
-
"true",
|
|
422
|
-
"false",
|
|
423
|
-
"null",
|
|
424
|
-
"undefined",
|
|
425
|
-
"this"
|
|
426
|
-
]);
|
|
427
|
-
function tokenize(norm) {
|
|
428
|
-
const punctuation = "(){}[];.,";
|
|
429
|
-
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
430
|
-
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// src/core/similarity.ts
|
|
434
|
-
function jaccardSimilarity(tokens1, tokens2) {
|
|
435
|
-
const set1 = new Set(tokens1);
|
|
436
|
-
const set2 = new Set(tokens2);
|
|
437
|
-
if (set1.size === 0 && set2.size === 0) return 0;
|
|
438
|
-
let intersection = 0;
|
|
439
|
-
for (const token of set1) {
|
|
440
|
-
if (set2.has(token)) intersection++;
|
|
235
|
+
function inferPatternType(keyword, name) {
|
|
236
|
+
const n = name.toLowerCase();
|
|
237
|
+
if (keyword === "handler" || n.includes("handler") || n.includes("controller") || n.startsWith("app.")) {
|
|
238
|
+
return "api-handler";
|
|
441
239
|
}
|
|
442
|
-
|
|
443
|
-
|
|
240
|
+
if (n.includes("validate") || n.includes("schema")) return "validator";
|
|
241
|
+
if (n.includes("util") || n.includes("helper")) return "utility";
|
|
242
|
+
if (keyword === "class") return "class-method";
|
|
243
|
+
if (n.match(/^[A-Z]/)) return "component";
|
|
244
|
+
if (keyword === "function") return "function";
|
|
245
|
+
return "unknown";
|
|
444
246
|
}
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
arr.push(i);
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
findCandidates(blockIdx, minSharedTokens, maxCandidates) {
|
|
467
|
-
const block1 = this.allBlocks[blockIdx];
|
|
468
|
-
const block1Tokens = this.blockTokens[blockIdx];
|
|
469
|
-
const counts = /* @__PURE__ */ new Map();
|
|
470
|
-
const rareTokens = block1Tokens.filter((tok) => {
|
|
471
|
-
const freq = this.invertedIndex.get(tok)?.length || 0;
|
|
472
|
-
return freq < this.allBlocks.length * 0.1;
|
|
473
|
-
});
|
|
474
|
-
for (const tok of rareTokens) {
|
|
475
|
-
const ids = this.invertedIndex.get(tok);
|
|
476
|
-
if (!ids) continue;
|
|
477
|
-
for (const j of ids) {
|
|
478
|
-
if (j <= blockIdx) continue;
|
|
479
|
-
if (this.allBlocks[j].file === block1.file) continue;
|
|
480
|
-
counts.set(j, (counts.get(j) || 0) + 1);
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
return Array.from(counts.entries()).filter(([j, shared]) => {
|
|
484
|
-
const block2Size = this.blockTokens[j].length;
|
|
485
|
-
const minSize = Math.min(block1Tokens.length, block2Size);
|
|
486
|
-
return shared >= minSharedTokens && shared / minSize >= 0.3;
|
|
487
|
-
}).sort((a, b) => b[1] - a[1]).slice(0, maxCandidates).map(([j, shared]) => ({ j, shared }));
|
|
247
|
+
function calculateSimilarity(a, b) {
|
|
248
|
+
if (a === b) return 1;
|
|
249
|
+
const tokensA = a.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
250
|
+
const tokensB = b.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
251
|
+
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
252
|
+
const setA = new Set(tokensA);
|
|
253
|
+
const setB = new Set(tokensB);
|
|
254
|
+
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
255
|
+
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
256
|
+
return intersection.size / union.size;
|
|
257
|
+
}
|
|
258
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
259
|
+
const { minSimilarity, minLines, streamResults } = options;
|
|
260
|
+
const allBlocks = [];
|
|
261
|
+
for (const { file, content } of fileContents) {
|
|
262
|
+
const blocks = extractBlocks(file, content);
|
|
263
|
+
allBlocks.push(...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines));
|
|
488
264
|
}
|
|
489
|
-
};
|
|
490
|
-
|
|
491
|
-
// src/detector.ts
|
|
492
|
-
async function detectDuplicatePatterns(files, options) {
|
|
493
|
-
const {
|
|
494
|
-
minSimilarity,
|
|
495
|
-
minLines,
|
|
496
|
-
batchSize = 100,
|
|
497
|
-
approx = true,
|
|
498
|
-
minSharedTokens = 8,
|
|
499
|
-
maxCandidatesPerBlock = 100,
|
|
500
|
-
streamResults = false
|
|
501
|
-
} = options;
|
|
502
265
|
const duplicates = [];
|
|
503
|
-
const maxComparisons = approx ? Infinity : 5e5;
|
|
504
|
-
const allBlocks = files.flatMap(
|
|
505
|
-
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
506
|
-
(block) => block && block.content && block.content.trim().length > 0
|
|
507
|
-
).map((block) => ({
|
|
508
|
-
...block,
|
|
509
|
-
file: file.file,
|
|
510
|
-
normalized: normalizeCode(block.content),
|
|
511
|
-
tokenCost: block.content ? (0, import_core2.estimateTokens)(block.content) : 0
|
|
512
|
-
}))
|
|
513
|
-
);
|
|
514
|
-
const pythonFiles = files.filter((f) => f.file.endsWith(".py"));
|
|
515
|
-
if (pythonFiles.length > 0) {
|
|
516
|
-
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
517
|
-
const pythonPatterns = await extractPythonPatterns2(
|
|
518
|
-
pythonFiles.map((f) => f.file)
|
|
519
|
-
);
|
|
520
|
-
allBlocks.push(
|
|
521
|
-
...pythonPatterns.map((p) => ({
|
|
522
|
-
content: p.code,
|
|
523
|
-
startLine: p.startLine,
|
|
524
|
-
endLine: p.endLine,
|
|
525
|
-
file: p.file,
|
|
526
|
-
normalized: normalizeCode(p.code),
|
|
527
|
-
patternType: p.type,
|
|
528
|
-
tokenCost: p.code ? (0, import_core2.estimateTokens)(p.code) : 0,
|
|
529
|
-
linesOfCode: p.endLine - p.startLine + 1
|
|
530
|
-
}))
|
|
531
|
-
);
|
|
532
|
-
}
|
|
533
|
-
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
534
|
-
const engine = approx ? new ApproxEngine(allBlocks, blockTokens) : null;
|
|
535
|
-
let comparisonsProcessed = 0;
|
|
536
|
-
const startTime = Date.now();
|
|
537
266
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
` Processed ${i}/${allBlocks.length} blocks (${elapsed.toFixed(1)}s, ${duplicates.length} duplicates)`
|
|
546
|
-
);
|
|
547
|
-
}
|
|
548
|
-
await new Promise((r) => setImmediate((resolve) => r(resolve)));
|
|
549
|
-
}
|
|
550
|
-
const block1 = allBlocks[i];
|
|
551
|
-
const candidates = engine ? engine.findCandidates(i, minSharedTokens, maxCandidatesPerBlock) : allBlocks.slice(i + 1).map((_, idx) => ({ j: i + 1 + idx, shared: 0 }));
|
|
552
|
-
for (const { j } of candidates) {
|
|
553
|
-
if (!approx && comparisonsProcessed >= maxComparisons) break;
|
|
554
|
-
comparisonsProcessed++;
|
|
555
|
-
const block2 = allBlocks[j];
|
|
556
|
-
if (block1.file === block2.file) continue;
|
|
557
|
-
const sim = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
267
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
268
|
+
const b1 = allBlocks[i];
|
|
269
|
+
const b2 = allBlocks[j];
|
|
270
|
+
if (b1.file === b2.file) continue;
|
|
271
|
+
const norm1 = normalizeCode(b1.code);
|
|
272
|
+
const norm2 = normalizeCode(b2.code);
|
|
273
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
558
274
|
if (sim >= minSimilarity) {
|
|
559
|
-
const severity = calculateSeverity(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
275
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
276
|
+
b1.file,
|
|
277
|
+
b2.file,
|
|
278
|
+
b1.code,
|
|
563
279
|
sim,
|
|
564
|
-
|
|
280
|
+
b1.endLine - b1.startLine + 1
|
|
565
281
|
);
|
|
566
282
|
const dup = {
|
|
567
|
-
file1:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
endLine2:
|
|
283
|
+
file1: b1.file,
|
|
284
|
+
line1: b1.startLine,
|
|
285
|
+
endLine1: b1.endLine,
|
|
286
|
+
file2: b2.file,
|
|
287
|
+
line2: b2.startLine,
|
|
288
|
+
endLine2: b2.endLine,
|
|
289
|
+
code1: b1.code,
|
|
290
|
+
code2: b2.code,
|
|
573
291
|
similarity: sim,
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
suggestion: severity.suggestion
|
|
292
|
+
patternType: b1.patternType,
|
|
293
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
294
|
+
severity,
|
|
295
|
+
reason,
|
|
296
|
+
suggestion,
|
|
297
|
+
matchedRule
|
|
581
298
|
};
|
|
582
299
|
duplicates.push(dup);
|
|
583
300
|
if (streamResults)
|
|
@@ -587,281 +304,134 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
587
304
|
}
|
|
588
305
|
}
|
|
589
306
|
}
|
|
590
|
-
return duplicates;
|
|
307
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
591
308
|
}
|
|
592
309
|
|
|
593
310
|
// src/grouping.ts
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
311
|
+
var import_core3 = require("@aiready/core");
|
|
312
|
+
var import_path = __toESM(require("path"));
|
|
313
|
+
function getSeverityLevel(s) {
|
|
314
|
+
if (s === import_core3.Severity.Critical || s === "critical") return 4;
|
|
315
|
+
if (s === import_core3.Severity.Major || s === "major") return 3;
|
|
316
|
+
if (s === import_core3.Severity.Minor || s === "minor") return 2;
|
|
317
|
+
if (s === import_core3.Severity.Info || s === "info") return 1;
|
|
318
|
+
return 0;
|
|
599
319
|
}
|
|
600
320
|
function groupDuplicatesByFilePair(duplicates) {
|
|
601
321
|
const groups = /* @__PURE__ */ new Map();
|
|
602
322
|
for (const dup of duplicates) {
|
|
603
|
-
const
|
|
323
|
+
const files = [dup.file1, dup.file2].sort();
|
|
324
|
+
const key = files.join("::");
|
|
604
325
|
if (!groups.has(key)) {
|
|
605
|
-
groups.set(key,
|
|
326
|
+
groups.set(key, {
|
|
327
|
+
filePair: key,
|
|
328
|
+
severity: dup.severity,
|
|
329
|
+
occurrences: 0,
|
|
330
|
+
totalTokenCost: 0,
|
|
331
|
+
averageSimilarity: 0,
|
|
332
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
333
|
+
lineRanges: []
|
|
334
|
+
});
|
|
606
335
|
}
|
|
607
|
-
groups.get(key)
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
);
|
|
616
|
-
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
617
|
-
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
618
|
-
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
619
|
-
const patternType = getMostCommonPatternType(deduplicated);
|
|
620
|
-
const lineRanges = deduplicated.map((d) => ({
|
|
621
|
-
file1: { start: d.line1, end: d.endLine1 },
|
|
622
|
-
file2: { start: d.line2, end: d.endLine2 }
|
|
623
|
-
}));
|
|
624
|
-
result.push({
|
|
625
|
-
filePair,
|
|
626
|
-
duplicates: deduplicated,
|
|
627
|
-
totalTokenCost,
|
|
628
|
-
averageSimilarity,
|
|
629
|
-
maxSimilarity,
|
|
630
|
-
severity,
|
|
631
|
-
patternType,
|
|
632
|
-
occurrences: deduplicated.length,
|
|
633
|
-
lineRanges
|
|
336
|
+
const group = groups.get(key);
|
|
337
|
+
group.occurrences++;
|
|
338
|
+
group.totalTokenCost += dup.tokenCost;
|
|
339
|
+
group.averageSimilarity += dup.similarity;
|
|
340
|
+
group.patternTypes.add(dup.patternType);
|
|
341
|
+
group.lineRanges.push({
|
|
342
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
343
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
634
344
|
});
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
function deduplicateOverlappingRanges(duplicates) {
|
|
639
|
-
if (duplicates.length === 0) return [];
|
|
640
|
-
const sorted = [...duplicates].sort((a, b) => {
|
|
641
|
-
if (a.line1 !== b.line1) return a.line1 - b.line1;
|
|
642
|
-
return b.similarity - a.similarity;
|
|
643
|
-
});
|
|
644
|
-
const result = [];
|
|
645
|
-
let current = null;
|
|
646
|
-
for (const dup of sorted) {
|
|
647
|
-
if (!current) {
|
|
648
|
-
current = dup;
|
|
649
|
-
result.push(dup);
|
|
650
|
-
continue;
|
|
651
|
-
}
|
|
652
|
-
const overlapsFile1 = rangesOverlap(
|
|
653
|
-
current.line1,
|
|
654
|
-
current.endLine1,
|
|
655
|
-
dup.line1,
|
|
656
|
-
dup.endLine1
|
|
657
|
-
);
|
|
658
|
-
const overlapsFile2 = rangesOverlap(
|
|
659
|
-
current.line2,
|
|
660
|
-
current.endLine2,
|
|
661
|
-
dup.line2,
|
|
662
|
-
dup.endLine2
|
|
663
|
-
);
|
|
664
|
-
if (overlapsFile1 && overlapsFile2) {
|
|
665
|
-
current = {
|
|
666
|
-
...current,
|
|
667
|
-
endLine1: Math.max(current.endLine1, dup.endLine1),
|
|
668
|
-
endLine2: Math.max(current.endLine2, dup.endLine2),
|
|
669
|
-
tokenCost: Math.max(current.tokenCost, dup.tokenCost)
|
|
670
|
-
};
|
|
671
|
-
result[result.length - 1] = current;
|
|
672
|
-
} else {
|
|
673
|
-
current = dup;
|
|
674
|
-
result.push(dup);
|
|
345
|
+
const currentSev = dup.severity;
|
|
346
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
347
|
+
group.severity = currentSev;
|
|
675
348
|
}
|
|
676
349
|
}
|
|
677
|
-
return
|
|
350
|
+
return Array.from(groups.values()).map((g) => ({
|
|
351
|
+
...g,
|
|
352
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
353
|
+
}));
|
|
678
354
|
}
|
|
679
355
|
function createRefactorClusters(duplicates) {
|
|
680
|
-
const
|
|
356
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
357
|
+
const visited = /* @__PURE__ */ new Set();
|
|
358
|
+
const components = [];
|
|
681
359
|
for (const dup of duplicates) {
|
|
682
|
-
|
|
683
|
-
if (!
|
|
684
|
-
|
|
360
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
361
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
362
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
363
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
364
|
+
}
|
|
365
|
+
for (const file of adjacency.keys()) {
|
|
366
|
+
if (visited.has(file)) continue;
|
|
367
|
+
const component = [];
|
|
368
|
+
const queue = [file];
|
|
369
|
+
visited.add(file);
|
|
370
|
+
while (queue.length > 0) {
|
|
371
|
+
const curr = queue.shift();
|
|
372
|
+
component.push(curr);
|
|
373
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
374
|
+
if (!visited.has(neighbor)) {
|
|
375
|
+
visited.add(neighbor);
|
|
376
|
+
queue.push(neighbor);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
685
379
|
}
|
|
686
|
-
|
|
380
|
+
components.push(component);
|
|
687
381
|
}
|
|
688
|
-
const
|
|
689
|
-
for (const
|
|
690
|
-
if (
|
|
691
|
-
const
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const
|
|
695
|
-
const
|
|
696
|
-
const
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
382
|
+
const clusters = [];
|
|
383
|
+
for (const component of components) {
|
|
384
|
+
if (component.length < 2) continue;
|
|
385
|
+
const componentDups = duplicates.filter(
|
|
386
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
387
|
+
);
|
|
388
|
+
const totalTokenCost = componentDups.reduce((sum, d) => sum + d.tokenCost, 0);
|
|
389
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
390
|
+
const name = determineClusterName(component);
|
|
391
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
392
|
+
component[0],
|
|
393
|
+
component[1],
|
|
394
|
+
"",
|
|
395
|
+
// Code not available here
|
|
396
|
+
avgSimilarity,
|
|
397
|
+
30
|
|
398
|
+
// Assume substantial if clustered
|
|
399
|
+
);
|
|
400
|
+
clusters.push({
|
|
401
|
+
id: `cluster-${clusters.length}`,
|
|
402
|
+
name,
|
|
403
|
+
files: component,
|
|
702
404
|
severity,
|
|
405
|
+
duplicateCount: componentDups.length,
|
|
703
406
|
totalTokenCost,
|
|
704
|
-
averageSimilarity,
|
|
705
|
-
|
|
706
|
-
suggestion
|
|
707
|
-
reason: clusterInfo.reason
|
|
407
|
+
averageSimilarity: avgSimilarity,
|
|
408
|
+
reason,
|
|
409
|
+
suggestion
|
|
708
410
|
});
|
|
709
411
|
}
|
|
710
|
-
return
|
|
412
|
+
return clusters;
|
|
711
413
|
}
|
|
712
|
-
function
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
if (
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
723
|
-
);
|
|
724
|
-
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
725
|
-
const category = getComponentCategory(component1);
|
|
726
|
-
console.log(`Creating cluster: component-${category}`);
|
|
727
|
-
return `component-${category}`;
|
|
728
|
-
}
|
|
414
|
+
function determineClusterName(files) {
|
|
415
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
416
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
417
|
+
if (files.some((f) => f.includes("buttons"))) return "Button Component Variants";
|
|
418
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
419
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
420
|
+
const first = files[0];
|
|
421
|
+
const dirName = import_path.default.dirname(first).split(import_path.default.sep).pop();
|
|
422
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
423
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
729
424
|
}
|
|
730
|
-
|
|
731
|
-
return "e2e-test-patterns";
|
|
732
|
-
}
|
|
733
|
-
if (dup.patternType === "api-handler") {
|
|
734
|
-
return "api-handlers";
|
|
735
|
-
}
|
|
736
|
-
if (dup.patternType === "validator") {
|
|
737
|
-
return "validators";
|
|
738
|
-
}
|
|
739
|
-
if ((file1.includes("/scripts/") || file1.startsWith("scripts/") || file1.includes("/infra/") || file1.startsWith("infra/")) && (file2.includes("/scripts/") || file2.startsWith("scripts/") || file2.includes("/infra/") || file2.startsWith("infra/"))) {
|
|
740
|
-
return "infrastructure-scripts";
|
|
741
|
-
}
|
|
742
|
-
return `${dup.patternType}-patterns`;
|
|
743
|
-
}
|
|
744
|
-
function extractComponentName(filePath) {
|
|
745
|
-
const match = filePath.match(/[/\\]?([A-Z][a-zA-Z0-9]*)\.(tsx|jsx|ts|js)$/);
|
|
746
|
-
return match ? match[1] : null;
|
|
425
|
+
return "Shared Pattern Group";
|
|
747
426
|
}
|
|
748
|
-
function
|
|
749
|
-
const category1 = getComponentCategory(name1);
|
|
750
|
-
const category2 = getComponentCategory(name2);
|
|
751
|
-
return category1 === category2;
|
|
752
|
-
}
|
|
753
|
-
function getComponentCategory(name) {
|
|
754
|
-
name = name.toLowerCase();
|
|
755
|
-
if (name.includes("button") || name.includes("btn")) return "button";
|
|
756
|
-
if (name.includes("card")) return "card";
|
|
757
|
-
if (name.includes("modal") || name.includes("dialog")) return "modal";
|
|
758
|
-
if (name.includes("form")) return "form";
|
|
759
|
-
if (name.includes("input") || name.includes("field")) return "input";
|
|
760
|
-
if (name.includes("table") || name.includes("grid")) return "table";
|
|
761
|
-
if (name.includes("nav") || name.includes("menu")) return "navigation";
|
|
762
|
-
if (name.includes("header") || name.includes("footer")) return "layout";
|
|
763
|
-
return "misc";
|
|
764
|
-
}
|
|
765
|
-
function getUniqueFiles(duplicates) {
|
|
766
|
-
const files = /* @__PURE__ */ new Set();
|
|
767
|
-
for (const dup of duplicates) {
|
|
768
|
-
files.add(dup.file1);
|
|
769
|
-
files.add(dup.file2);
|
|
770
|
-
}
|
|
771
|
-
return Array.from(files).sort();
|
|
772
|
-
}
|
|
773
|
-
function getHighestSeverity(severities) {
|
|
774
|
-
const order = {
|
|
775
|
-
critical: 4,
|
|
776
|
-
major: 3,
|
|
777
|
-
minor: 2,
|
|
778
|
-
info: 1
|
|
779
|
-
};
|
|
780
|
-
let highest = "info";
|
|
781
|
-
let highestValue = 0;
|
|
782
|
-
for (const severity of severities) {
|
|
783
|
-
if (order[severity] > highestValue) {
|
|
784
|
-
highestValue = order[severity];
|
|
785
|
-
highest = severity;
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
return highest;
|
|
789
|
-
}
|
|
790
|
-
function getMostCommonPatternType(duplicates) {
|
|
791
|
-
const counts = /* @__PURE__ */ new Map();
|
|
792
|
-
for (const dup of duplicates) {
|
|
793
|
-
counts.set(dup.patternType, (counts.get(dup.patternType) || 0) + 1);
|
|
794
|
-
}
|
|
795
|
-
let mostCommon = "unknown";
|
|
796
|
-
let maxCount = 0;
|
|
797
|
-
for (const [type, count] of counts.entries()) {
|
|
798
|
-
if (count > maxCount) {
|
|
799
|
-
maxCount = count;
|
|
800
|
-
mostCommon = type;
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
|
-
return mostCommon;
|
|
804
|
-
}
|
|
805
|
-
function getClusterInfo(clusterId, patternType, fileCount) {
|
|
806
|
-
const templates = {
|
|
807
|
-
"blog-seo-boilerplate": {
|
|
808
|
-
name: `Blog SEO Boilerplate (${fileCount} files)`,
|
|
809
|
-
suggestion: "Create BlogPageLayout component with SEO schema generator, breadcrumb component, and metadata helpers",
|
|
810
|
-
reason: "SEO boilerplate duplication increases maintenance burden and schema consistency risk"
|
|
811
|
-
},
|
|
812
|
-
"e2e-test-patterns": {
|
|
813
|
-
name: `E2E Test Patterns (${fileCount} files)`,
|
|
814
|
-
suggestion: "Extract page object helpers and common test utilities (waitFor, fillForm, etc.)",
|
|
815
|
-
reason: "Test helper extraction improves maintainability while preserving test independence"
|
|
816
|
-
},
|
|
817
|
-
"api-handlers": {
|
|
818
|
-
name: `API Handler Patterns (${fileCount} files)`,
|
|
819
|
-
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
820
|
-
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
821
|
-
},
|
|
822
|
-
validators: {
|
|
823
|
-
name: `Validator Patterns (${fileCount} files)`,
|
|
824
|
-
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
825
|
-
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
826
|
-
},
|
|
827
|
-
"infrastructure-scripts": {
|
|
828
|
-
name: `Infrastructure Scripts (${fileCount} files)`,
|
|
829
|
-
suggestion: "Extract common CLI parsing, file I/O, and error handling utilities",
|
|
830
|
-
reason: "Script duplication is often acceptable for one-off tasks, but common patterns can be shared"
|
|
831
|
-
},
|
|
832
|
-
"component-button": {
|
|
833
|
-
name: `Button Component Variants (${fileCount} files)`,
|
|
834
|
-
suggestion: "Create unified Button component with variant props",
|
|
835
|
-
reason: "Multiple button variants should share base styles and behavior"
|
|
836
|
-
},
|
|
837
|
-
"component-card": {
|
|
838
|
-
name: `Card Component Variants (${fileCount} files)`,
|
|
839
|
-
suggestion: "Create unified Card component with composition pattern",
|
|
840
|
-
reason: "Card variants should share layout structure and styling"
|
|
841
|
-
},
|
|
842
|
-
"component-modal": {
|
|
843
|
-
name: `Modal Component Variants (${fileCount} files)`,
|
|
844
|
-
suggestion: "Create base Modal component with customizable content",
|
|
845
|
-
reason: "Modal variants should share overlay, animation, and accessibility logic"
|
|
846
|
-
}
|
|
847
|
-
};
|
|
848
|
-
if (templates[clusterId]) {
|
|
849
|
-
return templates[clusterId];
|
|
850
|
-
}
|
|
851
|
-
return {
|
|
852
|
-
name: `${patternType} Cluster (${fileCount} files)`,
|
|
853
|
-
suggestion: `Extract common ${patternType} patterns into shared utilities`,
|
|
854
|
-
reason: `Multiple similar ${patternType} patterns detected across ${fileCount} files`
|
|
855
|
-
};
|
|
856
|
-
}
|
|
857
|
-
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFileCount = 3) {
|
|
427
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
858
428
|
return clusters.filter(
|
|
859
|
-
(
|
|
429
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
860
430
|
);
|
|
861
431
|
}
|
|
862
432
|
|
|
863
433
|
// src/scoring.ts
|
|
864
|
-
var
|
|
434
|
+
var import_core4 = require("@aiready/core");
|
|
865
435
|
|
|
866
436
|
// src/index.ts
|
|
867
437
|
function getRefactoringSuggestion(patternType, similarity) {
|
|
@@ -972,12 +542,18 @@ async function analyzePatterns(options) {
|
|
|
972
542
|
const estimatedBlocks = files.length * 3;
|
|
973
543
|
logConfiguration(finalOptions, estimatedBlocks);
|
|
974
544
|
const results = [];
|
|
975
|
-
const
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
545
|
+
const BATCH_SIZE = 50;
|
|
546
|
+
const fileContents = [];
|
|
547
|
+
for (let i = 0; i < files.length; i += BATCH_SIZE) {
|
|
548
|
+
const batch = files.slice(i, i + BATCH_SIZE);
|
|
549
|
+
const batchContents = await Promise.all(
|
|
550
|
+
batch.map(async (file) => ({
|
|
551
|
+
file,
|
|
552
|
+
content: await (0, import_core5.readFileContent)(file)
|
|
553
|
+
}))
|
|
554
|
+
);
|
|
555
|
+
fileContents.push(...batchContents);
|
|
556
|
+
}
|
|
981
557
|
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
982
558
|
minSimilarity,
|
|
983
559
|
minLines,
|
|
@@ -994,9 +570,9 @@ async function analyzePatterns(options) {
|
|
|
994
570
|
);
|
|
995
571
|
const issues = fileDuplicates.map((dup) => {
|
|
996
572
|
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
997
|
-
const severity2 = dup.similarity > 0.95 ?
|
|
573
|
+
const severity2 = dup.similarity > 0.95 ? import_core5.Severity.Critical : dup.similarity > 0.9 ? import_core5.Severity.Major : import_core5.Severity.Minor;
|
|
998
574
|
return {
|
|
999
|
-
type:
|
|
575
|
+
type: import_core5.IssueType.DuplicatePattern,
|
|
1000
576
|
severity: severity2,
|
|
1001
577
|
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
1002
578
|
location: {
|
|
@@ -1009,11 +585,11 @@ async function analyzePatterns(options) {
|
|
|
1009
585
|
let filteredIssues = issues;
|
|
1010
586
|
if (severity !== "all") {
|
|
1011
587
|
const severityMap = {
|
|
1012
|
-
critical: [
|
|
1013
|
-
high: [
|
|
1014
|
-
medium: [
|
|
588
|
+
critical: [import_core5.Severity.Critical],
|
|
589
|
+
high: [import_core5.Severity.Critical, import_core5.Severity.Major],
|
|
590
|
+
medium: [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor]
|
|
1015
591
|
};
|
|
1016
|
-
const allowedSeverities = severityMap[severity] || [
|
|
592
|
+
const allowedSeverities = severityMap[severity] || [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor];
|
|
1017
593
|
filteredIssues = issues.filter(
|
|
1018
594
|
(issue) => allowedSeverities.includes(issue.severity)
|
|
1019
595
|
);
|
|
@@ -1105,8 +681,8 @@ function generateSummary(results) {
|
|
|
1105
681
|
// src/cli.ts
|
|
1106
682
|
var import_chalk = __toESM(require("chalk"));
|
|
1107
683
|
var import_fs = require("fs");
|
|
1108
|
-
var
|
|
1109
|
-
var
|
|
684
|
+
var import_path2 = require("path");
|
|
685
|
+
var import_core6 = require("@aiready/core");
|
|
1110
686
|
var program = new import_commander.Command();
|
|
1111
687
|
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
1112
688
|
"after",
|
|
@@ -1160,7 +736,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1160
736
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
1161
737
|
console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
1162
738
|
const startTime = Date.now();
|
|
1163
|
-
const config = await (0,
|
|
739
|
+
const config = await (0, import_core6.loadConfig)(directory);
|
|
1164
740
|
const defaults = {
|
|
1165
741
|
minSimilarity: 0.4,
|
|
1166
742
|
minLines: 5,
|
|
@@ -1171,7 +747,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1171
747
|
streamResults: true,
|
|
1172
748
|
include: void 0,
|
|
1173
749
|
exclude: void 0,
|
|
1174
|
-
minSeverity:
|
|
750
|
+
minSeverity: import_core6.Severity.Minor,
|
|
1175
751
|
excludeTestFixtures: false,
|
|
1176
752
|
excludeTemplates: false,
|
|
1177
753
|
includeTests: false,
|
|
@@ -1182,7 +758,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1182
758
|
minClusterFiles: 3,
|
|
1183
759
|
showRawDuplicates: false
|
|
1184
760
|
};
|
|
1185
|
-
const mergedConfig = (0,
|
|
761
|
+
const mergedConfig = (0, import_core6.mergeConfigWithDefaults)(config, defaults);
|
|
1186
762
|
const finalOptions = {
|
|
1187
763
|
rootDir: directory,
|
|
1188
764
|
minSimilarity: options.similarity ? parseFloat(options.similarity) : mergedConfig.minSimilarity,
|
|
@@ -1200,10 +776,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1200
776
|
excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
|
|
1201
777
|
includeTests: options.includeTests || mergedConfig.includeTests,
|
|
1202
778
|
maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
|
|
1203
|
-
groupByFilePair: options.
|
|
1204
|
-
createClusters: options.
|
|
1205
|
-
minClusterTokenCost: options.
|
|
1206
|
-
minClusterFiles: options.
|
|
779
|
+
groupByFilePair: options.groupBy_file_pair !== false && mergedConfig.groupByFilePair,
|
|
780
|
+
createClusters: options.create_clusters !== false && mergedConfig.createClusters,
|
|
781
|
+
minClusterTokenCost: options.min_cluster_tokens ? parseInt(options.min_cluster_tokens) : mergedConfig.minClusterTokenCost,
|
|
782
|
+
minClusterFiles: options.min_cluster_files ? parseInt(options.min_cluster_files) : mergedConfig.minClusterFiles,
|
|
1207
783
|
showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
|
|
1208
784
|
};
|
|
1209
785
|
if (finalOptions.includeTests && finalOptions.exclude) {
|
|
@@ -1254,12 +830,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1254
830
|
clusters: clusters || [],
|
|
1255
831
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1256
832
|
};
|
|
1257
|
-
const outputPath = (0,
|
|
833
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1258
834
|
options.outputFile,
|
|
1259
835
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.json`,
|
|
1260
836
|
directory
|
|
1261
837
|
);
|
|
1262
|
-
const dir = (0,
|
|
838
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1263
839
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1264
840
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1265
841
|
}
|
|
@@ -1270,12 +846,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1270
846
|
}
|
|
1271
847
|
if (options.output === "html") {
|
|
1272
848
|
const html = generateHTMLReport(summary, results);
|
|
1273
|
-
const outputPath = (0,
|
|
849
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1274
850
|
options.outputFile,
|
|
1275
851
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.html`,
|
|
1276
852
|
directory
|
|
1277
853
|
);
|
|
1278
|
-
const dir = (0,
|
|
854
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1279
855
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1280
856
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1281
857
|
}
|
|
@@ -1324,14 +900,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1324
900
|
import_chalk.default.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
|
|
1325
901
|
);
|
|
1326
902
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1327
|
-
const severityOrder = {
|
|
1328
|
-
critical: 4,
|
|
1329
|
-
major: 3,
|
|
1330
|
-
minor: 2,
|
|
1331
|
-
info: 1
|
|
1332
|
-
};
|
|
1333
903
|
const topGroups = groups.sort((a, b) => {
|
|
1334
|
-
const
|
|
904
|
+
const bVal = getSeverityValue(b.severity);
|
|
905
|
+
const aVal = getSeverityValue(a.severity);
|
|
906
|
+
const severityDiff = bVal - aVal;
|
|
1335
907
|
if (severityDiff !== 0) return severityDiff;
|
|
1336
908
|
return b.totalTokenCost - a.totalTokenCost;
|
|
1337
909
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1405,14 +977,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1405
977
|
console.log(import_chalk.default.cyan("\n" + divider));
|
|
1406
978
|
console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
1407
979
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1408
|
-
const severityOrder = {
|
|
1409
|
-
critical: 4,
|
|
1410
|
-
major: 3,
|
|
1411
|
-
minor: 2,
|
|
1412
|
-
info: 1
|
|
1413
|
-
};
|
|
1414
980
|
const topDuplicates = filteredDuplicates.sort((a, b) => {
|
|
1415
|
-
const
|
|
981
|
+
const bVal = getSeverityValue(b.severity);
|
|
982
|
+
const aVal = getSeverityValue(a.severity);
|
|
983
|
+
const severityDiff = bVal - aVal;
|
|
1416
984
|
if (severityDiff !== 0) return severityDiff;
|
|
1417
985
|
return b.similarity - a.similarity;
|
|
1418
986
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1452,7 +1020,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1452
1020
|
(r) => r.issues.map((issue) => ({ ...issue, file: r.fileName }))
|
|
1453
1021
|
);
|
|
1454
1022
|
const criticalIssues = allIssues.filter(
|
|
1455
|
-
(issue) => issue.severity ===
|
|
1023
|
+
(issue) => getSeverityValue(issue.severity) === 4
|
|
1456
1024
|
);
|
|
1457
1025
|
if (criticalIssues.length > 0) {
|
|
1458
1026
|
console.log(import_chalk.default.cyan(divider));
|
|
@@ -1607,12 +1175,25 @@ function generateHTMLReport(summary, results) {
|
|
|
1607
1175
|
</html>`;
|
|
1608
1176
|
}
|
|
1609
1177
|
program.parse();
|
|
1178
|
+
function getSeverityValue(s) {
|
|
1179
|
+
if (s === import_core6.Severity.Critical || s === "critical") return 4;
|
|
1180
|
+
if (s === import_core6.Severity.Major || s === "major") return 3;
|
|
1181
|
+
if (s === import_core6.Severity.Minor || s === "minor") return 2;
|
|
1182
|
+
if (s === import_core6.Severity.Info || s === "info") return 1;
|
|
1183
|
+
return 0;
|
|
1184
|
+
}
|
|
1610
1185
|
function getSeverityBadge(severity) {
|
|
1611
|
-
const
|
|
1612
|
-
|
|
1613
|
-
|
|
1614
|
-
|
|
1615
|
-
|
|
1616
|
-
|
|
1617
|
-
|
|
1186
|
+
const val = getSeverityValue(severity);
|
|
1187
|
+
switch (val) {
|
|
1188
|
+
case 4:
|
|
1189
|
+
return import_chalk.default.bgRed.white.bold(" CRITICAL ");
|
|
1190
|
+
case 3:
|
|
1191
|
+
return import_chalk.default.bgYellow.black.bold(" MAJOR ");
|
|
1192
|
+
case 2:
|
|
1193
|
+
return import_chalk.default.bgBlue.white.bold(" MINOR ");
|
|
1194
|
+
case 1:
|
|
1195
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1196
|
+
default:
|
|
1197
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1198
|
+
}
|
|
1618
1199
|
}
|