@aiready/pattern-detect 0.12.2 → 0.12.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2R7HOR5H.mjs +777 -0
- package/dist/chunk-3VRQYFW3.mjs +782 -0
- package/dist/chunk-5LYDB7DY.mjs +771 -0
- package/dist/chunk-7EJGNGXM.mjs +771 -0
- package/dist/chunk-A76JUWER.mjs +786 -0
- package/dist/chunk-DQSLTL7J.mjs +788 -0
- package/dist/chunk-EZT3NZGB.mjs +1057 -0
- package/dist/chunk-H5FB2USZ.mjs +762 -0
- package/dist/chunk-INJ4SBTV.mjs +754 -0
- package/dist/chunk-JAFZCZAP.mjs +776 -0
- package/dist/chunk-JWR3AHKO.mjs +788 -0
- package/dist/chunk-LUA5FXSZ.mjs +771 -0
- package/dist/chunk-QX2BQJEO.mjs +1058 -0
- package/dist/chunk-RMGDSNLE.mjs +770 -0
- package/dist/chunk-SNSDVGWW.mjs +783 -0
- package/dist/chunk-TCG2G32F.mjs +911 -0
- package/dist/chunk-TGBZP7SB.mjs +773 -0
- package/dist/chunk-XCWY2DQY.mjs +788 -0
- package/dist/chunk-XUUVS54V.mjs +776 -0
- package/dist/chunk-YJYDBFT3.mjs +780 -0
- package/dist/cli.js +273 -692
- package/dist/cli.mjs +35 -29
- package/dist/index.d.mts +41 -54
- package/dist/index.d.ts +41 -54
- package/dist/index.js +242 -660
- package/dist/index.mjs +3 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -6,13 +6,6 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
6
6
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
7
|
var __getProtoOf = Object.getPrototypeOf;
|
|
8
8
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
-
var __esm = (fn, res) => function __init() {
|
|
10
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
11
|
-
};
|
|
12
|
-
var __export = (target, all) => {
|
|
13
|
-
for (var name in all)
|
|
14
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
-
};
|
|
16
9
|
var __copyProps = (to, from, except, desc) => {
|
|
17
10
|
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
11
|
for (let key of __getOwnPropNames(from))
|
|
@@ -30,164 +23,17 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
23
|
mod
|
|
31
24
|
));
|
|
32
25
|
|
|
33
|
-
// src/extractors/python-extractor.ts
|
|
34
|
-
var python_extractor_exports = {};
|
|
35
|
-
__export(python_extractor_exports, {
|
|
36
|
-
calculatePythonSimilarity: () => calculatePythonSimilarity,
|
|
37
|
-
detectPythonAntiPatterns: () => detectPythonAntiPatterns,
|
|
38
|
-
extractPythonPatterns: () => extractPythonPatterns
|
|
39
|
-
});
|
|
40
|
-
async function extractPythonPatterns(files) {
|
|
41
|
-
const patterns = [];
|
|
42
|
-
const parser = (0, import_core.getParser)("dummy.py");
|
|
43
|
-
if (!parser) {
|
|
44
|
-
console.warn("Python parser not available");
|
|
45
|
-
return patterns;
|
|
46
|
-
}
|
|
47
|
-
const pythonFiles = files.filter((f) => f.toLowerCase().endsWith(".py"));
|
|
48
|
-
for (const file of pythonFiles) {
|
|
49
|
-
try {
|
|
50
|
-
const fs = await import("fs");
|
|
51
|
-
const code = await fs.promises.readFile(file, "utf-8");
|
|
52
|
-
const result = parser.parse(code, file);
|
|
53
|
-
for (const exp of result.exports) {
|
|
54
|
-
if (exp.type === "function") {
|
|
55
|
-
patterns.push({
|
|
56
|
-
file,
|
|
57
|
-
name: exp.name,
|
|
58
|
-
type: "function",
|
|
59
|
-
startLine: exp.loc?.start.line || 0,
|
|
60
|
-
endLine: exp.loc?.end.line || 0,
|
|
61
|
-
imports: exp.imports || [],
|
|
62
|
-
dependencies: exp.dependencies || [],
|
|
63
|
-
signature: generatePythonSignature(exp),
|
|
64
|
-
language: "python"
|
|
65
|
-
});
|
|
66
|
-
} else if (exp.type === "class") {
|
|
67
|
-
patterns.push({
|
|
68
|
-
file,
|
|
69
|
-
name: exp.name,
|
|
70
|
-
type: "class",
|
|
71
|
-
startLine: exp.loc?.start.line || 0,
|
|
72
|
-
endLine: exp.loc?.end.line || 0,
|
|
73
|
-
imports: exp.imports || [],
|
|
74
|
-
dependencies: exp.dependencies || [],
|
|
75
|
-
signature: `class ${exp.name}`,
|
|
76
|
-
language: "python"
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
} catch (error) {
|
|
81
|
-
console.warn(`Failed to extract patterns from ${file}:`, error);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return patterns;
|
|
85
|
-
}
|
|
86
|
-
function generatePythonSignature(exp) {
|
|
87
|
-
const params = exp.parameters?.join(", ") || "";
|
|
88
|
-
return `def ${exp.name}(${params})`;
|
|
89
|
-
}
|
|
90
|
-
function calculatePythonSimilarity(pattern1, pattern2) {
|
|
91
|
-
let similarity = 0;
|
|
92
|
-
let factors = 0;
|
|
93
|
-
const nameSimilarity = calculateNameSimilarity(pattern1.name, pattern2.name);
|
|
94
|
-
similarity += nameSimilarity * 0.3;
|
|
95
|
-
factors += 0.3;
|
|
96
|
-
const importSimilarity = calculateImportSimilarity(
|
|
97
|
-
pattern1.imports || [],
|
|
98
|
-
pattern2.imports || []
|
|
99
|
-
);
|
|
100
|
-
similarity += importSimilarity * 0.4;
|
|
101
|
-
factors += 0.4;
|
|
102
|
-
if (pattern1.type === pattern2.type) {
|
|
103
|
-
similarity += 0.1;
|
|
104
|
-
}
|
|
105
|
-
factors += 0.1;
|
|
106
|
-
const sigSimilarity = calculateSignatureSimilarity(
|
|
107
|
-
pattern1.signature,
|
|
108
|
-
pattern2.signature
|
|
109
|
-
);
|
|
110
|
-
similarity += sigSimilarity * 0.2;
|
|
111
|
-
factors += 0.2;
|
|
112
|
-
return factors > 0 ? similarity / factors : 0;
|
|
113
|
-
}
|
|
114
|
-
function calculateNameSimilarity(name1, name2) {
|
|
115
|
-
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
-
""
|
|
119
|
-
);
|
|
120
|
-
const clean2 = name2.replace(
|
|
121
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
-
""
|
|
123
|
-
);
|
|
124
|
-
if (clean1 === clean2) return 0.9;
|
|
125
|
-
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
126
|
-
return 0.7;
|
|
127
|
-
}
|
|
128
|
-
const set1 = new Set(clean1.split("_"));
|
|
129
|
-
const set2 = new Set(clean2.split("_"));
|
|
130
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
131
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
132
|
-
return intersection.size / union.size;
|
|
133
|
-
}
|
|
134
|
-
function calculateImportSimilarity(imports1, imports2) {
|
|
135
|
-
if (imports1.length === 0 && imports2.length === 0) return 1;
|
|
136
|
-
if (imports1.length === 0 || imports2.length === 0) return 0;
|
|
137
|
-
const set1 = new Set(imports1);
|
|
138
|
-
const set2 = new Set(imports2);
|
|
139
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
140
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
141
|
-
return intersection.size / union.size;
|
|
142
|
-
}
|
|
143
|
-
function calculateSignatureSimilarity(sig1, sig2) {
|
|
144
|
-
if (sig1 === sig2) return 1;
|
|
145
|
-
const params1 = (sig1.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
146
|
-
const params2 = (sig2.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
147
|
-
if (params1 === params2) return 0.8;
|
|
148
|
-
if (Math.abs(params1 - params2) === 1) return 0.5;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
function detectPythonAntiPatterns(patterns) {
|
|
152
|
-
const antiPatterns = [];
|
|
153
|
-
const nameGroups = /* @__PURE__ */ new Map();
|
|
154
|
-
for (const pattern of patterns) {
|
|
155
|
-
const baseName = pattern.name.replace(
|
|
156
|
-
/^(get|set|create|delete|update)_/,
|
|
157
|
-
""
|
|
158
|
-
);
|
|
159
|
-
if (!nameGroups.has(baseName)) {
|
|
160
|
-
nameGroups.set(baseName, []);
|
|
161
|
-
}
|
|
162
|
-
nameGroups.get(baseName).push(pattern);
|
|
163
|
-
}
|
|
164
|
-
for (const [baseName, group] of nameGroups) {
|
|
165
|
-
if (group.length >= 3) {
|
|
166
|
-
antiPatterns.push(
|
|
167
|
-
`Found ${group.length} functions with similar names (${baseName}): Consider consolidating`
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return antiPatterns;
|
|
172
|
-
}
|
|
173
|
-
var import_core;
|
|
174
|
-
var init_python_extractor = __esm({
|
|
175
|
-
"src/extractors/python-extractor.ts"() {
|
|
176
|
-
"use strict";
|
|
177
|
-
import_core = require("@aiready/core");
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
|
|
181
26
|
// src/cli.ts
|
|
182
27
|
var import_commander = require("commander");
|
|
183
28
|
|
|
184
29
|
// src/index.ts
|
|
185
|
-
var
|
|
30
|
+
var import_core5 = require("@aiready/core");
|
|
186
31
|
|
|
187
32
|
// src/detector.ts
|
|
188
33
|
var import_core2 = require("@aiready/core");
|
|
189
34
|
|
|
190
35
|
// src/context-rules.ts
|
|
36
|
+
var import_core = require("@aiready/core");
|
|
191
37
|
var CONTEXT_RULES = [
|
|
192
38
|
// Test Fixtures - Intentional duplication for test isolation
|
|
193
39
|
{
|
|
@@ -197,7 +43,7 @@ var CONTEXT_RULES = [
|
|
|
197
43
|
const hasTestFixtures = code.includes("beforeAll") || code.includes("afterAll") || code.includes("beforeEach") || code.includes("afterEach") || code.includes("setUp") || code.includes("tearDown");
|
|
198
44
|
return isTestFile && hasTestFixtures;
|
|
199
45
|
},
|
|
200
|
-
severity:
|
|
46
|
+
severity: import_core.Severity.Info,
|
|
201
47
|
reason: "Test fixture duplication is intentional for test isolation",
|
|
202
48
|
suggestion: "Consider if shared test setup would improve maintainability without coupling tests"
|
|
203
49
|
},
|
|
@@ -209,7 +55,7 @@ var CONTEXT_RULES = [
|
|
|
209
55
|
const hasTemplateContent = (code.includes("return") || code.includes("export")) && (code.includes("html") || code.includes("subject") || code.includes("body"));
|
|
210
56
|
return isTemplate && hasTemplateContent;
|
|
211
57
|
},
|
|
212
|
-
severity:
|
|
58
|
+
severity: import_core.Severity.Minor,
|
|
213
59
|
reason: "Template duplication may be intentional for maintainability and branding consistency",
|
|
214
60
|
suggestion: "Extract shared structure only if templates become hard to maintain"
|
|
215
61
|
},
|
|
@@ -221,7 +67,7 @@ var CONTEXT_RULES = [
|
|
|
221
67
|
const hasPageObjectPatterns = code.includes("page.") || code.includes("await page") || code.includes("locator") || code.includes("getBy") || code.includes("selector") || code.includes("click(") || code.includes("fill(");
|
|
222
68
|
return isE2ETest && hasPageObjectPatterns;
|
|
223
69
|
},
|
|
224
|
-
severity:
|
|
70
|
+
severity: import_core.Severity.Minor,
|
|
225
71
|
reason: "E2E test duplication ensures test independence and reduces coupling",
|
|
226
72
|
suggestion: "Consider page object pattern only if duplication causes maintenance issues"
|
|
227
73
|
},
|
|
@@ -231,7 +77,7 @@ var CONTEXT_RULES = [
|
|
|
231
77
|
detect: (file) => {
|
|
232
78
|
return file.endsWith(".config.ts") || file.endsWith(".config.js") || file.includes("jest.config") || file.includes("vite.config") || file.includes("webpack.config") || file.includes("rollup.config") || file.includes("tsconfig");
|
|
233
79
|
},
|
|
234
|
-
severity:
|
|
80
|
+
severity: import_core.Severity.Minor,
|
|
235
81
|
reason: "Configuration files often have similar structure by design",
|
|
236
82
|
suggestion: "Consider shared config base only if configurations become hard to maintain"
|
|
237
83
|
},
|
|
@@ -243,7 +89,7 @@ var CONTEXT_RULES = [
|
|
|
243
89
|
const hasTypeDefinitions = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
244
90
|
return isTypeFile && hasTypeDefinitions;
|
|
245
91
|
},
|
|
246
|
-
severity:
|
|
92
|
+
severity: import_core.Severity.Info,
|
|
247
93
|
reason: "Type duplication may be intentional for module independence and type safety",
|
|
248
94
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
249
95
|
},
|
|
@@ -253,7 +99,7 @@ var CONTEXT_RULES = [
|
|
|
253
99
|
detect: (file) => {
|
|
254
100
|
return file.includes("/migrations/") || file.includes("/migrate/") || file.includes(".migration.");
|
|
255
101
|
},
|
|
256
|
-
severity:
|
|
102
|
+
severity: import_core.Severity.Info,
|
|
257
103
|
reason: "Migration scripts are typically one-off and intentionally similar",
|
|
258
104
|
suggestion: "Duplication is acceptable for migration scripts"
|
|
259
105
|
},
|
|
@@ -265,7 +111,7 @@ var CONTEXT_RULES = [
|
|
|
265
111
|
const hasMockData = code.includes("mock") || code.includes("Mock") || code.includes("fixture") || code.includes("stub") || code.includes("export const");
|
|
266
112
|
return isMockFile && hasMockData;
|
|
267
113
|
},
|
|
268
|
-
severity:
|
|
114
|
+
severity: import_core.Severity.Info,
|
|
269
115
|
reason: "Mock data duplication is expected for comprehensive test coverage",
|
|
270
116
|
suggestion: "Consider shared factories only for complex mock generation"
|
|
271
117
|
}
|
|
@@ -283,38 +129,43 @@ function calculateSeverity(file1, file2, code, similarity, linesOfCode) {
|
|
|
283
129
|
}
|
|
284
130
|
if (similarity >= 0.95 && linesOfCode >= 30) {
|
|
285
131
|
return {
|
|
286
|
-
severity:
|
|
132
|
+
severity: import_core.Severity.Critical,
|
|
287
133
|
reason: "Large nearly-identical code blocks waste tokens and create maintenance burden",
|
|
288
134
|
suggestion: "Extract to shared utility module immediately"
|
|
289
135
|
};
|
|
290
136
|
} else if (similarity >= 0.95 && linesOfCode >= 15) {
|
|
291
137
|
return {
|
|
292
|
-
severity:
|
|
138
|
+
severity: import_core.Severity.Major,
|
|
293
139
|
reason: "Nearly identical code should be consolidated",
|
|
294
140
|
suggestion: "Move to shared utility file"
|
|
295
141
|
};
|
|
296
142
|
} else if (similarity >= 0.85) {
|
|
297
143
|
return {
|
|
298
|
-
severity:
|
|
144
|
+
severity: import_core.Severity.Major,
|
|
299
145
|
reason: "High similarity indicates significant duplication",
|
|
300
146
|
suggestion: "Extract common logic to shared function"
|
|
301
147
|
};
|
|
302
148
|
} else if (similarity >= 0.7) {
|
|
303
149
|
return {
|
|
304
|
-
severity:
|
|
150
|
+
severity: import_core.Severity.Minor,
|
|
305
151
|
reason: "Moderate similarity detected",
|
|
306
152
|
suggestion: "Consider extracting shared patterns if code evolves together"
|
|
307
153
|
};
|
|
308
154
|
} else {
|
|
309
155
|
return {
|
|
310
|
-
severity:
|
|
156
|
+
severity: import_core.Severity.Minor,
|
|
311
157
|
reason: "Minor similarity detected",
|
|
312
158
|
suggestion: "Monitor but refactoring may not be worthwhile"
|
|
313
159
|
};
|
|
314
160
|
}
|
|
315
161
|
}
|
|
316
162
|
function filterBySeverity(duplicates, minSeverity) {
|
|
317
|
-
const severityOrder = [
|
|
163
|
+
const severityOrder = [
|
|
164
|
+
import_core.Severity.Info,
|
|
165
|
+
import_core.Severity.Minor,
|
|
166
|
+
import_core.Severity.Major,
|
|
167
|
+
import_core.Severity.Critical
|
|
168
|
+
];
|
|
318
169
|
const minIndex = severityOrder.indexOf(minSeverity);
|
|
319
170
|
if (minIndex === -1) return duplicates;
|
|
320
171
|
return duplicates.filter((dup) => {
|
|
@@ -323,261 +174,129 @@ function filterBySeverity(duplicates, minSeverity) {
|
|
|
323
174
|
});
|
|
324
175
|
}
|
|
325
176
|
|
|
326
|
-
// src/
|
|
327
|
-
function
|
|
328
|
-
|
|
329
|
-
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
330
|
-
return "api-handler";
|
|
331
|
-
}
|
|
332
|
-
if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
|
|
333
|
-
return "validator";
|
|
334
|
-
}
|
|
335
|
-
if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
|
|
336
|
-
return "component";
|
|
337
|
-
}
|
|
338
|
-
if (lower.includes("class ") || lower.includes("this.")) {
|
|
339
|
-
return "class-method";
|
|
340
|
-
}
|
|
341
|
-
if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
|
|
342
|
-
return "utility";
|
|
343
|
-
}
|
|
344
|
-
if (lower.includes("function") || lower.includes("=>")) {
|
|
345
|
-
return "function";
|
|
346
|
-
}
|
|
347
|
-
return "unknown";
|
|
177
|
+
// src/detector.ts
|
|
178
|
+
function normalizeCode(code) {
|
|
179
|
+
return code.replace(/\/\/.*/g, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/['"`]/g, '"').replace(/\s+/g, " ").trim().toLowerCase();
|
|
348
180
|
}
|
|
349
|
-
function
|
|
350
|
-
const lines = content.split("\n");
|
|
181
|
+
function extractBlocks(file, content) {
|
|
351
182
|
const blocks = [];
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
let
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
if (
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
if (char === "}") braceDepth--;
|
|
183
|
+
const lines = content.split("\n");
|
|
184
|
+
const blockRegex = /^\s*(?:export\s+)?(?:async\s+)?(function|class|const|interface|type)\s+([a-zA-Z0-9_]+)|^\s*(app\.(?:get|post|put|delete|patch|use))\(/gm;
|
|
185
|
+
let match;
|
|
186
|
+
while ((match = blockRegex.exec(content)) !== null) {
|
|
187
|
+
const startLine = content.substring(0, match.index).split("\n").length;
|
|
188
|
+
let type;
|
|
189
|
+
let name;
|
|
190
|
+
if (match[1]) {
|
|
191
|
+
type = match[1];
|
|
192
|
+
name = match[2];
|
|
193
|
+
} else {
|
|
194
|
+
type = "handler";
|
|
195
|
+
name = match[3];
|
|
366
196
|
}
|
|
367
|
-
|
|
368
|
-
|
|
197
|
+
let endLine = -1;
|
|
198
|
+
let openBraces = 0;
|
|
199
|
+
let foundStart = false;
|
|
200
|
+
for (let i = match.index; i < content.length; i++) {
|
|
201
|
+
if (content[i] === "{") {
|
|
202
|
+
openBraces++;
|
|
203
|
+
foundStart = true;
|
|
204
|
+
} else if (content[i] === "}") {
|
|
205
|
+
openBraces--;
|
|
206
|
+
}
|
|
207
|
+
if (foundStart && openBraces === 0) {
|
|
208
|
+
endLine = content.substring(0, i + 1).split("\n").length;
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
369
211
|
}
|
|
370
|
-
if (
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
content: blockContent,
|
|
378
|
-
startLine: blockStart + 1,
|
|
379
|
-
endLine: i + 1,
|
|
380
|
-
patternType: categorizePattern(blockContent),
|
|
381
|
-
linesOfCode: loc
|
|
382
|
-
});
|
|
212
|
+
if (endLine === -1) {
|
|
213
|
+
const remaining = content.slice(match.index);
|
|
214
|
+
const nextLineMatch = remaining.indexOf("\n");
|
|
215
|
+
if (nextLineMatch !== -1) {
|
|
216
|
+
endLine = startLine;
|
|
217
|
+
} else {
|
|
218
|
+
endLine = lines.length;
|
|
383
219
|
}
|
|
384
|
-
currentBlock = [];
|
|
385
|
-
inFunction = false;
|
|
386
|
-
} else if (inFunction && braceDepth === 0) {
|
|
387
|
-
currentBlock = [];
|
|
388
|
-
inFunction = false;
|
|
389
220
|
}
|
|
221
|
+
endLine = Math.max(startLine, endLine);
|
|
222
|
+
const blockCode = lines.slice(startLine - 1, endLine).join("\n");
|
|
223
|
+
const tokens = (0, import_core2.estimateTokens)(blockCode);
|
|
224
|
+
blocks.push({
|
|
225
|
+
file,
|
|
226
|
+
startLine,
|
|
227
|
+
endLine,
|
|
228
|
+
code: blockCode,
|
|
229
|
+
tokens,
|
|
230
|
+
patternType: inferPatternType(type, name)
|
|
231
|
+
});
|
|
390
232
|
}
|
|
391
233
|
return blocks;
|
|
392
234
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
398
|
-
}
|
|
399
|
-
var stopwords = /* @__PURE__ */ new Set([
|
|
400
|
-
"return",
|
|
401
|
-
"const",
|
|
402
|
-
"let",
|
|
403
|
-
"var",
|
|
404
|
-
"function",
|
|
405
|
-
"class",
|
|
406
|
-
"new",
|
|
407
|
-
"if",
|
|
408
|
-
"else",
|
|
409
|
-
"for",
|
|
410
|
-
"while",
|
|
411
|
-
"async",
|
|
412
|
-
"await",
|
|
413
|
-
"try",
|
|
414
|
-
"catch",
|
|
415
|
-
"switch",
|
|
416
|
-
"case",
|
|
417
|
-
"default",
|
|
418
|
-
"import",
|
|
419
|
-
"export",
|
|
420
|
-
"from",
|
|
421
|
-
"true",
|
|
422
|
-
"false",
|
|
423
|
-
"null",
|
|
424
|
-
"undefined",
|
|
425
|
-
"this"
|
|
426
|
-
]);
|
|
427
|
-
function tokenize(norm) {
|
|
428
|
-
const punctuation = "(){}[];.,";
|
|
429
|
-
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
430
|
-
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// src/core/similarity.ts
|
|
434
|
-
function jaccardSimilarity(tokens1, tokens2) {
|
|
435
|
-
const set1 = new Set(tokens1);
|
|
436
|
-
const set2 = new Set(tokens2);
|
|
437
|
-
if (set1.size === 0 && set2.size === 0) return 0;
|
|
438
|
-
let intersection = 0;
|
|
439
|
-
for (const token of set1) {
|
|
440
|
-
if (set2.has(token)) intersection++;
|
|
235
|
+
function inferPatternType(keyword, name) {
|
|
236
|
+
const n = name.toLowerCase();
|
|
237
|
+
if (keyword === "handler" || n.includes("handler") || n.includes("controller") || n.startsWith("app.")) {
|
|
238
|
+
return "api-handler";
|
|
441
239
|
}
|
|
442
|
-
|
|
443
|
-
|
|
240
|
+
if (n.includes("validate") || n.includes("schema")) return "validator";
|
|
241
|
+
if (n.includes("util") || n.includes("helper")) return "utility";
|
|
242
|
+
if (keyword === "class") return "class-method";
|
|
243
|
+
if (n.match(/^[A-Z]/)) return "component";
|
|
244
|
+
if (keyword === "function") return "function";
|
|
245
|
+
return "unknown";
|
|
444
246
|
}
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
}
|
|
462
|
-
arr.push(i);
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
findCandidates(blockIdx, minSharedTokens, maxCandidates) {
|
|
467
|
-
const block1 = this.allBlocks[blockIdx];
|
|
468
|
-
const block1Tokens = this.blockTokens[blockIdx];
|
|
469
|
-
const counts = /* @__PURE__ */ new Map();
|
|
470
|
-
const rareTokens = block1Tokens.filter((tok) => {
|
|
471
|
-
const freq = this.invertedIndex.get(tok)?.length || 0;
|
|
472
|
-
return freq < this.allBlocks.length * 0.1;
|
|
473
|
-
});
|
|
474
|
-
for (const tok of rareTokens) {
|
|
475
|
-
const ids = this.invertedIndex.get(tok);
|
|
476
|
-
if (!ids) continue;
|
|
477
|
-
for (const j of ids) {
|
|
478
|
-
if (j <= blockIdx) continue;
|
|
479
|
-
if (this.allBlocks[j].file === block1.file) continue;
|
|
480
|
-
counts.set(j, (counts.get(j) || 0) + 1);
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
return Array.from(counts.entries()).filter(([j, shared]) => {
|
|
484
|
-
const block2Size = this.blockTokens[j].length;
|
|
485
|
-
const minSize = Math.min(block1Tokens.length, block2Size);
|
|
486
|
-
return shared >= minSharedTokens && shared / minSize >= 0.3;
|
|
487
|
-
}).sort((a, b) => b[1] - a[1]).slice(0, maxCandidates).map(([j, shared]) => ({ j, shared }));
|
|
488
|
-
}
|
|
489
|
-
};
|
|
490
|
-
|
|
491
|
-
// src/detector.ts
|
|
492
|
-
async function detectDuplicatePatterns(files, options) {
|
|
493
|
-
const {
|
|
494
|
-
minSimilarity,
|
|
495
|
-
minLines,
|
|
496
|
-
batchSize = 100,
|
|
497
|
-
approx = true,
|
|
498
|
-
minSharedTokens = 8,
|
|
499
|
-
maxCandidatesPerBlock = 100,
|
|
500
|
-
streamResults = false
|
|
501
|
-
} = options;
|
|
502
|
-
const duplicates = [];
|
|
503
|
-
const maxComparisons = approx ? Infinity : 5e5;
|
|
504
|
-
const allBlocks = files.flatMap(
|
|
505
|
-
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
506
|
-
(block) => block && block.content && block.content.trim().length > 0
|
|
507
|
-
).map((block) => ({
|
|
508
|
-
...block,
|
|
509
|
-
file: file.file,
|
|
510
|
-
normalized: normalizeCode(block.content),
|
|
511
|
-
tokenCost: block.content ? (0, import_core2.estimateTokens)(block.content) : 0
|
|
512
|
-
}))
|
|
513
|
-
);
|
|
514
|
-
const pythonFiles = files.filter((f) => f.file.endsWith(".py"));
|
|
515
|
-
if (pythonFiles.length > 0) {
|
|
516
|
-
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
517
|
-
const pythonPatterns = await extractPythonPatterns2(
|
|
518
|
-
pythonFiles.map((f) => f.file)
|
|
519
|
-
);
|
|
247
|
+
function calculateSimilarity(a, b) {
|
|
248
|
+
if (a === b) return 1;
|
|
249
|
+
const tokensA = a.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
250
|
+
const tokensB = b.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
251
|
+
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
252
|
+
const setA = new Set(tokensA);
|
|
253
|
+
const setB = new Set(tokensB);
|
|
254
|
+
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
255
|
+
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
256
|
+
return intersection.size / union.size;
|
|
257
|
+
}
|
|
258
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
259
|
+
const { minSimilarity, minLines, streamResults } = options;
|
|
260
|
+
const allBlocks = [];
|
|
261
|
+
for (const { file, content } of fileContents) {
|
|
262
|
+
const blocks = extractBlocks(file, content);
|
|
520
263
|
allBlocks.push(
|
|
521
|
-
...
|
|
522
|
-
content: p.code,
|
|
523
|
-
startLine: p.startLine,
|
|
524
|
-
endLine: p.endLine,
|
|
525
|
-
file: p.file,
|
|
526
|
-
normalized: normalizeCode(p.code),
|
|
527
|
-
patternType: p.type,
|
|
528
|
-
tokenCost: p.code ? (0, import_core2.estimateTokens)(p.code) : 0,
|
|
529
|
-
linesOfCode: p.endLine - p.startLine + 1
|
|
530
|
-
}))
|
|
264
|
+
...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines)
|
|
531
265
|
);
|
|
532
266
|
}
|
|
533
|
-
const
|
|
534
|
-
const engine = approx ? new ApproxEngine(allBlocks, blockTokens) : null;
|
|
535
|
-
let comparisonsProcessed = 0;
|
|
536
|
-
const startTime = Date.now();
|
|
267
|
+
const duplicates = [];
|
|
537
268
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
` Processed ${i}/${allBlocks.length} blocks (${elapsed.toFixed(1)}s, ${duplicates.length} duplicates)`
|
|
546
|
-
);
|
|
547
|
-
}
|
|
548
|
-
await new Promise((r) => setImmediate((resolve) => r(resolve)));
|
|
549
|
-
}
|
|
550
|
-
const block1 = allBlocks[i];
|
|
551
|
-
const candidates = engine ? engine.findCandidates(i, minSharedTokens, maxCandidatesPerBlock) : allBlocks.slice(i + 1).map((_, idx) => ({ j: i + 1 + idx, shared: 0 }));
|
|
552
|
-
for (const { j } of candidates) {
|
|
553
|
-
if (!approx && comparisonsProcessed >= maxComparisons) break;
|
|
554
|
-
comparisonsProcessed++;
|
|
555
|
-
const block2 = allBlocks[j];
|
|
556
|
-
if (block1.file === block2.file) continue;
|
|
557
|
-
const sim = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
269
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
270
|
+
const b1 = allBlocks[i];
|
|
271
|
+
const b2 = allBlocks[j];
|
|
272
|
+
if (b1.file === b2.file) continue;
|
|
273
|
+
const norm1 = normalizeCode(b1.code);
|
|
274
|
+
const norm2 = normalizeCode(b2.code);
|
|
275
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
558
276
|
if (sim >= minSimilarity) {
|
|
559
|
-
const severity = calculateSeverity(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
277
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
278
|
+
b1.file,
|
|
279
|
+
b2.file,
|
|
280
|
+
b1.code,
|
|
563
281
|
sim,
|
|
564
|
-
|
|
282
|
+
b1.endLine - b1.startLine + 1
|
|
565
283
|
);
|
|
566
284
|
const dup = {
|
|
567
|
-
file1:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
endLine2:
|
|
285
|
+
file1: b1.file,
|
|
286
|
+
line1: b1.startLine,
|
|
287
|
+
endLine1: b1.endLine,
|
|
288
|
+
file2: b2.file,
|
|
289
|
+
line2: b2.startLine,
|
|
290
|
+
endLine2: b2.endLine,
|
|
291
|
+
code1: b1.code,
|
|
292
|
+
code2: b2.code,
|
|
573
293
|
similarity: sim,
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
suggestion: severity.suggestion
|
|
294
|
+
patternType: b1.patternType,
|
|
295
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
296
|
+
severity,
|
|
297
|
+
reason,
|
|
298
|
+
suggestion,
|
|
299
|
+
matchedRule
|
|
581
300
|
};
|
|
582
301
|
duplicates.push(dup);
|
|
583
302
|
if (streamResults)
|
|
@@ -587,281 +306,138 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
587
306
|
}
|
|
588
307
|
}
|
|
589
308
|
}
|
|
590
|
-
return duplicates;
|
|
309
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
591
310
|
}
|
|
592
311
|
|
|
593
312
|
// src/grouping.ts
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
313
|
+
var import_core3 = require("@aiready/core");
|
|
314
|
+
var import_path = __toESM(require("path"));
|
|
315
|
+
function getSeverityLevel(s) {
|
|
316
|
+
if (s === import_core3.Severity.Critical || s === "critical") return 4;
|
|
317
|
+
if (s === import_core3.Severity.Major || s === "major") return 3;
|
|
318
|
+
if (s === import_core3.Severity.Minor || s === "minor") return 2;
|
|
319
|
+
if (s === import_core3.Severity.Info || s === "info") return 1;
|
|
320
|
+
return 0;
|
|
599
321
|
}
|
|
600
322
|
function groupDuplicatesByFilePair(duplicates) {
|
|
601
323
|
const groups = /* @__PURE__ */ new Map();
|
|
602
324
|
for (const dup of duplicates) {
|
|
603
|
-
const
|
|
325
|
+
const files = [dup.file1, dup.file2].sort();
|
|
326
|
+
const key = files.join("::");
|
|
604
327
|
if (!groups.has(key)) {
|
|
605
|
-
groups.set(key,
|
|
328
|
+
groups.set(key, {
|
|
329
|
+
filePair: key,
|
|
330
|
+
severity: dup.severity,
|
|
331
|
+
occurrences: 0,
|
|
332
|
+
totalTokenCost: 0,
|
|
333
|
+
averageSimilarity: 0,
|
|
334
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
335
|
+
lineRanges: []
|
|
336
|
+
});
|
|
606
337
|
}
|
|
607
|
-
groups.get(key)
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
);
|
|
616
|
-
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
617
|
-
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
618
|
-
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
619
|
-
const patternType = getMostCommonPatternType(deduplicated);
|
|
620
|
-
const lineRanges = deduplicated.map((d) => ({
|
|
621
|
-
file1: { start: d.line1, end: d.endLine1 },
|
|
622
|
-
file2: { start: d.line2, end: d.endLine2 }
|
|
623
|
-
}));
|
|
624
|
-
result.push({
|
|
625
|
-
filePair,
|
|
626
|
-
duplicates: deduplicated,
|
|
627
|
-
totalTokenCost,
|
|
628
|
-
averageSimilarity,
|
|
629
|
-
maxSimilarity,
|
|
630
|
-
severity,
|
|
631
|
-
patternType,
|
|
632
|
-
occurrences: deduplicated.length,
|
|
633
|
-
lineRanges
|
|
338
|
+
const group = groups.get(key);
|
|
339
|
+
group.occurrences++;
|
|
340
|
+
group.totalTokenCost += dup.tokenCost;
|
|
341
|
+
group.averageSimilarity += dup.similarity;
|
|
342
|
+
group.patternTypes.add(dup.patternType);
|
|
343
|
+
group.lineRanges.push({
|
|
344
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
345
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
634
346
|
});
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
function deduplicateOverlappingRanges(duplicates) {
|
|
639
|
-
if (duplicates.length === 0) return [];
|
|
640
|
-
const sorted = [...duplicates].sort((a, b) => {
|
|
641
|
-
if (a.line1 !== b.line1) return a.line1 - b.line1;
|
|
642
|
-
return b.similarity - a.similarity;
|
|
643
|
-
});
|
|
644
|
-
const result = [];
|
|
645
|
-
let current = null;
|
|
646
|
-
for (const dup of sorted) {
|
|
647
|
-
if (!current) {
|
|
648
|
-
current = dup;
|
|
649
|
-
result.push(dup);
|
|
650
|
-
continue;
|
|
651
|
-
}
|
|
652
|
-
const overlapsFile1 = rangesOverlap(
|
|
653
|
-
current.line1,
|
|
654
|
-
current.endLine1,
|
|
655
|
-
dup.line1,
|
|
656
|
-
dup.endLine1
|
|
657
|
-
);
|
|
658
|
-
const overlapsFile2 = rangesOverlap(
|
|
659
|
-
current.line2,
|
|
660
|
-
current.endLine2,
|
|
661
|
-
dup.line2,
|
|
662
|
-
dup.endLine2
|
|
663
|
-
);
|
|
664
|
-
if (overlapsFile1 && overlapsFile2) {
|
|
665
|
-
current = {
|
|
666
|
-
...current,
|
|
667
|
-
endLine1: Math.max(current.endLine1, dup.endLine1),
|
|
668
|
-
endLine2: Math.max(current.endLine2, dup.endLine2),
|
|
669
|
-
tokenCost: Math.max(current.tokenCost, dup.tokenCost)
|
|
670
|
-
};
|
|
671
|
-
result[result.length - 1] = current;
|
|
672
|
-
} else {
|
|
673
|
-
current = dup;
|
|
674
|
-
result.push(dup);
|
|
347
|
+
const currentSev = dup.severity;
|
|
348
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
349
|
+
group.severity = currentSev;
|
|
675
350
|
}
|
|
676
351
|
}
|
|
677
|
-
return
|
|
352
|
+
return Array.from(groups.values()).map((g) => ({
|
|
353
|
+
...g,
|
|
354
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
355
|
+
}));
|
|
678
356
|
}
|
|
679
357
|
function createRefactorClusters(duplicates) {
|
|
680
|
-
const
|
|
358
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
359
|
+
const visited = /* @__PURE__ */ new Set();
|
|
360
|
+
const components = [];
|
|
681
361
|
for (const dup of duplicates) {
|
|
682
|
-
|
|
683
|
-
if (!
|
|
684
|
-
|
|
362
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
363
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
364
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
365
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
366
|
+
}
|
|
367
|
+
for (const file of adjacency.keys()) {
|
|
368
|
+
if (visited.has(file)) continue;
|
|
369
|
+
const component = [];
|
|
370
|
+
const queue = [file];
|
|
371
|
+
visited.add(file);
|
|
372
|
+
while (queue.length > 0) {
|
|
373
|
+
const curr = queue.shift();
|
|
374
|
+
component.push(curr);
|
|
375
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
376
|
+
if (!visited.has(neighbor)) {
|
|
377
|
+
visited.add(neighbor);
|
|
378
|
+
queue.push(neighbor);
|
|
379
|
+
}
|
|
380
|
+
}
|
|
685
381
|
}
|
|
686
|
-
|
|
382
|
+
components.push(component);
|
|
687
383
|
}
|
|
688
|
-
const
|
|
689
|
-
for (const
|
|
690
|
-
if (
|
|
691
|
-
const
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
384
|
+
const clusters = [];
|
|
385
|
+
for (const component of components) {
|
|
386
|
+
if (component.length < 2) continue;
|
|
387
|
+
const componentDups = duplicates.filter(
|
|
388
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
389
|
+
);
|
|
390
|
+
const totalTokenCost = componentDups.reduce(
|
|
391
|
+
(sum, d) => sum + d.tokenCost,
|
|
392
|
+
0
|
|
393
|
+
);
|
|
394
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
395
|
+
const name = determineClusterName(component);
|
|
396
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
397
|
+
component[0],
|
|
398
|
+
component[1],
|
|
399
|
+
"",
|
|
400
|
+
// Code not available here
|
|
401
|
+
avgSimilarity,
|
|
402
|
+
30
|
|
403
|
+
// Assume substantial if clustered
|
|
404
|
+
);
|
|
405
|
+
clusters.push({
|
|
406
|
+
id: `cluster-${clusters.length}`,
|
|
407
|
+
name,
|
|
408
|
+
files: component,
|
|
702
409
|
severity,
|
|
410
|
+
duplicateCount: componentDups.length,
|
|
703
411
|
totalTokenCost,
|
|
704
|
-
averageSimilarity,
|
|
705
|
-
|
|
706
|
-
suggestion
|
|
707
|
-
reason: clusterInfo.reason
|
|
412
|
+
averageSimilarity: avgSimilarity,
|
|
413
|
+
reason,
|
|
414
|
+
suggestion
|
|
708
415
|
});
|
|
709
416
|
}
|
|
710
|
-
return
|
|
711
|
-
}
|
|
712
|
-
function identifyCluster(dup) {
|
|
713
|
-
const file1 = dup.file1.toLowerCase();
|
|
714
|
-
const file2 = dup.file2.toLowerCase();
|
|
715
|
-
if ((file1.includes("/blog/") || file1.startsWith("blog/") || file1.includes("/articles/") || file1.startsWith("articles/")) && (file2.includes("/blog/") || file2.startsWith("blog/") || file2.includes("/articles/") || file2.startsWith("articles/"))) {
|
|
716
|
-
return "blog-seo-boilerplate";
|
|
717
|
-
}
|
|
718
|
-
if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
|
|
719
|
-
const component1 = extractComponentName(dup.file1);
|
|
720
|
-
const component2 = extractComponentName(dup.file2);
|
|
721
|
-
console.log(
|
|
722
|
-
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
723
|
-
);
|
|
724
|
-
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
725
|
-
const category = getComponentCategory(component1);
|
|
726
|
-
console.log(`Creating cluster: component-${category}`);
|
|
727
|
-
return `component-${category}`;
|
|
728
|
-
}
|
|
729
|
-
}
|
|
730
|
-
if ((file1.includes("/e2e/") || file1.startsWith("e2e/") || file1.includes(".e2e.")) && (file2.includes("/e2e/") || file2.startsWith("e2e/") || file2.includes(".e2e."))) {
|
|
731
|
-
return "e2e-test-patterns";
|
|
732
|
-
}
|
|
733
|
-
if (dup.patternType === "api-handler") {
|
|
734
|
-
return "api-handlers";
|
|
735
|
-
}
|
|
736
|
-
if (dup.patternType === "validator") {
|
|
737
|
-
return "validators";
|
|
738
|
-
}
|
|
739
|
-
if ((file1.includes("/scripts/") || file1.startsWith("scripts/") || file1.includes("/infra/") || file1.startsWith("infra/")) && (file2.includes("/scripts/") || file2.startsWith("scripts/") || file2.includes("/infra/") || file2.startsWith("infra/"))) {
|
|
740
|
-
return "infrastructure-scripts";
|
|
741
|
-
}
|
|
742
|
-
return `${dup.patternType}-patterns`;
|
|
417
|
+
return clusters;
|
|
743
418
|
}
|
|
744
|
-
function
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
755
|
-
if (name.includes("button") || name.includes("btn")) return "button";
|
|
756
|
-
if (name.includes("card")) return "card";
|
|
757
|
-
if (name.includes("modal") || name.includes("dialog")) return "modal";
|
|
758
|
-
if (name.includes("form")) return "form";
|
|
759
|
-
if (name.includes("input") || name.includes("field")) return "input";
|
|
760
|
-
if (name.includes("table") || name.includes("grid")) return "table";
|
|
761
|
-
if (name.includes("nav") || name.includes("menu")) return "navigation";
|
|
762
|
-
if (name.includes("header") || name.includes("footer")) return "layout";
|
|
763
|
-
return "misc";
|
|
764
|
-
}
|
|
765
|
-
function getUniqueFiles(duplicates) {
|
|
766
|
-
const files = /* @__PURE__ */ new Set();
|
|
767
|
-
for (const dup of duplicates) {
|
|
768
|
-
files.add(dup.file1);
|
|
769
|
-
files.add(dup.file2);
|
|
419
|
+
function determineClusterName(files) {
|
|
420
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
421
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
422
|
+
if (files.some((f) => f.includes("buttons")))
|
|
423
|
+
return "Button Component Variants";
|
|
424
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
425
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
426
|
+
const first = files[0];
|
|
427
|
+
const dirName = import_path.default.dirname(first).split(import_path.default.sep).pop();
|
|
428
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
429
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
770
430
|
}
|
|
771
|
-
return
|
|
431
|
+
return "Shared Pattern Group";
|
|
772
432
|
}
|
|
773
|
-
function
|
|
774
|
-
const order = {
|
|
775
|
-
critical: 4,
|
|
776
|
-
major: 3,
|
|
777
|
-
minor: 2,
|
|
778
|
-
info: 1
|
|
779
|
-
};
|
|
780
|
-
let highest = "info";
|
|
781
|
-
let highestValue = 0;
|
|
782
|
-
for (const severity of severities) {
|
|
783
|
-
if (order[severity] > highestValue) {
|
|
784
|
-
highestValue = order[severity];
|
|
785
|
-
highest = severity;
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
return highest;
|
|
789
|
-
}
|
|
790
|
-
function getMostCommonPatternType(duplicates) {
|
|
791
|
-
const counts = /* @__PURE__ */ new Map();
|
|
792
|
-
for (const dup of duplicates) {
|
|
793
|
-
counts.set(dup.patternType, (counts.get(dup.patternType) || 0) + 1);
|
|
794
|
-
}
|
|
795
|
-
let mostCommon = "unknown";
|
|
796
|
-
let maxCount = 0;
|
|
797
|
-
for (const [type, count] of counts.entries()) {
|
|
798
|
-
if (count > maxCount) {
|
|
799
|
-
maxCount = count;
|
|
800
|
-
mostCommon = type;
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
|
-
return mostCommon;
|
|
804
|
-
}
|
|
805
|
-
function getClusterInfo(clusterId, patternType, fileCount) {
|
|
806
|
-
const templates = {
|
|
807
|
-
"blog-seo-boilerplate": {
|
|
808
|
-
name: `Blog SEO Boilerplate (${fileCount} files)`,
|
|
809
|
-
suggestion: "Create BlogPageLayout component with SEO schema generator, breadcrumb component, and metadata helpers",
|
|
810
|
-
reason: "SEO boilerplate duplication increases maintenance burden and schema consistency risk"
|
|
811
|
-
},
|
|
812
|
-
"e2e-test-patterns": {
|
|
813
|
-
name: `E2E Test Patterns (${fileCount} files)`,
|
|
814
|
-
suggestion: "Extract page object helpers and common test utilities (waitFor, fillForm, etc.)",
|
|
815
|
-
reason: "Test helper extraction improves maintainability while preserving test independence"
|
|
816
|
-
},
|
|
817
|
-
"api-handlers": {
|
|
818
|
-
name: `API Handler Patterns (${fileCount} files)`,
|
|
819
|
-
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
820
|
-
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
821
|
-
},
|
|
822
|
-
validators: {
|
|
823
|
-
name: `Validator Patterns (${fileCount} files)`,
|
|
824
|
-
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
825
|
-
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
826
|
-
},
|
|
827
|
-
"infrastructure-scripts": {
|
|
828
|
-
name: `Infrastructure Scripts (${fileCount} files)`,
|
|
829
|
-
suggestion: "Extract common CLI parsing, file I/O, and error handling utilities",
|
|
830
|
-
reason: "Script duplication is often acceptable for one-off tasks, but common patterns can be shared"
|
|
831
|
-
},
|
|
832
|
-
"component-button": {
|
|
833
|
-
name: `Button Component Variants (${fileCount} files)`,
|
|
834
|
-
suggestion: "Create unified Button component with variant props",
|
|
835
|
-
reason: "Multiple button variants should share base styles and behavior"
|
|
836
|
-
},
|
|
837
|
-
"component-card": {
|
|
838
|
-
name: `Card Component Variants (${fileCount} files)`,
|
|
839
|
-
suggestion: "Create unified Card component with composition pattern",
|
|
840
|
-
reason: "Card variants should share layout structure and styling"
|
|
841
|
-
},
|
|
842
|
-
"component-modal": {
|
|
843
|
-
name: `Modal Component Variants (${fileCount} files)`,
|
|
844
|
-
suggestion: "Create base Modal component with customizable content",
|
|
845
|
-
reason: "Modal variants should share overlay, animation, and accessibility logic"
|
|
846
|
-
}
|
|
847
|
-
};
|
|
848
|
-
if (templates[clusterId]) {
|
|
849
|
-
return templates[clusterId];
|
|
850
|
-
}
|
|
851
|
-
return {
|
|
852
|
-
name: `${patternType} Cluster (${fileCount} files)`,
|
|
853
|
-
suggestion: `Extract common ${patternType} patterns into shared utilities`,
|
|
854
|
-
reason: `Multiple similar ${patternType} patterns detected across ${fileCount} files`
|
|
855
|
-
};
|
|
856
|
-
}
|
|
857
|
-
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFileCount = 3) {
|
|
433
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
858
434
|
return clusters.filter(
|
|
859
|
-
(
|
|
435
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
860
436
|
);
|
|
861
437
|
}
|
|
862
438
|
|
|
863
439
|
// src/scoring.ts
|
|
864
|
-
var
|
|
440
|
+
var import_core4 = require("@aiready/core");
|
|
865
441
|
|
|
866
442
|
// src/index.ts
|
|
867
443
|
function getRefactoringSuggestion(patternType, similarity) {
|
|
@@ -979,7 +555,7 @@ async function analyzePatterns(options) {
|
|
|
979
555
|
const batchContents = await Promise.all(
|
|
980
556
|
batch.map(async (file) => ({
|
|
981
557
|
file,
|
|
982
|
-
content: await (0,
|
|
558
|
+
content: await (0, import_core5.readFileContent)(file)
|
|
983
559
|
}))
|
|
984
560
|
);
|
|
985
561
|
fileContents.push(...batchContents);
|
|
@@ -1000,9 +576,9 @@ async function analyzePatterns(options) {
|
|
|
1000
576
|
);
|
|
1001
577
|
const issues = fileDuplicates.map((dup) => {
|
|
1002
578
|
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
1003
|
-
const severity2 = dup.similarity > 0.95 ?
|
|
579
|
+
const severity2 = dup.similarity > 0.95 ? import_core5.Severity.Critical : dup.similarity > 0.9 ? import_core5.Severity.Major : import_core5.Severity.Minor;
|
|
1004
580
|
return {
|
|
1005
|
-
type:
|
|
581
|
+
type: import_core5.IssueType.DuplicatePattern,
|
|
1006
582
|
severity: severity2,
|
|
1007
583
|
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
1008
584
|
location: {
|
|
@@ -1015,11 +591,11 @@ async function analyzePatterns(options) {
|
|
|
1015
591
|
let filteredIssues = issues;
|
|
1016
592
|
if (severity !== "all") {
|
|
1017
593
|
const severityMap = {
|
|
1018
|
-
critical: [
|
|
1019
|
-
high: [
|
|
1020
|
-
medium: [
|
|
594
|
+
critical: [import_core5.Severity.Critical],
|
|
595
|
+
high: [import_core5.Severity.Critical, import_core5.Severity.Major],
|
|
596
|
+
medium: [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor]
|
|
1021
597
|
};
|
|
1022
|
-
const allowedSeverities = severityMap[severity] || [
|
|
598
|
+
const allowedSeverities = severityMap[severity] || [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor];
|
|
1023
599
|
filteredIssues = issues.filter(
|
|
1024
600
|
(issue) => allowedSeverities.includes(issue.severity)
|
|
1025
601
|
);
|
|
@@ -1111,8 +687,8 @@ function generateSummary(results) {
|
|
|
1111
687
|
// src/cli.ts
|
|
1112
688
|
var import_chalk = __toESM(require("chalk"));
|
|
1113
689
|
var import_fs = require("fs");
|
|
1114
|
-
var
|
|
1115
|
-
var
|
|
690
|
+
var import_path2 = require("path");
|
|
691
|
+
var import_core6 = require("@aiready/core");
|
|
1116
692
|
var program = new import_commander.Command();
|
|
1117
693
|
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
1118
694
|
"after",
|
|
@@ -1166,7 +742,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1166
742
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
1167
743
|
console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
1168
744
|
const startTime = Date.now();
|
|
1169
|
-
const config = await (0,
|
|
745
|
+
const config = await (0, import_core6.loadConfig)(directory);
|
|
1170
746
|
const defaults = {
|
|
1171
747
|
minSimilarity: 0.4,
|
|
1172
748
|
minLines: 5,
|
|
@@ -1177,7 +753,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1177
753
|
streamResults: true,
|
|
1178
754
|
include: void 0,
|
|
1179
755
|
exclude: void 0,
|
|
1180
|
-
minSeverity:
|
|
756
|
+
minSeverity: import_core6.Severity.Minor,
|
|
1181
757
|
excludeTestFixtures: false,
|
|
1182
758
|
excludeTemplates: false,
|
|
1183
759
|
includeTests: false,
|
|
@@ -1188,7 +764,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1188
764
|
minClusterFiles: 3,
|
|
1189
765
|
showRawDuplicates: false
|
|
1190
766
|
};
|
|
1191
|
-
const mergedConfig = (0,
|
|
767
|
+
const mergedConfig = (0, import_core6.mergeConfigWithDefaults)(config, defaults);
|
|
1192
768
|
const finalOptions = {
|
|
1193
769
|
rootDir: directory,
|
|
1194
770
|
minSimilarity: options.similarity ? parseFloat(options.similarity) : mergedConfig.minSimilarity,
|
|
@@ -1206,10 +782,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1206
782
|
excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
|
|
1207
783
|
includeTests: options.includeTests || mergedConfig.includeTests,
|
|
1208
784
|
maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
|
|
1209
|
-
groupByFilePair: options.
|
|
1210
|
-
createClusters: options.
|
|
1211
|
-
minClusterTokenCost: options.
|
|
1212
|
-
minClusterFiles: options.
|
|
785
|
+
groupByFilePair: options.groupBy_file_pair !== false && mergedConfig.groupByFilePair,
|
|
786
|
+
createClusters: options.create_clusters !== false && mergedConfig.createClusters,
|
|
787
|
+
minClusterTokenCost: options.min_cluster_tokens ? parseInt(options.min_cluster_tokens) : mergedConfig.minClusterTokenCost,
|
|
788
|
+
minClusterFiles: options.min_cluster_files ? parseInt(options.min_cluster_files) : mergedConfig.minClusterFiles,
|
|
1213
789
|
showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
|
|
1214
790
|
};
|
|
1215
791
|
if (finalOptions.includeTests && finalOptions.exclude) {
|
|
@@ -1260,12 +836,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1260
836
|
clusters: clusters || [],
|
|
1261
837
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1262
838
|
};
|
|
1263
|
-
const outputPath = (0,
|
|
839
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1264
840
|
options.outputFile,
|
|
1265
841
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.json`,
|
|
1266
842
|
directory
|
|
1267
843
|
);
|
|
1268
|
-
const dir = (0,
|
|
844
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1269
845
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1270
846
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1271
847
|
}
|
|
@@ -1276,12 +852,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1276
852
|
}
|
|
1277
853
|
if (options.output === "html") {
|
|
1278
854
|
const html = generateHTMLReport(summary, results);
|
|
1279
|
-
const outputPath = (0,
|
|
855
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1280
856
|
options.outputFile,
|
|
1281
857
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.html`,
|
|
1282
858
|
directory
|
|
1283
859
|
);
|
|
1284
|
-
const dir = (0,
|
|
860
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1285
861
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1286
862
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1287
863
|
}
|
|
@@ -1330,14 +906,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1330
906
|
import_chalk.default.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
|
|
1331
907
|
);
|
|
1332
908
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1333
|
-
const severityOrder = {
|
|
1334
|
-
critical: 4,
|
|
1335
|
-
major: 3,
|
|
1336
|
-
minor: 2,
|
|
1337
|
-
info: 1
|
|
1338
|
-
};
|
|
1339
909
|
const topGroups = groups.sort((a, b) => {
|
|
1340
|
-
const
|
|
910
|
+
const bVal = getSeverityValue(b.severity);
|
|
911
|
+
const aVal = getSeverityValue(a.severity);
|
|
912
|
+
const severityDiff = bVal - aVal;
|
|
1341
913
|
if (severityDiff !== 0) return severityDiff;
|
|
1342
914
|
return b.totalTokenCost - a.totalTokenCost;
|
|
1343
915
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1411,14 +983,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1411
983
|
console.log(import_chalk.default.cyan("\n" + divider));
|
|
1412
984
|
console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
1413
985
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1414
|
-
const severityOrder = {
|
|
1415
|
-
critical: 4,
|
|
1416
|
-
major: 3,
|
|
1417
|
-
minor: 2,
|
|
1418
|
-
info: 1
|
|
1419
|
-
};
|
|
1420
986
|
const topDuplicates = filteredDuplicates.sort((a, b) => {
|
|
1421
|
-
const
|
|
987
|
+
const bVal = getSeverityValue(b.severity);
|
|
988
|
+
const aVal = getSeverityValue(a.severity);
|
|
989
|
+
const severityDiff = bVal - aVal;
|
|
1422
990
|
if (severityDiff !== 0) return severityDiff;
|
|
1423
991
|
return b.similarity - a.similarity;
|
|
1424
992
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1458,7 +1026,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1458
1026
|
(r) => r.issues.map((issue) => ({ ...issue, file: r.fileName }))
|
|
1459
1027
|
);
|
|
1460
1028
|
const criticalIssues = allIssues.filter(
|
|
1461
|
-
(issue) => issue.severity ===
|
|
1029
|
+
(issue) => getSeverityValue(issue.severity) === 4
|
|
1462
1030
|
);
|
|
1463
1031
|
if (criticalIssues.length > 0) {
|
|
1464
1032
|
console.log(import_chalk.default.cyan(divider));
|
|
@@ -1613,12 +1181,25 @@ function generateHTMLReport(summary, results) {
|
|
|
1613
1181
|
</html>`;
|
|
1614
1182
|
}
|
|
1615
1183
|
program.parse();
|
|
1184
|
+
function getSeverityValue(s) {
|
|
1185
|
+
if (s === import_core6.Severity.Critical || s === "critical") return 4;
|
|
1186
|
+
if (s === import_core6.Severity.Major || s === "major") return 3;
|
|
1187
|
+
if (s === import_core6.Severity.Minor || s === "minor") return 2;
|
|
1188
|
+
if (s === import_core6.Severity.Info || s === "info") return 1;
|
|
1189
|
+
return 0;
|
|
1190
|
+
}
|
|
1616
1191
|
function getSeverityBadge(severity) {
|
|
1617
|
-
const
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1192
|
+
const val = getSeverityValue(severity);
|
|
1193
|
+
switch (val) {
|
|
1194
|
+
case 4:
|
|
1195
|
+
return import_chalk.default.bgRed.white.bold(" CRITICAL ");
|
|
1196
|
+
case 3:
|
|
1197
|
+
return import_chalk.default.bgYellow.black.bold(" MAJOR ");
|
|
1198
|
+
case 2:
|
|
1199
|
+
return import_chalk.default.bgBlue.white.bold(" MINOR ");
|
|
1200
|
+
case 1:
|
|
1201
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1202
|
+
default:
|
|
1203
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1204
|
+
}
|
|
1624
1205
|
}
|