@aiready/pattern-detect 0.12.2 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2R7HOR5H.mjs +777 -0
- package/dist/chunk-3VRQYFW3.mjs +782 -0
- package/dist/chunk-5LYDB7DY.mjs +771 -0
- package/dist/chunk-7EJGNGXM.mjs +771 -0
- package/dist/chunk-A76JUWER.mjs +786 -0
- package/dist/chunk-DQSLTL7J.mjs +788 -0
- package/dist/chunk-EZT3NZGB.mjs +1057 -0
- package/dist/chunk-H5FB2USZ.mjs +762 -0
- package/dist/chunk-INJ4SBTV.mjs +754 -0
- package/dist/chunk-JAFZCZAP.mjs +776 -0
- package/dist/chunk-JWR3AHKO.mjs +788 -0
- package/dist/chunk-LUA5FXSZ.mjs +771 -0
- package/dist/chunk-QX2BQJEO.mjs +1058 -0
- package/dist/chunk-RMGDSNLE.mjs +770 -0
- package/dist/chunk-TCG2G32F.mjs +911 -0
- package/dist/chunk-TGBZP7SB.mjs +773 -0
- package/dist/chunk-XCWY2DQY.mjs +788 -0
- package/dist/chunk-XUUVS54V.mjs +776 -0
- package/dist/chunk-YJYDBFT3.mjs +780 -0
- package/dist/cli.js +268 -693
- package/dist/cli.mjs +35 -29
- package/dist/index.d.mts +41 -54
- package/dist/index.d.ts +41 -54
- package/dist/index.js +237 -661
- package/dist/index.mjs +3 -1
- package/package.json +2 -2
package/dist/cli.js
CHANGED
|
@@ -6,13 +6,6 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
6
6
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
7
7
|
var __getProtoOf = Object.getPrototypeOf;
|
|
8
8
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
9
|
-
var __esm = (fn, res) => function __init() {
|
|
10
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
11
|
-
};
|
|
12
|
-
var __export = (target, all) => {
|
|
13
|
-
for (var name in all)
|
|
14
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
15
|
-
};
|
|
16
9
|
var __copyProps = (to, from, except, desc) => {
|
|
17
10
|
if (from && typeof from === "object" || typeof from === "function") {
|
|
18
11
|
for (let key of __getOwnPropNames(from))
|
|
@@ -30,164 +23,17 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
23
|
mod
|
|
31
24
|
));
|
|
32
25
|
|
|
33
|
-
// src/extractors/python-extractor.ts
|
|
34
|
-
var python_extractor_exports = {};
|
|
35
|
-
__export(python_extractor_exports, {
|
|
36
|
-
calculatePythonSimilarity: () => calculatePythonSimilarity,
|
|
37
|
-
detectPythonAntiPatterns: () => detectPythonAntiPatterns,
|
|
38
|
-
extractPythonPatterns: () => extractPythonPatterns
|
|
39
|
-
});
|
|
40
|
-
async function extractPythonPatterns(files) {
|
|
41
|
-
const patterns = [];
|
|
42
|
-
const parser = (0, import_core.getParser)("dummy.py");
|
|
43
|
-
if (!parser) {
|
|
44
|
-
console.warn("Python parser not available");
|
|
45
|
-
return patterns;
|
|
46
|
-
}
|
|
47
|
-
const pythonFiles = files.filter((f) => f.toLowerCase().endsWith(".py"));
|
|
48
|
-
for (const file of pythonFiles) {
|
|
49
|
-
try {
|
|
50
|
-
const fs = await import("fs");
|
|
51
|
-
const code = await fs.promises.readFile(file, "utf-8");
|
|
52
|
-
const result = parser.parse(code, file);
|
|
53
|
-
for (const exp of result.exports) {
|
|
54
|
-
if (exp.type === "function") {
|
|
55
|
-
patterns.push({
|
|
56
|
-
file,
|
|
57
|
-
name: exp.name,
|
|
58
|
-
type: "function",
|
|
59
|
-
startLine: exp.loc?.start.line || 0,
|
|
60
|
-
endLine: exp.loc?.end.line || 0,
|
|
61
|
-
imports: exp.imports || [],
|
|
62
|
-
dependencies: exp.dependencies || [],
|
|
63
|
-
signature: generatePythonSignature(exp),
|
|
64
|
-
language: "python"
|
|
65
|
-
});
|
|
66
|
-
} else if (exp.type === "class") {
|
|
67
|
-
patterns.push({
|
|
68
|
-
file,
|
|
69
|
-
name: exp.name,
|
|
70
|
-
type: "class",
|
|
71
|
-
startLine: exp.loc?.start.line || 0,
|
|
72
|
-
endLine: exp.loc?.end.line || 0,
|
|
73
|
-
imports: exp.imports || [],
|
|
74
|
-
dependencies: exp.dependencies || [],
|
|
75
|
-
signature: `class ${exp.name}`,
|
|
76
|
-
language: "python"
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
} catch (error) {
|
|
81
|
-
console.warn(`Failed to extract patterns from ${file}:`, error);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return patterns;
|
|
85
|
-
}
|
|
86
|
-
function generatePythonSignature(exp) {
|
|
87
|
-
const params = exp.parameters?.join(", ") || "";
|
|
88
|
-
return `def ${exp.name}(${params})`;
|
|
89
|
-
}
|
|
90
|
-
function calculatePythonSimilarity(pattern1, pattern2) {
|
|
91
|
-
let similarity = 0;
|
|
92
|
-
let factors = 0;
|
|
93
|
-
const nameSimilarity = calculateNameSimilarity(pattern1.name, pattern2.name);
|
|
94
|
-
similarity += nameSimilarity * 0.3;
|
|
95
|
-
factors += 0.3;
|
|
96
|
-
const importSimilarity = calculateImportSimilarity(
|
|
97
|
-
pattern1.imports || [],
|
|
98
|
-
pattern2.imports || []
|
|
99
|
-
);
|
|
100
|
-
similarity += importSimilarity * 0.4;
|
|
101
|
-
factors += 0.4;
|
|
102
|
-
if (pattern1.type === pattern2.type) {
|
|
103
|
-
similarity += 0.1;
|
|
104
|
-
}
|
|
105
|
-
factors += 0.1;
|
|
106
|
-
const sigSimilarity = calculateSignatureSimilarity(
|
|
107
|
-
pattern1.signature,
|
|
108
|
-
pattern2.signature
|
|
109
|
-
);
|
|
110
|
-
similarity += sigSimilarity * 0.2;
|
|
111
|
-
factors += 0.2;
|
|
112
|
-
return factors > 0 ? similarity / factors : 0;
|
|
113
|
-
}
|
|
114
|
-
function calculateNameSimilarity(name1, name2) {
|
|
115
|
-
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
-
""
|
|
119
|
-
);
|
|
120
|
-
const clean2 = name2.replace(
|
|
121
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
-
""
|
|
123
|
-
);
|
|
124
|
-
if (clean1 === clean2) return 0.9;
|
|
125
|
-
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
126
|
-
return 0.7;
|
|
127
|
-
}
|
|
128
|
-
const set1 = new Set(clean1.split("_"));
|
|
129
|
-
const set2 = new Set(clean2.split("_"));
|
|
130
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
131
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
132
|
-
return intersection.size / union.size;
|
|
133
|
-
}
|
|
134
|
-
function calculateImportSimilarity(imports1, imports2) {
|
|
135
|
-
if (imports1.length === 0 && imports2.length === 0) return 1;
|
|
136
|
-
if (imports1.length === 0 || imports2.length === 0) return 0;
|
|
137
|
-
const set1 = new Set(imports1);
|
|
138
|
-
const set2 = new Set(imports2);
|
|
139
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
140
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
141
|
-
return intersection.size / union.size;
|
|
142
|
-
}
|
|
143
|
-
function calculateSignatureSimilarity(sig1, sig2) {
|
|
144
|
-
if (sig1 === sig2) return 1;
|
|
145
|
-
const params1 = (sig1.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
146
|
-
const params2 = (sig2.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
147
|
-
if (params1 === params2) return 0.8;
|
|
148
|
-
if (Math.abs(params1 - params2) === 1) return 0.5;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
function detectPythonAntiPatterns(patterns) {
|
|
152
|
-
const antiPatterns = [];
|
|
153
|
-
const nameGroups = /* @__PURE__ */ new Map();
|
|
154
|
-
for (const pattern of patterns) {
|
|
155
|
-
const baseName = pattern.name.replace(
|
|
156
|
-
/^(get|set|create|delete|update)_/,
|
|
157
|
-
""
|
|
158
|
-
);
|
|
159
|
-
if (!nameGroups.has(baseName)) {
|
|
160
|
-
nameGroups.set(baseName, []);
|
|
161
|
-
}
|
|
162
|
-
nameGroups.get(baseName).push(pattern);
|
|
163
|
-
}
|
|
164
|
-
for (const [baseName, group] of nameGroups) {
|
|
165
|
-
if (group.length >= 3) {
|
|
166
|
-
antiPatterns.push(
|
|
167
|
-
`Found ${group.length} functions with similar names (${baseName}): Consider consolidating`
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return antiPatterns;
|
|
172
|
-
}
|
|
173
|
-
var import_core;
|
|
174
|
-
var init_python_extractor = __esm({
|
|
175
|
-
"src/extractors/python-extractor.ts"() {
|
|
176
|
-
"use strict";
|
|
177
|
-
import_core = require("@aiready/core");
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
|
|
181
26
|
// src/cli.ts
|
|
182
27
|
var import_commander = require("commander");
|
|
183
28
|
|
|
184
29
|
// src/index.ts
|
|
185
|
-
var
|
|
30
|
+
var import_core5 = require("@aiready/core");
|
|
186
31
|
|
|
187
32
|
// src/detector.ts
|
|
188
33
|
var import_core2 = require("@aiready/core");
|
|
189
34
|
|
|
190
35
|
// src/context-rules.ts
|
|
36
|
+
var import_core = require("@aiready/core");
|
|
191
37
|
var CONTEXT_RULES = [
|
|
192
38
|
// Test Fixtures - Intentional duplication for test isolation
|
|
193
39
|
{
|
|
@@ -197,7 +43,7 @@ var CONTEXT_RULES = [
|
|
|
197
43
|
const hasTestFixtures = code.includes("beforeAll") || code.includes("afterAll") || code.includes("beforeEach") || code.includes("afterEach") || code.includes("setUp") || code.includes("tearDown");
|
|
198
44
|
return isTestFile && hasTestFixtures;
|
|
199
45
|
},
|
|
200
|
-
severity:
|
|
46
|
+
severity: import_core.Severity.Info,
|
|
201
47
|
reason: "Test fixture duplication is intentional for test isolation",
|
|
202
48
|
suggestion: "Consider if shared test setup would improve maintainability without coupling tests"
|
|
203
49
|
},
|
|
@@ -209,7 +55,7 @@ var CONTEXT_RULES = [
|
|
|
209
55
|
const hasTemplateContent = (code.includes("return") || code.includes("export")) && (code.includes("html") || code.includes("subject") || code.includes("body"));
|
|
210
56
|
return isTemplate && hasTemplateContent;
|
|
211
57
|
},
|
|
212
|
-
severity:
|
|
58
|
+
severity: import_core.Severity.Minor,
|
|
213
59
|
reason: "Template duplication may be intentional for maintainability and branding consistency",
|
|
214
60
|
suggestion: "Extract shared structure only if templates become hard to maintain"
|
|
215
61
|
},
|
|
@@ -221,7 +67,7 @@ var CONTEXT_RULES = [
|
|
|
221
67
|
const hasPageObjectPatterns = code.includes("page.") || code.includes("await page") || code.includes("locator") || code.includes("getBy") || code.includes("selector") || code.includes("click(") || code.includes("fill(");
|
|
222
68
|
return isE2ETest && hasPageObjectPatterns;
|
|
223
69
|
},
|
|
224
|
-
severity:
|
|
70
|
+
severity: import_core.Severity.Minor,
|
|
225
71
|
reason: "E2E test duplication ensures test independence and reduces coupling",
|
|
226
72
|
suggestion: "Consider page object pattern only if duplication causes maintenance issues"
|
|
227
73
|
},
|
|
@@ -231,7 +77,7 @@ var CONTEXT_RULES = [
|
|
|
231
77
|
detect: (file) => {
|
|
232
78
|
return file.endsWith(".config.ts") || file.endsWith(".config.js") || file.includes("jest.config") || file.includes("vite.config") || file.includes("webpack.config") || file.includes("rollup.config") || file.includes("tsconfig");
|
|
233
79
|
},
|
|
234
|
-
severity:
|
|
80
|
+
severity: import_core.Severity.Minor,
|
|
235
81
|
reason: "Configuration files often have similar structure by design",
|
|
236
82
|
suggestion: "Consider shared config base only if configurations become hard to maintain"
|
|
237
83
|
},
|
|
@@ -243,7 +89,7 @@ var CONTEXT_RULES = [
|
|
|
243
89
|
const hasTypeDefinitions = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
244
90
|
return isTypeFile && hasTypeDefinitions;
|
|
245
91
|
},
|
|
246
|
-
severity:
|
|
92
|
+
severity: import_core.Severity.Info,
|
|
247
93
|
reason: "Type duplication may be intentional for module independence and type safety",
|
|
248
94
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
249
95
|
},
|
|
@@ -253,7 +99,7 @@ var CONTEXT_RULES = [
|
|
|
253
99
|
detect: (file) => {
|
|
254
100
|
return file.includes("/migrations/") || file.includes("/migrate/") || file.includes(".migration.");
|
|
255
101
|
},
|
|
256
|
-
severity:
|
|
102
|
+
severity: import_core.Severity.Info,
|
|
257
103
|
reason: "Migration scripts are typically one-off and intentionally similar",
|
|
258
104
|
suggestion: "Duplication is acceptable for migration scripts"
|
|
259
105
|
},
|
|
@@ -265,7 +111,7 @@ var CONTEXT_RULES = [
|
|
|
265
111
|
const hasMockData = code.includes("mock") || code.includes("Mock") || code.includes("fixture") || code.includes("stub") || code.includes("export const");
|
|
266
112
|
return isMockFile && hasMockData;
|
|
267
113
|
},
|
|
268
|
-
severity:
|
|
114
|
+
severity: import_core.Severity.Info,
|
|
269
115
|
reason: "Mock data duplication is expected for comprehensive test coverage",
|
|
270
116
|
suggestion: "Consider shared factories only for complex mock generation"
|
|
271
117
|
}
|
|
@@ -283,38 +129,43 @@ function calculateSeverity(file1, file2, code, similarity, linesOfCode) {
|
|
|
283
129
|
}
|
|
284
130
|
if (similarity >= 0.95 && linesOfCode >= 30) {
|
|
285
131
|
return {
|
|
286
|
-
severity:
|
|
132
|
+
severity: import_core.Severity.Critical,
|
|
287
133
|
reason: "Large nearly-identical code blocks waste tokens and create maintenance burden",
|
|
288
134
|
suggestion: "Extract to shared utility module immediately"
|
|
289
135
|
};
|
|
290
136
|
} else if (similarity >= 0.95 && linesOfCode >= 15) {
|
|
291
137
|
return {
|
|
292
|
-
severity:
|
|
138
|
+
severity: import_core.Severity.Major,
|
|
293
139
|
reason: "Nearly identical code should be consolidated",
|
|
294
140
|
suggestion: "Move to shared utility file"
|
|
295
141
|
};
|
|
296
142
|
} else if (similarity >= 0.85) {
|
|
297
143
|
return {
|
|
298
|
-
severity:
|
|
144
|
+
severity: import_core.Severity.Major,
|
|
299
145
|
reason: "High similarity indicates significant duplication",
|
|
300
146
|
suggestion: "Extract common logic to shared function"
|
|
301
147
|
};
|
|
302
148
|
} else if (similarity >= 0.7) {
|
|
303
149
|
return {
|
|
304
|
-
severity:
|
|
150
|
+
severity: import_core.Severity.Minor,
|
|
305
151
|
reason: "Moderate similarity detected",
|
|
306
152
|
suggestion: "Consider extracting shared patterns if code evolves together"
|
|
307
153
|
};
|
|
308
154
|
} else {
|
|
309
155
|
return {
|
|
310
|
-
severity:
|
|
156
|
+
severity: import_core.Severity.Minor,
|
|
311
157
|
reason: "Minor similarity detected",
|
|
312
158
|
suggestion: "Monitor but refactoring may not be worthwhile"
|
|
313
159
|
};
|
|
314
160
|
}
|
|
315
161
|
}
|
|
316
162
|
function filterBySeverity(duplicates, minSeverity) {
|
|
317
|
-
const severityOrder = [
|
|
163
|
+
const severityOrder = [
|
|
164
|
+
import_core.Severity.Info,
|
|
165
|
+
import_core.Severity.Minor,
|
|
166
|
+
import_core.Severity.Major,
|
|
167
|
+
import_core.Severity.Critical
|
|
168
|
+
];
|
|
318
169
|
const minIndex = severityOrder.indexOf(minSeverity);
|
|
319
170
|
if (minIndex === -1) return duplicates;
|
|
320
171
|
return duplicates.filter((dup) => {
|
|
@@ -323,261 +174,127 @@ function filterBySeverity(duplicates, minSeverity) {
|
|
|
323
174
|
});
|
|
324
175
|
}
|
|
325
176
|
|
|
326
|
-
// src/
|
|
327
|
-
function
|
|
328
|
-
|
|
329
|
-
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
330
|
-
return "api-handler";
|
|
331
|
-
}
|
|
332
|
-
if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
|
|
333
|
-
return "validator";
|
|
334
|
-
}
|
|
335
|
-
if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
|
|
336
|
-
return "component";
|
|
337
|
-
}
|
|
338
|
-
if (lower.includes("class ") || lower.includes("this.")) {
|
|
339
|
-
return "class-method";
|
|
340
|
-
}
|
|
341
|
-
if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
|
|
342
|
-
return "utility";
|
|
343
|
-
}
|
|
344
|
-
if (lower.includes("function") || lower.includes("=>")) {
|
|
345
|
-
return "function";
|
|
346
|
-
}
|
|
347
|
-
return "unknown";
|
|
177
|
+
// src/detector.ts
|
|
178
|
+
function normalizeCode(code) {
|
|
179
|
+
return code.replace(/\/\/.*/g, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/['"`]/g, '"').replace(/\s+/g, " ").trim().toLowerCase();
|
|
348
180
|
}
|
|
349
|
-
function
|
|
350
|
-
const lines = content.split("\n");
|
|
181
|
+
function extractBlocks(file, content) {
|
|
351
182
|
const blocks = [];
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
let
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
if (
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
}
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
if (char === "}") braceDepth--;
|
|
183
|
+
const lines = content.split("\n");
|
|
184
|
+
const blockRegex = /^\s*(?:export\s+)?(?:async\s+)?(function|class|const|interface|type)\s+([a-zA-Z0-9_]+)|^\s*(app\.(?:get|post|put|delete|patch|use))\(/gm;
|
|
185
|
+
let match;
|
|
186
|
+
while ((match = blockRegex.exec(content)) !== null) {
|
|
187
|
+
const startLine = content.substring(0, match.index).split("\n").length;
|
|
188
|
+
let type;
|
|
189
|
+
let name;
|
|
190
|
+
if (match[1]) {
|
|
191
|
+
type = match[1];
|
|
192
|
+
name = match[2];
|
|
193
|
+
} else {
|
|
194
|
+
type = "handler";
|
|
195
|
+
name = match[3];
|
|
366
196
|
}
|
|
367
|
-
|
|
368
|
-
|
|
197
|
+
let endLine = -1;
|
|
198
|
+
let openBraces = 0;
|
|
199
|
+
let foundStart = false;
|
|
200
|
+
for (let i = match.index; i < content.length; i++) {
|
|
201
|
+
if (content[i] === "{") {
|
|
202
|
+
openBraces++;
|
|
203
|
+
foundStart = true;
|
|
204
|
+
} else if (content[i] === "}") {
|
|
205
|
+
openBraces--;
|
|
206
|
+
}
|
|
207
|
+
if (foundStart && openBraces === 0) {
|
|
208
|
+
endLine = content.substring(0, i + 1).split("\n").length;
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
369
211
|
}
|
|
370
|
-
if (
|
|
371
|
-
const
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
content: blockContent,
|
|
378
|
-
startLine: blockStart + 1,
|
|
379
|
-
endLine: i + 1,
|
|
380
|
-
patternType: categorizePattern(blockContent),
|
|
381
|
-
linesOfCode: loc
|
|
382
|
-
});
|
|
212
|
+
if (endLine === -1) {
|
|
213
|
+
const remaining = content.slice(match.index);
|
|
214
|
+
const nextLineMatch = remaining.indexOf("\n");
|
|
215
|
+
if (nextLineMatch !== -1) {
|
|
216
|
+
endLine = startLine;
|
|
217
|
+
} else {
|
|
218
|
+
endLine = lines.length;
|
|
383
219
|
}
|
|
384
|
-
currentBlock = [];
|
|
385
|
-
inFunction = false;
|
|
386
|
-
} else if (inFunction && braceDepth === 0) {
|
|
387
|
-
currentBlock = [];
|
|
388
|
-
inFunction = false;
|
|
389
220
|
}
|
|
221
|
+
endLine = Math.max(startLine, endLine);
|
|
222
|
+
const blockCode = lines.slice(startLine - 1, endLine).join("\n");
|
|
223
|
+
const tokens = (0, import_core2.estimateTokens)(blockCode);
|
|
224
|
+
blocks.push({
|
|
225
|
+
file,
|
|
226
|
+
startLine,
|
|
227
|
+
endLine,
|
|
228
|
+
code: blockCode,
|
|
229
|
+
tokens,
|
|
230
|
+
patternType: inferPatternType(type, name)
|
|
231
|
+
});
|
|
390
232
|
}
|
|
391
233
|
return blocks;
|
|
392
234
|
}
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
398
|
-
}
|
|
399
|
-
var stopwords = /* @__PURE__ */ new Set([
|
|
400
|
-
"return",
|
|
401
|
-
"const",
|
|
402
|
-
"let",
|
|
403
|
-
"var",
|
|
404
|
-
"function",
|
|
405
|
-
"class",
|
|
406
|
-
"new",
|
|
407
|
-
"if",
|
|
408
|
-
"else",
|
|
409
|
-
"for",
|
|
410
|
-
"while",
|
|
411
|
-
"async",
|
|
412
|
-
"await",
|
|
413
|
-
"try",
|
|
414
|
-
"catch",
|
|
415
|
-
"switch",
|
|
416
|
-
"case",
|
|
417
|
-
"default",
|
|
418
|
-
"import",
|
|
419
|
-
"export",
|
|
420
|
-
"from",
|
|
421
|
-
"true",
|
|
422
|
-
"false",
|
|
423
|
-
"null",
|
|
424
|
-
"undefined",
|
|
425
|
-
"this"
|
|
426
|
-
]);
|
|
427
|
-
function tokenize(norm) {
|
|
428
|
-
const punctuation = "(){}[];.,";
|
|
429
|
-
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
430
|
-
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
431
|
-
}
|
|
432
|
-
|
|
433
|
-
// src/core/similarity.ts
|
|
434
|
-
function jaccardSimilarity(tokens1, tokens2) {
|
|
435
|
-
const set1 = new Set(tokens1);
|
|
436
|
-
const set2 = new Set(tokens2);
|
|
437
|
-
if (set1.size === 0 && set2.size === 0) return 0;
|
|
438
|
-
let intersection = 0;
|
|
439
|
-
for (const token of set1) {
|
|
440
|
-
if (set2.has(token)) intersection++;
|
|
235
|
+
function inferPatternType(keyword, name) {
|
|
236
|
+
const n = name.toLowerCase();
|
|
237
|
+
if (keyword === "handler" || n.includes("handler") || n.includes("controller") || n.startsWith("app.")) {
|
|
238
|
+
return "api-handler";
|
|
441
239
|
}
|
|
442
|
-
|
|
443
|
-
|
|
240
|
+
if (n.includes("validate") || n.includes("schema")) return "validator";
|
|
241
|
+
if (n.includes("util") || n.includes("helper")) return "utility";
|
|
242
|
+
if (keyword === "class") return "class-method";
|
|
243
|
+
if (n.match(/^[A-Z]/)) return "component";
|
|
244
|
+
if (keyword === "function") return "function";
|
|
245
|
+
return "unknown";
|
|
444
246
|
}
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
arr.push(i);
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
findCandidates(blockIdx, minSharedTokens, maxCandidates) {
|
|
467
|
-
const block1 = this.allBlocks[blockIdx];
|
|
468
|
-
const block1Tokens = this.blockTokens[blockIdx];
|
|
469
|
-
const counts = /* @__PURE__ */ new Map();
|
|
470
|
-
const rareTokens = block1Tokens.filter((tok) => {
|
|
471
|
-
const freq = this.invertedIndex.get(tok)?.length || 0;
|
|
472
|
-
return freq < this.allBlocks.length * 0.1;
|
|
473
|
-
});
|
|
474
|
-
for (const tok of rareTokens) {
|
|
475
|
-
const ids = this.invertedIndex.get(tok);
|
|
476
|
-
if (!ids) continue;
|
|
477
|
-
for (const j of ids) {
|
|
478
|
-
if (j <= blockIdx) continue;
|
|
479
|
-
if (this.allBlocks[j].file === block1.file) continue;
|
|
480
|
-
counts.set(j, (counts.get(j) || 0) + 1);
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
return Array.from(counts.entries()).filter(([j, shared]) => {
|
|
484
|
-
const block2Size = this.blockTokens[j].length;
|
|
485
|
-
const minSize = Math.min(block1Tokens.length, block2Size);
|
|
486
|
-
return shared >= minSharedTokens && shared / minSize >= 0.3;
|
|
487
|
-
}).sort((a, b) => b[1] - a[1]).slice(0, maxCandidates).map(([j, shared]) => ({ j, shared }));
|
|
247
|
+
function calculateSimilarity(a, b) {
|
|
248
|
+
if (a === b) return 1;
|
|
249
|
+
const tokensA = a.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
250
|
+
const tokensB = b.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
251
|
+
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
252
|
+
const setA = new Set(tokensA);
|
|
253
|
+
const setB = new Set(tokensB);
|
|
254
|
+
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
255
|
+
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
256
|
+
return intersection.size / union.size;
|
|
257
|
+
}
|
|
258
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
259
|
+
const { minSimilarity, minLines, streamResults } = options;
|
|
260
|
+
const allBlocks = [];
|
|
261
|
+
for (const { file, content } of fileContents) {
|
|
262
|
+
const blocks = extractBlocks(file, content);
|
|
263
|
+
allBlocks.push(...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines));
|
|
488
264
|
}
|
|
489
|
-
};
|
|
490
|
-
|
|
491
|
-
// src/detector.ts
|
|
492
|
-
async function detectDuplicatePatterns(files, options) {
|
|
493
|
-
const {
|
|
494
|
-
minSimilarity,
|
|
495
|
-
minLines,
|
|
496
|
-
batchSize = 100,
|
|
497
|
-
approx = true,
|
|
498
|
-
minSharedTokens = 8,
|
|
499
|
-
maxCandidatesPerBlock = 100,
|
|
500
|
-
streamResults = false
|
|
501
|
-
} = options;
|
|
502
265
|
const duplicates = [];
|
|
503
|
-
const maxComparisons = approx ? Infinity : 5e5;
|
|
504
|
-
const allBlocks = files.flatMap(
|
|
505
|
-
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
506
|
-
(block) => block && block.content && block.content.trim().length > 0
|
|
507
|
-
).map((block) => ({
|
|
508
|
-
...block,
|
|
509
|
-
file: file.file,
|
|
510
|
-
normalized: normalizeCode(block.content),
|
|
511
|
-
tokenCost: block.content ? (0, import_core2.estimateTokens)(block.content) : 0
|
|
512
|
-
}))
|
|
513
|
-
);
|
|
514
|
-
const pythonFiles = files.filter((f) => f.file.endsWith(".py"));
|
|
515
|
-
if (pythonFiles.length > 0) {
|
|
516
|
-
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
517
|
-
const pythonPatterns = await extractPythonPatterns2(
|
|
518
|
-
pythonFiles.map((f) => f.file)
|
|
519
|
-
);
|
|
520
|
-
allBlocks.push(
|
|
521
|
-
...pythonPatterns.map((p) => ({
|
|
522
|
-
content: p.code,
|
|
523
|
-
startLine: p.startLine,
|
|
524
|
-
endLine: p.endLine,
|
|
525
|
-
file: p.file,
|
|
526
|
-
normalized: normalizeCode(p.code),
|
|
527
|
-
patternType: p.type,
|
|
528
|
-
tokenCost: p.code ? (0, import_core2.estimateTokens)(p.code) : 0,
|
|
529
|
-
linesOfCode: p.endLine - p.startLine + 1
|
|
530
|
-
}))
|
|
531
|
-
);
|
|
532
|
-
}
|
|
533
|
-
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
534
|
-
const engine = approx ? new ApproxEngine(allBlocks, blockTokens) : null;
|
|
535
|
-
let comparisonsProcessed = 0;
|
|
536
|
-
const startTime = Date.now();
|
|
537
266
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
` Processed ${i}/${allBlocks.length} blocks (${elapsed.toFixed(1)}s, ${duplicates.length} duplicates)`
|
|
546
|
-
);
|
|
547
|
-
}
|
|
548
|
-
await new Promise((r) => setImmediate((resolve) => r(resolve)));
|
|
549
|
-
}
|
|
550
|
-
const block1 = allBlocks[i];
|
|
551
|
-
const candidates = engine ? engine.findCandidates(i, minSharedTokens, maxCandidatesPerBlock) : allBlocks.slice(i + 1).map((_, idx) => ({ j: i + 1 + idx, shared: 0 }));
|
|
552
|
-
for (const { j } of candidates) {
|
|
553
|
-
if (!approx && comparisonsProcessed >= maxComparisons) break;
|
|
554
|
-
comparisonsProcessed++;
|
|
555
|
-
const block2 = allBlocks[j];
|
|
556
|
-
if (block1.file === block2.file) continue;
|
|
557
|
-
const sim = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
267
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
268
|
+
const b1 = allBlocks[i];
|
|
269
|
+
const b2 = allBlocks[j];
|
|
270
|
+
if (b1.file === b2.file) continue;
|
|
271
|
+
const norm1 = normalizeCode(b1.code);
|
|
272
|
+
const norm2 = normalizeCode(b2.code);
|
|
273
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
558
274
|
if (sim >= minSimilarity) {
|
|
559
|
-
const severity = calculateSeverity(
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
275
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
276
|
+
b1.file,
|
|
277
|
+
b2.file,
|
|
278
|
+
b1.code,
|
|
563
279
|
sim,
|
|
564
|
-
|
|
280
|
+
b1.endLine - b1.startLine + 1
|
|
565
281
|
);
|
|
566
282
|
const dup = {
|
|
567
|
-
file1:
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
endLine2:
|
|
283
|
+
file1: b1.file,
|
|
284
|
+
line1: b1.startLine,
|
|
285
|
+
endLine1: b1.endLine,
|
|
286
|
+
file2: b2.file,
|
|
287
|
+
line2: b2.startLine,
|
|
288
|
+
endLine2: b2.endLine,
|
|
289
|
+
code1: b1.code,
|
|
290
|
+
code2: b2.code,
|
|
573
291
|
similarity: sim,
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
suggestion: severity.suggestion
|
|
292
|
+
patternType: b1.patternType,
|
|
293
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
294
|
+
severity,
|
|
295
|
+
reason,
|
|
296
|
+
suggestion,
|
|
297
|
+
matchedRule
|
|
581
298
|
};
|
|
582
299
|
duplicates.push(dup);
|
|
583
300
|
if (streamResults)
|
|
@@ -587,281 +304,134 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
587
304
|
}
|
|
588
305
|
}
|
|
589
306
|
}
|
|
590
|
-
return duplicates;
|
|
307
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
591
308
|
}
|
|
592
309
|
|
|
593
310
|
// src/grouping.ts
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
311
|
+
var import_core3 = require("@aiready/core");
|
|
312
|
+
var import_path = __toESM(require("path"));
|
|
313
|
+
function getSeverityLevel(s) {
|
|
314
|
+
if (s === import_core3.Severity.Critical || s === "critical") return 4;
|
|
315
|
+
if (s === import_core3.Severity.Major || s === "major") return 3;
|
|
316
|
+
if (s === import_core3.Severity.Minor || s === "minor") return 2;
|
|
317
|
+
if (s === import_core3.Severity.Info || s === "info") return 1;
|
|
318
|
+
return 0;
|
|
599
319
|
}
|
|
600
320
|
function groupDuplicatesByFilePair(duplicates) {
|
|
601
321
|
const groups = /* @__PURE__ */ new Map();
|
|
602
322
|
for (const dup of duplicates) {
|
|
603
|
-
const
|
|
323
|
+
const files = [dup.file1, dup.file2].sort();
|
|
324
|
+
const key = files.join("::");
|
|
604
325
|
if (!groups.has(key)) {
|
|
605
|
-
groups.set(key,
|
|
326
|
+
groups.set(key, {
|
|
327
|
+
filePair: key,
|
|
328
|
+
severity: dup.severity,
|
|
329
|
+
occurrences: 0,
|
|
330
|
+
totalTokenCost: 0,
|
|
331
|
+
averageSimilarity: 0,
|
|
332
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
333
|
+
lineRanges: []
|
|
334
|
+
});
|
|
606
335
|
}
|
|
607
|
-
groups.get(key)
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
);
|
|
616
|
-
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
617
|
-
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
618
|
-
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
619
|
-
const patternType = getMostCommonPatternType(deduplicated);
|
|
620
|
-
const lineRanges = deduplicated.map((d) => ({
|
|
621
|
-
file1: { start: d.line1, end: d.endLine1 },
|
|
622
|
-
file2: { start: d.line2, end: d.endLine2 }
|
|
623
|
-
}));
|
|
624
|
-
result.push({
|
|
625
|
-
filePair,
|
|
626
|
-
duplicates: deduplicated,
|
|
627
|
-
totalTokenCost,
|
|
628
|
-
averageSimilarity,
|
|
629
|
-
maxSimilarity,
|
|
630
|
-
severity,
|
|
631
|
-
patternType,
|
|
632
|
-
occurrences: deduplicated.length,
|
|
633
|
-
lineRanges
|
|
336
|
+
const group = groups.get(key);
|
|
337
|
+
group.occurrences++;
|
|
338
|
+
group.totalTokenCost += dup.tokenCost;
|
|
339
|
+
group.averageSimilarity += dup.similarity;
|
|
340
|
+
group.patternTypes.add(dup.patternType);
|
|
341
|
+
group.lineRanges.push({
|
|
342
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
343
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
634
344
|
});
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
function deduplicateOverlappingRanges(duplicates) {
|
|
639
|
-
if (duplicates.length === 0) return [];
|
|
640
|
-
const sorted = [...duplicates].sort((a, b) => {
|
|
641
|
-
if (a.line1 !== b.line1) return a.line1 - b.line1;
|
|
642
|
-
return b.similarity - a.similarity;
|
|
643
|
-
});
|
|
644
|
-
const result = [];
|
|
645
|
-
let current = null;
|
|
646
|
-
for (const dup of sorted) {
|
|
647
|
-
if (!current) {
|
|
648
|
-
current = dup;
|
|
649
|
-
result.push(dup);
|
|
650
|
-
continue;
|
|
651
|
-
}
|
|
652
|
-
const overlapsFile1 = rangesOverlap(
|
|
653
|
-
current.line1,
|
|
654
|
-
current.endLine1,
|
|
655
|
-
dup.line1,
|
|
656
|
-
dup.endLine1
|
|
657
|
-
);
|
|
658
|
-
const overlapsFile2 = rangesOverlap(
|
|
659
|
-
current.line2,
|
|
660
|
-
current.endLine2,
|
|
661
|
-
dup.line2,
|
|
662
|
-
dup.endLine2
|
|
663
|
-
);
|
|
664
|
-
if (overlapsFile1 && overlapsFile2) {
|
|
665
|
-
current = {
|
|
666
|
-
...current,
|
|
667
|
-
endLine1: Math.max(current.endLine1, dup.endLine1),
|
|
668
|
-
endLine2: Math.max(current.endLine2, dup.endLine2),
|
|
669
|
-
tokenCost: Math.max(current.tokenCost, dup.tokenCost)
|
|
670
|
-
};
|
|
671
|
-
result[result.length - 1] = current;
|
|
672
|
-
} else {
|
|
673
|
-
current = dup;
|
|
674
|
-
result.push(dup);
|
|
345
|
+
const currentSev = dup.severity;
|
|
346
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
347
|
+
group.severity = currentSev;
|
|
675
348
|
}
|
|
676
349
|
}
|
|
677
|
-
return
|
|
350
|
+
return Array.from(groups.values()).map((g) => ({
|
|
351
|
+
...g,
|
|
352
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
353
|
+
}));
|
|
678
354
|
}
|
|
679
355
|
function createRefactorClusters(duplicates) {
|
|
680
|
-
const
|
|
356
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
357
|
+
const visited = /* @__PURE__ */ new Set();
|
|
358
|
+
const components = [];
|
|
681
359
|
for (const dup of duplicates) {
|
|
682
|
-
|
|
683
|
-
if (!
|
|
684
|
-
|
|
360
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
361
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
362
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
363
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
364
|
+
}
|
|
365
|
+
for (const file of adjacency.keys()) {
|
|
366
|
+
if (visited.has(file)) continue;
|
|
367
|
+
const component = [];
|
|
368
|
+
const queue = [file];
|
|
369
|
+
visited.add(file);
|
|
370
|
+
while (queue.length > 0) {
|
|
371
|
+
const curr = queue.shift();
|
|
372
|
+
component.push(curr);
|
|
373
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
374
|
+
if (!visited.has(neighbor)) {
|
|
375
|
+
visited.add(neighbor);
|
|
376
|
+
queue.push(neighbor);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
685
379
|
}
|
|
686
|
-
|
|
380
|
+
components.push(component);
|
|
687
381
|
}
|
|
688
|
-
const
|
|
689
|
-
for (const
|
|
690
|
-
if (
|
|
691
|
-
const
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
const
|
|
695
|
-
const
|
|
696
|
-
const
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
382
|
+
const clusters = [];
|
|
383
|
+
for (const component of components) {
|
|
384
|
+
if (component.length < 2) continue;
|
|
385
|
+
const componentDups = duplicates.filter(
|
|
386
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
387
|
+
);
|
|
388
|
+
const totalTokenCost = componentDups.reduce((sum, d) => sum + d.tokenCost, 0);
|
|
389
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
390
|
+
const name = determineClusterName(component);
|
|
391
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
392
|
+
component[0],
|
|
393
|
+
component[1],
|
|
394
|
+
"",
|
|
395
|
+
// Code not available here
|
|
396
|
+
avgSimilarity,
|
|
397
|
+
30
|
|
398
|
+
// Assume substantial if clustered
|
|
399
|
+
);
|
|
400
|
+
clusters.push({
|
|
401
|
+
id: `cluster-${clusters.length}`,
|
|
402
|
+
name,
|
|
403
|
+
files: component,
|
|
702
404
|
severity,
|
|
405
|
+
duplicateCount: componentDups.length,
|
|
703
406
|
totalTokenCost,
|
|
704
|
-
averageSimilarity,
|
|
705
|
-
|
|
706
|
-
suggestion
|
|
707
|
-
reason: clusterInfo.reason
|
|
407
|
+
averageSimilarity: avgSimilarity,
|
|
408
|
+
reason,
|
|
409
|
+
suggestion
|
|
708
410
|
});
|
|
709
411
|
}
|
|
710
|
-
return
|
|
711
|
-
}
|
|
712
|
-
function identifyCluster(dup) {
|
|
713
|
-
const file1 = dup.file1.toLowerCase();
|
|
714
|
-
const file2 = dup.file2.toLowerCase();
|
|
715
|
-
if ((file1.includes("/blog/") || file1.startsWith("blog/") || file1.includes("/articles/") || file1.startsWith("articles/")) && (file2.includes("/blog/") || file2.startsWith("blog/") || file2.includes("/articles/") || file2.startsWith("articles/"))) {
|
|
716
|
-
return "blog-seo-boilerplate";
|
|
717
|
-
}
|
|
718
|
-
if ((file1.includes("/components/") || file1.startsWith("components/")) && (file2.includes("/components/") || file2.startsWith("components/")) && dup.patternType === "component") {
|
|
719
|
-
const component1 = extractComponentName(dup.file1);
|
|
720
|
-
const component2 = extractComponentName(dup.file2);
|
|
721
|
-
console.log(
|
|
722
|
-
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
723
|
-
);
|
|
724
|
-
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
725
|
-
const category = getComponentCategory(component1);
|
|
726
|
-
console.log(`Creating cluster: component-${category}`);
|
|
727
|
-
return `component-${category}`;
|
|
728
|
-
}
|
|
729
|
-
}
|
|
730
|
-
if ((file1.includes("/e2e/") || file1.startsWith("e2e/") || file1.includes(".e2e.")) && (file2.includes("/e2e/") || file2.startsWith("e2e/") || file2.includes(".e2e."))) {
|
|
731
|
-
return "e2e-test-patterns";
|
|
732
|
-
}
|
|
733
|
-
if (dup.patternType === "api-handler") {
|
|
734
|
-
return "api-handlers";
|
|
735
|
-
}
|
|
736
|
-
if (dup.patternType === "validator") {
|
|
737
|
-
return "validators";
|
|
738
|
-
}
|
|
739
|
-
if ((file1.includes("/scripts/") || file1.startsWith("scripts/") || file1.includes("/infra/") || file1.startsWith("infra/")) && (file2.includes("/scripts/") || file2.startsWith("scripts/") || file2.includes("/infra/") || file2.startsWith("infra/"))) {
|
|
740
|
-
return "infrastructure-scripts";
|
|
741
|
-
}
|
|
742
|
-
return `${dup.patternType}-patterns`;
|
|
743
|
-
}
|
|
744
|
-
function extractComponentName(filePath) {
|
|
745
|
-
const match = filePath.match(/[/\\]?([A-Z][a-zA-Z0-9]*)\.(tsx|jsx|ts|js)$/);
|
|
746
|
-
return match ? match[1] : null;
|
|
747
|
-
}
|
|
748
|
-
function areSimilarComponents(name1, name2) {
|
|
749
|
-
const category1 = getComponentCategory(name1);
|
|
750
|
-
const category2 = getComponentCategory(name2);
|
|
751
|
-
return category1 === category2;
|
|
752
|
-
}
|
|
753
|
-
function getComponentCategory(name) {
|
|
754
|
-
name = name.toLowerCase();
|
|
755
|
-
if (name.includes("button") || name.includes("btn")) return "button";
|
|
756
|
-
if (name.includes("card")) return "card";
|
|
757
|
-
if (name.includes("modal") || name.includes("dialog")) return "modal";
|
|
758
|
-
if (name.includes("form")) return "form";
|
|
759
|
-
if (name.includes("input") || name.includes("field")) return "input";
|
|
760
|
-
if (name.includes("table") || name.includes("grid")) return "table";
|
|
761
|
-
if (name.includes("nav") || name.includes("menu")) return "navigation";
|
|
762
|
-
if (name.includes("header") || name.includes("footer")) return "layout";
|
|
763
|
-
return "misc";
|
|
764
|
-
}
|
|
765
|
-
function getUniqueFiles(duplicates) {
|
|
766
|
-
const files = /* @__PURE__ */ new Set();
|
|
767
|
-
for (const dup of duplicates) {
|
|
768
|
-
files.add(dup.file1);
|
|
769
|
-
files.add(dup.file2);
|
|
770
|
-
}
|
|
771
|
-
return Array.from(files).sort();
|
|
772
|
-
}
|
|
773
|
-
function getHighestSeverity(severities) {
|
|
774
|
-
const order = {
|
|
775
|
-
critical: 4,
|
|
776
|
-
major: 3,
|
|
777
|
-
minor: 2,
|
|
778
|
-
info: 1
|
|
779
|
-
};
|
|
780
|
-
let highest = "info";
|
|
781
|
-
let highestValue = 0;
|
|
782
|
-
for (const severity of severities) {
|
|
783
|
-
if (order[severity] > highestValue) {
|
|
784
|
-
highestValue = order[severity];
|
|
785
|
-
highest = severity;
|
|
786
|
-
}
|
|
787
|
-
}
|
|
788
|
-
return highest;
|
|
789
|
-
}
|
|
790
|
-
function getMostCommonPatternType(duplicates) {
|
|
791
|
-
const counts = /* @__PURE__ */ new Map();
|
|
792
|
-
for (const dup of duplicates) {
|
|
793
|
-
counts.set(dup.patternType, (counts.get(dup.patternType) || 0) + 1);
|
|
794
|
-
}
|
|
795
|
-
let mostCommon = "unknown";
|
|
796
|
-
let maxCount = 0;
|
|
797
|
-
for (const [type, count] of counts.entries()) {
|
|
798
|
-
if (count > maxCount) {
|
|
799
|
-
maxCount = count;
|
|
800
|
-
mostCommon = type;
|
|
801
|
-
}
|
|
802
|
-
}
|
|
803
|
-
return mostCommon;
|
|
412
|
+
return clusters;
|
|
804
413
|
}
|
|
805
|
-
function
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
reason: "Test helper extraction improves maintainability while preserving test independence"
|
|
816
|
-
},
|
|
817
|
-
"api-handlers": {
|
|
818
|
-
name: `API Handler Patterns (${fileCount} files)`,
|
|
819
|
-
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
820
|
-
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
821
|
-
},
|
|
822
|
-
validators: {
|
|
823
|
-
name: `Validator Patterns (${fileCount} files)`,
|
|
824
|
-
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
825
|
-
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
826
|
-
},
|
|
827
|
-
"infrastructure-scripts": {
|
|
828
|
-
name: `Infrastructure Scripts (${fileCount} files)`,
|
|
829
|
-
suggestion: "Extract common CLI parsing, file I/O, and error handling utilities",
|
|
830
|
-
reason: "Script duplication is often acceptable for one-off tasks, but common patterns can be shared"
|
|
831
|
-
},
|
|
832
|
-
"component-button": {
|
|
833
|
-
name: `Button Component Variants (${fileCount} files)`,
|
|
834
|
-
suggestion: "Create unified Button component with variant props",
|
|
835
|
-
reason: "Multiple button variants should share base styles and behavior"
|
|
836
|
-
},
|
|
837
|
-
"component-card": {
|
|
838
|
-
name: `Card Component Variants (${fileCount} files)`,
|
|
839
|
-
suggestion: "Create unified Card component with composition pattern",
|
|
840
|
-
reason: "Card variants should share layout structure and styling"
|
|
841
|
-
},
|
|
842
|
-
"component-modal": {
|
|
843
|
-
name: `Modal Component Variants (${fileCount} files)`,
|
|
844
|
-
suggestion: "Create base Modal component with customizable content",
|
|
845
|
-
reason: "Modal variants should share overlay, animation, and accessibility logic"
|
|
846
|
-
}
|
|
847
|
-
};
|
|
848
|
-
if (templates[clusterId]) {
|
|
849
|
-
return templates[clusterId];
|
|
414
|
+
function determineClusterName(files) {
|
|
415
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
416
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
417
|
+
if (files.some((f) => f.includes("buttons"))) return "Button Component Variants";
|
|
418
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
419
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
420
|
+
const first = files[0];
|
|
421
|
+
const dirName = import_path.default.dirname(first).split(import_path.default.sep).pop();
|
|
422
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
423
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
850
424
|
}
|
|
851
|
-
return
|
|
852
|
-
name: `${patternType} Cluster (${fileCount} files)`,
|
|
853
|
-
suggestion: `Extract common ${patternType} patterns into shared utilities`,
|
|
854
|
-
reason: `Multiple similar ${patternType} patterns detected across ${fileCount} files`
|
|
855
|
-
};
|
|
425
|
+
return "Shared Pattern Group";
|
|
856
426
|
}
|
|
857
|
-
function filterClustersByImpact(clusters, minTokenCost = 1e3,
|
|
427
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
858
428
|
return clusters.filter(
|
|
859
|
-
(
|
|
429
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
860
430
|
);
|
|
861
431
|
}
|
|
862
432
|
|
|
863
433
|
// src/scoring.ts
|
|
864
|
-
var
|
|
434
|
+
var import_core4 = require("@aiready/core");
|
|
865
435
|
|
|
866
436
|
// src/index.ts
|
|
867
437
|
function getRefactoringSuggestion(patternType, similarity) {
|
|
@@ -979,7 +549,7 @@ async function analyzePatterns(options) {
|
|
|
979
549
|
const batchContents = await Promise.all(
|
|
980
550
|
batch.map(async (file) => ({
|
|
981
551
|
file,
|
|
982
|
-
content: await (0,
|
|
552
|
+
content: await (0, import_core5.readFileContent)(file)
|
|
983
553
|
}))
|
|
984
554
|
);
|
|
985
555
|
fileContents.push(...batchContents);
|
|
@@ -1000,9 +570,9 @@ async function analyzePatterns(options) {
|
|
|
1000
570
|
);
|
|
1001
571
|
const issues = fileDuplicates.map((dup) => {
|
|
1002
572
|
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
1003
|
-
const severity2 = dup.similarity > 0.95 ?
|
|
573
|
+
const severity2 = dup.similarity > 0.95 ? import_core5.Severity.Critical : dup.similarity > 0.9 ? import_core5.Severity.Major : import_core5.Severity.Minor;
|
|
1004
574
|
return {
|
|
1005
|
-
type:
|
|
575
|
+
type: import_core5.IssueType.DuplicatePattern,
|
|
1006
576
|
severity: severity2,
|
|
1007
577
|
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
1008
578
|
location: {
|
|
@@ -1015,11 +585,11 @@ async function analyzePatterns(options) {
|
|
|
1015
585
|
let filteredIssues = issues;
|
|
1016
586
|
if (severity !== "all") {
|
|
1017
587
|
const severityMap = {
|
|
1018
|
-
critical: [
|
|
1019
|
-
high: [
|
|
1020
|
-
medium: [
|
|
588
|
+
critical: [import_core5.Severity.Critical],
|
|
589
|
+
high: [import_core5.Severity.Critical, import_core5.Severity.Major],
|
|
590
|
+
medium: [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor]
|
|
1021
591
|
};
|
|
1022
|
-
const allowedSeverities = severityMap[severity] || [
|
|
592
|
+
const allowedSeverities = severityMap[severity] || [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor];
|
|
1023
593
|
filteredIssues = issues.filter(
|
|
1024
594
|
(issue) => allowedSeverities.includes(issue.severity)
|
|
1025
595
|
);
|
|
@@ -1111,8 +681,8 @@ function generateSummary(results) {
|
|
|
1111
681
|
// src/cli.ts
|
|
1112
682
|
var import_chalk = __toESM(require("chalk"));
|
|
1113
683
|
var import_fs = require("fs");
|
|
1114
|
-
var
|
|
1115
|
-
var
|
|
684
|
+
var import_path2 = require("path");
|
|
685
|
+
var import_core6 = require("@aiready/core");
|
|
1116
686
|
var program = new import_commander.Command();
|
|
1117
687
|
program.name("aiready-patterns").description("Detect duplicate patterns in your codebase").version("0.1.0").addHelpText(
|
|
1118
688
|
"after",
|
|
@@ -1166,7 +736,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1166
736
|
).option("--output-file <path>", "Output file path (for json/html)").action(async (directory, options) => {
|
|
1167
737
|
console.log(import_chalk.default.blue("\u{1F50D} Analyzing patterns...\n"));
|
|
1168
738
|
const startTime = Date.now();
|
|
1169
|
-
const config = await (0,
|
|
739
|
+
const config = await (0, import_core6.loadConfig)(directory);
|
|
1170
740
|
const defaults = {
|
|
1171
741
|
minSimilarity: 0.4,
|
|
1172
742
|
minLines: 5,
|
|
@@ -1177,7 +747,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1177
747
|
streamResults: true,
|
|
1178
748
|
include: void 0,
|
|
1179
749
|
exclude: void 0,
|
|
1180
|
-
minSeverity:
|
|
750
|
+
minSeverity: import_core6.Severity.Minor,
|
|
1181
751
|
excludeTestFixtures: false,
|
|
1182
752
|
excludeTemplates: false,
|
|
1183
753
|
includeTests: false,
|
|
@@ -1188,7 +758,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1188
758
|
minClusterFiles: 3,
|
|
1189
759
|
showRawDuplicates: false
|
|
1190
760
|
};
|
|
1191
|
-
const mergedConfig = (0,
|
|
761
|
+
const mergedConfig = (0, import_core6.mergeConfigWithDefaults)(config, defaults);
|
|
1192
762
|
const finalOptions = {
|
|
1193
763
|
rootDir: directory,
|
|
1194
764
|
minSimilarity: options.similarity ? parseFloat(options.similarity) : mergedConfig.minSimilarity,
|
|
@@ -1206,10 +776,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1206
776
|
excludeTemplates: options.excludeTemplates || mergedConfig.excludeTemplates,
|
|
1207
777
|
includeTests: options.includeTests || mergedConfig.includeTests,
|
|
1208
778
|
maxResults: options.maxResults ? parseInt(options.maxResults) : mergedConfig.maxResults,
|
|
1209
|
-
groupByFilePair: options.
|
|
1210
|
-
createClusters: options.
|
|
1211
|
-
minClusterTokenCost: options.
|
|
1212
|
-
minClusterFiles: options.
|
|
779
|
+
groupByFilePair: options.groupBy_file_pair !== false && mergedConfig.groupByFilePair,
|
|
780
|
+
createClusters: options.create_clusters !== false && mergedConfig.createClusters,
|
|
781
|
+
minClusterTokenCost: options.min_cluster_tokens ? parseInt(options.min_cluster_tokens) : mergedConfig.minClusterTokenCost,
|
|
782
|
+
minClusterFiles: options.min_cluster_files ? parseInt(options.min_cluster_files) : mergedConfig.minClusterFiles,
|
|
1213
783
|
showRawDuplicates: options.showRawDuplicates || mergedConfig.showRawDuplicates
|
|
1214
784
|
};
|
|
1215
785
|
if (finalOptions.includeTests && finalOptions.exclude) {
|
|
@@ -1260,12 +830,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1260
830
|
clusters: clusters || [],
|
|
1261
831
|
timestamp: (/* @__PURE__ */ new Date()).toISOString()
|
|
1262
832
|
};
|
|
1263
|
-
const outputPath = (0,
|
|
833
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1264
834
|
options.outputFile,
|
|
1265
835
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.json`,
|
|
1266
836
|
directory
|
|
1267
837
|
);
|
|
1268
|
-
const dir = (0,
|
|
838
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1269
839
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1270
840
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1271
841
|
}
|
|
@@ -1276,12 +846,12 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1276
846
|
}
|
|
1277
847
|
if (options.output === "html") {
|
|
1278
848
|
const html = generateHTMLReport(summary, results);
|
|
1279
|
-
const outputPath = (0,
|
|
849
|
+
const outputPath = (0, import_core6.resolveOutputPath)(
|
|
1280
850
|
options.outputFile,
|
|
1281
851
|
`pattern-report-${(/* @__PURE__ */ new Date()).toISOString().split("T")[0]}.html`,
|
|
1282
852
|
directory
|
|
1283
853
|
);
|
|
1284
|
-
const dir = (0,
|
|
854
|
+
const dir = (0, import_path2.dirname)(outputPath);
|
|
1285
855
|
if (!(0, import_fs.existsSync)(dir)) {
|
|
1286
856
|
(0, import_fs.mkdirSync)(dir, { recursive: true });
|
|
1287
857
|
}
|
|
@@ -1330,14 +900,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1330
900
|
import_chalk.default.bold.white(` \u{1F4E6} DUPLICATE GROUPS (${groups.length} file pairs)`)
|
|
1331
901
|
);
|
|
1332
902
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1333
|
-
const severityOrder = {
|
|
1334
|
-
critical: 4,
|
|
1335
|
-
major: 3,
|
|
1336
|
-
minor: 2,
|
|
1337
|
-
info: 1
|
|
1338
|
-
};
|
|
1339
903
|
const topGroups = groups.sort((a, b) => {
|
|
1340
|
-
const
|
|
904
|
+
const bVal = getSeverityValue(b.severity);
|
|
905
|
+
const aVal = getSeverityValue(a.severity);
|
|
906
|
+
const severityDiff = bVal - aVal;
|
|
1341
907
|
if (severityDiff !== 0) return severityDiff;
|
|
1342
908
|
return b.totalTokenCost - a.totalTokenCost;
|
|
1343
909
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1411,14 +977,10 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1411
977
|
console.log(import_chalk.default.cyan("\n" + divider));
|
|
1412
978
|
console.log(import_chalk.default.bold.white(" TOP DUPLICATE PATTERNS"));
|
|
1413
979
|
console.log(import_chalk.default.cyan(divider) + "\n");
|
|
1414
|
-
const severityOrder = {
|
|
1415
|
-
critical: 4,
|
|
1416
|
-
major: 3,
|
|
1417
|
-
minor: 2,
|
|
1418
|
-
info: 1
|
|
1419
|
-
};
|
|
1420
980
|
const topDuplicates = filteredDuplicates.sort((a, b) => {
|
|
1421
|
-
const
|
|
981
|
+
const bVal = getSeverityValue(b.severity);
|
|
982
|
+
const aVal = getSeverityValue(a.severity);
|
|
983
|
+
const severityDiff = bVal - aVal;
|
|
1422
984
|
if (severityDiff !== 0) return severityDiff;
|
|
1423
985
|
return b.similarity - a.similarity;
|
|
1424
986
|
}).slice(0, finalOptions.maxResults);
|
|
@@ -1458,7 +1020,7 @@ program.name("aiready-patterns").description("Detect duplicate patterns in your
|
|
|
1458
1020
|
(r) => r.issues.map((issue) => ({ ...issue, file: r.fileName }))
|
|
1459
1021
|
);
|
|
1460
1022
|
const criticalIssues = allIssues.filter(
|
|
1461
|
-
(issue) => issue.severity ===
|
|
1023
|
+
(issue) => getSeverityValue(issue.severity) === 4
|
|
1462
1024
|
);
|
|
1463
1025
|
if (criticalIssues.length > 0) {
|
|
1464
1026
|
console.log(import_chalk.default.cyan(divider));
|
|
@@ -1613,12 +1175,25 @@ function generateHTMLReport(summary, results) {
|
|
|
1613
1175
|
</html>`;
|
|
1614
1176
|
}
|
|
1615
1177
|
program.parse();
|
|
1178
|
+
function getSeverityValue(s) {
|
|
1179
|
+
if (s === import_core6.Severity.Critical || s === "critical") return 4;
|
|
1180
|
+
if (s === import_core6.Severity.Major || s === "major") return 3;
|
|
1181
|
+
if (s === import_core6.Severity.Minor || s === "minor") return 2;
|
|
1182
|
+
if (s === import_core6.Severity.Info || s === "info") return 1;
|
|
1183
|
+
return 0;
|
|
1184
|
+
}
|
|
1616
1185
|
function getSeverityBadge(severity) {
|
|
1617
|
-
const
|
|
1618
|
-
|
|
1619
|
-
|
|
1620
|
-
|
|
1621
|
-
|
|
1622
|
-
|
|
1623
|
-
|
|
1186
|
+
const val = getSeverityValue(severity);
|
|
1187
|
+
switch (val) {
|
|
1188
|
+
case 4:
|
|
1189
|
+
return import_chalk.default.bgRed.white.bold(" CRITICAL ");
|
|
1190
|
+
case 3:
|
|
1191
|
+
return import_chalk.default.bgYellow.black.bold(" MAJOR ");
|
|
1192
|
+
case 2:
|
|
1193
|
+
return import_chalk.default.bgBlue.white.bold(" MINOR ");
|
|
1194
|
+
case 1:
|
|
1195
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1196
|
+
default:
|
|
1197
|
+
return import_chalk.default.bgCyan.black(" INFO ");
|
|
1198
|
+
}
|
|
1624
1199
|
}
|