@aiready/pattern-detect 0.12.0 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-2R7HOR5H.mjs +777 -0
- package/dist/chunk-3VRQYFW3.mjs +782 -0
- package/dist/chunk-5LYDB7DY.mjs +771 -0
- package/dist/chunk-7EJGNGXM.mjs +771 -0
- package/dist/chunk-A76JUWER.mjs +786 -0
- package/dist/chunk-CCHM2VLK.mjs +1051 -0
- package/dist/chunk-DQSLTL7J.mjs +788 -0
- package/dist/chunk-EZT3NZGB.mjs +1057 -0
- package/dist/chunk-H5FB2USZ.mjs +762 -0
- package/dist/chunk-INJ4SBTV.mjs +754 -0
- package/dist/chunk-JAFZCZAP.mjs +776 -0
- package/dist/chunk-JWR3AHKO.mjs +788 -0
- package/dist/chunk-LUA5FXSZ.mjs +771 -0
- package/dist/chunk-QX2BQJEO.mjs +1058 -0
- package/dist/chunk-RMGDSNLE.mjs +770 -0
- package/dist/chunk-TCG2G32F.mjs +911 -0
- package/dist/chunk-TGBZP7SB.mjs +773 -0
- package/dist/chunk-XCWY2DQY.mjs +788 -0
- package/dist/chunk-XUUVS54V.mjs +776 -0
- package/dist/chunk-YJYDBFT3.mjs +780 -0
- package/dist/cli.js +279 -698
- package/dist/cli.mjs +35 -29
- package/dist/index.d.mts +41 -54
- package/dist/index.d.ts +41 -54
- package/dist/index.js +248 -666
- package/dist/index.mjs +3 -1
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -5,9 +5,6 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
|
5
5
|
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
6
|
var __getProtoOf = Object.getPrototypeOf;
|
|
7
7
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
-
var __esm = (fn, res) => function __init() {
|
|
9
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
10
|
-
};
|
|
11
8
|
var __export = (target, all) => {
|
|
12
9
|
for (var name in all)
|
|
13
10
|
__defProp(target, name, { get: all[name], enumerable: true });
|
|
@@ -30,157 +27,10 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
30
27
|
));
|
|
31
28
|
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
32
29
|
|
|
33
|
-
// src/extractors/python-extractor.ts
|
|
34
|
-
var python_extractor_exports = {};
|
|
35
|
-
__export(python_extractor_exports, {
|
|
36
|
-
calculatePythonSimilarity: () => calculatePythonSimilarity,
|
|
37
|
-
detectPythonAntiPatterns: () => detectPythonAntiPatterns,
|
|
38
|
-
extractPythonPatterns: () => extractPythonPatterns
|
|
39
|
-
});
|
|
40
|
-
async function extractPythonPatterns(files) {
|
|
41
|
-
const patterns = [];
|
|
42
|
-
const parser = (0, import_core.getParser)("dummy.py");
|
|
43
|
-
if (!parser) {
|
|
44
|
-
console.warn("Python parser not available");
|
|
45
|
-
return patterns;
|
|
46
|
-
}
|
|
47
|
-
const pythonFiles = files.filter((f) => f.toLowerCase().endsWith(".py"));
|
|
48
|
-
for (const file of pythonFiles) {
|
|
49
|
-
try {
|
|
50
|
-
const fs = await import("fs");
|
|
51
|
-
const code = await fs.promises.readFile(file, "utf-8");
|
|
52
|
-
const result = parser.parse(code, file);
|
|
53
|
-
for (const exp of result.exports) {
|
|
54
|
-
if (exp.type === "function") {
|
|
55
|
-
patterns.push({
|
|
56
|
-
file,
|
|
57
|
-
name: exp.name,
|
|
58
|
-
type: "function",
|
|
59
|
-
startLine: exp.loc?.start.line || 0,
|
|
60
|
-
endLine: exp.loc?.end.line || 0,
|
|
61
|
-
imports: exp.imports || [],
|
|
62
|
-
dependencies: exp.dependencies || [],
|
|
63
|
-
signature: generatePythonSignature(exp),
|
|
64
|
-
language: "python"
|
|
65
|
-
});
|
|
66
|
-
} else if (exp.type === "class") {
|
|
67
|
-
patterns.push({
|
|
68
|
-
file,
|
|
69
|
-
name: exp.name,
|
|
70
|
-
type: "class",
|
|
71
|
-
startLine: exp.loc?.start.line || 0,
|
|
72
|
-
endLine: exp.loc?.end.line || 0,
|
|
73
|
-
imports: exp.imports || [],
|
|
74
|
-
dependencies: exp.dependencies || [],
|
|
75
|
-
signature: `class ${exp.name}`,
|
|
76
|
-
language: "python"
|
|
77
|
-
});
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
} catch (error) {
|
|
81
|
-
console.warn(`Failed to extract patterns from ${file}:`, error);
|
|
82
|
-
}
|
|
83
|
-
}
|
|
84
|
-
return patterns;
|
|
85
|
-
}
|
|
86
|
-
function generatePythonSignature(exp) {
|
|
87
|
-
const params = exp.parameters?.join(", ") || "";
|
|
88
|
-
return `def ${exp.name}(${params})`;
|
|
89
|
-
}
|
|
90
|
-
function calculatePythonSimilarity(pattern1, pattern2) {
|
|
91
|
-
let similarity = 0;
|
|
92
|
-
let factors = 0;
|
|
93
|
-
const nameSimilarity = calculateNameSimilarity(pattern1.name, pattern2.name);
|
|
94
|
-
similarity += nameSimilarity * 0.3;
|
|
95
|
-
factors += 0.3;
|
|
96
|
-
const importSimilarity = calculateImportSimilarity(
|
|
97
|
-
pattern1.imports || [],
|
|
98
|
-
pattern2.imports || []
|
|
99
|
-
);
|
|
100
|
-
similarity += importSimilarity * 0.4;
|
|
101
|
-
factors += 0.4;
|
|
102
|
-
if (pattern1.type === pattern2.type) {
|
|
103
|
-
similarity += 0.1;
|
|
104
|
-
}
|
|
105
|
-
factors += 0.1;
|
|
106
|
-
const sigSimilarity = calculateSignatureSimilarity(
|
|
107
|
-
pattern1.signature,
|
|
108
|
-
pattern2.signature
|
|
109
|
-
);
|
|
110
|
-
similarity += sigSimilarity * 0.2;
|
|
111
|
-
factors += 0.2;
|
|
112
|
-
return factors > 0 ? similarity / factors : 0;
|
|
113
|
-
}
|
|
114
|
-
function calculateNameSimilarity(name1, name2) {
|
|
115
|
-
if (name1 === name2) return 1;
|
|
116
|
-
const clean1 = name1.replace(
|
|
117
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
118
|
-
""
|
|
119
|
-
);
|
|
120
|
-
const clean2 = name2.replace(
|
|
121
|
-
/^(get|set|is|has|create|delete|update|fetch)_?/,
|
|
122
|
-
""
|
|
123
|
-
);
|
|
124
|
-
if (clean1 === clean2) return 0.9;
|
|
125
|
-
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
126
|
-
return 0.7;
|
|
127
|
-
}
|
|
128
|
-
const set1 = new Set(clean1.split("_"));
|
|
129
|
-
const set2 = new Set(clean2.split("_"));
|
|
130
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
131
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
132
|
-
return intersection.size / union.size;
|
|
133
|
-
}
|
|
134
|
-
function calculateImportSimilarity(imports1, imports2) {
|
|
135
|
-
if (imports1.length === 0 && imports2.length === 0) return 1;
|
|
136
|
-
if (imports1.length === 0 || imports2.length === 0) return 0;
|
|
137
|
-
const set1 = new Set(imports1);
|
|
138
|
-
const set2 = new Set(imports2);
|
|
139
|
-
const intersection = new Set([...set1].filter((x) => set2.has(x)));
|
|
140
|
-
const union = /* @__PURE__ */ new Set([...set1, ...set2]);
|
|
141
|
-
return intersection.size / union.size;
|
|
142
|
-
}
|
|
143
|
-
function calculateSignatureSimilarity(sig1, sig2) {
|
|
144
|
-
if (sig1 === sig2) return 1;
|
|
145
|
-
const params1 = (sig1.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
146
|
-
const params2 = (sig2.match(/\([^)]*\)/)?.[0] || "").split(",").filter(Boolean).length;
|
|
147
|
-
if (params1 === params2) return 0.8;
|
|
148
|
-
if (Math.abs(params1 - params2) === 1) return 0.5;
|
|
149
|
-
return 0;
|
|
150
|
-
}
|
|
151
|
-
function detectPythonAntiPatterns(patterns) {
|
|
152
|
-
const antiPatterns = [];
|
|
153
|
-
const nameGroups = /* @__PURE__ */ new Map();
|
|
154
|
-
for (const pattern of patterns) {
|
|
155
|
-
const baseName = pattern.name.replace(
|
|
156
|
-
/^(get|set|create|delete|update)_/,
|
|
157
|
-
""
|
|
158
|
-
);
|
|
159
|
-
if (!nameGroups.has(baseName)) {
|
|
160
|
-
nameGroups.set(baseName, []);
|
|
161
|
-
}
|
|
162
|
-
nameGroups.get(baseName).push(pattern);
|
|
163
|
-
}
|
|
164
|
-
for (const [baseName, group] of nameGroups) {
|
|
165
|
-
if (group.length >= 3) {
|
|
166
|
-
antiPatterns.push(
|
|
167
|
-
`Found ${group.length} functions with similar names (${baseName}): Consider consolidating`
|
|
168
|
-
);
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
return antiPatterns;
|
|
172
|
-
}
|
|
173
|
-
var import_core;
|
|
174
|
-
var init_python_extractor = __esm({
|
|
175
|
-
"src/extractors/python-extractor.ts"() {
|
|
176
|
-
"use strict";
|
|
177
|
-
import_core = require("@aiready/core");
|
|
178
|
-
}
|
|
179
|
-
});
|
|
180
|
-
|
|
181
30
|
// src/index.ts
|
|
182
31
|
var index_exports = {};
|
|
183
32
|
__export(index_exports, {
|
|
33
|
+
Severity: () => import_core5.Severity,
|
|
184
34
|
analyzePatterns: () => analyzePatterns,
|
|
185
35
|
calculatePatternScore: () => calculatePatternScore,
|
|
186
36
|
calculateSeverity: () => calculateSeverity,
|
|
@@ -191,12 +41,13 @@ __export(index_exports, {
|
|
|
191
41
|
getSmartDefaults: () => getSmartDefaults
|
|
192
42
|
});
|
|
193
43
|
module.exports = __toCommonJS(index_exports);
|
|
194
|
-
var
|
|
44
|
+
var import_core5 = require("@aiready/core");
|
|
195
45
|
|
|
196
46
|
// src/detector.ts
|
|
197
47
|
var import_core2 = require("@aiready/core");
|
|
198
48
|
|
|
199
49
|
// src/context-rules.ts
|
|
50
|
+
var import_core = require("@aiready/core");
|
|
200
51
|
var CONTEXT_RULES = [
|
|
201
52
|
// Test Fixtures - Intentional duplication for test isolation
|
|
202
53
|
{
|
|
@@ -206,7 +57,7 @@ var CONTEXT_RULES = [
|
|
|
206
57
|
const hasTestFixtures = code.includes("beforeAll") || code.includes("afterAll") || code.includes("beforeEach") || code.includes("afterEach") || code.includes("setUp") || code.includes("tearDown");
|
|
207
58
|
return isTestFile && hasTestFixtures;
|
|
208
59
|
},
|
|
209
|
-
severity:
|
|
60
|
+
severity: import_core.Severity.Info,
|
|
210
61
|
reason: "Test fixture duplication is intentional for test isolation",
|
|
211
62
|
suggestion: "Consider if shared test setup would improve maintainability without coupling tests"
|
|
212
63
|
},
|
|
@@ -218,7 +69,7 @@ var CONTEXT_RULES = [
|
|
|
218
69
|
const hasTemplateContent = (code.includes("return") || code.includes("export")) && (code.includes("html") || code.includes("subject") || code.includes("body"));
|
|
219
70
|
return isTemplate && hasTemplateContent;
|
|
220
71
|
},
|
|
221
|
-
severity:
|
|
72
|
+
severity: import_core.Severity.Minor,
|
|
222
73
|
reason: "Template duplication may be intentional for maintainability and branding consistency",
|
|
223
74
|
suggestion: "Extract shared structure only if templates become hard to maintain"
|
|
224
75
|
},
|
|
@@ -230,7 +81,7 @@ var CONTEXT_RULES = [
|
|
|
230
81
|
const hasPageObjectPatterns = code.includes("page.") || code.includes("await page") || code.includes("locator") || code.includes("getBy") || code.includes("selector") || code.includes("click(") || code.includes("fill(");
|
|
231
82
|
return isE2ETest && hasPageObjectPatterns;
|
|
232
83
|
},
|
|
233
|
-
severity:
|
|
84
|
+
severity: import_core.Severity.Minor,
|
|
234
85
|
reason: "E2E test duplication ensures test independence and reduces coupling",
|
|
235
86
|
suggestion: "Consider page object pattern only if duplication causes maintenance issues"
|
|
236
87
|
},
|
|
@@ -240,7 +91,7 @@ var CONTEXT_RULES = [
|
|
|
240
91
|
detect: (file) => {
|
|
241
92
|
return file.endsWith(".config.ts") || file.endsWith(".config.js") || file.includes("jest.config") || file.includes("vite.config") || file.includes("webpack.config") || file.includes("rollup.config") || file.includes("tsconfig");
|
|
242
93
|
},
|
|
243
|
-
severity:
|
|
94
|
+
severity: import_core.Severity.Minor,
|
|
244
95
|
reason: "Configuration files often have similar structure by design",
|
|
245
96
|
suggestion: "Consider shared config base only if configurations become hard to maintain"
|
|
246
97
|
},
|
|
@@ -252,7 +103,7 @@ var CONTEXT_RULES = [
|
|
|
252
103
|
const hasTypeDefinitions = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
253
104
|
return isTypeFile && hasTypeDefinitions;
|
|
254
105
|
},
|
|
255
|
-
severity:
|
|
106
|
+
severity: import_core.Severity.Info,
|
|
256
107
|
reason: "Type duplication may be intentional for module independence and type safety",
|
|
257
108
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
258
109
|
},
|
|
@@ -262,7 +113,7 @@ var CONTEXT_RULES = [
|
|
|
262
113
|
detect: (file) => {
|
|
263
114
|
return file.includes("/migrations/") || file.includes("/migrate/") || file.includes(".migration.");
|
|
264
115
|
},
|
|
265
|
-
severity:
|
|
116
|
+
severity: import_core.Severity.Info,
|
|
266
117
|
reason: "Migration scripts are typically one-off and intentionally similar",
|
|
267
118
|
suggestion: "Duplication is acceptable for migration scripts"
|
|
268
119
|
},
|
|
@@ -274,7 +125,7 @@ var CONTEXT_RULES = [
|
|
|
274
125
|
const hasMockData = code.includes("mock") || code.includes("Mock") || code.includes("fixture") || code.includes("stub") || code.includes("export const");
|
|
275
126
|
return isMockFile && hasMockData;
|
|
276
127
|
},
|
|
277
|
-
severity:
|
|
128
|
+
severity: import_core.Severity.Info,
|
|
278
129
|
reason: "Mock data duplication is expected for comprehensive test coverage",
|
|
279
130
|
suggestion: "Consider shared factories only for complex mock generation"
|
|
280
131
|
}
|
|
@@ -292,31 +143,31 @@ function calculateSeverity(file1, file2, code, similarity, linesOfCode) {
|
|
|
292
143
|
}
|
|
293
144
|
if (similarity >= 0.95 && linesOfCode >= 30) {
|
|
294
145
|
return {
|
|
295
|
-
severity:
|
|
146
|
+
severity: import_core.Severity.Critical,
|
|
296
147
|
reason: "Large nearly-identical code blocks waste tokens and create maintenance burden",
|
|
297
148
|
suggestion: "Extract to shared utility module immediately"
|
|
298
149
|
};
|
|
299
150
|
} else if (similarity >= 0.95 && linesOfCode >= 15) {
|
|
300
151
|
return {
|
|
301
|
-
severity:
|
|
152
|
+
severity: import_core.Severity.Major,
|
|
302
153
|
reason: "Nearly identical code should be consolidated",
|
|
303
154
|
suggestion: "Move to shared utility file"
|
|
304
155
|
};
|
|
305
156
|
} else if (similarity >= 0.85) {
|
|
306
157
|
return {
|
|
307
|
-
severity:
|
|
158
|
+
severity: import_core.Severity.Major,
|
|
308
159
|
reason: "High similarity indicates significant duplication",
|
|
309
160
|
suggestion: "Extract common logic to shared function"
|
|
310
161
|
};
|
|
311
162
|
} else if (similarity >= 0.7) {
|
|
312
163
|
return {
|
|
313
|
-
severity:
|
|
164
|
+
severity: import_core.Severity.Minor,
|
|
314
165
|
reason: "Moderate similarity detected",
|
|
315
166
|
suggestion: "Consider extracting shared patterns if code evolves together"
|
|
316
167
|
};
|
|
317
168
|
} else {
|
|
318
169
|
return {
|
|
319
|
-
severity:
|
|
170
|
+
severity: import_core.Severity.Minor,
|
|
320
171
|
reason: "Minor similarity detected",
|
|
321
172
|
suggestion: "Monitor but refactoring may not be worthwhile"
|
|
322
173
|
};
|
|
@@ -324,15 +175,20 @@ function calculateSeverity(file1, file2, code, similarity, linesOfCode) {
|
|
|
324
175
|
}
|
|
325
176
|
function getSeverityLabel(severity) {
|
|
326
177
|
const labels = {
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
178
|
+
[import_core.Severity.Critical]: "\u{1F534} CRITICAL",
|
|
179
|
+
[import_core.Severity.Major]: "\u{1F7E1} MAJOR",
|
|
180
|
+
[import_core.Severity.Minor]: "\u{1F535} MINOR",
|
|
181
|
+
[import_core.Severity.Info]: "\u2139\uFE0F INFO"
|
|
331
182
|
};
|
|
332
183
|
return labels[severity];
|
|
333
184
|
}
|
|
334
185
|
function filterBySeverity(duplicates, minSeverity) {
|
|
335
|
-
const severityOrder = [
|
|
186
|
+
const severityOrder = [
|
|
187
|
+
import_core.Severity.Info,
|
|
188
|
+
import_core.Severity.Minor,
|
|
189
|
+
import_core.Severity.Major,
|
|
190
|
+
import_core.Severity.Critical
|
|
191
|
+
];
|
|
336
192
|
const minIndex = severityOrder.indexOf(minSeverity);
|
|
337
193
|
if (minIndex === -1) return duplicates;
|
|
338
194
|
return duplicates.filter((dup) => {
|
|
@@ -341,261 +197,127 @@ function filterBySeverity(duplicates, minSeverity) {
|
|
|
341
197
|
});
|
|
342
198
|
}
|
|
343
199
|
|
|
344
|
-
// src/
|
|
345
|
-
function
|
|
346
|
-
|
|
347
|
-
if (lower.includes("request") && lower.includes("response") || lower.includes("router.") || lower.includes("app.get") || lower.includes("app.post") || lower.includes("express") || lower.includes("ctx.body")) {
|
|
348
|
-
return "api-handler";
|
|
349
|
-
}
|
|
350
|
-
if (lower.includes("validate") || lower.includes("schema") || lower.includes("zod") || lower.includes("yup") || lower.includes("if") && lower.includes("throw")) {
|
|
351
|
-
return "validator";
|
|
352
|
-
}
|
|
353
|
-
if (lower.includes("return (") || lower.includes("jsx") || lower.includes("component") || lower.includes("props")) {
|
|
354
|
-
return "component";
|
|
355
|
-
}
|
|
356
|
-
if (lower.includes("class ") || lower.includes("this.")) {
|
|
357
|
-
return "class-method";
|
|
358
|
-
}
|
|
359
|
-
if (lower.includes("return ") && !lower.includes("this") && !lower.includes("new ")) {
|
|
360
|
-
return "utility";
|
|
361
|
-
}
|
|
362
|
-
if (lower.includes("function") || lower.includes("=>")) {
|
|
363
|
-
return "function";
|
|
364
|
-
}
|
|
365
|
-
return "unknown";
|
|
200
|
+
// src/detector.ts
|
|
201
|
+
function normalizeCode(code) {
|
|
202
|
+
return code.replace(/\/\/.*/g, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/['"`]/g, '"').replace(/\s+/g, " ").trim().toLowerCase();
|
|
366
203
|
}
|
|
367
|
-
function
|
|
368
|
-
const lines = content.split("\n");
|
|
204
|
+
function extractBlocks(file, content) {
|
|
369
205
|
const blocks = [];
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
let
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
if (
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
}
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
if (char === "}") braceDepth--;
|
|
206
|
+
const lines = content.split("\n");
|
|
207
|
+
const blockRegex = /^\s*(?:export\s+)?(?:async\s+)?(function|class|const|interface|type)\s+([a-zA-Z0-9_]+)|^\s*(app\.(?:get|post|put|delete|patch|use))\(/gm;
|
|
208
|
+
let match;
|
|
209
|
+
while ((match = blockRegex.exec(content)) !== null) {
|
|
210
|
+
const startLine = content.substring(0, match.index).split("\n").length;
|
|
211
|
+
let type;
|
|
212
|
+
let name;
|
|
213
|
+
if (match[1]) {
|
|
214
|
+
type = match[1];
|
|
215
|
+
name = match[2];
|
|
216
|
+
} else {
|
|
217
|
+
type = "handler";
|
|
218
|
+
name = match[3];
|
|
384
219
|
}
|
|
385
|
-
|
|
386
|
-
|
|
220
|
+
let endLine = -1;
|
|
221
|
+
let openBraces = 0;
|
|
222
|
+
let foundStart = false;
|
|
223
|
+
for (let i = match.index; i < content.length; i++) {
|
|
224
|
+
if (content[i] === "{") {
|
|
225
|
+
openBraces++;
|
|
226
|
+
foundStart = true;
|
|
227
|
+
} else if (content[i] === "}") {
|
|
228
|
+
openBraces--;
|
|
229
|
+
}
|
|
230
|
+
if (foundStart && openBraces === 0) {
|
|
231
|
+
endLine = content.substring(0, i + 1).split("\n").length;
|
|
232
|
+
break;
|
|
233
|
+
}
|
|
387
234
|
}
|
|
388
|
-
if (
|
|
389
|
-
const
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
content: blockContent,
|
|
396
|
-
startLine: blockStart + 1,
|
|
397
|
-
endLine: i + 1,
|
|
398
|
-
patternType: categorizePattern(blockContent),
|
|
399
|
-
linesOfCode: loc
|
|
400
|
-
});
|
|
235
|
+
if (endLine === -1) {
|
|
236
|
+
const remaining = content.slice(match.index);
|
|
237
|
+
const nextLineMatch = remaining.indexOf("\n");
|
|
238
|
+
if (nextLineMatch !== -1) {
|
|
239
|
+
endLine = startLine;
|
|
240
|
+
} else {
|
|
241
|
+
endLine = lines.length;
|
|
401
242
|
}
|
|
402
|
-
currentBlock = [];
|
|
403
|
-
inFunction = false;
|
|
404
|
-
} else if (inFunction && braceDepth === 0) {
|
|
405
|
-
currentBlock = [];
|
|
406
|
-
inFunction = false;
|
|
407
243
|
}
|
|
244
|
+
endLine = Math.max(startLine, endLine);
|
|
245
|
+
const blockCode = lines.slice(startLine - 1, endLine).join("\n");
|
|
246
|
+
const tokens = (0, import_core2.estimateTokens)(blockCode);
|
|
247
|
+
blocks.push({
|
|
248
|
+
file,
|
|
249
|
+
startLine,
|
|
250
|
+
endLine,
|
|
251
|
+
code: blockCode,
|
|
252
|
+
tokens,
|
|
253
|
+
patternType: inferPatternType(type, name)
|
|
254
|
+
});
|
|
408
255
|
}
|
|
409
256
|
return blocks;
|
|
410
257
|
}
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
return code.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "").replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
416
|
-
}
|
|
417
|
-
var stopwords = /* @__PURE__ */ new Set([
|
|
418
|
-
"return",
|
|
419
|
-
"const",
|
|
420
|
-
"let",
|
|
421
|
-
"var",
|
|
422
|
-
"function",
|
|
423
|
-
"class",
|
|
424
|
-
"new",
|
|
425
|
-
"if",
|
|
426
|
-
"else",
|
|
427
|
-
"for",
|
|
428
|
-
"while",
|
|
429
|
-
"async",
|
|
430
|
-
"await",
|
|
431
|
-
"try",
|
|
432
|
-
"catch",
|
|
433
|
-
"switch",
|
|
434
|
-
"case",
|
|
435
|
-
"default",
|
|
436
|
-
"import",
|
|
437
|
-
"export",
|
|
438
|
-
"from",
|
|
439
|
-
"true",
|
|
440
|
-
"false",
|
|
441
|
-
"null",
|
|
442
|
-
"undefined",
|
|
443
|
-
"this"
|
|
444
|
-
]);
|
|
445
|
-
function tokenize(norm) {
|
|
446
|
-
const punctuation = "(){}[];.,";
|
|
447
|
-
const cleaned = norm.split("").map((ch) => punctuation.includes(ch) ? " " : ch).join("");
|
|
448
|
-
return cleaned.split(/\s+/).filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
449
|
-
}
|
|
450
|
-
|
|
451
|
-
// src/core/similarity.ts
|
|
452
|
-
function jaccardSimilarity(tokens1, tokens2) {
|
|
453
|
-
const set1 = new Set(tokens1);
|
|
454
|
-
const set2 = new Set(tokens2);
|
|
455
|
-
if (set1.size === 0 && set2.size === 0) return 0;
|
|
456
|
-
let intersection = 0;
|
|
457
|
-
for (const token of set1) {
|
|
458
|
-
if (set2.has(token)) intersection++;
|
|
258
|
+
function inferPatternType(keyword, name) {
|
|
259
|
+
const n = name.toLowerCase();
|
|
260
|
+
if (keyword === "handler" || n.includes("handler") || n.includes("controller") || n.startsWith("app.")) {
|
|
261
|
+
return "api-handler";
|
|
459
262
|
}
|
|
460
|
-
|
|
461
|
-
|
|
263
|
+
if (n.includes("validate") || n.includes("schema")) return "validator";
|
|
264
|
+
if (n.includes("util") || n.includes("helper")) return "utility";
|
|
265
|
+
if (keyword === "class") return "class-method";
|
|
266
|
+
if (n.match(/^[A-Z]/)) return "component";
|
|
267
|
+
if (keyword === "function") return "function";
|
|
268
|
+
return "unknown";
|
|
462
269
|
}
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
arr.push(i);
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
|
-
}
|
|
484
|
-
findCandidates(blockIdx, minSharedTokens, maxCandidates) {
|
|
485
|
-
const block1 = this.allBlocks[blockIdx];
|
|
486
|
-
const block1Tokens = this.blockTokens[blockIdx];
|
|
487
|
-
const counts = /* @__PURE__ */ new Map();
|
|
488
|
-
const rareTokens = block1Tokens.filter((tok) => {
|
|
489
|
-
const freq = this.invertedIndex.get(tok)?.length || 0;
|
|
490
|
-
return freq < this.allBlocks.length * 0.1;
|
|
491
|
-
});
|
|
492
|
-
for (const tok of rareTokens) {
|
|
493
|
-
const ids = this.invertedIndex.get(tok);
|
|
494
|
-
if (!ids) continue;
|
|
495
|
-
for (const j of ids) {
|
|
496
|
-
if (j <= blockIdx) continue;
|
|
497
|
-
if (this.allBlocks[j].file === block1.file) continue;
|
|
498
|
-
counts.set(j, (counts.get(j) || 0) + 1);
|
|
499
|
-
}
|
|
500
|
-
}
|
|
501
|
-
return Array.from(counts.entries()).filter(([j, shared]) => {
|
|
502
|
-
const block2Size = this.blockTokens[j].length;
|
|
503
|
-
const minSize = Math.min(block1Tokens.length, block2Size);
|
|
504
|
-
return shared >= minSharedTokens && shared / minSize >= 0.3;
|
|
505
|
-
}).sort((a, b) => b[1] - a[1]).slice(0, maxCandidates).map(([j, shared]) => ({ j, shared }));
|
|
270
|
+
function calculateSimilarity(a, b) {
|
|
271
|
+
if (a === b) return 1;
|
|
272
|
+
const tokensA = a.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
273
|
+
const tokensB = b.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
274
|
+
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
275
|
+
const setA = new Set(tokensA);
|
|
276
|
+
const setB = new Set(tokensB);
|
|
277
|
+
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
278
|
+
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
279
|
+
return intersection.size / union.size;
|
|
280
|
+
}
|
|
281
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
282
|
+
const { minSimilarity, minLines, streamResults } = options;
|
|
283
|
+
const allBlocks = [];
|
|
284
|
+
for (const { file, content } of fileContents) {
|
|
285
|
+
const blocks = extractBlocks(file, content);
|
|
286
|
+
allBlocks.push(...blocks.filter((b) => b.endLine - b.startLine + 1 >= minLines));
|
|
506
287
|
}
|
|
507
|
-
};
|
|
508
|
-
|
|
509
|
-
// src/detector.ts
|
|
510
|
-
async function detectDuplicatePatterns(files, options) {
|
|
511
|
-
const {
|
|
512
|
-
minSimilarity,
|
|
513
|
-
minLines,
|
|
514
|
-
batchSize = 100,
|
|
515
|
-
approx = true,
|
|
516
|
-
minSharedTokens = 8,
|
|
517
|
-
maxCandidatesPerBlock = 100,
|
|
518
|
-
streamResults = false
|
|
519
|
-
} = options;
|
|
520
288
|
const duplicates = [];
|
|
521
|
-
const maxComparisons = approx ? Infinity : 5e5;
|
|
522
|
-
const allBlocks = files.flatMap(
|
|
523
|
-
(file) => extractCodeBlocks(file.content, minLines).filter(
|
|
524
|
-
(block) => block && block.content && block.content.trim().length > 0
|
|
525
|
-
).map((block) => ({
|
|
526
|
-
...block,
|
|
527
|
-
file: file.file,
|
|
528
|
-
normalized: normalizeCode(block.content),
|
|
529
|
-
tokenCost: block.content ? (0, import_core2.estimateTokens)(block.content) : 0
|
|
530
|
-
}))
|
|
531
|
-
);
|
|
532
|
-
const pythonFiles = files.filter((f) => f.file.endsWith(".py"));
|
|
533
|
-
if (pythonFiles.length > 0) {
|
|
534
|
-
const { extractPythonPatterns: extractPythonPatterns2 } = await Promise.resolve().then(() => (init_python_extractor(), python_extractor_exports));
|
|
535
|
-
const pythonPatterns = await extractPythonPatterns2(
|
|
536
|
-
pythonFiles.map((f) => f.file)
|
|
537
|
-
);
|
|
538
|
-
allBlocks.push(
|
|
539
|
-
...pythonPatterns.map((p) => ({
|
|
540
|
-
content: p.code,
|
|
541
|
-
startLine: p.startLine,
|
|
542
|
-
endLine: p.endLine,
|
|
543
|
-
file: p.file,
|
|
544
|
-
normalized: normalizeCode(p.code),
|
|
545
|
-
patternType: p.type,
|
|
546
|
-
tokenCost: p.code ? (0, import_core2.estimateTokens)(p.code) : 0,
|
|
547
|
-
linesOfCode: p.endLine - p.startLine + 1
|
|
548
|
-
}))
|
|
549
|
-
);
|
|
550
|
-
}
|
|
551
|
-
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
552
|
-
const engine = approx ? new ApproxEngine(allBlocks, blockTokens) : null;
|
|
553
|
-
let comparisonsProcessed = 0;
|
|
554
|
-
const startTime = Date.now();
|
|
555
289
|
for (let i = 0; i < allBlocks.length; i++) {
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
` Processed ${i}/${allBlocks.length} blocks (${elapsed.toFixed(1)}s, ${duplicates.length} duplicates)`
|
|
564
|
-
);
|
|
565
|
-
}
|
|
566
|
-
await new Promise((r) => setImmediate((resolve) => r(resolve)));
|
|
567
|
-
}
|
|
568
|
-
const block1 = allBlocks[i];
|
|
569
|
-
const candidates = engine ? engine.findCandidates(i, minSharedTokens, maxCandidatesPerBlock) : allBlocks.slice(i + 1).map((_, idx) => ({ j: i + 1 + idx, shared: 0 }));
|
|
570
|
-
for (const { j } of candidates) {
|
|
571
|
-
if (!approx && comparisonsProcessed >= maxComparisons) break;
|
|
572
|
-
comparisonsProcessed++;
|
|
573
|
-
const block2 = allBlocks[j];
|
|
574
|
-
if (block1.file === block2.file) continue;
|
|
575
|
-
const sim = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
290
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
291
|
+
const b1 = allBlocks[i];
|
|
292
|
+
const b2 = allBlocks[j];
|
|
293
|
+
if (b1.file === b2.file) continue;
|
|
294
|
+
const norm1 = normalizeCode(b1.code);
|
|
295
|
+
const norm2 = normalizeCode(b2.code);
|
|
296
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
576
297
|
if (sim >= minSimilarity) {
|
|
577
|
-
const severity = calculateSeverity(
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
298
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
299
|
+
b1.file,
|
|
300
|
+
b2.file,
|
|
301
|
+
b1.code,
|
|
581
302
|
sim,
|
|
582
|
-
|
|
303
|
+
b1.endLine - b1.startLine + 1
|
|
583
304
|
);
|
|
584
305
|
const dup = {
|
|
585
|
-
file1:
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
endLine2:
|
|
306
|
+
file1: b1.file,
|
|
307
|
+
line1: b1.startLine,
|
|
308
|
+
endLine1: b1.endLine,
|
|
309
|
+
file2: b2.file,
|
|
310
|
+
line2: b2.startLine,
|
|
311
|
+
endLine2: b2.endLine,
|
|
312
|
+
code1: b1.code,
|
|
313
|
+
code2: b2.code,
|
|
591
314
|
similarity: sim,
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
suggestion: severity.suggestion
|
|
315
|
+
patternType: b1.patternType,
|
|
316
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
317
|
+
severity,
|
|
318
|
+
reason,
|
|
319
|
+
suggestion,
|
|
320
|
+
matchedRule
|
|
599
321
|
};
|
|
600
322
|
duplicates.push(dup);
|
|
601
323
|
if (streamResults)
|
|
@@ -605,281 +327,134 @@ async function detectDuplicatePatterns(files, options) {
|
|
|
605
327
|
}
|
|
606
328
|
}
|
|
607
329
|
}
|
|
608
|
-
return duplicates;
|
|
330
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
609
331
|
}
|
|
610
332
|
|
|
611
333
|
// src/grouping.ts
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
334
|
+
var import_core3 = require("@aiready/core");
|
|
335
|
+
var import_path = __toESM(require("path"));
|
|
336
|
+
function getSeverityLevel(s) {
|
|
337
|
+
if (s === import_core3.Severity.Critical || s === "critical") return 4;
|
|
338
|
+
if (s === import_core3.Severity.Major || s === "major") return 3;
|
|
339
|
+
if (s === import_core3.Severity.Minor || s === "minor") return 2;
|
|
340
|
+
if (s === import_core3.Severity.Info || s === "info") return 1;
|
|
341
|
+
return 0;
|
|
617
342
|
}
|
|
618
343
|
function groupDuplicatesByFilePair(duplicates) {
|
|
619
344
|
const groups = /* @__PURE__ */ new Map();
|
|
620
345
|
for (const dup of duplicates) {
|
|
621
|
-
const
|
|
346
|
+
const files = [dup.file1, dup.file2].sort();
|
|
347
|
+
const key = files.join("::");
|
|
622
348
|
if (!groups.has(key)) {
|
|
623
|
-
groups.set(key,
|
|
349
|
+
groups.set(key, {
|
|
350
|
+
filePair: key,
|
|
351
|
+
severity: dup.severity,
|
|
352
|
+
occurrences: 0,
|
|
353
|
+
totalTokenCost: 0,
|
|
354
|
+
averageSimilarity: 0,
|
|
355
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
356
|
+
lineRanges: []
|
|
357
|
+
});
|
|
624
358
|
}
|
|
625
|
-
groups.get(key)
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
);
|
|
634
|
-
const averageSimilarity = deduplicated.reduce((sum, d) => sum + d.similarity, 0) / deduplicated.length;
|
|
635
|
-
const maxSimilarity = Math.max(...deduplicated.map((d) => d.similarity));
|
|
636
|
-
const severity = getHighestSeverity(deduplicated.map((d) => d.severity));
|
|
637
|
-
const patternType = getMostCommonPatternType(deduplicated);
|
|
638
|
-
const lineRanges = deduplicated.map((d) => ({
|
|
639
|
-
file1: { start: d.line1, end: d.endLine1 },
|
|
640
|
-
file2: { start: d.line2, end: d.endLine2 }
|
|
641
|
-
}));
|
|
642
|
-
result.push({
|
|
643
|
-
filePair,
|
|
644
|
-
duplicates: deduplicated,
|
|
645
|
-
totalTokenCost,
|
|
646
|
-
averageSimilarity,
|
|
647
|
-
maxSimilarity,
|
|
648
|
-
severity,
|
|
649
|
-
patternType,
|
|
650
|
-
occurrences: deduplicated.length,
|
|
651
|
-
lineRanges
|
|
359
|
+
const group = groups.get(key);
|
|
360
|
+
group.occurrences++;
|
|
361
|
+
group.totalTokenCost += dup.tokenCost;
|
|
362
|
+
group.averageSimilarity += dup.similarity;
|
|
363
|
+
group.patternTypes.add(dup.patternType);
|
|
364
|
+
group.lineRanges.push({
|
|
365
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
366
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
652
367
|
});
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
function deduplicateOverlappingRanges(duplicates) {
|
|
657
|
-
if (duplicates.length === 0) return [];
|
|
658
|
-
const sorted = [...duplicates].sort((a, b) => {
|
|
659
|
-
if (a.line1 !== b.line1) return a.line1 - b.line1;
|
|
660
|
-
return b.similarity - a.similarity;
|
|
661
|
-
});
|
|
662
|
-
const result = [];
|
|
663
|
-
let current = null;
|
|
664
|
-
for (const dup of sorted) {
|
|
665
|
-
if (!current) {
|
|
666
|
-
current = dup;
|
|
667
|
-
result.push(dup);
|
|
668
|
-
continue;
|
|
669
|
-
}
|
|
670
|
-
const overlapsFile1 = rangesOverlap(
|
|
671
|
-
current.line1,
|
|
672
|
-
current.endLine1,
|
|
673
|
-
dup.line1,
|
|
674
|
-
dup.endLine1
|
|
675
|
-
);
|
|
676
|
-
const overlapsFile2 = rangesOverlap(
|
|
677
|
-
current.line2,
|
|
678
|
-
current.endLine2,
|
|
679
|
-
dup.line2,
|
|
680
|
-
dup.endLine2
|
|
681
|
-
);
|
|
682
|
-
if (overlapsFile1 && overlapsFile2) {
|
|
683
|
-
current = {
|
|
684
|
-
...current,
|
|
685
|
-
endLine1: Math.max(current.endLine1, dup.endLine1),
|
|
686
|
-
endLine2: Math.max(current.endLine2, dup.endLine2),
|
|
687
|
-
tokenCost: Math.max(current.tokenCost, dup.tokenCost)
|
|
688
|
-
};
|
|
689
|
-
result[result.length - 1] = current;
|
|
690
|
-
} else {
|
|
691
|
-
current = dup;
|
|
692
|
-
result.push(dup);
|
|
368
|
+
const currentSev = dup.severity;
|
|
369
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
370
|
+
group.severity = currentSev;
|
|
693
371
|
}
|
|
694
372
|
}
|
|
695
|
-
return
|
|
373
|
+
return Array.from(groups.values()).map((g) => ({
|
|
374
|
+
...g,
|
|
375
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
376
|
+
}));
|
|
696
377
|
}
|
|
697
378
|
function createRefactorClusters(duplicates) {
|
|
698
|
-
const
|
|
379
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
380
|
+
const visited = /* @__PURE__ */ new Set();
|
|
381
|
+
const components = [];
|
|
699
382
|
for (const dup of duplicates) {
|
|
700
|
-
|
|
701
|
-
if (!
|
|
702
|
-
|
|
383
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
384
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
385
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
386
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
387
|
+
}
|
|
388
|
+
for (const file of adjacency.keys()) {
|
|
389
|
+
if (visited.has(file)) continue;
|
|
390
|
+
const component = [];
|
|
391
|
+
const queue = [file];
|
|
392
|
+
visited.add(file);
|
|
393
|
+
while (queue.length > 0) {
|
|
394
|
+
const curr = queue.shift();
|
|
395
|
+
component.push(curr);
|
|
396
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
397
|
+
if (!visited.has(neighbor)) {
|
|
398
|
+
visited.add(neighbor);
|
|
399
|
+
queue.push(neighbor);
|
|
400
|
+
}
|
|
401
|
+
}
|
|
703
402
|
}
|
|
704
|
-
|
|
403
|
+
components.push(component);
|
|
705
404
|
}
|
|
706
|
-
const
|
|
707
|
-
for (const
|
|
708
|
-
if (
|
|
709
|
-
const
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
const
|
|
713
|
-
const
|
|
714
|
-
const
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
405
|
+
const clusters = [];
|
|
406
|
+
for (const component of components) {
|
|
407
|
+
if (component.length < 2) continue;
|
|
408
|
+
const componentDups = duplicates.filter(
|
|
409
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
410
|
+
);
|
|
411
|
+
const totalTokenCost = componentDups.reduce((sum, d) => sum + d.tokenCost, 0);
|
|
412
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
413
|
+
const name = determineClusterName(component);
|
|
414
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
415
|
+
component[0],
|
|
416
|
+
component[1],
|
|
417
|
+
"",
|
|
418
|
+
// Code not available here
|
|
419
|
+
avgSimilarity,
|
|
420
|
+
30
|
|
421
|
+
// Assume substantial if clustered
|
|
422
|
+
);
|
|
423
|
+
clusters.push({
|
|
424
|
+
id: `cluster-${clusters.length}`,
|
|
425
|
+
name,
|
|
426
|
+
files: component,
|
|
720
427
|
severity,
|
|
428
|
+
duplicateCount: componentDups.length,
|
|
721
429
|
totalTokenCost,
|
|
722
|
-
averageSimilarity,
|
|
723
|
-
|
|
724
|
-
suggestion
|
|
725
|
-
reason: clusterInfo.reason
|
|
430
|
+
averageSimilarity: avgSimilarity,
|
|
431
|
+
reason,
|
|
432
|
+
suggestion
|
|
726
433
|
});
|
|
727
434
|
}
|
|
728
|
-
return
|
|
435
|
+
return clusters;
|
|
729
436
|
}
|
|
730
|
-
function
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
if (
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
`Component check: ${dup.file1} -> ${component1}, ${dup.file2} -> ${component2}`
|
|
741
|
-
);
|
|
742
|
-
if (component1 && component2 && areSimilarComponents(component1, component2)) {
|
|
743
|
-
const category = getComponentCategory(component1);
|
|
744
|
-
console.log(`Creating cluster: component-${category}`);
|
|
745
|
-
return `component-${category}`;
|
|
746
|
-
}
|
|
747
|
-
}
|
|
748
|
-
if ((file1.includes("/e2e/") || file1.startsWith("e2e/") || file1.includes(".e2e.")) && (file2.includes("/e2e/") || file2.startsWith("e2e/") || file2.includes(".e2e."))) {
|
|
749
|
-
return "e2e-test-patterns";
|
|
437
|
+
function determineClusterName(files) {
|
|
438
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
439
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
440
|
+
if (files.some((f) => f.includes("buttons"))) return "Button Component Variants";
|
|
441
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
442
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
443
|
+
const first = files[0];
|
|
444
|
+
const dirName = import_path.default.dirname(first).split(import_path.default.sep).pop();
|
|
445
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
446
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
750
447
|
}
|
|
751
|
-
|
|
752
|
-
return "api-handlers";
|
|
753
|
-
}
|
|
754
|
-
if (dup.patternType === "validator") {
|
|
755
|
-
return "validators";
|
|
756
|
-
}
|
|
757
|
-
if ((file1.includes("/scripts/") || file1.startsWith("scripts/") || file1.includes("/infra/") || file1.startsWith("infra/")) && (file2.includes("/scripts/") || file2.startsWith("scripts/") || file2.includes("/infra/") || file2.startsWith("infra/"))) {
|
|
758
|
-
return "infrastructure-scripts";
|
|
759
|
-
}
|
|
760
|
-
return `${dup.patternType}-patterns`;
|
|
761
|
-
}
|
|
762
|
-
function extractComponentName(filePath) {
|
|
763
|
-
const match = filePath.match(/[/\\]?([A-Z][a-zA-Z0-9]*)\.(tsx|jsx|ts|js)$/);
|
|
764
|
-
return match ? match[1] : null;
|
|
765
|
-
}
|
|
766
|
-
function areSimilarComponents(name1, name2) {
|
|
767
|
-
const category1 = getComponentCategory(name1);
|
|
768
|
-
const category2 = getComponentCategory(name2);
|
|
769
|
-
return category1 === category2;
|
|
770
|
-
}
|
|
771
|
-
function getComponentCategory(name) {
|
|
772
|
-
name = name.toLowerCase();
|
|
773
|
-
if (name.includes("button") || name.includes("btn")) return "button";
|
|
774
|
-
if (name.includes("card")) return "card";
|
|
775
|
-
if (name.includes("modal") || name.includes("dialog")) return "modal";
|
|
776
|
-
if (name.includes("form")) return "form";
|
|
777
|
-
if (name.includes("input") || name.includes("field")) return "input";
|
|
778
|
-
if (name.includes("table") || name.includes("grid")) return "table";
|
|
779
|
-
if (name.includes("nav") || name.includes("menu")) return "navigation";
|
|
780
|
-
if (name.includes("header") || name.includes("footer")) return "layout";
|
|
781
|
-
return "misc";
|
|
782
|
-
}
|
|
783
|
-
function getUniqueFiles(duplicates) {
|
|
784
|
-
const files = /* @__PURE__ */ new Set();
|
|
785
|
-
for (const dup of duplicates) {
|
|
786
|
-
files.add(dup.file1);
|
|
787
|
-
files.add(dup.file2);
|
|
788
|
-
}
|
|
789
|
-
return Array.from(files).sort();
|
|
790
|
-
}
|
|
791
|
-
function getHighestSeverity(severities) {
|
|
792
|
-
const order = {
|
|
793
|
-
critical: 4,
|
|
794
|
-
major: 3,
|
|
795
|
-
minor: 2,
|
|
796
|
-
info: 1
|
|
797
|
-
};
|
|
798
|
-
let highest = "info";
|
|
799
|
-
let highestValue = 0;
|
|
800
|
-
for (const severity of severities) {
|
|
801
|
-
if (order[severity] > highestValue) {
|
|
802
|
-
highestValue = order[severity];
|
|
803
|
-
highest = severity;
|
|
804
|
-
}
|
|
805
|
-
}
|
|
806
|
-
return highest;
|
|
807
|
-
}
|
|
808
|
-
function getMostCommonPatternType(duplicates) {
|
|
809
|
-
const counts = /* @__PURE__ */ new Map();
|
|
810
|
-
for (const dup of duplicates) {
|
|
811
|
-
counts.set(dup.patternType, (counts.get(dup.patternType) || 0) + 1);
|
|
812
|
-
}
|
|
813
|
-
let mostCommon = "unknown";
|
|
814
|
-
let maxCount = 0;
|
|
815
|
-
for (const [type, count] of counts.entries()) {
|
|
816
|
-
if (count > maxCount) {
|
|
817
|
-
maxCount = count;
|
|
818
|
-
mostCommon = type;
|
|
819
|
-
}
|
|
820
|
-
}
|
|
821
|
-
return mostCommon;
|
|
822
|
-
}
|
|
823
|
-
function getClusterInfo(clusterId, patternType, fileCount) {
|
|
824
|
-
const templates = {
|
|
825
|
-
"blog-seo-boilerplate": {
|
|
826
|
-
name: `Blog SEO Boilerplate (${fileCount} files)`,
|
|
827
|
-
suggestion: "Create BlogPageLayout component with SEO schema generator, breadcrumb component, and metadata helpers",
|
|
828
|
-
reason: "SEO boilerplate duplication increases maintenance burden and schema consistency risk"
|
|
829
|
-
},
|
|
830
|
-
"e2e-test-patterns": {
|
|
831
|
-
name: `E2E Test Patterns (${fileCount} files)`,
|
|
832
|
-
suggestion: "Extract page object helpers and common test utilities (waitFor, fillForm, etc.)",
|
|
833
|
-
reason: "Test helper extraction improves maintainability while preserving test independence"
|
|
834
|
-
},
|
|
835
|
-
"api-handlers": {
|
|
836
|
-
name: `API Handler Patterns (${fileCount} files)`,
|
|
837
|
-
suggestion: "Extract common middleware, error handling, and response formatting",
|
|
838
|
-
reason: "API handler duplication leads to inconsistent error handling and response formats"
|
|
839
|
-
},
|
|
840
|
-
validators: {
|
|
841
|
-
name: `Validator Patterns (${fileCount} files)`,
|
|
842
|
-
suggestion: "Consolidate into shared schema validators (Zod/Yup) with reusable rules",
|
|
843
|
-
reason: "Validator duplication causes inconsistent validation and harder maintenance"
|
|
844
|
-
},
|
|
845
|
-
"infrastructure-scripts": {
|
|
846
|
-
name: `Infrastructure Scripts (${fileCount} files)`,
|
|
847
|
-
suggestion: "Extract common CLI parsing, file I/O, and error handling utilities",
|
|
848
|
-
reason: "Script duplication is often acceptable for one-off tasks, but common patterns can be shared"
|
|
849
|
-
},
|
|
850
|
-
"component-button": {
|
|
851
|
-
name: `Button Component Variants (${fileCount} files)`,
|
|
852
|
-
suggestion: "Create unified Button component with variant props",
|
|
853
|
-
reason: "Multiple button variants should share base styles and behavior"
|
|
854
|
-
},
|
|
855
|
-
"component-card": {
|
|
856
|
-
name: `Card Component Variants (${fileCount} files)`,
|
|
857
|
-
suggestion: "Create unified Card component with composition pattern",
|
|
858
|
-
reason: "Card variants should share layout structure and styling"
|
|
859
|
-
},
|
|
860
|
-
"component-modal": {
|
|
861
|
-
name: `Modal Component Variants (${fileCount} files)`,
|
|
862
|
-
suggestion: "Create base Modal component with customizable content",
|
|
863
|
-
reason: "Modal variants should share overlay, animation, and accessibility logic"
|
|
864
|
-
}
|
|
865
|
-
};
|
|
866
|
-
if (templates[clusterId]) {
|
|
867
|
-
return templates[clusterId];
|
|
868
|
-
}
|
|
869
|
-
return {
|
|
870
|
-
name: `${patternType} Cluster (${fileCount} files)`,
|
|
871
|
-
suggestion: `Extract common ${patternType} patterns into shared utilities`,
|
|
872
|
-
reason: `Multiple similar ${patternType} patterns detected across ${fileCount} files`
|
|
873
|
-
};
|
|
448
|
+
return "Shared Pattern Group";
|
|
874
449
|
}
|
|
875
|
-
function filterClustersByImpact(clusters, minTokenCost = 1e3,
|
|
450
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
876
451
|
return clusters.filter(
|
|
877
|
-
(
|
|
452
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
878
453
|
);
|
|
879
454
|
}
|
|
880
455
|
|
|
881
456
|
// src/scoring.ts
|
|
882
|
-
var
|
|
457
|
+
var import_core4 = require("@aiready/core");
|
|
883
458
|
function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
|
|
884
459
|
const totalDuplicates = duplicates.length;
|
|
885
460
|
const totalTokenCost = duplicates.reduce((sum, d) => sum + d.tokenCost, 0);
|
|
@@ -957,12 +532,12 @@ function calculatePatternScore(duplicates, totalFilesAnalyzed, costConfig) {
|
|
|
957
532
|
priority: totalTokenCost > 1e4 ? "high" : "medium"
|
|
958
533
|
});
|
|
959
534
|
}
|
|
960
|
-
const cfg = { ...
|
|
961
|
-
const estimatedMonthlyCost = (0,
|
|
535
|
+
const cfg = { ...import_core4.DEFAULT_COST_CONFIG, ...costConfig };
|
|
536
|
+
const estimatedMonthlyCost = (0, import_core4.calculateMonthlyCost)(totalTokenCost, cfg);
|
|
962
537
|
const issues = duplicates.map((d) => ({
|
|
963
538
|
severity: d.severity === "critical" ? "critical" : d.severity === "major" ? "major" : "minor"
|
|
964
539
|
}));
|
|
965
|
-
const productivityImpact = (0,
|
|
540
|
+
const productivityImpact = (0, import_core4.calculateProductivityImpact)(issues);
|
|
966
541
|
return {
|
|
967
542
|
toolName: "pattern-detect",
|
|
968
543
|
score: finalScore,
|
|
@@ -1091,12 +666,18 @@ async function analyzePatterns(options) {
|
|
|
1091
666
|
const estimatedBlocks = files.length * 3;
|
|
1092
667
|
logConfiguration(finalOptions, estimatedBlocks);
|
|
1093
668
|
const results = [];
|
|
1094
|
-
const
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
669
|
+
const BATCH_SIZE = 50;
|
|
670
|
+
const fileContents = [];
|
|
671
|
+
for (let i = 0; i < files.length; i += BATCH_SIZE) {
|
|
672
|
+
const batch = files.slice(i, i + BATCH_SIZE);
|
|
673
|
+
const batchContents = await Promise.all(
|
|
674
|
+
batch.map(async (file) => ({
|
|
675
|
+
file,
|
|
676
|
+
content: await (0, import_core5.readFileContent)(file)
|
|
677
|
+
}))
|
|
678
|
+
);
|
|
679
|
+
fileContents.push(...batchContents);
|
|
680
|
+
}
|
|
1100
681
|
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
1101
682
|
minSimilarity,
|
|
1102
683
|
minLines,
|
|
@@ -1113,9 +694,9 @@ async function analyzePatterns(options) {
|
|
|
1113
694
|
);
|
|
1114
695
|
const issues = fileDuplicates.map((dup) => {
|
|
1115
696
|
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
1116
|
-
const severity2 = dup.similarity > 0.95 ?
|
|
697
|
+
const severity2 = dup.similarity > 0.95 ? import_core5.Severity.Critical : dup.similarity > 0.9 ? import_core5.Severity.Major : import_core5.Severity.Minor;
|
|
1117
698
|
return {
|
|
1118
|
-
type:
|
|
699
|
+
type: import_core5.IssueType.DuplicatePattern,
|
|
1119
700
|
severity: severity2,
|
|
1120
701
|
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
1121
702
|
location: {
|
|
@@ -1128,11 +709,11 @@ async function analyzePatterns(options) {
|
|
|
1128
709
|
let filteredIssues = issues;
|
|
1129
710
|
if (severity !== "all") {
|
|
1130
711
|
const severityMap = {
|
|
1131
|
-
critical: [
|
|
1132
|
-
high: [
|
|
1133
|
-
medium: [
|
|
712
|
+
critical: [import_core5.Severity.Critical],
|
|
713
|
+
high: [import_core5.Severity.Critical, import_core5.Severity.Major],
|
|
714
|
+
medium: [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor]
|
|
1134
715
|
};
|
|
1135
|
-
const allowedSeverities = severityMap[severity] || [
|
|
716
|
+
const allowedSeverities = severityMap[severity] || [import_core5.Severity.Critical, import_core5.Severity.Major, import_core5.Severity.Minor];
|
|
1136
717
|
filteredIssues = issues.filter(
|
|
1137
718
|
(issue) => allowedSeverities.includes(issue.severity)
|
|
1138
719
|
);
|
|
@@ -1222,6 +803,7 @@ function generateSummary(results) {
|
|
|
1222
803
|
}
|
|
1223
804
|
// Annotate the CommonJS export names for ESM import in node:
|
|
1224
805
|
0 && (module.exports = {
|
|
806
|
+
Severity,
|
|
1225
807
|
analyzePatterns,
|
|
1226
808
|
calculatePatternScore,
|
|
1227
809
|
calculateSeverity,
|