@aiready/pattern-detect 0.17.13 → 0.17.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer-entry/index.js +119 -1
- package/dist/analyzer-entry/index.mjs +4 -4
- package/dist/chunk-3CNHAYOD.mjs +499 -0
- package/dist/chunk-4SKGAZEW.mjs +514 -0
- package/dist/chunk-6JNGAY7M.mjs +514 -0
- package/dist/chunk-ATXO4JL7.mjs +404 -0
- package/dist/chunk-C4ZGC4KA.mjs +514 -0
- package/dist/chunk-F42Q2M4O.mjs +143 -0
- package/dist/chunk-G3GZFYRI.mjs +144 -0
- package/dist/chunk-JWP5TCDM.mjs +143 -0
- package/dist/chunk-KDXWIT6W.mjs +408 -0
- package/dist/chunk-KZQXBBR3.mjs +143 -0
- package/dist/chunk-NVV4UFIV.mjs +514 -0
- package/dist/chunk-PFA2DO73.mjs +392 -0
- package/dist/chunk-RH5JPWEC.mjs +143 -0
- package/dist/chunk-UKQFCUQA.mjs +323 -0
- package/dist/chunk-WQX7IHAN.mjs +514 -0
- package/dist/cli.js +123 -5
- package/dist/cli.mjs +8 -8
- package/dist/context-rules-entry/index.js +103 -1
- package/dist/context-rules-entry/index.mjs +1 -1
- package/dist/detector-entry/index.js +103 -1
- package/dist/detector-entry/index.mjs +2 -2
- package/dist/index.js +148 -2
- package/dist/index.mjs +4 -4
- package/dist/scoring-entry/index.js +29 -1
- package/dist/scoring-entry/index.mjs +1 -1
- package/package.json +5 -5
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
import {
|
|
2
|
+
detectDuplicatePatterns
|
|
3
|
+
} from "./chunk-JWP5TCDM.mjs";
|
|
4
|
+
import {
|
|
5
|
+
calculateSeverity
|
|
6
|
+
} from "./chunk-KDXWIT6W.mjs";
|
|
7
|
+
|
|
8
|
+
// src/grouping.ts
|
|
9
|
+
import { Severity, getSeverityLevel } from "@aiready/core";
|
|
10
|
+
import path from "path";
|
|
11
|
+
function groupDuplicatesByFilePair(duplicates) {
|
|
12
|
+
const groups = /* @__PURE__ */ new Map();
|
|
13
|
+
for (const dup of duplicates) {
|
|
14
|
+
const files = [dup.file1, dup.file2].sort();
|
|
15
|
+
const key = files.join("::");
|
|
16
|
+
if (!groups.has(key)) {
|
|
17
|
+
groups.set(key, {
|
|
18
|
+
filePair: key,
|
|
19
|
+
severity: dup.severity,
|
|
20
|
+
occurrences: 0,
|
|
21
|
+
totalTokenCost: 0,
|
|
22
|
+
averageSimilarity: 0,
|
|
23
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
24
|
+
lineRanges: []
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
const group = groups.get(key);
|
|
28
|
+
group.occurrences++;
|
|
29
|
+
group.totalTokenCost += dup.tokenCost;
|
|
30
|
+
group.averageSimilarity += dup.similarity;
|
|
31
|
+
group.patternTypes.add(dup.patternType);
|
|
32
|
+
group.lineRanges.push({
|
|
33
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
34
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
35
|
+
});
|
|
36
|
+
const currentSev = dup.severity;
|
|
37
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
38
|
+
group.severity = currentSev;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return Array.from(groups.values()).map((g) => ({
|
|
42
|
+
...g,
|
|
43
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
function createRefactorClusters(duplicates) {
|
|
47
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
48
|
+
const visited = /* @__PURE__ */ new Set();
|
|
49
|
+
const components = [];
|
|
50
|
+
for (const dup of duplicates) {
|
|
51
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
52
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
53
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
54
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
55
|
+
}
|
|
56
|
+
for (const file of adjacency.keys()) {
|
|
57
|
+
if (visited.has(file)) continue;
|
|
58
|
+
const component = [];
|
|
59
|
+
const queue = [file];
|
|
60
|
+
visited.add(file);
|
|
61
|
+
while (queue.length > 0) {
|
|
62
|
+
const curr = queue.shift();
|
|
63
|
+
component.push(curr);
|
|
64
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
65
|
+
if (!visited.has(neighbor)) {
|
|
66
|
+
visited.add(neighbor);
|
|
67
|
+
queue.push(neighbor);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
components.push(component);
|
|
72
|
+
}
|
|
73
|
+
const clusters = [];
|
|
74
|
+
for (const component of components) {
|
|
75
|
+
if (component.length < 2) continue;
|
|
76
|
+
const componentDups = duplicates.filter(
|
|
77
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
78
|
+
);
|
|
79
|
+
const totalTokenCost = componentDups.reduce(
|
|
80
|
+
(sum, d) => sum + d.tokenCost,
|
|
81
|
+
0
|
|
82
|
+
);
|
|
83
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
84
|
+
const name = determineClusterName(component);
|
|
85
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
86
|
+
component[0],
|
|
87
|
+
component[1],
|
|
88
|
+
"",
|
|
89
|
+
// Code not available here
|
|
90
|
+
avgSimilarity,
|
|
91
|
+
30
|
|
92
|
+
// Assume substantial if clustered
|
|
93
|
+
);
|
|
94
|
+
clusters.push({
|
|
95
|
+
id: `cluster-${clusters.length}`,
|
|
96
|
+
name,
|
|
97
|
+
files: component,
|
|
98
|
+
severity,
|
|
99
|
+
duplicateCount: componentDups.length,
|
|
100
|
+
totalTokenCost,
|
|
101
|
+
averageSimilarity: avgSimilarity,
|
|
102
|
+
reason,
|
|
103
|
+
suggestion
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return clusters;
|
|
107
|
+
}
|
|
108
|
+
function determineClusterName(files) {
|
|
109
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
110
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
111
|
+
if (files.some((f) => f.includes("buttons")))
|
|
112
|
+
return "Button Component Variants";
|
|
113
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
114
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
115
|
+
const first = files[0];
|
|
116
|
+
const dirName = path.dirname(first).split(path.sep).pop();
|
|
117
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
118
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
119
|
+
}
|
|
120
|
+
return "Shared Pattern Group";
|
|
121
|
+
}
|
|
122
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
123
|
+
return clusters.filter(
|
|
124
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
function isPureInterfaceDefinition(code) {
|
|
128
|
+
const trimmed = code.trim();
|
|
129
|
+
if (!trimmed.startsWith("interface ") && !trimmed.startsWith("type ") && !trimmed.startsWith("export interface ") && !trimmed.startsWith("export type ") && !trimmed.startsWith("enum ") && !trimmed.startsWith("export enum ")) {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (trimmed.includes("={") || trimmed.includes("=> {") || trimmed.includes("function ") || trimmed.includes("() {") || trimmed.includes(" implements ")) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
if (trimmed.length > 200) return false;
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
var BRAND_INDICATORS = [
|
|
139
|
+
"cyberpunk",
|
|
140
|
+
"cyber-blue",
|
|
141
|
+
"cyber-purple",
|
|
142
|
+
"slate-900",
|
|
143
|
+
"slate-400",
|
|
144
|
+
"zinc-",
|
|
145
|
+
"indigo-",
|
|
146
|
+
"neon-",
|
|
147
|
+
"glassmorphism",
|
|
148
|
+
"backdrop-blur"
|
|
149
|
+
];
|
|
150
|
+
function isBrandSpecificComponent(filePath) {
|
|
151
|
+
const lower = filePath.toLowerCase();
|
|
152
|
+
const brandingTerms = ["landing", "clawmore", "platform", "apps/"];
|
|
153
|
+
for (const term of brandingTerms) {
|
|
154
|
+
if (lower.includes(term)) return true;
|
|
155
|
+
}
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
function areBrandSpecificVariants(file1, file2, code1, code2) {
|
|
159
|
+
const f1IsBrand = isBrandSpecificComponent(file1);
|
|
160
|
+
const f2IsBrand = isBrandSpecificComponent(file2);
|
|
161
|
+
if (f1IsBrand && f2IsBrand && file1 !== file2) {
|
|
162
|
+
const hasBrandKeyword = (code) => {
|
|
163
|
+
const lowerCode = code.toLowerCase();
|
|
164
|
+
return BRAND_INDICATORS.some((ind) => lowerCode.includes(ind));
|
|
165
|
+
};
|
|
166
|
+
const code1Brand = hasBrandKeyword(code1);
|
|
167
|
+
const code2Brand = hasBrandKeyword(code2);
|
|
168
|
+
if (code1Brand && code2Brand) {
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
function filterBrandSpecificVariants(duplicates) {
|
|
175
|
+
return duplicates.filter((dup) => {
|
|
176
|
+
if (dup.file1 === dup.file2) return true;
|
|
177
|
+
const isBrandVariant = areBrandSpecificVariants(
|
|
178
|
+
dup.file1,
|
|
179
|
+
dup.file2,
|
|
180
|
+
dup.code1,
|
|
181
|
+
dup.code2
|
|
182
|
+
);
|
|
183
|
+
if (isBrandVariant) {
|
|
184
|
+
dup.severity = Severity.Info;
|
|
185
|
+
dup.suggestion = "Brand-specific themed component variant (intentional)";
|
|
186
|
+
}
|
|
187
|
+
const isInterfaceDef = isPureInterfaceDefinition(dup.code1) && isPureInterfaceDefinition(dup.code2);
|
|
188
|
+
if (isInterfaceDef) {
|
|
189
|
+
dup.severity = Severity.Info;
|
|
190
|
+
dup.suggestion = "Pure interface/type definition - intentional for module independence";
|
|
191
|
+
}
|
|
192
|
+
return true;
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/config.ts
|
|
197
|
+
import { scanFiles } from "@aiready/core";
|
|
198
|
+
async function getSmartDefaults(directory, userOptions) {
|
|
199
|
+
if (userOptions.useSmartDefaults === false) {
|
|
200
|
+
return {
|
|
201
|
+
rootDir: directory,
|
|
202
|
+
minSimilarity: 0.6,
|
|
203
|
+
minLines: 8,
|
|
204
|
+
batchSize: 100,
|
|
205
|
+
approx: true,
|
|
206
|
+
minSharedTokens: 12,
|
|
207
|
+
maxCandidatesPerBlock: 5,
|
|
208
|
+
streamResults: false,
|
|
209
|
+
severity: "all",
|
|
210
|
+
includeTests: false
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
const scanOptions = {
|
|
214
|
+
rootDir: directory,
|
|
215
|
+
include: userOptions.include || ["**/*.{ts,tsx,js,jsx,py,java}"],
|
|
216
|
+
exclude: userOptions.exclude
|
|
217
|
+
};
|
|
218
|
+
const files = await scanFiles(scanOptions);
|
|
219
|
+
const fileCount = files.length;
|
|
220
|
+
const estimatedBlocks = fileCount * 5;
|
|
221
|
+
const minLines = Math.max(
|
|
222
|
+
6,
|
|
223
|
+
Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
|
|
224
|
+
);
|
|
225
|
+
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
226
|
+
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
227
|
+
const severity = estimatedBlocks > 3e3 ? "high" : "all";
|
|
228
|
+
const maxCandidatesPerBlock = Math.max(
|
|
229
|
+
5,
|
|
230
|
+
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
231
|
+
);
|
|
232
|
+
const defaults = {
|
|
233
|
+
rootDir: directory,
|
|
234
|
+
minSimilarity,
|
|
235
|
+
minLines,
|
|
236
|
+
batchSize,
|
|
237
|
+
approx: true,
|
|
238
|
+
minSharedTokens: 10,
|
|
239
|
+
maxCandidatesPerBlock,
|
|
240
|
+
streamResults: false,
|
|
241
|
+
severity,
|
|
242
|
+
includeTests: false
|
|
243
|
+
};
|
|
244
|
+
const result = { ...defaults };
|
|
245
|
+
for (const key of Object.keys(defaults)) {
|
|
246
|
+
if (key in userOptions && userOptions[key] !== void 0) {
|
|
247
|
+
result[key] = userOptions[key];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return result;
|
|
251
|
+
}
|
|
252
|
+
function logConfiguration(config, estimatedBlocks) {
|
|
253
|
+
if (config.suppressToolConfig) return;
|
|
254
|
+
console.log("\u{1F4CB} Configuration:");
|
|
255
|
+
console.log(` Repository size: ~${estimatedBlocks} code blocks`);
|
|
256
|
+
console.log(` Similarity threshold: ${config.minSimilarity}`);
|
|
257
|
+
console.log(` Minimum lines: ${config.minLines}`);
|
|
258
|
+
console.log(` Approximate mode: ${config.approx ? "enabled" : "disabled"}`);
|
|
259
|
+
console.log(` Max candidates per block: ${config.maxCandidatesPerBlock}`);
|
|
260
|
+
console.log(` Min shared tokens: ${config.minSharedTokens}`);
|
|
261
|
+
console.log(` Severity filter: ${config.severity}`);
|
|
262
|
+
console.log(` Include tests: ${config.includeTests}`);
|
|
263
|
+
if (config.excludePatterns && config.excludePatterns.length > 0) {
|
|
264
|
+
console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
|
|
265
|
+
}
|
|
266
|
+
if (config.confidenceThreshold && config.confidenceThreshold > 0) {
|
|
267
|
+
console.log(` Confidence threshold: ${config.confidenceThreshold}`);
|
|
268
|
+
}
|
|
269
|
+
if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
|
|
270
|
+
console.log(
|
|
271
|
+
` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
console.log("");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// src/summary.ts
|
|
278
|
+
import { Severity as Severity2 } from "@aiready/core";
|
|
279
|
+
function getRefactoringSuggestion(patternType, similarity) {
|
|
280
|
+
const baseMessages = {
|
|
281
|
+
"api-handler": "Extract common middleware or create a base handler class",
|
|
282
|
+
validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
|
|
283
|
+
utility: "Move to a shared utilities file and reuse across modules",
|
|
284
|
+
"class-method": "Consider inheritance or composition to share behavior",
|
|
285
|
+
component: "Extract shared logic into a custom hook or HOC",
|
|
286
|
+
function: "Extract into a shared helper function",
|
|
287
|
+
unknown: "Extract common logic into a reusable module"
|
|
288
|
+
};
|
|
289
|
+
const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
|
|
290
|
+
return baseMessages[patternType] + urgency;
|
|
291
|
+
}
|
|
292
|
+
function generateSummary(results) {
|
|
293
|
+
if (!Array.isArray(results)) {
|
|
294
|
+
return {
|
|
295
|
+
totalPatterns: 0,
|
|
296
|
+
totalTokenCost: 0,
|
|
297
|
+
patternsByType: {
|
|
298
|
+
"api-handler": 0,
|
|
299
|
+
validator: 0,
|
|
300
|
+
utility: 0,
|
|
301
|
+
"class-method": 0,
|
|
302
|
+
component: 0,
|
|
303
|
+
function: 0,
|
|
304
|
+
unknown: 0
|
|
305
|
+
},
|
|
306
|
+
topDuplicates: []
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
const allIssues = results.flatMap((r) => r.issues || []);
|
|
310
|
+
const totalTokenCost = results.reduce(
|
|
311
|
+
(sum, r) => sum + (r.metrics?.tokenCost || 0),
|
|
312
|
+
0
|
|
313
|
+
);
|
|
314
|
+
const patternsByType = {
|
|
315
|
+
"api-handler": 0,
|
|
316
|
+
validator: 0,
|
|
317
|
+
utility: 0,
|
|
318
|
+
"class-method": 0,
|
|
319
|
+
component: 0,
|
|
320
|
+
function: 0,
|
|
321
|
+
unknown: 0
|
|
322
|
+
};
|
|
323
|
+
allIssues.forEach((issue) => {
|
|
324
|
+
const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
325
|
+
if (match) {
|
|
326
|
+
const type = match[1] || "unknown";
|
|
327
|
+
patternsByType[type] = (patternsByType[type] || 0) + 1;
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
const topDuplicates = allIssues.slice(0, 10).map((issue) => {
|
|
331
|
+
const similarityMatch = issue.message.match(/(\d+)% similar/);
|
|
332
|
+
const tokenMatch = issue.message.match(/\((\d+) tokens/);
|
|
333
|
+
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
334
|
+
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
335
|
+
return {
|
|
336
|
+
files: [
|
|
337
|
+
{
|
|
338
|
+
path: issue.location.file,
|
|
339
|
+
startLine: issue.location.line,
|
|
340
|
+
endLine: 0
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
path: fileMatch?.[1] || "unknown",
|
|
344
|
+
startLine: 0,
|
|
345
|
+
endLine: 0
|
|
346
|
+
}
|
|
347
|
+
],
|
|
348
|
+
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
349
|
+
patternType: typeMatch?.[1] || "unknown",
|
|
350
|
+
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
return {
|
|
354
|
+
totalPatterns: allIssues.length,
|
|
355
|
+
totalTokenCost,
|
|
356
|
+
patternsByType,
|
|
357
|
+
topDuplicates
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
function filterBySeverity(issues, severity) {
|
|
361
|
+
if (severity === "all") return issues;
|
|
362
|
+
const severityMap = {
|
|
363
|
+
critical: [Severity2.Critical],
|
|
364
|
+
high: [Severity2.Critical, Severity2.Major],
|
|
365
|
+
medium: [Severity2.Critical, Severity2.Major, Severity2.Minor]
|
|
366
|
+
};
|
|
367
|
+
const allowed = severityMap[severity] || [
|
|
368
|
+
Severity2.Critical,
|
|
369
|
+
Severity2.Major,
|
|
370
|
+
Severity2.Minor
|
|
371
|
+
];
|
|
372
|
+
return issues.filter((issue) => allowed.includes(issue.severity));
|
|
373
|
+
}
|
|
374
|
+
function getSeverityLabel(severity) {
|
|
375
|
+
switch (severity) {
|
|
376
|
+
case Severity2.Critical:
|
|
377
|
+
return "CRITICAL";
|
|
378
|
+
case Severity2.Major:
|
|
379
|
+
return "HIGH";
|
|
380
|
+
case Severity2.Minor:
|
|
381
|
+
return "MEDIUM";
|
|
382
|
+
case Severity2.Info:
|
|
383
|
+
return "LOW";
|
|
384
|
+
default:
|
|
385
|
+
return "UNKNOWN";
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
function calculateSeverity2(similarity) {
|
|
389
|
+
if (similarity > 0.95) return Severity2.Critical;
|
|
390
|
+
if (similarity > 0.9) return Severity2.Major;
|
|
391
|
+
return Severity2.Minor;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// src/analyzer.ts
|
|
395
|
+
import { scanFiles as scanFiles2, readFileContent, Severity as Severity3, IssueType } from "@aiready/core";
|
|
396
|
+
async function analyzePatterns(options) {
|
|
397
|
+
const smartDefaults = await getSmartDefaults(options.rootDir || ".", options);
|
|
398
|
+
const finalOptions = { ...smartDefaults, ...options };
|
|
399
|
+
const {
|
|
400
|
+
minSimilarity = 0.4,
|
|
401
|
+
minLines = 5,
|
|
402
|
+
batchSize = 100,
|
|
403
|
+
approx = true,
|
|
404
|
+
minSharedTokens = 8,
|
|
405
|
+
maxCandidatesPerBlock = 100,
|
|
406
|
+
streamResults = false,
|
|
407
|
+
severity = "all",
|
|
408
|
+
groupByFilePair = true,
|
|
409
|
+
createClusters = true,
|
|
410
|
+
minClusterTokenCost = 1e3,
|
|
411
|
+
minClusterFiles = 3,
|
|
412
|
+
excludePatterns = [],
|
|
413
|
+
confidenceThreshold = 0,
|
|
414
|
+
ignoreWhitelist = [],
|
|
415
|
+
...scanOptions
|
|
416
|
+
} = finalOptions;
|
|
417
|
+
const files = await scanFiles2(scanOptions);
|
|
418
|
+
const estimatedBlocks = files.length * 3;
|
|
419
|
+
logConfiguration(finalOptions, estimatedBlocks);
|
|
420
|
+
const results = [];
|
|
421
|
+
const READ_BATCH_SIZE = 50;
|
|
422
|
+
const fileContents = [];
|
|
423
|
+
for (let i = 0; i < files.length; i += READ_BATCH_SIZE) {
|
|
424
|
+
const batch = files.slice(i, i + READ_BATCH_SIZE);
|
|
425
|
+
const batchContents = await Promise.all(
|
|
426
|
+
batch.map(async (file) => ({
|
|
427
|
+
file,
|
|
428
|
+
content: await readFileContent(file)
|
|
429
|
+
}))
|
|
430
|
+
);
|
|
431
|
+
fileContents.push(...batchContents);
|
|
432
|
+
}
|
|
433
|
+
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
434
|
+
minSimilarity,
|
|
435
|
+
minLines,
|
|
436
|
+
batchSize,
|
|
437
|
+
approx,
|
|
438
|
+
minSharedTokens,
|
|
439
|
+
maxCandidatesPerBlock,
|
|
440
|
+
streamResults,
|
|
441
|
+
excludePatterns,
|
|
442
|
+
confidenceThreshold,
|
|
443
|
+
ignoreWhitelist,
|
|
444
|
+
onProgress: options.onProgress
|
|
445
|
+
});
|
|
446
|
+
filterBrandSpecificVariants(duplicates);
|
|
447
|
+
for (const file of files) {
|
|
448
|
+
const fileDuplicates = duplicates.filter(
|
|
449
|
+
(dup) => dup.file1 === file || dup.file2 === file
|
|
450
|
+
);
|
|
451
|
+
const issues = fileDuplicates.map((dup) => {
|
|
452
|
+
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
453
|
+
let severityLevel;
|
|
454
|
+
if (dup.severity === "info" || dup.severity === "Info") {
|
|
455
|
+
severityLevel = Severity3.Info;
|
|
456
|
+
} else {
|
|
457
|
+
severityLevel = calculateSeverity2(dup.similarity);
|
|
458
|
+
}
|
|
459
|
+
return {
|
|
460
|
+
type: IssueType.DuplicatePattern,
|
|
461
|
+
severity: severityLevel,
|
|
462
|
+
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
463
|
+
location: {
|
|
464
|
+
file,
|
|
465
|
+
line: dup.file1 === file ? dup.line1 : dup.line2
|
|
466
|
+
},
|
|
467
|
+
suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
|
|
468
|
+
};
|
|
469
|
+
});
|
|
470
|
+
const filteredIssues = filterBySeverity(issues, severity || "all");
|
|
471
|
+
const totalTokenCost = fileDuplicates.reduce(
|
|
472
|
+
(sum, dup) => sum + dup.tokenCost,
|
|
473
|
+
0
|
|
474
|
+
);
|
|
475
|
+
results.push({
|
|
476
|
+
fileName: file,
|
|
477
|
+
issues: filteredIssues,
|
|
478
|
+
metrics: {
|
|
479
|
+
tokenCost: totalTokenCost,
|
|
480
|
+
consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
|
|
481
|
+
}
|
|
482
|
+
});
|
|
483
|
+
}
|
|
484
|
+
let groups;
|
|
485
|
+
let clusters;
|
|
486
|
+
if (groupByFilePair) {
|
|
487
|
+
groups = groupDuplicatesByFilePair(duplicates);
|
|
488
|
+
}
|
|
489
|
+
if (createClusters) {
|
|
490
|
+
const allClusters = createRefactorClusters(duplicates);
|
|
491
|
+
clusters = filterClustersByImpact(
|
|
492
|
+
allClusters,
|
|
493
|
+
minClusterTokenCost,
|
|
494
|
+
minClusterFiles
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
return { results, duplicates, files, groups, clusters, config: finalOptions };
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
export {
|
|
501
|
+
groupDuplicatesByFilePair,
|
|
502
|
+
createRefactorClusters,
|
|
503
|
+
filterClustersByImpact,
|
|
504
|
+
areBrandSpecificVariants,
|
|
505
|
+
filterBrandSpecificVariants,
|
|
506
|
+
getSmartDefaults,
|
|
507
|
+
logConfiguration,
|
|
508
|
+
generateSummary,
|
|
509
|
+
filterBySeverity,
|
|
510
|
+
getSeverityLabel,
|
|
511
|
+
calculateSeverity2 as calculateSeverity,
|
|
512
|
+
Severity3 as Severity,
|
|
513
|
+
analyzePatterns
|
|
514
|
+
};
|
package/dist/cli.js
CHANGED
|
@@ -156,19 +156,91 @@ var INFRA_RULES = [
|
|
|
156
156
|
{
|
|
157
157
|
name: "cli-command-definitions",
|
|
158
158
|
detect: (file, code) => {
|
|
159
|
-
const
|
|
159
|
+
const basename = file.split("/").pop() || "";
|
|
160
|
+
const isCliFile = file.includes("/commands/") || file.includes("/cli/") || file.endsWith(".command.ts") || basename === "cli.ts" || basename === "cli.js" || basename === "cli.tsx" || basename === "cli-action.ts";
|
|
160
161
|
const hasCommandPattern = (code.includes(".command(") || code.includes("defineCommand")) && (code.includes(".description(") || code.includes(".option(")) && (code.includes(".action(") || code.includes("async"));
|
|
161
162
|
return isCliFile && hasCommandPattern;
|
|
162
163
|
},
|
|
163
164
|
severity: import_core3.Severity.Info,
|
|
164
165
|
reason: "CLI command definitions follow standard Commander.js patterns and are intentionally similar",
|
|
165
166
|
suggestion: "Command boilerplate duplication is acceptable for CLI interfaces"
|
|
167
|
+
},
|
|
168
|
+
// DynamoDB Single-Table Design - Standard single-table patterns with prefixed keys
|
|
169
|
+
{
|
|
170
|
+
name: "dynamodb-single-table",
|
|
171
|
+
detect: (file, code) => {
|
|
172
|
+
const hasDynamoDBPattern = code.includes("docClient") || code.includes("dynamodb") || code.includes("DynamoDB") || code.includes("queryItems") || code.includes("putItem") || code.includes("getItem") || code.includes("updateItem") || code.includes("deleteItem");
|
|
173
|
+
const hasKeyPrefix = code.includes("userId:") && code.includes("#") || code.includes("pk:") && code.includes("#") || code.includes("Key:") && code.includes("#") || /[A-Z]+#/.test(code);
|
|
174
|
+
const hasSingleTablePattern = code.includes("KeyConditionExpression") || code.includes("pk =") || code.includes("sk =") || code.includes("userId") && code.includes("timestamp");
|
|
175
|
+
return hasDynamoDBPattern && (hasKeyPrefix || hasSingleTablePattern);
|
|
176
|
+
},
|
|
177
|
+
severity: import_core3.Severity.Info,
|
|
178
|
+
reason: "DynamoDB single-table design with prefixed keys is a standard pattern for efficient data access",
|
|
179
|
+
suggestion: "Single-table query patterns are intentionally similar and should not be refactored"
|
|
180
|
+
},
|
|
181
|
+
// CLI Main Function Boilerplate - Standard argument parsing patterns
|
|
182
|
+
{
|
|
183
|
+
name: "cli-main-boilerplate",
|
|
184
|
+
detect: (file, code) => {
|
|
185
|
+
const basename = file.split("/").pop() || "";
|
|
186
|
+
const isCliFile = file.includes("/cli/") || file.includes("/commands/") || basename.startsWith("cli") || basename.endsWith(".cli.ts") || basename.endsWith(".cli.js");
|
|
187
|
+
const hasMainFunction = code.includes("function main()") || code.includes("async function main()") || code.includes("const main =") || code.includes("main()");
|
|
188
|
+
const hasArgParsing = code.includes("process.argv") || code.includes("yargs") || code.includes("commander") || code.includes("minimist") || code.includes(".parse(") || code.includes("args") && code.includes("._");
|
|
189
|
+
return isCliFile && hasMainFunction && hasArgParsing;
|
|
190
|
+
},
|
|
191
|
+
severity: import_core3.Severity.Info,
|
|
192
|
+
reason: "CLI main functions with argument parsing follow standard boilerplate patterns",
|
|
193
|
+
suggestion: "CLI argument parsing boilerplate is acceptable and should not be flagged as duplication"
|
|
166
194
|
}
|
|
167
195
|
];
|
|
168
196
|
|
|
169
197
|
// src/rules/categories/logic-rules.ts
|
|
170
198
|
var import_core4 = require("@aiready/core");
|
|
171
199
|
var LOGIC_RULES = [
|
|
200
|
+
// Enum Semantic Difference - Different enum names indicate different semantic meanings
|
|
201
|
+
{
|
|
202
|
+
name: "enum-semantic-difference",
|
|
203
|
+
detect: (file, code) => {
|
|
204
|
+
const enumRegex = /(?:export\s+)?(?:const\s+)?enum\s+([A-Z][a-zA-Z0-9]*)/g;
|
|
205
|
+
const enums = [];
|
|
206
|
+
let match;
|
|
207
|
+
while ((match = enumRegex.exec(code)) !== null) {
|
|
208
|
+
enums.push(match[1]);
|
|
209
|
+
}
|
|
210
|
+
return enums.length > 0;
|
|
211
|
+
},
|
|
212
|
+
severity: import_core4.Severity.Info,
|
|
213
|
+
reason: "Enums with different names represent different semantic domain concepts, even if they share similar values",
|
|
214
|
+
suggestion: "Different enums (e.g., EscalationPriority vs HealthSeverity) serve different purposes and should not be merged"
|
|
215
|
+
},
|
|
216
|
+
// Enum Value Similarity - Common enum values like LOW, MEDIUM, HIGH are standard
|
|
217
|
+
{
|
|
218
|
+
name: "enum-value-similarity",
|
|
219
|
+
detect: (file, code) => {
|
|
220
|
+
const hasCommonEnumValues = (code.includes("LOW = 'low'") || code.includes("LOW = 0") || code.includes("LOW = 'LOW'")) && (code.includes("HIGH = 'high'") || code.includes("HIGH = 2") || code.includes("HIGH = 'HIGH'")) && (code.includes("MEDIUM = 'medium'") || code.includes("MEDIUM = 1") || code.includes("MEDIUM = 'MEDIUM'"));
|
|
221
|
+
const isEnumDefinition = /(?:export\s+)?(?:const\s+)?enum\s+/.test(code) || code.includes("enum ") && code.includes("{") && code.includes("}");
|
|
222
|
+
return hasCommonEnumValues && isEnumDefinition;
|
|
223
|
+
},
|
|
224
|
+
severity: import_core4.Severity.Info,
|
|
225
|
+
reason: "Common enum values (LOW, MEDIUM, HIGH, CRITICAL) are standard patterns used across different domain enums",
|
|
226
|
+
suggestion: "Enum value similarity is expected for severity/priority enums and should not be flagged as duplication"
|
|
227
|
+
},
|
|
228
|
+
// Re-export / Barrel files - Intentional API surface consolidation
|
|
229
|
+
{
|
|
230
|
+
name: "re-export-files",
|
|
231
|
+
detect: (file, code) => {
|
|
232
|
+
const isIndexFile = file.endsWith("/index.ts") || file.endsWith("/index.js") || file.endsWith("/index.tsx") || file.endsWith("/index.jsx");
|
|
233
|
+
const lines = code.split("\n").filter((l) => l.trim());
|
|
234
|
+
if (lines.length === 0) return false;
|
|
235
|
+
const reExportLines = lines.filter(
|
|
236
|
+
(l) => /^export\s+(\{[^}]+\}|\*)\s+from\s+/.test(l.trim()) || /^export\s+\*\s+as\s+\w+\s+from\s+/.test(l.trim())
|
|
237
|
+
).length;
|
|
238
|
+
return isIndexFile && reExportLines > 0 && reExportLines / lines.length > 0.5;
|
|
239
|
+
},
|
|
240
|
+
severity: import_core4.Severity.Info,
|
|
241
|
+
reason: "Barrel/index files intentionally re-export for API surface consolidation",
|
|
242
|
+
suggestion: "Re-exports in barrel files are expected and not true duplication"
|
|
243
|
+
},
|
|
172
244
|
// Type Definitions - Duplication for type safety and module independence
|
|
173
245
|
{
|
|
174
246
|
name: "type-definitions",
|
|
@@ -182,6 +254,20 @@ var LOGIC_RULES = [
|
|
|
182
254
|
reason: "Type/interface definitions are intentionally duplicated for module independence",
|
|
183
255
|
suggestion: "Extract to shared types package only if causing maintenance burden"
|
|
184
256
|
},
|
|
257
|
+
// Cross-Package Type Definitions - Different packages may have similar types
|
|
258
|
+
{
|
|
259
|
+
name: "cross-package-types",
|
|
260
|
+
detect: (file, code) => {
|
|
261
|
+
const hasTypeDefinition = code.includes("interface ") || code.includes("type ") || code.includes("enum ");
|
|
262
|
+
const isPackageOrApp = file.includes("/packages/") || file.includes("/apps/") || file.includes("/core/");
|
|
263
|
+
const packageMatch = file.match(/\/(packages|apps|core)\/([^/]+)\//);
|
|
264
|
+
const hasPackageStructure = packageMatch !== null;
|
|
265
|
+
return hasTypeDefinition && isPackageOrApp && hasPackageStructure;
|
|
266
|
+
},
|
|
267
|
+
severity: import_core4.Severity.Info,
|
|
268
|
+
reason: "Types in different packages/modules are often intentionally similar for module independence",
|
|
269
|
+
suggestion: "Cross-package type duplication is acceptable for decoupled module architecture"
|
|
270
|
+
},
|
|
185
271
|
// Utility Functions - Small helpers in dedicated utility files
|
|
186
272
|
{
|
|
187
273
|
name: "utility-functions",
|
|
@@ -264,6 +350,22 @@ var LOGIC_RULES = [
|
|
|
264
350
|
severity: import_core4.Severity.Info,
|
|
265
351
|
reason: "Validation functions are inherently similar and often intentionally duplicated for domain clarity",
|
|
266
352
|
suggestion: "Consider extracting to shared validators only if validation logic becomes complex"
|
|
353
|
+
},
|
|
354
|
+
// Singleton Getter Pattern - Standard singleton initialization pattern
|
|
355
|
+
{
|
|
356
|
+
name: "singleton-getter",
|
|
357
|
+
detect: (file, code) => {
|
|
358
|
+
const hasSingletonGetter = /(?:export\s+)?(?:async\s+)?function\s+get[A-Z][a-zA-Z0-9]*\s*\(/.test(
|
|
359
|
+
code
|
|
360
|
+
) || /(?:export\s+)?const\s+get[A-Z][a-zA-Z0-9]*\s*=\s*(?:async\s+)?\(\)\s*=>/.test(
|
|
361
|
+
code
|
|
362
|
+
);
|
|
363
|
+
const hasSingletonPattern = code.includes("if (!") && code.includes("instance") && code.includes(" = ") || code.includes("if (!_") && code.includes(" = new ") || code.includes("if (") && code.includes(" === null") && code.includes(" = new ");
|
|
364
|
+
return hasSingletonGetter && hasSingletonPattern;
|
|
365
|
+
},
|
|
366
|
+
severity: import_core4.Severity.Info,
|
|
367
|
+
reason: "Singleton getter functions follow standard initialization pattern and are intentionally similar",
|
|
368
|
+
suggestion: "Singleton getters are boilerplate and acceptable duplication for lazy initialization"
|
|
267
369
|
}
|
|
268
370
|
];
|
|
269
371
|
|
|
@@ -732,6 +834,22 @@ function getRefactoringSuggestion(patternType, similarity) {
|
|
|
732
834
|
return baseMessages[patternType] + urgency;
|
|
733
835
|
}
|
|
734
836
|
function generateSummary(results) {
|
|
837
|
+
if (!Array.isArray(results)) {
|
|
838
|
+
return {
|
|
839
|
+
totalPatterns: 0,
|
|
840
|
+
totalTokenCost: 0,
|
|
841
|
+
patternsByType: {
|
|
842
|
+
"api-handler": 0,
|
|
843
|
+
validator: 0,
|
|
844
|
+
utility: 0,
|
|
845
|
+
"class-method": 0,
|
|
846
|
+
component: 0,
|
|
847
|
+
function: 0,
|
|
848
|
+
unknown: 0
|
|
849
|
+
},
|
|
850
|
+
topDuplicates: []
|
|
851
|
+
};
|
|
852
|
+
}
|
|
735
853
|
const allIssues = results.flatMap((r) => r.issues || []);
|
|
736
854
|
const totalTokenCost = results.reduce(
|
|
737
855
|
(sum, r) => sum + (r.metrics?.tokenCost || 0),
|
|
@@ -1047,8 +1165,8 @@ function generateHTMLReport(results, summary) {
|
|
|
1047
1165
|
{
|
|
1048
1166
|
title: "Pattern Detection Report",
|
|
1049
1167
|
packageName: "pattern-detect",
|
|
1050
|
-
packageUrl: "https://github.com/
|
|
1051
|
-
bugUrl: "https://github.com/
|
|
1168
|
+
packageUrl: "https://github.com/getaiready/aiready-pattern-detect",
|
|
1169
|
+
bugUrl: "https://github.com/getaiready/aiready-pattern-detect/issues",
|
|
1052
1170
|
version: metadata.version,
|
|
1053
1171
|
emoji: "\u{1F50D}"
|
|
1054
1172
|
},
|
|
@@ -1394,12 +1512,12 @@ function printGuidance() {
|
|
|
1394
1512
|
function printFooter() {
|
|
1395
1513
|
console.log(
|
|
1396
1514
|
import_chalk2.default.dim(
|
|
1397
|
-
"\n\u2B50 Like AIReady? Star us on GitHub: https://github.com/
|
|
1515
|
+
"\n\u2B50 Like AIReady? Star us on GitHub: https://github.com/getaiready/aiready-pattern-detect"
|
|
1398
1516
|
)
|
|
1399
1517
|
);
|
|
1400
1518
|
console.log(
|
|
1401
1519
|
import_chalk2.default.dim(
|
|
1402
|
-
"\u{1F41B} Found a bug? Report it: https://github.com/
|
|
1520
|
+
"\u{1F41B} Found a bug? Report it: https://github.com/getaiready/aiready-pattern-detect/issues\n"
|
|
1403
1521
|
)
|
|
1404
1522
|
);
|
|
1405
1523
|
}
|