@aiready/pattern-detect 0.17.22 → 0.17.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer-entry/index.js +3 -2
- package/dist/analyzer-entry/index.mjs +1 -1
- package/dist/chunk-AK7XQ2XQ.mjs +517 -0
- package/dist/cli.js +3 -2
- package/dist/cli.mjs +1 -1
- package/dist/index.js +3 -2
- package/dist/index.mjs +1 -1
- package/package.json +2 -2
|
@@ -993,7 +993,7 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
993
993
|
);
|
|
994
994
|
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
995
995
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
996
|
-
const severity = estimatedBlocks >
|
|
996
|
+
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
997
997
|
const maxCandidatesPerBlock = Math.max(
|
|
998
998
|
5,
|
|
999
999
|
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
@@ -1143,7 +1143,8 @@ function filterBySeverity2(issues, severity) {
|
|
|
1143
1143
|
function calculateSeverity2(similarity) {
|
|
1144
1144
|
if (similarity > 0.95) return import_core9.Severity.Critical;
|
|
1145
1145
|
if (similarity > 0.9) return import_core9.Severity.Major;
|
|
1146
|
-
return import_core9.Severity.Minor;
|
|
1146
|
+
if (similarity > 0.7) return import_core9.Severity.Minor;
|
|
1147
|
+
return import_core9.Severity.Info;
|
|
1147
1148
|
}
|
|
1148
1149
|
|
|
1149
1150
|
// src/scoring.ts
|
|
@@ -0,0 +1,517 @@
|
|
|
1
|
+
import {
|
|
2
|
+
detectDuplicatePatterns
|
|
3
|
+
} from "./chunk-XR373Q6G.mjs";
|
|
4
|
+
import {
|
|
5
|
+
calculateSeverity
|
|
6
|
+
} from "./chunk-XWIBTD67.mjs";
|
|
7
|
+
|
|
8
|
+
// src/grouping.ts
|
|
9
|
+
import { Severity, getSeverityLevel } from "@aiready/core";
|
|
10
|
+
import path from "path";
|
|
11
|
+
function groupDuplicatesByFilePair(duplicates) {
|
|
12
|
+
const groups = /* @__PURE__ */ new Map();
|
|
13
|
+
for (const dup of duplicates) {
|
|
14
|
+
const files = [dup.file1, dup.file2].sort();
|
|
15
|
+
const key = files.join("::");
|
|
16
|
+
if (!groups.has(key)) {
|
|
17
|
+
groups.set(key, {
|
|
18
|
+
filePair: key,
|
|
19
|
+
severity: dup.severity,
|
|
20
|
+
occurrences: 0,
|
|
21
|
+
totalTokenCost: 0,
|
|
22
|
+
averageSimilarity: 0,
|
|
23
|
+
patternTypes: /* @__PURE__ */ new Set(),
|
|
24
|
+
lineRanges: []
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
const group = groups.get(key);
|
|
28
|
+
group.occurrences++;
|
|
29
|
+
group.totalTokenCost += dup.tokenCost;
|
|
30
|
+
group.averageSimilarity += dup.similarity;
|
|
31
|
+
group.patternTypes.add(dup.patternType);
|
|
32
|
+
group.lineRanges.push({
|
|
33
|
+
file1: { start: dup.line1, end: dup.endLine1 },
|
|
34
|
+
file2: { start: dup.line2, end: dup.endLine2 }
|
|
35
|
+
});
|
|
36
|
+
const currentSev = dup.severity;
|
|
37
|
+
if (getSeverityLevel(currentSev) > getSeverityLevel(group.severity)) {
|
|
38
|
+
group.severity = currentSev;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return Array.from(groups.values()).map((g) => ({
|
|
42
|
+
...g,
|
|
43
|
+
averageSimilarity: g.averageSimilarity / g.occurrences
|
|
44
|
+
}));
|
|
45
|
+
}
|
|
46
|
+
function createRefactorClusters(duplicates) {
|
|
47
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
48
|
+
const visited = /* @__PURE__ */ new Set();
|
|
49
|
+
const components = [];
|
|
50
|
+
for (const dup of duplicates) {
|
|
51
|
+
if (!adjacency.has(dup.file1)) adjacency.set(dup.file1, /* @__PURE__ */ new Set());
|
|
52
|
+
if (!adjacency.has(dup.file2)) adjacency.set(dup.file2, /* @__PURE__ */ new Set());
|
|
53
|
+
adjacency.get(dup.file1).add(dup.file2);
|
|
54
|
+
adjacency.get(dup.file2).add(dup.file1);
|
|
55
|
+
}
|
|
56
|
+
for (const file of adjacency.keys()) {
|
|
57
|
+
if (visited.has(file)) continue;
|
|
58
|
+
const component = [];
|
|
59
|
+
const queue = [file];
|
|
60
|
+
visited.add(file);
|
|
61
|
+
while (queue.length > 0) {
|
|
62
|
+
const curr = queue.shift();
|
|
63
|
+
component.push(curr);
|
|
64
|
+
for (const neighbor of adjacency.get(curr) || []) {
|
|
65
|
+
if (!visited.has(neighbor)) {
|
|
66
|
+
visited.add(neighbor);
|
|
67
|
+
queue.push(neighbor);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
components.push(component);
|
|
72
|
+
}
|
|
73
|
+
const clusters = [];
|
|
74
|
+
for (const component of components) {
|
|
75
|
+
if (component.length < 2) continue;
|
|
76
|
+
const componentDups = duplicates.filter(
|
|
77
|
+
(d) => component.includes(d.file1) && component.includes(d.file2)
|
|
78
|
+
);
|
|
79
|
+
const totalTokenCost = componentDups.reduce(
|
|
80
|
+
(sum, d) => sum + d.tokenCost,
|
|
81
|
+
0
|
|
82
|
+
);
|
|
83
|
+
const avgSimilarity = componentDups.reduce((sum, d) => sum + d.similarity, 0) / Math.max(1, componentDups.length);
|
|
84
|
+
const name = determineClusterName(component);
|
|
85
|
+
const { severity, reason, suggestion } = calculateSeverity(
|
|
86
|
+
component[0],
|
|
87
|
+
component[1],
|
|
88
|
+
"",
|
|
89
|
+
// Code not available here
|
|
90
|
+
avgSimilarity,
|
|
91
|
+
30
|
|
92
|
+
// Assume substantial if clustered
|
|
93
|
+
);
|
|
94
|
+
clusters.push({
|
|
95
|
+
id: `cluster-${clusters.length}`,
|
|
96
|
+
name,
|
|
97
|
+
files: component,
|
|
98
|
+
severity,
|
|
99
|
+
duplicateCount: componentDups.length,
|
|
100
|
+
totalTokenCost,
|
|
101
|
+
averageSimilarity: avgSimilarity,
|
|
102
|
+
reason,
|
|
103
|
+
suggestion
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
return clusters;
|
|
107
|
+
}
|
|
108
|
+
function determineClusterName(files) {
|
|
109
|
+
if (files.length === 0) return "Unknown Cluster";
|
|
110
|
+
if (files.some((f) => f.includes("blog"))) return "Blog SEO Boilerplate";
|
|
111
|
+
if (files.some((f) => f.includes("buttons")))
|
|
112
|
+
return "Button Component Variants";
|
|
113
|
+
if (files.some((f) => f.includes("cards"))) return "Card Component Variants";
|
|
114
|
+
if (files.some((f) => f.includes("login.test"))) return "E2E Test Patterns";
|
|
115
|
+
const first = files[0];
|
|
116
|
+
const dirName = path.dirname(first).split(path.sep).pop();
|
|
117
|
+
if (dirName && dirName !== "." && dirName !== "..") {
|
|
118
|
+
return `${dirName.charAt(0).toUpperCase() + dirName.slice(1)} Domain Group`;
|
|
119
|
+
}
|
|
120
|
+
return "Shared Pattern Group";
|
|
121
|
+
}
|
|
122
|
+
function filterClustersByImpact(clusters, minTokenCost = 1e3, minFiles = 3) {
|
|
123
|
+
return clusters.filter(
|
|
124
|
+
(c) => c.totalTokenCost >= minTokenCost && c.files.length >= minFiles
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
function isPureInterfaceDefinition(code) {
|
|
128
|
+
const trimmed = code.trim();
|
|
129
|
+
if (!trimmed.startsWith("interface ") && !trimmed.startsWith("type ") && !trimmed.startsWith("export interface ") && !trimmed.startsWith("export type ") && !trimmed.startsWith("enum ") && !trimmed.startsWith("export enum ")) {
|
|
130
|
+
return false;
|
|
131
|
+
}
|
|
132
|
+
if (trimmed.includes("={") || trimmed.includes("=> {") || trimmed.includes("function ") || trimmed.includes("() {") || trimmed.includes(" implements ")) {
|
|
133
|
+
return false;
|
|
134
|
+
}
|
|
135
|
+
if (trimmed.length > 200) return false;
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
var BRAND_INDICATORS = [
|
|
139
|
+
"cyberpunk",
|
|
140
|
+
"cyber-blue",
|
|
141
|
+
"cyber-purple",
|
|
142
|
+
"slate-900",
|
|
143
|
+
"slate-400",
|
|
144
|
+
"zinc-",
|
|
145
|
+
"indigo-",
|
|
146
|
+
"neon-",
|
|
147
|
+
"glassmorphism",
|
|
148
|
+
"backdrop-blur"
|
|
149
|
+
];
|
|
150
|
+
function isBrandSpecificComponent(filePath) {
|
|
151
|
+
const lower = filePath.toLowerCase();
|
|
152
|
+
const brandingTerms = ["landing", "clawmore", "platform", "apps/"];
|
|
153
|
+
for (const term of brandingTerms) {
|
|
154
|
+
if (lower.includes(term)) return true;
|
|
155
|
+
}
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
function areBrandSpecificVariants(file1, file2, code1, code2) {
|
|
159
|
+
const f1IsBrand = isBrandSpecificComponent(file1);
|
|
160
|
+
const f2IsBrand = isBrandSpecificComponent(file2);
|
|
161
|
+
if (f1IsBrand && f2IsBrand && file1 !== file2) {
|
|
162
|
+
const hasBrandKeyword = (code) => {
|
|
163
|
+
const lowerCode = code.toLowerCase();
|
|
164
|
+
return BRAND_INDICATORS.some((ind) => lowerCode.includes(ind));
|
|
165
|
+
};
|
|
166
|
+
const code1Brand = hasBrandKeyword(code1);
|
|
167
|
+
const code2Brand = hasBrandKeyword(code2);
|
|
168
|
+
if (code1Brand && code2Brand) {
|
|
169
|
+
return true;
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return false;
|
|
173
|
+
}
|
|
174
|
+
function filterBrandSpecificVariants(duplicates) {
|
|
175
|
+
return duplicates.filter((dup) => {
|
|
176
|
+
if (dup.file1 === dup.file2) return true;
|
|
177
|
+
const isBrandVariant = areBrandSpecificVariants(
|
|
178
|
+
dup.file1,
|
|
179
|
+
dup.file2,
|
|
180
|
+
dup.code1,
|
|
181
|
+
dup.code2
|
|
182
|
+
);
|
|
183
|
+
if (isBrandVariant) {
|
|
184
|
+
dup.severity = Severity.Info;
|
|
185
|
+
dup.suggestion = "Brand-specific themed component variant (intentional)";
|
|
186
|
+
}
|
|
187
|
+
const isInterfaceDef = isPureInterfaceDefinition(dup.code1) && isPureInterfaceDefinition(dup.code2);
|
|
188
|
+
if (isInterfaceDef) {
|
|
189
|
+
dup.severity = Severity.Info;
|
|
190
|
+
dup.suggestion = "Pure interface/type definition - intentional for module independence";
|
|
191
|
+
}
|
|
192
|
+
return true;
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// src/config.ts
|
|
197
|
+
import { scanFiles } from "@aiready/core";
|
|
198
|
+
async function getSmartDefaults(directory, userOptions) {
|
|
199
|
+
if (userOptions.useSmartDefaults === false) {
|
|
200
|
+
return {
|
|
201
|
+
rootDir: directory,
|
|
202
|
+
minSimilarity: 0.6,
|
|
203
|
+
minLines: 8,
|
|
204
|
+
batchSize: 100,
|
|
205
|
+
approx: true,
|
|
206
|
+
minSharedTokens: 12,
|
|
207
|
+
maxCandidatesPerBlock: 5,
|
|
208
|
+
streamResults: false,
|
|
209
|
+
severity: "all",
|
|
210
|
+
includeTests: false
|
|
211
|
+
};
|
|
212
|
+
}
|
|
213
|
+
const scanOptions = {
|
|
214
|
+
rootDir: directory,
|
|
215
|
+
include: userOptions.include || ["**/*.{ts,tsx,js,jsx,py,java}"],
|
|
216
|
+
exclude: userOptions.exclude
|
|
217
|
+
};
|
|
218
|
+
const files = await scanFiles(scanOptions);
|
|
219
|
+
const fileCount = files.length;
|
|
220
|
+
const estimatedBlocks = fileCount * 5;
|
|
221
|
+
const minLines = Math.max(
|
|
222
|
+
6,
|
|
223
|
+
Math.min(20, 6 + Math.floor(estimatedBlocks / 1e3) * 2)
|
|
224
|
+
);
|
|
225
|
+
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
226
|
+
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
227
|
+
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
228
|
+
const maxCandidatesPerBlock = Math.max(
|
|
229
|
+
5,
|
|
230
|
+
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
231
|
+
);
|
|
232
|
+
const defaults = {
|
|
233
|
+
rootDir: directory,
|
|
234
|
+
minSimilarity,
|
|
235
|
+
minLines,
|
|
236
|
+
batchSize,
|
|
237
|
+
approx: true,
|
|
238
|
+
minSharedTokens: 10,
|
|
239
|
+
maxCandidatesPerBlock,
|
|
240
|
+
streamResults: false,
|
|
241
|
+
severity,
|
|
242
|
+
includeTests: false
|
|
243
|
+
};
|
|
244
|
+
const result = { ...defaults };
|
|
245
|
+
for (const key of Object.keys(defaults)) {
|
|
246
|
+
if (key in userOptions && userOptions[key] !== void 0) {
|
|
247
|
+
result[key] = userOptions[key];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
return result;
|
|
251
|
+
}
|
|
252
|
+
function logConfiguration(config, estimatedBlocks) {
|
|
253
|
+
if (config.suppressToolConfig) return;
|
|
254
|
+
console.log("\u{1F4CB} Configuration:");
|
|
255
|
+
console.log(` Repository size: ~${estimatedBlocks} code blocks`);
|
|
256
|
+
console.log(` Similarity threshold: ${config.minSimilarity}`);
|
|
257
|
+
console.log(` Minimum lines: ${config.minLines}`);
|
|
258
|
+
console.log(` Approximate mode: ${config.approx ? "enabled" : "disabled"}`);
|
|
259
|
+
console.log(` Max candidates per block: ${config.maxCandidatesPerBlock}`);
|
|
260
|
+
console.log(` Min shared tokens: ${config.minSharedTokens}`);
|
|
261
|
+
console.log(` Severity filter: ${config.severity}`);
|
|
262
|
+
console.log(` Include tests: ${config.includeTests}`);
|
|
263
|
+
if (config.excludePatterns && config.excludePatterns.length > 0) {
|
|
264
|
+
console.log(` Exclude patterns: ${config.excludePatterns.length} active`);
|
|
265
|
+
}
|
|
266
|
+
if (config.confidenceThreshold && config.confidenceThreshold > 0) {
|
|
267
|
+
console.log(` Confidence threshold: ${config.confidenceThreshold}`);
|
|
268
|
+
}
|
|
269
|
+
if (config.ignoreWhitelist && config.ignoreWhitelist.length > 0) {
|
|
270
|
+
console.log(
|
|
271
|
+
` Ignore whitelist: ${config.ignoreWhitelist.length} entries`
|
|
272
|
+
);
|
|
273
|
+
}
|
|
274
|
+
console.log("");
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// src/summary.ts
|
|
278
|
+
import { Severity as Severity2 } from "@aiready/core";
|
|
279
|
+
function getRefactoringSuggestion(patternType, similarity) {
|
|
280
|
+
const baseMessages = {
|
|
281
|
+
"api-handler": "Extract common middleware or create a base handler class",
|
|
282
|
+
validator: "Consolidate validation logic into shared schema validators (Zod/Yup)",
|
|
283
|
+
utility: "Move to a shared utilities file and reuse across modules",
|
|
284
|
+
"class-method": "Consider inheritance or composition to share behavior",
|
|
285
|
+
component: "Extract shared logic into a custom hook or HOC",
|
|
286
|
+
function: "Extract into a shared helper function",
|
|
287
|
+
unknown: "Extract common logic into a reusable module"
|
|
288
|
+
};
|
|
289
|
+
const urgency = similarity > 0.95 ? " (CRITICAL: Nearly identical code)" : similarity > 0.9 ? " (HIGH: Very similar, refactor soon)" : "";
|
|
290
|
+
return baseMessages[patternType] + urgency;
|
|
291
|
+
}
|
|
292
|
+
function generateSummary(results) {
|
|
293
|
+
if (!Array.isArray(results)) {
|
|
294
|
+
return {
|
|
295
|
+
totalPatterns: 0,
|
|
296
|
+
totalTokenCost: 0,
|
|
297
|
+
patternsByType: {
|
|
298
|
+
"api-handler": 0,
|
|
299
|
+
validator: 0,
|
|
300
|
+
utility: 0,
|
|
301
|
+
"class-method": 0,
|
|
302
|
+
component: 0,
|
|
303
|
+
function: 0,
|
|
304
|
+
unknown: 0
|
|
305
|
+
},
|
|
306
|
+
topDuplicates: []
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
const allIssues = results.flatMap((r) => r.issues || []);
|
|
310
|
+
const totalTokenCost = results.reduce(
|
|
311
|
+
(sum, r) => sum + (r.metrics?.tokenCost || 0),
|
|
312
|
+
0
|
|
313
|
+
);
|
|
314
|
+
const patternsByType = {
|
|
315
|
+
"api-handler": 0,
|
|
316
|
+
validator: 0,
|
|
317
|
+
utility: 0,
|
|
318
|
+
"class-method": 0,
|
|
319
|
+
component: 0,
|
|
320
|
+
function: 0,
|
|
321
|
+
unknown: 0
|
|
322
|
+
};
|
|
323
|
+
allIssues.forEach((issue) => {
|
|
324
|
+
const match = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
325
|
+
if (match) {
|
|
326
|
+
const type = match[1] || "unknown";
|
|
327
|
+
patternsByType[type] = (patternsByType[type] || 0) + 1;
|
|
328
|
+
}
|
|
329
|
+
});
|
|
330
|
+
const topDuplicates = allIssues.slice(0, 10).map((issue) => {
|
|
331
|
+
const similarityMatch = issue.message.match(/(\d+)% similar/);
|
|
332
|
+
const tokenMatch = issue.message.match(/\((\d+) tokens/);
|
|
333
|
+
const typeMatch = issue.message.match(/^(\S+(?:-\S+)*) pattern/);
|
|
334
|
+
const fileMatch = issue.message.match(/similar to (.+?) \(/);
|
|
335
|
+
return {
|
|
336
|
+
files: [
|
|
337
|
+
{
|
|
338
|
+
path: issue.location.file,
|
|
339
|
+
startLine: issue.location.line,
|
|
340
|
+
endLine: 0
|
|
341
|
+
},
|
|
342
|
+
{
|
|
343
|
+
path: fileMatch?.[1] || "unknown",
|
|
344
|
+
startLine: 0,
|
|
345
|
+
endLine: 0
|
|
346
|
+
}
|
|
347
|
+
],
|
|
348
|
+
similarity: similarityMatch ? parseInt(similarityMatch[1]) / 100 : 0,
|
|
349
|
+
patternType: typeMatch?.[1] || "unknown",
|
|
350
|
+
tokenCost: tokenMatch ? parseInt(tokenMatch[1]) : 0
|
|
351
|
+
};
|
|
352
|
+
});
|
|
353
|
+
return {
|
|
354
|
+
totalPatterns: allIssues.length,
|
|
355
|
+
totalTokenCost,
|
|
356
|
+
patternsByType,
|
|
357
|
+
topDuplicates
|
|
358
|
+
};
|
|
359
|
+
}
|
|
360
|
+
function filterBySeverity(issues, severity) {
|
|
361
|
+
if (severity === "all") return issues;
|
|
362
|
+
const severityMap = {
|
|
363
|
+
critical: [Severity2.Critical],
|
|
364
|
+
high: [Severity2.Critical, Severity2.Major],
|
|
365
|
+
medium: [Severity2.Critical, Severity2.Major, Severity2.Minor]
|
|
366
|
+
};
|
|
367
|
+
const allowed = severityMap[severity] || [
|
|
368
|
+
Severity2.Critical,
|
|
369
|
+
Severity2.Major,
|
|
370
|
+
Severity2.Minor
|
|
371
|
+
];
|
|
372
|
+
return issues.filter((issue) => allowed.includes(issue.severity));
|
|
373
|
+
}
|
|
374
|
+
function getSeverityLabel(severity) {
|
|
375
|
+
switch (severity) {
|
|
376
|
+
case Severity2.Critical:
|
|
377
|
+
return "CRITICAL";
|
|
378
|
+
case Severity2.Major:
|
|
379
|
+
return "HIGH";
|
|
380
|
+
case Severity2.Minor:
|
|
381
|
+
return "MEDIUM";
|
|
382
|
+
case Severity2.Info:
|
|
383
|
+
return "LOW";
|
|
384
|
+
default:
|
|
385
|
+
return "UNKNOWN";
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
function calculateSeverity2(similarity) {
|
|
389
|
+
if (similarity > 0.95) return Severity2.Critical;
|
|
390
|
+
if (similarity > 0.9) return Severity2.Major;
|
|
391
|
+
if (similarity > 0.7) return Severity2.Minor;
|
|
392
|
+
return Severity2.Info;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
// src/analyzer.ts
|
|
396
|
+
import { scanFiles as scanFiles2, readFileContent, Severity as Severity3, IssueType } from "@aiready/core";
|
|
397
|
+
async function analyzePatterns(options) {
|
|
398
|
+
const smartDefaults = await getSmartDefaults(options.rootDir || ".", options);
|
|
399
|
+
const finalOptions = { ...smartDefaults, ...options };
|
|
400
|
+
const {
|
|
401
|
+
minSimilarity = 0.4,
|
|
402
|
+
minLines = 5,
|
|
403
|
+
batchSize = 100,
|
|
404
|
+
approx = true,
|
|
405
|
+
minSharedTokens = 8,
|
|
406
|
+
maxCandidatesPerBlock = 100,
|
|
407
|
+
streamResults = false,
|
|
408
|
+
severity = "all",
|
|
409
|
+
groupByFilePair = true,
|
|
410
|
+
createClusters = true,
|
|
411
|
+
minClusterTokenCost = 1e3,
|
|
412
|
+
minClusterFiles = 3,
|
|
413
|
+
excludePatterns = [],
|
|
414
|
+
excludeFiles = [],
|
|
415
|
+
confidenceThreshold = 0,
|
|
416
|
+
ignoreWhitelist = [],
|
|
417
|
+
...scanOptions
|
|
418
|
+
} = finalOptions;
|
|
419
|
+
const files = await scanFiles2(scanOptions);
|
|
420
|
+
const estimatedBlocks = files.length * 3;
|
|
421
|
+
logConfiguration(finalOptions, estimatedBlocks);
|
|
422
|
+
const results = [];
|
|
423
|
+
const READ_BATCH_SIZE = 50;
|
|
424
|
+
const fileContents = [];
|
|
425
|
+
for (let i = 0; i < files.length; i += READ_BATCH_SIZE) {
|
|
426
|
+
const batch = files.slice(i, i + READ_BATCH_SIZE);
|
|
427
|
+
const batchContents = await Promise.all(
|
|
428
|
+
batch.map(async (file) => ({
|
|
429
|
+
file,
|
|
430
|
+
content: await readFileContent(file)
|
|
431
|
+
}))
|
|
432
|
+
);
|
|
433
|
+
fileContents.push(...batchContents);
|
|
434
|
+
}
|
|
435
|
+
const duplicates = await detectDuplicatePatterns(fileContents, {
|
|
436
|
+
minSimilarity,
|
|
437
|
+
minLines,
|
|
438
|
+
batchSize,
|
|
439
|
+
approx,
|
|
440
|
+
minSharedTokens,
|
|
441
|
+
maxCandidatesPerBlock,
|
|
442
|
+
streamResults,
|
|
443
|
+
excludePatterns,
|
|
444
|
+
excludeFiles,
|
|
445
|
+
confidenceThreshold,
|
|
446
|
+
ignoreWhitelist,
|
|
447
|
+
onProgress: options.onProgress
|
|
448
|
+
});
|
|
449
|
+
filterBrandSpecificVariants(duplicates);
|
|
450
|
+
for (const file of files) {
|
|
451
|
+
const fileDuplicates = duplicates.filter(
|
|
452
|
+
(dup) => dup.file1 === file || dup.file2 === file
|
|
453
|
+
);
|
|
454
|
+
const issues = fileDuplicates.map((dup) => {
|
|
455
|
+
const otherFile = dup.file1 === file ? dup.file2 : dup.file1;
|
|
456
|
+
let severityLevel;
|
|
457
|
+
if (dup.severity === "info" || dup.severity === "Info") {
|
|
458
|
+
severityLevel = Severity3.Info;
|
|
459
|
+
} else {
|
|
460
|
+
severityLevel = calculateSeverity2(dup.similarity);
|
|
461
|
+
}
|
|
462
|
+
return {
|
|
463
|
+
type: IssueType.DuplicatePattern,
|
|
464
|
+
severity: severityLevel,
|
|
465
|
+
message: `${dup.patternType} pattern ${Math.round(dup.similarity * 100)}% similar to ${otherFile} (${dup.tokenCost} tokens wasted)`,
|
|
466
|
+
location: {
|
|
467
|
+
file,
|
|
468
|
+
line: dup.file1 === file ? dup.line1 : dup.line2
|
|
469
|
+
},
|
|
470
|
+
suggestion: getRefactoringSuggestion(dup.patternType, dup.similarity)
|
|
471
|
+
};
|
|
472
|
+
});
|
|
473
|
+
const filteredIssues = filterBySeverity(issues, severity || "all");
|
|
474
|
+
const totalTokenCost = fileDuplicates.reduce(
|
|
475
|
+
(sum, dup) => sum + dup.tokenCost,
|
|
476
|
+
0
|
|
477
|
+
);
|
|
478
|
+
results.push({
|
|
479
|
+
fileName: file,
|
|
480
|
+
issues: filteredIssues,
|
|
481
|
+
metrics: {
|
|
482
|
+
tokenCost: totalTokenCost,
|
|
483
|
+
consistencyScore: Math.max(0, 1 - fileDuplicates.length * 0.1)
|
|
484
|
+
}
|
|
485
|
+
});
|
|
486
|
+
}
|
|
487
|
+
let groups;
|
|
488
|
+
let clusters;
|
|
489
|
+
if (groupByFilePair) {
|
|
490
|
+
groups = groupDuplicatesByFilePair(duplicates);
|
|
491
|
+
}
|
|
492
|
+
if (createClusters) {
|
|
493
|
+
const allClusters = createRefactorClusters(duplicates);
|
|
494
|
+
clusters = filterClustersByImpact(
|
|
495
|
+
allClusters,
|
|
496
|
+
minClusterTokenCost,
|
|
497
|
+
minClusterFiles
|
|
498
|
+
);
|
|
499
|
+
}
|
|
500
|
+
return { results, duplicates, files, groups, clusters, config: finalOptions };
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
export {
|
|
504
|
+
groupDuplicatesByFilePair,
|
|
505
|
+
createRefactorClusters,
|
|
506
|
+
filterClustersByImpact,
|
|
507
|
+
areBrandSpecificVariants,
|
|
508
|
+
filterBrandSpecificVariants,
|
|
509
|
+
getSmartDefaults,
|
|
510
|
+
logConfiguration,
|
|
511
|
+
generateSummary,
|
|
512
|
+
filterBySeverity,
|
|
513
|
+
getSeverityLabel,
|
|
514
|
+
calculateSeverity2 as calculateSeverity,
|
|
515
|
+
Severity3 as Severity,
|
|
516
|
+
analyzePatterns
|
|
517
|
+
};
|
package/dist/cli.js
CHANGED
|
@@ -983,7 +983,7 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
983
983
|
);
|
|
984
984
|
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
985
985
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
986
|
-
const severity = estimatedBlocks >
|
|
986
|
+
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
987
987
|
const maxCandidatesPerBlock = Math.max(
|
|
988
988
|
5,
|
|
989
989
|
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
@@ -1133,7 +1133,8 @@ function filterBySeverity2(issues, severity) {
|
|
|
1133
1133
|
function calculateSeverity2(similarity) {
|
|
1134
1134
|
if (similarity > 0.95) return import_core9.Severity.Critical;
|
|
1135
1135
|
if (similarity > 0.9) return import_core9.Severity.Major;
|
|
1136
|
-
return import_core9.Severity.Minor;
|
|
1136
|
+
if (similarity > 0.7) return import_core9.Severity.Minor;
|
|
1137
|
+
return import_core9.Severity.Info;
|
|
1137
1138
|
}
|
|
1138
1139
|
|
|
1139
1140
|
// src/scoring.ts
|
package/dist/cli.mjs
CHANGED
package/dist/index.js
CHANGED
|
@@ -1010,7 +1010,7 @@ async function getSmartDefaults(directory, userOptions) {
|
|
|
1010
1010
|
);
|
|
1011
1011
|
const minSimilarity = Math.min(0.75, 0.45 + estimatedBlocks / 1e4 * 0.3);
|
|
1012
1012
|
const batchSize = estimatedBlocks > 1e3 ? 200 : 100;
|
|
1013
|
-
const severity = estimatedBlocks >
|
|
1013
|
+
const severity = estimatedBlocks > 5e3 ? "high" : "all";
|
|
1014
1014
|
const maxCandidatesPerBlock = Math.max(
|
|
1015
1015
|
5,
|
|
1016
1016
|
Math.min(100, Math.floor(1e6 / estimatedBlocks))
|
|
@@ -1174,7 +1174,8 @@ function getSeverityLabel2(severity) {
|
|
|
1174
1174
|
function calculateSeverity2(similarity) {
|
|
1175
1175
|
if (similarity > 0.95) return import_core9.Severity.Critical;
|
|
1176
1176
|
if (similarity > 0.9) return import_core9.Severity.Major;
|
|
1177
|
-
return import_core9.Severity.Minor;
|
|
1177
|
+
if (similarity > 0.7) return import_core9.Severity.Minor;
|
|
1178
|
+
return import_core9.Severity.Info;
|
|
1178
1179
|
}
|
|
1179
1180
|
|
|
1180
1181
|
// src/scoring.ts
|
package/dist/index.mjs
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@aiready/pattern-detect",
|
|
3
|
-
"version": "0.17.
|
|
3
|
+
"version": "0.17.24",
|
|
4
4
|
"description": "Semantic duplicate pattern detection for AI-generated code - finds similar implementations that waste AI context tokens",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -65,7 +65,7 @@
|
|
|
65
65
|
"dependencies": {
|
|
66
66
|
"chalk": "^5.6.2",
|
|
67
67
|
"commander": "^14.0.3",
|
|
68
|
-
"@aiready/core": "0.24.
|
|
68
|
+
"@aiready/core": "0.24.27"
|
|
69
69
|
},
|
|
70
70
|
"devDependencies": {
|
|
71
71
|
"@types/node": "^24.12.2",
|