@aiready/pattern-detect 0.11.4 → 0.11.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/__tests__/context-rules.test.d.ts +2 -0
- package/dist/__tests__/context-rules.test.d.ts.map +1 -0
- package/dist/__tests__/context-rules.test.js +189 -0
- package/dist/__tests__/context-rules.test.js.map +1 -0
- package/dist/__tests__/detector.test.d.ts +2 -0
- package/dist/__tests__/detector.test.d.ts.map +1 -0
- package/dist/__tests__/detector.test.js +259 -0
- package/dist/__tests__/detector.test.js.map +1 -0
- package/dist/__tests__/grouping.test.d.ts +2 -0
- package/dist/__tests__/grouping.test.d.ts.map +1 -0
- package/dist/__tests__/grouping.test.js +443 -0
- package/dist/__tests__/grouping.test.js.map +1 -0
- package/dist/__tests__/scoring.test.d.ts +2 -0
- package/dist/__tests__/scoring.test.d.ts.map +1 -0
- package/dist/__tests__/scoring.test.js +102 -0
- package/dist/__tests__/scoring.test.js.map +1 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js.map +1 -0
- package/dist/context-rules.d.ts +41 -0
- package/dist/context-rules.d.ts.map +1 -0
- package/dist/context-rules.js +225 -0
- package/dist/context-rules.js.map +1 -0
- package/dist/detector.d.ts +40 -0
- package/dist/detector.d.ts.map +1 -0
- package/dist/detector.js +385 -0
- package/dist/detector.js.map +1 -0
- package/dist/extractors/python-extractor.d.ts +19 -0
- package/dist/extractors/python-extractor.d.ts.map +1 -0
- package/dist/extractors/python-extractor.js +164 -0
- package/dist/extractors/python-extractor.js.map +1 -0
- package/dist/grouping.d.ts +54 -0
- package/dist/grouping.d.ts.map +1 -0
- package/dist/grouping.js +347 -0
- package/dist/grouping.js.map +1 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js.map +1 -0
- package/dist/scoring.d.ts +12 -0
- package/dist/scoring.d.ts.map +1 -0
- package/dist/scoring.js +116 -0
- package/dist/scoring.js.map +1 -0
- package/package.json +2 -2
package/dist/detector.js
ADDED
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
import { estimateTokens } from '@aiready/core';
|
|
2
|
+
import { calculateSeverity } from './context-rules';
|
|
3
|
+
/**
|
|
4
|
+
* Categorize code pattern based on content heuristics
|
|
5
|
+
*/
|
|
6
|
+
function categorizePattern(code) {
|
|
7
|
+
const lower = code.toLowerCase();
|
|
8
|
+
// API handler patterns
|
|
9
|
+
if ((lower.includes('request') && lower.includes('response')) ||
|
|
10
|
+
lower.includes('router.') ||
|
|
11
|
+
lower.includes('app.get') ||
|
|
12
|
+
lower.includes('app.post') ||
|
|
13
|
+
lower.includes('express') ||
|
|
14
|
+
lower.includes('ctx.body')) {
|
|
15
|
+
return 'api-handler';
|
|
16
|
+
}
|
|
17
|
+
// Validator patterns
|
|
18
|
+
if (lower.includes('validate') ||
|
|
19
|
+
lower.includes('schema') ||
|
|
20
|
+
lower.includes('zod') ||
|
|
21
|
+
lower.includes('yup') ||
|
|
22
|
+
(lower.includes('if') && lower.includes('throw'))) {
|
|
23
|
+
return 'validator';
|
|
24
|
+
}
|
|
25
|
+
// Component patterns (React, Vue, etc.)
|
|
26
|
+
if (lower.includes('return (') ||
|
|
27
|
+
lower.includes('jsx') ||
|
|
28
|
+
lower.includes('component') ||
|
|
29
|
+
lower.includes('props')) {
|
|
30
|
+
return 'component';
|
|
31
|
+
}
|
|
32
|
+
// Class methods
|
|
33
|
+
if (lower.includes('class ') || lower.includes('this.')) {
|
|
34
|
+
return 'class-method';
|
|
35
|
+
}
|
|
36
|
+
// Utility functions (pure functions with clear input/output)
|
|
37
|
+
if (lower.includes('return ') &&
|
|
38
|
+
!lower.includes('this') &&
|
|
39
|
+
!lower.includes('new ')) {
|
|
40
|
+
return 'utility';
|
|
41
|
+
}
|
|
42
|
+
// Generic function
|
|
43
|
+
if (lower.includes('function') || lower.includes('=>')) {
|
|
44
|
+
return 'function';
|
|
45
|
+
}
|
|
46
|
+
return 'unknown';
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Extract function-like blocks from code using improved heuristics
|
|
50
|
+
*/
|
|
51
|
+
function extractCodeBlocks(content, minLines) {
|
|
52
|
+
const lines = content.split('\n');
|
|
53
|
+
const blocks = [];
|
|
54
|
+
let currentBlock = [];
|
|
55
|
+
let blockStart = 0;
|
|
56
|
+
let braceDepth = 0;
|
|
57
|
+
let inFunction = false;
|
|
58
|
+
for (let i = 0; i < lines.length; i++) {
|
|
59
|
+
const line = lines[i];
|
|
60
|
+
const trimmed = line.trim();
|
|
61
|
+
// Detect function start
|
|
62
|
+
if (!inFunction &&
|
|
63
|
+
(trimmed.includes('function ') ||
|
|
64
|
+
trimmed.includes('=>') ||
|
|
65
|
+
trimmed.includes('async ') ||
|
|
66
|
+
/^(export\s+)?(async\s+)?function\s+/.test(trimmed) ||
|
|
67
|
+
/^(export\s+)?const\s+\w+\s*=\s*(async\s*)?\(/.test(trimmed))) {
|
|
68
|
+
inFunction = true;
|
|
69
|
+
blockStart = i;
|
|
70
|
+
}
|
|
71
|
+
// Track brace depth
|
|
72
|
+
for (const char of line) {
|
|
73
|
+
if (char === '{')
|
|
74
|
+
braceDepth++;
|
|
75
|
+
if (char === '}')
|
|
76
|
+
braceDepth--;
|
|
77
|
+
}
|
|
78
|
+
if (inFunction) {
|
|
79
|
+
currentBlock.push(line);
|
|
80
|
+
}
|
|
81
|
+
// When we close a function block
|
|
82
|
+
if (inFunction && braceDepth === 0 && currentBlock.length >= minLines) {
|
|
83
|
+
const blockContent = currentBlock.join('\n');
|
|
84
|
+
const linesOfCode = currentBlock.filter((l) => l.trim() && !l.trim().startsWith('//')).length;
|
|
85
|
+
blocks.push({
|
|
86
|
+
content: blockContent,
|
|
87
|
+
startLine: blockStart + 1,
|
|
88
|
+
endLine: i + 1,
|
|
89
|
+
patternType: categorizePattern(blockContent),
|
|
90
|
+
linesOfCode,
|
|
91
|
+
});
|
|
92
|
+
currentBlock = [];
|
|
93
|
+
inFunction = false;
|
|
94
|
+
}
|
|
95
|
+
else if (inFunction && braceDepth === 0) {
|
|
96
|
+
// Reset if we're not accumulating enough
|
|
97
|
+
currentBlock = [];
|
|
98
|
+
inFunction = false;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
return blocks;
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Normalize code for comparison
|
|
105
|
+
* - Remove comments
|
|
106
|
+
* - Normalize whitespace
|
|
107
|
+
* - Remove variable names (replace with placeholders)
|
|
108
|
+
* - Keep structure intact
|
|
109
|
+
*/
|
|
110
|
+
function normalizeCode(code) {
|
|
111
|
+
// Safety check for undefined/null input
|
|
112
|
+
if (!code) {
|
|
113
|
+
return '';
|
|
114
|
+
}
|
|
115
|
+
return (code
|
|
116
|
+
// Remove single-line comments
|
|
117
|
+
.replace(/\/\/.*$/gm, '')
|
|
118
|
+
// Remove multi-line comments
|
|
119
|
+
.replace(/\/\*[\s\S]*?\*\//g, '')
|
|
120
|
+
// Normalize string literals to generic placeholder
|
|
121
|
+
.replace(/"[^"]*"/g, '"STR"')
|
|
122
|
+
.replace(/'[^']*'/g, "'STR'")
|
|
123
|
+
.replace(/`[^`]*`/g, '`STR`')
|
|
124
|
+
// Normalize numbers
|
|
125
|
+
.replace(/\b\d+\b/g, 'NUM')
|
|
126
|
+
// Normalize whitespace but keep structure
|
|
127
|
+
.replace(/\s+/g, ' ')
|
|
128
|
+
.trim());
|
|
129
|
+
}
|
|
130
|
+
/**
|
|
131
|
+
* Fast Jaccard similarity on token sets - O(N+M) instead of O(N×M)
|
|
132
|
+
*/
|
|
133
|
+
function jaccardSimilarity(tokens1, tokens2) {
|
|
134
|
+
const set1 = new Set(tokens1);
|
|
135
|
+
const set2 = new Set(tokens2);
|
|
136
|
+
let intersection = 0;
|
|
137
|
+
for (const token of set1) {
|
|
138
|
+
if (set2.has(token))
|
|
139
|
+
intersection++;
|
|
140
|
+
}
|
|
141
|
+
const union = set1.size + set2.size - intersection;
|
|
142
|
+
return union === 0 ? 0 : intersection / union;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Detect duplicate patterns across files with enhanced analysis
|
|
146
|
+
*/
|
|
147
|
+
export async function detectDuplicatePatterns(files, options) {
|
|
148
|
+
const { minSimilarity, minLines, batchSize = 100, approx = true, minSharedTokens = 8, maxCandidatesPerBlock = 100, streamResults = false, } = options;
|
|
149
|
+
const duplicates = [];
|
|
150
|
+
// Safety limit only for --no-approx mode (O(B²) worst case)
|
|
151
|
+
// Approximate mode has natural limits and doesn't need budget
|
|
152
|
+
const maxComparisons = approx ? Infinity : 500000;
|
|
153
|
+
// Extract blocks from all files
|
|
154
|
+
const allBlocks = files.flatMap((file) => extractCodeBlocks(file.content, minLines)
|
|
155
|
+
.filter((block) => block.content && block.content.trim().length > 0)
|
|
156
|
+
.map((block) => ({
|
|
157
|
+
content: block.content,
|
|
158
|
+
startLine: block.startLine,
|
|
159
|
+
endLine: block.endLine,
|
|
160
|
+
file: file.file,
|
|
161
|
+
normalized: normalizeCode(block.content),
|
|
162
|
+
patternType: block.patternType,
|
|
163
|
+
tokenCost: estimateTokens(block.content),
|
|
164
|
+
linesOfCode: block.linesOfCode,
|
|
165
|
+
})));
|
|
166
|
+
console.log(`Extracted ${allBlocks.length} code blocks for analysis`);
|
|
167
|
+
// Add Python blocks if present
|
|
168
|
+
const pythonFiles = files.filter(f => f.file.toLowerCase().endsWith('.py'));
|
|
169
|
+
if (pythonFiles.length > 0) {
|
|
170
|
+
const { extractPythonPatterns } = await import('./extractors/python-extractor');
|
|
171
|
+
const patterns = await extractPythonPatterns(pythonFiles.map(f => f.file));
|
|
172
|
+
const pythonBlocks = patterns
|
|
173
|
+
.filter((p) => p.code && p.code.trim().length > 0)
|
|
174
|
+
.map(p => ({
|
|
175
|
+
content: p.code,
|
|
176
|
+
startLine: p.startLine,
|
|
177
|
+
endLine: p.endLine,
|
|
178
|
+
file: p.file,
|
|
179
|
+
normalized: normalizeCode(p.code),
|
|
180
|
+
patternType: p.type,
|
|
181
|
+
tokenCost: estimateTokens(p.code),
|
|
182
|
+
linesOfCode: p.endLine - p.startLine + 1,
|
|
183
|
+
}));
|
|
184
|
+
allBlocks.push(...pythonBlocks);
|
|
185
|
+
console.log(`Added ${pythonBlocks.length} Python patterns`);
|
|
186
|
+
}
|
|
187
|
+
// Warn about --no-approx performance implications
|
|
188
|
+
if (!approx && allBlocks.length > 500) {
|
|
189
|
+
console.log(`⚠️ Using --no-approx mode with ${allBlocks.length} blocks may be slow (O(B²) complexity).`);
|
|
190
|
+
console.log(` Consider using approximate mode (default) for better performance.`);
|
|
191
|
+
}
|
|
192
|
+
// Use minLines to control scope instead of arbitrary block limits
|
|
193
|
+
// Tokenize blocks for candidate selection
|
|
194
|
+
const stopwords = new Set([
|
|
195
|
+
'return', 'const', 'let', 'var', 'function', 'class', 'new', 'if', 'else', 'for', 'while',
|
|
196
|
+
'async', 'await', 'try', 'catch', 'switch', 'case', 'default', 'import', 'export', 'from',
|
|
197
|
+
'true', 'false', 'null', 'undefined', 'this'
|
|
198
|
+
]);
|
|
199
|
+
const tokenize = (norm) => norm
|
|
200
|
+
.split(/[\s(){}\[\];,\.]+/)
|
|
201
|
+
.filter((t) => t && t.length >= 3 && !stopwords.has(t.toLowerCase()));
|
|
202
|
+
const blockTokens = allBlocks.map((b) => tokenize(b.normalized));
|
|
203
|
+
// Build inverted index token -> block ids (for approx mode)
|
|
204
|
+
const invertedIndex = new Map();
|
|
205
|
+
if (approx) {
|
|
206
|
+
for (let i = 0; i < blockTokens.length; i++) {
|
|
207
|
+
for (const tok of blockTokens[i]) {
|
|
208
|
+
let arr = invertedIndex.get(tok);
|
|
209
|
+
if (!arr) {
|
|
210
|
+
arr = [];
|
|
211
|
+
invertedIndex.set(tok, arr);
|
|
212
|
+
}
|
|
213
|
+
arr.push(i);
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
// Process comparisons (exact or approximate) in batches to reduce memory pressure
|
|
218
|
+
const totalComparisons = approx
|
|
219
|
+
? undefined
|
|
220
|
+
: (allBlocks.length * (allBlocks.length - 1)) / 2;
|
|
221
|
+
if (totalComparisons !== undefined) {
|
|
222
|
+
console.log(`Processing ${totalComparisons.toLocaleString()} comparisons in batches...`);
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
console.log(`Using approximate candidate selection to reduce comparisons...`);
|
|
226
|
+
}
|
|
227
|
+
let comparisonsProcessed = 0;
|
|
228
|
+
let comparisonsBudgetExhausted = false;
|
|
229
|
+
const startTime = Date.now();
|
|
230
|
+
for (let i = 0; i < allBlocks.length; i++) {
|
|
231
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons) {
|
|
232
|
+
comparisonsBudgetExhausted = true;
|
|
233
|
+
break;
|
|
234
|
+
}
|
|
235
|
+
// Progress reporting every batch
|
|
236
|
+
if (i % batchSize === 0 && i > 0) {
|
|
237
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
238
|
+
const duplicatesFound = duplicates.length;
|
|
239
|
+
if (totalComparisons !== undefined) {
|
|
240
|
+
const progress = ((comparisonsProcessed / totalComparisons) * 100).toFixed(1);
|
|
241
|
+
const remaining = totalComparisons - comparisonsProcessed;
|
|
242
|
+
const rate = comparisonsProcessed / parseFloat(elapsed);
|
|
243
|
+
const eta = remaining > 0 ? (remaining / rate).toFixed(0) : 0;
|
|
244
|
+
console.log(` ${progress}% (${comparisonsProcessed.toLocaleString()}/${totalComparisons.toLocaleString()} comparisons, ${elapsed}s elapsed, ~${eta}s remaining, ${duplicatesFound} duplicates)`);
|
|
245
|
+
}
|
|
246
|
+
else {
|
|
247
|
+
console.log(` Processed ${i.toLocaleString()}/${allBlocks.length} blocks (${elapsed}s elapsed, ${duplicatesFound} duplicates)`);
|
|
248
|
+
}
|
|
249
|
+
// Allow garbage collection between batches
|
|
250
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
251
|
+
}
|
|
252
|
+
const block1 = allBlocks[i];
|
|
253
|
+
// Build candidate list (approx mode) - much more aggressive filtering
|
|
254
|
+
let candidates = null;
|
|
255
|
+
if (approx) {
|
|
256
|
+
const counts = new Map();
|
|
257
|
+
const block1Tokens = new Set(blockTokens[i]);
|
|
258
|
+
const block1Size = block1Tokens.size;
|
|
259
|
+
// Only consider tokens that are not too common (appear in < 10% of blocks)
|
|
260
|
+
const rareTokens = blockTokens[i].filter(tok => {
|
|
261
|
+
const blocksWithToken = invertedIndex.get(tok)?.length || 0;
|
|
262
|
+
return blocksWithToken < allBlocks.length * 0.1; // < 10% of blocks
|
|
263
|
+
});
|
|
264
|
+
// Use only rare tokens for candidate selection to avoid noise from common tokens
|
|
265
|
+
for (const tok of rareTokens) {
|
|
266
|
+
const ids = invertedIndex.get(tok);
|
|
267
|
+
if (!ids)
|
|
268
|
+
continue;
|
|
269
|
+
for (const j of ids) {
|
|
270
|
+
if (j <= i)
|
|
271
|
+
continue; // only forward pairs
|
|
272
|
+
if (allBlocks[j].file === block1.file)
|
|
273
|
+
continue; // skip same-file
|
|
274
|
+
counts.set(j, (counts.get(j) || 0) + 1);
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
// Filter candidates more aggressively:
|
|
278
|
+
// - Must share at least minSharedTokens
|
|
279
|
+
// - Must share at least minSharedTokens
|
|
280
|
+
// - Must share at least 30% of the smaller block's tokens (to ensure substantial overlap)
|
|
281
|
+
candidates = Array.from(counts.entries())
|
|
282
|
+
.filter(([j, shared]) => {
|
|
283
|
+
const block2Tokens = blockTokens[j];
|
|
284
|
+
const block2Size = block2Tokens.length;
|
|
285
|
+
const minSize = Math.min(block1Size, block2Size);
|
|
286
|
+
const sharedPercentage = shared / minSize;
|
|
287
|
+
return shared >= minSharedTokens && sharedPercentage >= 0.3;
|
|
288
|
+
})
|
|
289
|
+
.sort((a, b) => b[1] - a[1])
|
|
290
|
+
.slice(0, Math.min(maxCandidatesPerBlock, 5)) // Even more aggressive limit
|
|
291
|
+
.map(([j, shared]) => ({ j, shared }));
|
|
292
|
+
}
|
|
293
|
+
if (approx && candidates) {
|
|
294
|
+
for (const { j } of candidates) {
|
|
295
|
+
if (!approx && maxComparisons !== Infinity && comparisonsProcessed >= maxComparisons) {
|
|
296
|
+
console.log(`⚠️ Comparison safety limit reached (${maxComparisons.toLocaleString()} comparisons in --no-approx mode).`);
|
|
297
|
+
console.log(` This prevents excessive runtime on large repos. Consider using approximate mode (default) or --min-lines to reduce blocks.`);
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
comparisonsProcessed++;
|
|
301
|
+
const block2 = allBlocks[j];
|
|
302
|
+
// Optional: skip cross-type comparisons unless unknown
|
|
303
|
+
// if (block1.patternType !== block2.patternType &&
|
|
304
|
+
// block1.patternType !== 'unknown' && block2.patternType !== 'unknown') continue;
|
|
305
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
306
|
+
if (similarity >= minSimilarity) {
|
|
307
|
+
// Calculate context-aware severity
|
|
308
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(block1.file, block2.file, block1.content, similarity, block1.linesOfCode);
|
|
309
|
+
const duplicate = {
|
|
310
|
+
file1: block1.file,
|
|
311
|
+
file2: block2.file,
|
|
312
|
+
line1: block1.startLine,
|
|
313
|
+
line2: block2.startLine,
|
|
314
|
+
endLine1: block1.endLine,
|
|
315
|
+
endLine2: block2.endLine,
|
|
316
|
+
similarity,
|
|
317
|
+
snippet: block1.content.split('\n').slice(0, 5).join('\n') + '\n...',
|
|
318
|
+
patternType: block1.patternType,
|
|
319
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
320
|
+
linesOfCode: block1.linesOfCode,
|
|
321
|
+
severity,
|
|
322
|
+
reason,
|
|
323
|
+
suggestion,
|
|
324
|
+
matchedRule,
|
|
325
|
+
};
|
|
326
|
+
duplicates.push(duplicate);
|
|
327
|
+
if (streamResults) {
|
|
328
|
+
console.log(`\n ✅ Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
329
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} ⇔ ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
330
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
else {
|
|
336
|
+
// Exact mode: compare against all subsequent blocks
|
|
337
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
338
|
+
if (maxComparisons && comparisonsProcessed >= maxComparisons)
|
|
339
|
+
break;
|
|
340
|
+
comparisonsProcessed++;
|
|
341
|
+
const block2 = allBlocks[j];
|
|
342
|
+
// Skip comparing blocks from the same file
|
|
343
|
+
if (block1.file === block2.file)
|
|
344
|
+
continue;
|
|
345
|
+
// Optional: skip cross-type comparisons unless unknown
|
|
346
|
+
// if (block1.patternType !== block2.patternType &&
|
|
347
|
+
// block1.patternType !== 'unknown' && block2.patternType !== 'unknown') continue;
|
|
348
|
+
const similarity = jaccardSimilarity(blockTokens[i], blockTokens[j]);
|
|
349
|
+
if (similarity >= minSimilarity) {
|
|
350
|
+
// Calculate context-aware severity
|
|
351
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(block1.file, block2.file, block1.content, similarity, block1.linesOfCode);
|
|
352
|
+
const duplicate = {
|
|
353
|
+
file1: block1.file,
|
|
354
|
+
file2: block2.file,
|
|
355
|
+
line1: block1.startLine,
|
|
356
|
+
line2: block2.startLine,
|
|
357
|
+
endLine1: block1.endLine,
|
|
358
|
+
endLine2: block2.endLine,
|
|
359
|
+
similarity,
|
|
360
|
+
snippet: block1.content.split('\n').slice(0, 5).join('\n') + '\n...',
|
|
361
|
+
patternType: block1.patternType,
|
|
362
|
+
tokenCost: block1.tokenCost + block2.tokenCost,
|
|
363
|
+
linesOfCode: block1.linesOfCode,
|
|
364
|
+
severity,
|
|
365
|
+
reason,
|
|
366
|
+
suggestion,
|
|
367
|
+
matchedRule,
|
|
368
|
+
};
|
|
369
|
+
duplicates.push(duplicate);
|
|
370
|
+
if (streamResults) {
|
|
371
|
+
console.log(`\n ✅ Found: ${duplicate.patternType} ${Math.round(similarity * 100)}% similar`);
|
|
372
|
+
console.log(` ${duplicate.file1}:${duplicate.line1}-${duplicate.endLine1} ⇔ ${duplicate.file2}:${duplicate.line2}-${duplicate.endLine2}`);
|
|
373
|
+
console.log(` Token cost: ${duplicate.tokenCost.toLocaleString()}`);
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
if (comparisonsBudgetExhausted) {
|
|
380
|
+
console.log(`⚠️ Comparison budget exhausted (${maxComparisons.toLocaleString()} comparisons). Use --max-comparisons to increase.`);
|
|
381
|
+
}
|
|
382
|
+
// Sort by similarity descending, then by token cost
|
|
383
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity || b.tokenCost - a.tokenCost);
|
|
384
|
+
}
|
|
385
|
+
//# sourceMappingURL=detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"detector.js","sourceRoot":"","sources":["../src/detector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAC/C,OAAO,EAAE,iBAAiB,EAAiB,MAAM,iBAAiB,CAAC;AAyDnE;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;IAEjC,uBAAuB;IACvB,IACE,CAAC,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;QACzD,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzB,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzB,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC1B,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzB,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,EAC1B,CAAC;QACD,OAAO,aAAa,CAAC;IACvB,CAAC;IAED,qBAAqB;IACrB,IACE,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC1B,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC;QACxB,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;QACrB,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,EACjD,CAAC;QACD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,wCAAwC;IACxC,IACE,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC1B,KAAK,CAAC,QAAQ,CAAC,KAAK,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC;QAC3B,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,EACvB,CAAC;QACD,OAAO,WAAW,CAAC;IACrB,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,EAAE,CAAC;QACxD,OAAO,cAAc,CAAC;IACxB,CAAC;IAED,6DAA6D;IAC7D,IACE,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;QACzB,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC;QACvB,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,EACvB,CAAC;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,mBAAmB;IACnB,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;QACvD,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,OAAe,EAAE,QAAgB;IAO1D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,MAAM,GAMP,EAAE,CAAC;IAER,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,UAAU,GAAG,KAAK,CAAC;IAEvB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;QACtB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAE5B,wBAAwB;QACxB,IACE,CAAC,UAAU;YACX,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC;gBAC5B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC;gBACtB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC;gBAC1B,qCAAqC,CAAC,IAAI,CAAC,OAAO,CAAC;gBACnD,8CAA8C,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,EAC/D,CAAC;YACD,UAAU,GAAG,IAAI,CAAC;YAClB,UAAU,GAAG,CAAC,CAAC;QACjB,CAAC;QAED,oBAAoB;QACpB,KAAK,MAAM,IAAI,IAAI,IAAI,EAAE,CAAC;YACxB,IAAI,IAAI,KAAK,GAAG;gBAAE,UAAU,EAAE,CAAC;YAC/B,IAAI,IAAI,KAAK,GAAG;gBAAE,UAAU,EAAE,CAAC;QACjC,CAAC;QAED,IAAI,UAAU,EAAE,CAAC;YACf,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;QAED,iCAAiC;QACjC,IAAI,UAAU,IAAI,UAAU,KAAK,CAAC,IAAI,YAAY,CAAC,MAAM,IAAI,QAAQ,EAAE,CAAC;YACtE,MAAM,YAAY,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,MAAM,WAAW,GAAG,YAAY,CAAC,MAAM,CACrC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,CAC9C,CAAC,MAAM,CAAC;YAET,MAAM,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,YAAY;gBACrB,SAAS,EAAE,UAAU,GAAG,CAAC;gBACzB,OAAO,EAAE,CAAC,GAAG,CAAC;gBACd,WAAW,EAAE,iBAAiB,CAAC,YAAY,CAAC;gBAC5C,WAAW;aACZ,CAAC,CAAC;YAEH,YAAY,GAAG,EAAE,CAAC;YAClB,UAAU,GAAG,KAAK,CAAC;QACrB,CAAC;aAAM,IAAI,UAAU,IAAI,UAAU,KAAK,CAAC,EAAE,CAAC;YAC1C,yCAAyC;YACzC,YAAY,GAAG,EAAE,CAAC;YAClB,UAAU,GAAG,KAAK,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;;;;GAMG;AACH,SAAS,aAAa,CAAC,IAAY;IACjC,wCAAwC;IACxC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,OAAO,CACL,IAAI;QACF,8BAA8B;SAC7B,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;QACzB,6BAA6B;SAC5B,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC;QACjC,mDAAmD;SAClD,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC;SAC5B,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC;SAC5B,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC;QAC7B,oBAAoB;SACnB,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC;QAC3B,0CAA0C;SACzC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CACV,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,OAAiB,EAAE,OAAiB;IAC7D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAC9B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;IAE9B,IAAI,YAAY,GAAG,CAAC,CAAC;IACrB,KAAK,MAAM,KAAK,IAAI,IAAI,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;YAAE,YAAY,EAAE,CAAC;IACtC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,YAAY,CAAC;IACnD,OAAO,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC;AAChD,CAAC;AAID;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,uBAAuB,CAC3C,KAAoB,EACpB,OAAyB;IAEzB,MAAM,EACJ,aAAa,EACb,QAAQ,EACR,SAAS,GAAG,GAAG,EACf,MAAM,GAAG,IAAI,EACb,eAAe,GAAG,CAAC,EACnB,qBAAqB,GAAG,GAAG,EAC3B,aAAa,GAAG,KAAK,GACtB,GAAG,OAAO,CAAC;IACZ,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,4DAA4D;IAC5D,8DAA8D;IAC9D,MAAM,cAAc,GAAG,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC;IAElD,gCAAgC;IAChC,MAAM,SAAS,GAAgB,KAAK,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,EAAE,CACpD,iBAAiB,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC;SACtC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;SACnE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACf,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,SAAS,EAAE,KAAK,CAAC,SAAS;QAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;QACtB,IAAI,EAAE,IAAI,CAAC,IAAI;QACf,UAAU,EAAE,aAAa,CAAC,KAAK,CAAC,OAAO,CAAC;QACxC,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,SAAS,EAAE,cAAc,CAAC,KAAK,CAAC,OAAO,CAAC;QACxC,WAAW,EAAE,KAAK,CAAC,WAAW;KAC/B,CAAC,CAAC,CACN,CAAC;IAEF,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,CAAC,MAAM,2BAA2B,CAAC,CAAC;IAEtE,+BAA+B;IAC/B,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5E,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,MAAM,EAAE,qBAAqB,EAAE,GAAG,MAAM,MAAM,CAAC,+BAA+B,CAAC,CAAC;QAChF,MAAM,QAAQ,GAAG,MAAM,qBAAqB,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QAE3E,MAAM,YAAY,GAAgB,QAAQ;aACvC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;aACjD,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YACT,OAAO,EAAE,CAAC,CAAC,IAAI;YACf,SAAS,EAAE,CAAC,CAAC,SAAS;YACtB,OAAO,EAAE,CAAC,CAAC,OAAO;YAClB,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,UAAU,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;YACjC,WAAW,EAAE,CAAC,CAAC,IAAmB;YAClC,SAAS,EAAE,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC;YACjC,WAAW,EAAE,CAAC,CAAC,OAAO,GAAG,CAAC,CAAC,SAAS,GAAG,CAAC;SACzC,CAAC,CAAC,CAAC;QAEN,SAAS,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;QAChC,OAAO,CAAC,GAAG,CAAC,SAAS,YAAY,CAAC,MAAM,kBAAkB,CAAC,CAAC;IAC9D,CAAC;IAED,kDAAkD;IAClD,IAAI,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;QACtC,OAAO,CAAC,GAAG,CAAC,mCAAmC,SAAS,CAAC,MAAM,yCAAyC,CAAC,CAAC;QAC1G,OAAO,CAAC,GAAG,CAAC,sEAAsE,CAAC,CAAC;IACtF,CAAC;IAED,kEAAkE;IAElE,0CAA0C;IAC1C,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC;QACxB,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,OAAO;QACzF,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,QAAQ,EAAE,MAAM;QACzF,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,MAAM;KAC7C,CAAC,CAAC;IACH,MAAM,QAAQ,GAAG,CAAC,IAAY,EAAY,EAAE,CAC1C,IAAI;SACD,KAAK,CAAC,mBAAmB,CAAC;SAC1B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;IAE1E,MAAM,WAAW,GAAe,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;IAE7E,4DAA4D;IAC5D,MAAM,aAAa,GAA0B,IAAI,GAAG,EAAE,CAAC;IACvD,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5C,KAAK,MAAM,GAAG,IAAI,WAAW,CAAC,CAAC,CAAC,EAAE,CAAC;gBACjC,IAAI,GAAG,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACjC,IAAI,CAAC,GAAG,EAAE,CAAC;oBACT,GAAG,GAAG,EAAE,CAAC;oBACT,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC;gBAC9B,CAAC;gBACD,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,kFAAkF;IAClF,MAAM,gBAAgB,GAAG,MAAM;QAC7B,CAAC,CAAC,SAAS;QACX,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACpD,IAAI,gBAAgB,KAAK,SAAS,EAAE,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,cAAc,gBAAgB,CAAC,cAAc,EAAE,4BAA4B,CAAC,CAAC;IAC3F,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,GAAG,CAAC,gEAAgE,CAAC,CAAC;IAChF,CAAC;IAED,IAAI,oBAAoB,GAAG,CAAC,CAAC;IAC7B,IAAI,0BAA0B,GAAG,KAAK,CAAC;IACvC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,IAAI,cAAc,IAAI,oBAAoB,IAAI,cAAc,EAAE,CAAC;YAC7D,0BAA0B,GAAG,IAAI,CAAC;YAClC,MAAM;QACR,CAAC;QACD,iCAAiC;QACjC,IAAI,CAAC,GAAG,SAAS,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACjC,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,eAAe,GAAG,UAAU,CAAC,MAAM,CAAC;YAC1C,IAAI,gBAAgB,KAAK,SAAS,EAAE,CAAC;gBACnC,MAAM,QAAQ,GAAG,CAAC,CAAC,oBAAoB,GAAG,gBAAgB,CAAC,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBAC9E,MAAM,SAAS,GAAG,gBAAgB,GAAG,oBAAoB,CAAC;gBAC1D,MAAM,IAAI,GAAG,oBAAoB,GAAG,UAAU,CAAC,OAAO,CAAC,CAAC;gBACxD,MAAM,GAAG,GAAG,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC9D,OAAO,CAAC,GAAG,CAAC,MAAM,QAAQ,MAAM,oBAAoB,CAAC,cAAc,EAAE,IAAI,gBAAgB,CAAC,cAAc,EAAE,iBAAiB,OAAO,eAAe,GAAG,gBAAgB,eAAe,cAAc,CAAC,CAAC;YACrM,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC,cAAc,EAAE,IAAI,SAAS,CAAC,MAAM,YAAY,OAAO,cAAc,eAAe,cAAc,CAAC,CAAC;YACpI,CAAC;YACD,2CAA2C;YAC3C,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC,CAAC;QACxD,CAAC;QAED,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;QAE5B,sEAAsE;QACtE,IAAI,UAAU,GAAgD,IAAI,CAAC;QACnE,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,MAAM,GAAwB,IAAI,GAAG,EAAE,CAAC;YAC9C,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;YAC7C,MAAM,UAAU,GAAG,YAAY,CAAC,IAAI,CAAC;YAErC,2EAA2E;YAC3E,MAAM,UAAU,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE;gBAC7C,MAAM,eAAe,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,MAAM,IAAI,CAAC,CAAC;gBAC5D,OAAO,eAAe,GAAG,SAAS,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,kBAAkB;YACrE,CAAC,CAAC,CAAC;YAEH,iFAAiF;YACjF,KAAK,MAAM,GAAG,IAAI,UAAU,EAAE,CAAC;gBAC7B,MAAM,GAAG,GAAG,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;gBACnC,IAAI,CAAC,GAAG;oBAAE,SAAS;gBACnB,KAAK,MAAM,CAAC,IAAI,GAAG,EAAE,CAAC;oBACpB,IAAI,CAAC,IAAI,CAAC;wBAAE,SAAS,CAAC,qBAAqB;oBAC3C,IAAI,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI;wBAAE,SAAS,CAAC,iBAAiB;oBAClE,MAAM,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;YAED,uCAAuC;YACvC,wCAAwC;YACxC,wCAAwC;YACxC,0FAA0F;YAC1F,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;iBACtC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,EAAE;gBACtB,MAAM,YAAY,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;gBACpC,MAAM,UAAU,GAAG,YAAY,CAAC,MAAM,CAAC;gBACvC,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;gBACjD,MAAM,gBAAgB,GAAG,MAAM,GAAG,OAAO,CAAC;gBAC1C,OAAO,MAAM,IAAI,eAAe,IAAI,gBAAgB,IAAI,GAAG,CAAC;YAC9D,CAAC,CAAC;iBACD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;iBAC3B,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,qBAAqB,EAAE,CAAC,CAAC,CAAC,CAAC,6BAA6B;iBAC1E,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC;QAC3C,CAAC;QAED,IAAI,MAAM,IAAI,UAAU,EAAE,CAAC;YACzB,KAAK,MAAM,EAAE,CAAC,EAAE,IAAI,UAAU,EAAE,CAAC;gBAC/B,IAAI,CAAC,MAAM,IAAI,cAAc,KAAK,QAAQ,IAAI,oBAAoB,IAAI,cAAc,EAAE,CAAC;oBACrF,OAAO,CAAC,GAAG,CAAC,wCAAwC,cAAc,CAAC,cAAc,EAAE,oCAAoC,CAAC,CAAC;oBACzH,OAAO,CAAC,GAAG,CAAC,+HAA+H,CAAC,CAAC;oBAC7I,MAAM;gBACR,CAAC;gBACD,oBAAoB,EAAE,CAAC;gBACvB,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBAE5B,uDAAuD;gBACvD,mDAAmD;gBACnD,sFAAsF;gBAEtF,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrE,IAAI,UAAU,IAAI,aAAa,EAAE,CAAC;oBAChC,mCAAmC;oBACnC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,iBAAiB,CACrE,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,OAAO,EACd,UAAU,EACV,MAAM,CAAC,WAAW,CACnB,CAAC;oBAEF,MAAM,SAAS,GAAG;wBAChB,KAAK,EAAE,MAAM,CAAC,IAAI;wBAClB,KAAK,EAAE,MAAM,CAAC,IAAI;wBAClB,KAAK,EAAE,MAAM,CAAC,SAAS;wBACvB,KAAK,EAAE,MAAM,CAAC,SAAS;wBACvB,QAAQ,EAAE,MAAM,CAAC,OAAO;wBACxB,QAAQ,EAAE,MAAM,CAAC,OAAO;wBACxB,UAAU;wBACV,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,OAAO;wBACpE,WAAW,EAAE,MAAM,CAAC,WAAW;wBAC/B,SAAS,EAAE,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS;wBAC9C,WAAW,EAAE,MAAM,CAAC,WAAW;wBAC/B,QAAQ;wBACR,MAAM;wBACN,UAAU;wBACV,WAAW;qBACZ,CAAC;oBACF,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAE3B,IAAI,aAAa,EAAE,CAAC;wBAClB,OAAO,CAAC,GAAG,CAAC,iBAAiB,SAAS,CAAC,WAAW,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC;wBAC/F,OAAO,CAAC,GAAG,CAAC,SAAS,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,QAAQ,MAAM,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAC;wBAC/I,OAAO,CAAC,GAAG,CAAC,qBAAqB,SAAS,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;oBAC3E,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,oDAAoD;YACpD,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,IAAI,cAAc,IAAI,oBAAoB,IAAI,cAAc;oBAAE,MAAM;gBACpE,oBAAoB,EAAE,CAAC;gBACvB,MAAM,MAAM,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBAE5B,2CAA2C;gBAC3C,IAAI,MAAM,CAAC,IAAI,KAAK,MAAM,CAAC,IAAI;oBAAE,SAAS;gBAE1C,uDAAuD;gBACvD,mDAAmD;gBACnD,sFAAsF;gBAEtF,MAAM,UAAU,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;gBACrE,IAAI,UAAU,IAAI,aAAa,EAAE,CAAC;oBAChC,mCAAmC;oBACnC,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,GAAG,iBAAiB,CACrE,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,IAAI,EACX,MAAM,CAAC,OAAO,EACd,UAAU,EACV,MAAM,CAAC,WAAW,CACnB,CAAC;oBAEF,MAAM,SAAS,GAAG;wBAChB,KAAK,EAAE,MAAM,CAAC,IAAI;wBAClB,KAAK,EAAE,MAAM,CAAC,IAAI;wBAClB,KAAK,EAAE,MAAM,CAAC,SAAS;wBACvB,KAAK,EAAE,MAAM,CAAC,SAAS;wBACvB,QAAQ,EAAE,MAAM,CAAC,OAAO;wBACxB,QAAQ,EAAE,MAAM,CAAC,OAAO;wBACxB,UAAU;wBACV,OAAO,EAAE,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,OAAO;wBACpE,WAAW,EAAE,MAAM,CAAC,WAAW;wBAC/B,SAAS,EAAE,MAAM,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS;wBAC9C,WAAW,EAAE,MAAM,CAAC,WAAW;wBAC/B,QAAQ;wBACR,MAAM;wBACN,UAAU;wBACV,WAAW;qBACZ,CAAC;oBACF,UAAU,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;oBAE3B,IAAI,aAAa,EAAE,CAAC;wBAClB,OAAO,CAAC,GAAG,CAAC,iBAAiB,SAAS,CAAC,WAAW,IAAI,IAAI,CAAC,KAAK,CAAC,UAAU,GAAG,GAAG,CAAC,WAAW,CAAC,CAAC;wBAC/F,OAAO,CAAC,GAAG,CAAC,SAAS,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,QAAQ,MAAM,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,KAAK,IAAI,SAAS,CAAC,QAAQ,EAAE,CAAC,CAAC;wBAC/I,OAAO,CAAC,GAAG,CAAC,qBAAqB,SAAS,CAAC,SAAS,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;oBAC3E,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,0BAA0B,EAAE,CAAC;QAC/B,OAAO,CAAC,GAAG,CAAC,oCAAoC,cAAc,CAAC,cAAc,EAAE,mDAAmD,CAAC,CAAC;IACtI,CAAC;IAED,oDAAoD;IACpD,OAAO,UAAU,CAAC,IAAI,CACpB,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,IAAI,CAAC,CAAC,SAAS,GAAG,CAAC,CAAC,SAAS,CACnE,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Python Pattern Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts functions and classes from Python code for similarity analysis
|
|
5
|
+
*/
|
|
6
|
+
import type { CodePattern } from '../types';
|
|
7
|
+
/**
|
|
8
|
+
* Extract patterns from Python files
|
|
9
|
+
*/
|
|
10
|
+
export declare function extractPythonPatterns(files: string[]): Promise<CodePattern[]>;
|
|
11
|
+
/**
|
|
12
|
+
* Calculate similarity between two Python patterns
|
|
13
|
+
*/
|
|
14
|
+
export declare function calculatePythonSimilarity(pattern1: CodePattern, pattern2: CodePattern): number;
|
|
15
|
+
/**
|
|
16
|
+
* Detect common Python patterns that indicate duplication
|
|
17
|
+
*/
|
|
18
|
+
export declare function detectPythonAntiPatterns(patterns: CodePattern[]): string[];
|
|
19
|
+
//# sourceMappingURL=python-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"python-extractor.d.ts","sourceRoot":"","sources":["../../src/extractors/python-extractor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAGH,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAE5C;;GAEG;AACH,wBAAsB,qBAAqB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAmDnF;AAUD;;GAEG;AACH,wBAAgB,yBAAyB,CACvC,QAAQ,EAAE,WAAW,EACrB,QAAQ,EAAE,WAAW,GACpB,MAAM,CAgCR;AA4DD;;GAEG;AACH,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,WAAW,EAAE,GAAG,MAAM,EAAE,CAwB1E"}
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Python Pattern Extractor
|
|
3
|
+
*
|
|
4
|
+
* Extracts functions and classes from Python code for similarity analysis
|
|
5
|
+
*/
|
|
6
|
+
import { getParser } from '@aiready/core';
|
|
7
|
+
/**
|
|
8
|
+
* Extract patterns from Python files
|
|
9
|
+
*/
|
|
10
|
+
export async function extractPythonPatterns(files) {
|
|
11
|
+
const patterns = [];
|
|
12
|
+
const parser = getParser('dummy.py');
|
|
13
|
+
if (!parser) {
|
|
14
|
+
console.warn('Python parser not available');
|
|
15
|
+
return patterns;
|
|
16
|
+
}
|
|
17
|
+
const pythonFiles = files.filter(f => f.toLowerCase().endsWith('.py'));
|
|
18
|
+
for (const file of pythonFiles) {
|
|
19
|
+
try {
|
|
20
|
+
const fs = await import('fs');
|
|
21
|
+
const code = await fs.promises.readFile(file, 'utf-8');
|
|
22
|
+
const result = parser.parse(code, file);
|
|
23
|
+
// Extract function patterns
|
|
24
|
+
for (const exp of result.exports) {
|
|
25
|
+
if (exp.type === 'function') {
|
|
26
|
+
patterns.push({
|
|
27
|
+
file,
|
|
28
|
+
name: exp.name,
|
|
29
|
+
type: 'function',
|
|
30
|
+
startLine: exp.loc?.start.line || 0,
|
|
31
|
+
endLine: exp.loc?.end.line || 0,
|
|
32
|
+
imports: exp.imports || [],
|
|
33
|
+
dependencies: exp.dependencies || [],
|
|
34
|
+
signature: generatePythonSignature(exp),
|
|
35
|
+
language: 'python',
|
|
36
|
+
});
|
|
37
|
+
}
|
|
38
|
+
else if (exp.type === 'class') {
|
|
39
|
+
patterns.push({
|
|
40
|
+
file,
|
|
41
|
+
name: exp.name,
|
|
42
|
+
type: 'class',
|
|
43
|
+
startLine: exp.loc?.start.line || 0,
|
|
44
|
+
endLine: exp.loc?.end.line || 0,
|
|
45
|
+
imports: exp.imports || [],
|
|
46
|
+
dependencies: exp.dependencies || [],
|
|
47
|
+
signature: `class ${exp.name}`,
|
|
48
|
+
language: 'python',
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
catch (error) {
|
|
54
|
+
console.warn(`Failed to extract patterns from ${file}:`, error);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
return patterns;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Generate a signature for a Python function
|
|
61
|
+
*/
|
|
62
|
+
function generatePythonSignature(exp) {
|
|
63
|
+
const params = exp.parameters?.join(', ') || '';
|
|
64
|
+
return `def ${exp.name}(${params})`;
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Calculate similarity between two Python patterns
|
|
68
|
+
*/
|
|
69
|
+
export function calculatePythonSimilarity(pattern1, pattern2) {
|
|
70
|
+
let similarity = 0;
|
|
71
|
+
let factors = 0;
|
|
72
|
+
// 1. Name similarity (30%)
|
|
73
|
+
const nameSimilarity = calculateNameSimilarity(pattern1.name, pattern2.name);
|
|
74
|
+
similarity += nameSimilarity * 0.3;
|
|
75
|
+
factors += 0.3;
|
|
76
|
+
// 2. Import similarity (40%)
|
|
77
|
+
const importSimilarity = calculateImportSimilarity(pattern1.imports || [], pattern2.imports || []);
|
|
78
|
+
similarity += importSimilarity * 0.4;
|
|
79
|
+
factors += 0.4;
|
|
80
|
+
// 3. Type similarity (10%)
|
|
81
|
+
if (pattern1.type === pattern2.type) {
|
|
82
|
+
similarity += 0.1;
|
|
83
|
+
}
|
|
84
|
+
factors += 0.1;
|
|
85
|
+
// 4. Signature similarity (20%)
|
|
86
|
+
const sigSimilarity = calculateSignatureSimilarity(pattern1.signature, pattern2.signature);
|
|
87
|
+
similarity += sigSimilarity * 0.2;
|
|
88
|
+
factors += 0.2;
|
|
89
|
+
return factors > 0 ? similarity / factors : 0;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Calculate name similarity using Levenshtein-based approach
|
|
93
|
+
*/
|
|
94
|
+
function calculateNameSimilarity(name1, name2) {
|
|
95
|
+
if (name1 === name2)
|
|
96
|
+
return 1;
|
|
97
|
+
// Remove common prefixes/suffixes
|
|
98
|
+
const clean1 = name1.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, '');
|
|
99
|
+
const clean2 = name2.replace(/^(get|set|is|has|create|delete|update|fetch)_?/, '');
|
|
100
|
+
if (clean1 === clean2)
|
|
101
|
+
return 0.9;
|
|
102
|
+
// Check for substring match
|
|
103
|
+
if (clean1.includes(clean2) || clean2.includes(clean1)) {
|
|
104
|
+
return 0.7;
|
|
105
|
+
}
|
|
106
|
+
// Simple character overlap
|
|
107
|
+
const set1 = new Set(clean1.split('_'));
|
|
108
|
+
const set2 = new Set(clean2.split('_'));
|
|
109
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
110
|
+
const union = new Set([...set1, ...set2]);
|
|
111
|
+
return intersection.size / union.size;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Calculate import similarity (Jaccard index)
|
|
115
|
+
*/
|
|
116
|
+
function calculateImportSimilarity(imports1, imports2) {
|
|
117
|
+
if (imports1.length === 0 && imports2.length === 0)
|
|
118
|
+
return 1;
|
|
119
|
+
if (imports1.length === 0 || imports2.length === 0)
|
|
120
|
+
return 0;
|
|
121
|
+
const set1 = new Set(imports1);
|
|
122
|
+
const set2 = new Set(imports2);
|
|
123
|
+
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
124
|
+
const union = new Set([...set1, ...set2]);
|
|
125
|
+
return intersection.size / union.size;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Calculate signature similarity
|
|
129
|
+
*/
|
|
130
|
+
function calculateSignatureSimilarity(sig1, sig2) {
|
|
131
|
+
if (sig1 === sig2)
|
|
132
|
+
return 1;
|
|
133
|
+
// Extract parameter counts
|
|
134
|
+
const params1 = (sig1.match(/\([^)]*\)/)?.[0] || '').split(',').filter(Boolean).length;
|
|
135
|
+
const params2 = (sig2.match(/\([^)]*\)/)?.[0] || '').split(',').filter(Boolean).length;
|
|
136
|
+
if (params1 === params2)
|
|
137
|
+
return 0.8;
|
|
138
|
+
if (Math.abs(params1 - params2) === 1)
|
|
139
|
+
return 0.5;
|
|
140
|
+
return 0;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* Detect common Python patterns that indicate duplication
|
|
144
|
+
*/
|
|
145
|
+
export function detectPythonAntiPatterns(patterns) {
|
|
146
|
+
const antiPatterns = [];
|
|
147
|
+
// Group by similar names
|
|
148
|
+
const nameGroups = new Map();
|
|
149
|
+
for (const pattern of patterns) {
|
|
150
|
+
const baseName = pattern.name.replace(/^(get|set|create|delete|update)_/, '');
|
|
151
|
+
if (!nameGroups.has(baseName)) {
|
|
152
|
+
nameGroups.set(baseName, []);
|
|
153
|
+
}
|
|
154
|
+
nameGroups.get(baseName).push(pattern);
|
|
155
|
+
}
|
|
156
|
+
// Check for groups with multiple similar patterns
|
|
157
|
+
for (const [baseName, group] of nameGroups) {
|
|
158
|
+
if (group.length >= 3) {
|
|
159
|
+
antiPatterns.push(`Found ${group.length} functions with similar names (${baseName}): Consider consolidating`);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
return antiPatterns;
|
|
163
|
+
}
|
|
164
|
+
//# sourceMappingURL=python-extractor.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"python-extractor.js","sourceRoot":"","sources":["../../src/extractors/python-extractor.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,SAAS,EAAY,MAAM,eAAe,CAAC;AAGpD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,KAAe;IACzD,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,MAAM,MAAM,GAAG,SAAS,CAAC,UAAU,CAAC,CAAC;IAErC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QAC5C,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,WAAW,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEvE,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;QAC/B,IAAI,CAAC;YACH,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,CAAC;YAC9B,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;YACvD,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;YAExC,4BAA4B;YAC5B,KAAK,MAAM,GAAG,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACjC,IAAI,GAAG,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;oBAC5B,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI;wBACJ,IAAI,EAAE,GAAG,CAAC,IAAI;wBACd,IAAI,EAAE,UAAU;wBAChB,SAAS,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;wBACnC,OAAO,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC;wBAC/B,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE;wBAC1B,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE;wBACpC,SAAS,EAAE,uBAAuB,CAAC,GAAG,CAAC;wBACvC,QAAQ,EAAE,QAAQ;qBACnB,CAAC,CAAC;gBACL,CAAC;qBAAM,IAAI,GAAG,CAAC,IAAI,KAAK,OAAO,EAAE,CAAC;oBAChC,QAAQ,CAAC,IAAI,CAAC;wBACZ,IAAI;wBACJ,IAAI,EAAE,GAAG,CAAC,IAAI;wBACd,IAAI,EAAE,OAAO;wBACb,SAAS,EAAE,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;wBACnC,OAAO,EAAE,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,IAAI,CAAC;wBAC/B,OAAO,EAAE,GAAG,CAAC,OAAO,IAAI,EAAE;wBAC1B,YAAY,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE;wBACpC,SAAS,EAAE,SAAS,GAAG,CAAC,IAAI,EAAE;wBAC9B,QAAQ,EAAE,QAAQ;qBACnB,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,IAAI,CAAC,mCAAmC,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,GAAQ;IACvC,MAAM,MAAM,GAAG,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;IAChD,OAAO,OAAO,GAAG,CAAC,IAAI,IAAI,MAAM,GAAG,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,yBAAyB,CACvC,QAAqB,EACrB,QAAqB;IAErB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,OAAO,GAAG,CAAC,CAAC;IAEhB,2BAA2B;IAC3B,MAAM,cAAc,GAAG,uBAAuB,CAAC,QAAQ,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC7E,UAAU,IAAI,cAAc,GAAG,GAAG,CAAC;IACnC,OAAO,IAAI,GAAG,CAAC;IAEf,6BAA6B;IAC7B,MAAM,gBAAgB,GAAG,yBAAyB,CAChD,QAAQ,CAAC,OAAO,IAAI,EAAE,EACtB,QAAQ,CAAC,OAAO,IAAI,EAAE,CACvB,CAAC;IACF,UAAU,IAAI,gBAAgB,GAAG,GAAG,CAAC;IACrC,OAAO,IAAI,GAAG,CAAC;IAEf,2BAA2B;IAC3B,IAAI,QAAQ,CAAC,IAAI,KAAK,QAAQ,CAAC,IAAI,EAAE,CAAC;QACpC,UAAU,IAAI,GAAG,CAAC;IACpB,CAAC;IACD,OAAO,IAAI,GAAG,CAAC;IAEf,gCAAgC;IAChC,MAAM,aAAa,GAAG,4BAA4B,CAChD,QAAQ,CAAC,SAAS,EAClB,QAAQ,CAAC,SAAS,CACnB,CAAC;IACF,UAAU,IAAI,aAAa,GAAG,GAAG,CAAC;IAClC,OAAO,IAAI,GAAG,CAAC;IAEf,OAAO,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,UAAU,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC;AAChD,CAAC;AAED;;GAEG;AACH,SAAS,uBAAuB,CAAC,KAAa,EAAE,KAAa;IAC3D,IAAI,KAAK,KAAK,KAAK;QAAE,OAAO,CAAC,CAAC;IAE9B,kCAAkC;IAClC,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,gDAAgD,EAAE,EAAE,CAAC,CAAC;IACnF,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,gDAAgD,EAAE,EAAE,CAAC,CAAC;IAEnF,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,GAAG,CAAC;IAElC,4BAA4B;IAC5B,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACvD,OAAO,GAAG,CAAC;IACb,CAAC;IAED,2BAA2B;IAC3B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;IACxC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;IACxC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;IAE1C,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,yBAAyB,CAAC,QAAkB,EAAE,QAAkB;IACvE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAC7D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAE7D,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC/B,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;IAE/B,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACjE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;IAE1C,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;AACxC,CAAC;AAED;;GAEG;AACH,SAAS,4BAA4B,CAAC,IAAY,EAAE,IAAY;IAC9D,IAAI,IAAI,KAAK,IAAI;QAAE,OAAO,CAAC,CAAC;IAE5B,2BAA2B;IAC3B,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IACvF,MAAM,OAAO,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;IAEvF,IAAI,OAAO,KAAK,OAAO;QAAE,OAAO,GAAG,CAAC;IACpC,IAAI,IAAI,CAAC,GAAG,CAAC,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC;QAAE,OAAO,GAAG,CAAC;IAElD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,wBAAwB,CAAC,QAAuB;IAC9D,MAAM,YAAY,GAAa,EAAE,CAAC;IAElC,yBAAyB;IACzB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAyB,CAAC;IAEpD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,kCAAkC,EAAE,EAAE,CAAC,CAAC;QAC9E,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9B,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;QAC/B,CAAC;QACD,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED,kDAAkD;IAClD,KAAK,MAAM,CAAC,QAAQ,EAAE,KAAK,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3C,IAAI,KAAK,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACtB,YAAY,CAAC,IAAI,CACf,SAAS,KAAK,CAAC,MAAM,kCAAkC,QAAQ,2BAA2B,CAC3F,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Grouping and clustering utilities for duplicate patterns
|
|
3
|
+
* Reduces noise by consolidating similar duplicates and creating refactor clusters
|
|
4
|
+
*/
|
|
5
|
+
import type { DuplicatePattern, PatternType } from './detector';
|
|
6
|
+
import type { Severity } from './context-rules';
|
|
7
|
+
export interface DuplicateGroup {
|
|
8
|
+
filePair: string;
|
|
9
|
+
duplicates: DuplicatePattern[];
|
|
10
|
+
totalTokenCost: number;
|
|
11
|
+
averageSimilarity: number;
|
|
12
|
+
maxSimilarity: number;
|
|
13
|
+
severity: Severity;
|
|
14
|
+
patternType: PatternType;
|
|
15
|
+
occurrences: number;
|
|
16
|
+
lineRanges: Array<{
|
|
17
|
+
file1: {
|
|
18
|
+
start: number;
|
|
19
|
+
end: number;
|
|
20
|
+
};
|
|
21
|
+
file2: {
|
|
22
|
+
start: number;
|
|
23
|
+
end: number;
|
|
24
|
+
};
|
|
25
|
+
}>;
|
|
26
|
+
}
|
|
27
|
+
export interface RefactorCluster {
|
|
28
|
+
id: string;
|
|
29
|
+
name: string;
|
|
30
|
+
files: string[];
|
|
31
|
+
patternType: PatternType;
|
|
32
|
+
severity: Severity;
|
|
33
|
+
totalTokenCost: number;
|
|
34
|
+
averageSimilarity: number;
|
|
35
|
+
duplicateCount: number;
|
|
36
|
+
suggestion: string;
|
|
37
|
+
reason: string;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Group duplicates by file pair, consolidating similar line ranges
|
|
41
|
+
* Reduces "80% similar entries for the same file pairs" noise
|
|
42
|
+
*/
|
|
43
|
+
export declare function groupDuplicatesByFilePair(duplicates: DuplicatePattern[]): DuplicateGroup[];
|
|
44
|
+
/**
|
|
45
|
+
* Create refactor clusters for related duplicates
|
|
46
|
+
* Groups UI patterns, components, etc. into actionable clusters
|
|
47
|
+
*/
|
|
48
|
+
export declare function createRefactorClusters(duplicates: DuplicatePattern[]): RefactorCluster[];
|
|
49
|
+
/**
|
|
50
|
+
* Filter clusters by minimum impact threshold
|
|
51
|
+
* Reduces noise from minor refactoring opportunities
|
|
52
|
+
*/
|
|
53
|
+
export declare function filterClustersByImpact(clusters: RefactorCluster[], minTokenCost?: number, minFileCount?: number): RefactorCluster[];
|
|
54
|
+
//# sourceMappingURL=grouping.d.ts.map
|