deep-slop 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.deep-slop/.deep-slop-ignore +13 -0
- package/LICENSE +21 -0
- package/README.md +1170 -0
- package/dist/arch-constraints-C7s1E_bc.js +450 -0
- package/dist/arch-rules-DI1SYPqu.js +358 -0
- package/dist/ast-slop-BGdr58wZ.js +1839 -0
- package/dist/config-lint-ph3vMUbg.js +371 -0
- package/dist/dead-flow-DHRkyxZT.js +1422 -0
- package/dist/deep-slop-bundled.js +33140 -0
- package/dist/discover-B_S_Fy2S.js +164 -0
- package/dist/dup-detect-DKRXM04q.js +709 -0
- package/dist/file-utils-B_HFXhCs.js +93 -0
- package/dist/format-lint-DeElllNm.js +445 -0
- package/dist/framework-lint-CqdlF9hX.js +782 -0
- package/dist/i18n-lint-CPzx7V8Q.js +605 -0
- package/dist/import-intelligence-SK4F7XpL.js +966 -0
- package/dist/index.d.ts +233 -0
- package/dist/index.js +1030 -0
- package/dist/knip-CgxnnTBZ.js +93 -0
- package/dist/lint-external-ZbW3jGvB.js +326 -0
- package/dist/markup-lint-DKVEDz9M.js +805 -0
- package/dist/mcp.js +35939 -0
- package/dist/meta-quality-Dai1W5iC.js +224 -0
- package/dist/perf-hints-BnWFMFff.js +500 -0
- package/dist/security-deep-DJRINs10.js +1198 -0
- package/dist/syntax-deep-ZQYMutky.js +624 -0
- package/dist/tree-sitter-CM-cP0nl.js +661 -0
- package/dist/type-safety-Dboj2C1t.js +519 -0
- package/package.json +92 -0
|
@@ -0,0 +1,709 @@
|
|
|
1
|
+
import { t as collectFiles } from "./discover-B_S_Fy2S.js";
|
|
2
|
+
import { i as toLines, n as extractImports, r as readFileContent } from "./file-utils-B_HFXhCs.js";
|
|
3
|
+
import { extname, relative } from "node:path";
|
|
4
|
+
|
|
5
|
+
//#region src/engines/dup-detect/index.ts
|
|
6
|
+
const IDENTICAL_BLOCK_MIN_LINES = 10;
|
|
7
|
+
const SIMILARITY_THRESHOLD = .9;
|
|
8
|
+
const DUPLICATE_IMPORT_MIN_FILES = 15;
|
|
9
|
+
const REPEATED_CONSTANT_MIN_CHARS = 8;
|
|
10
|
+
const REPEATED_CONSTANT_MIN_OCCURRENCES = 3;
|
|
11
|
+
const BLOCK_OVERLAP_STEP = 5;
|
|
12
|
+
const LARGE_FILE_LINE_LIMIT = 2e3;
|
|
13
|
+
const FILE_BATCH_SIZE = 50;
|
|
14
|
+
const COPY_PASTE_MIN_BODY_LINES = 5;
|
|
15
|
+
const COPY_PASTE_NAME_WHITELIST = new Set([
|
|
16
|
+
"run",
|
|
17
|
+
"fix",
|
|
18
|
+
"constructor",
|
|
19
|
+
"get",
|
|
20
|
+
"set",
|
|
21
|
+
"init",
|
|
22
|
+
"handle",
|
|
23
|
+
"process",
|
|
24
|
+
"execute",
|
|
25
|
+
"dispose",
|
|
26
|
+
"close",
|
|
27
|
+
"open",
|
|
28
|
+
"start",
|
|
29
|
+
"stop",
|
|
30
|
+
"reset",
|
|
31
|
+
"validate"
|
|
32
|
+
]);
|
|
33
|
+
const SUPPORTED_EXTS = new Set([
|
|
34
|
+
".ts",
|
|
35
|
+
".tsx",
|
|
36
|
+
".js",
|
|
37
|
+
".jsx",
|
|
38
|
+
".mjs",
|
|
39
|
+
".cjs",
|
|
40
|
+
".py"
|
|
41
|
+
]);
|
|
42
|
+
/** Build a diagnostic with common fields filled */
|
|
43
|
+
function diag(opts) {
|
|
44
|
+
return {
|
|
45
|
+
filePath: opts.filePath,
|
|
46
|
+
engine: "dup-detect",
|
|
47
|
+
rule: opts.rule,
|
|
48
|
+
severity: opts.severity,
|
|
49
|
+
message: opts.message,
|
|
50
|
+
help: opts.help,
|
|
51
|
+
line: opts.line,
|
|
52
|
+
column: opts.column,
|
|
53
|
+
category: "duplication",
|
|
54
|
+
fixable: opts.fixable,
|
|
55
|
+
suggestion: opts.suggestion,
|
|
56
|
+
detail: opts.detail
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/** Determine language from file extension */
|
|
60
|
+
const LANG_MAP = {
|
|
61
|
+
".ts": "typescript",
|
|
62
|
+
".tsx": "typescript",
|
|
63
|
+
".js": "javascript",
|
|
64
|
+
".jsx": "javascript",
|
|
65
|
+
".mjs": "javascript",
|
|
66
|
+
".cjs": "javascript",
|
|
67
|
+
".py": "python"
|
|
68
|
+
};
|
|
69
|
+
function languageFromPath(filePath) {
|
|
70
|
+
return LANG_MAP[extname(filePath)] ?? null;
|
|
71
|
+
}
|
|
72
|
+
/** Normalize a line: strip leading/trailing whitespace, remove comments */
|
|
73
|
+
function normalizeLine(line, lang) {
|
|
74
|
+
let trimmed = line.trim();
|
|
75
|
+
if (lang === "python") {
|
|
76
|
+
const hashIdx = trimmed.indexOf("#");
|
|
77
|
+
if (hashIdx >= 0) trimmed = trimmed.slice(0, hashIdx).trimEnd();
|
|
78
|
+
} else {
|
|
79
|
+
const slashIdx = trimmed.indexOf("//");
|
|
80
|
+
if (slashIdx >= 0) trimmed = trimmed.slice(0, slashIdx).trimEnd();
|
|
81
|
+
}
|
|
82
|
+
return trimmed;
|
|
83
|
+
}
|
|
84
|
+
/** Normalize a block of lines into a single string */
|
|
85
|
+
function normalizeBlock(lines, lang) {
|
|
86
|
+
return lines.map((l) => normalizeLine(l, lang)).filter((l) => l.length > 0).join("\n");
|
|
87
|
+
}
|
|
88
|
+
/** Tokenize a line into meaningful tokens (identifiers, operators, literals) */
|
|
89
|
+
function tokenizeLine(line) {
|
|
90
|
+
return line.split(/[\s{}()\[\];,.<>:=+\-*/&|!~^%]+/).filter((t) => t.length > 0);
|
|
91
|
+
}
|
|
92
|
+
/** Compute Jaccard similarity between two sets */
|
|
93
|
+
function jaccardSimilarity(a, b) {
|
|
94
|
+
if (a.size === 0 && b.size === 0) return 1;
|
|
95
|
+
if (a.size === 0 || b.size === 0) return 0;
|
|
96
|
+
let intersection = 0;
|
|
97
|
+
for (const item of a) if (b.has(item)) intersection++;
|
|
98
|
+
const union = a.size + b.size - intersection;
|
|
99
|
+
return union === 0 ? 0 : intersection / union;
|
|
100
|
+
}
|
|
101
|
+
/** Extract all code blocks of a given size from lines using a sliding window */
|
|
102
|
+
function extractBlocks(lines, blockSize, step, filePath, lang, includeTokenSets) {
|
|
103
|
+
const blocks = [];
|
|
104
|
+
for (let i = 0; i <= lines.length - blockSize; i += step) {
|
|
105
|
+
const slice = lines.slice(i, i + blockSize);
|
|
106
|
+
const rawLines = slice.map((l) => l.text);
|
|
107
|
+
const normalizedText = normalizeBlock(rawLines, lang);
|
|
108
|
+
if (normalizedText.length < 10) continue;
|
|
109
|
+
let tokenSet;
|
|
110
|
+
if (includeTokenSets) {
|
|
111
|
+
tokenSet = /* @__PURE__ */ new Set();
|
|
112
|
+
for (const line of rawLines) {
|
|
113
|
+
const normalized = normalizeLine(line, lang);
|
|
114
|
+
if (normalized.length > 0) for (const tok of tokenizeLine(normalized)) tokenSet.add(tok);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
blocks.push({
|
|
118
|
+
filePath,
|
|
119
|
+
startLine: slice[0].num,
|
|
120
|
+
endLine: slice[slice.length - 1].num,
|
|
121
|
+
normalizedText,
|
|
122
|
+
tokenSet
|
|
123
|
+
});
|
|
124
|
+
}
|
|
125
|
+
return blocks;
|
|
126
|
+
}
|
|
127
|
+
/** Extract function definitions using regex (JS/TS/Python) */
|
|
128
|
+
function extractFunctions(content, filePath, lang) {
|
|
129
|
+
const lines = toLines(content);
|
|
130
|
+
const functions = [];
|
|
131
|
+
if (lang === "typescript" || lang === "javascript") {
|
|
132
|
+
const funcStartRe = /^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/;
|
|
133
|
+
const arrowFuncRe = /^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:\([^)]*\)|[a-zA-Z_]\w*)\s*=>/;
|
|
134
|
+
const methodRe = /^\s*(?:(?:public|private|protected|static|async|abstract)\s+)*(\w+)\s*\([^)]*\)\s*(?::\s*[^{]+)?\{/;
|
|
135
|
+
for (let i = 0; i < lines.length; i++) {
|
|
136
|
+
const { num, text } = lines[i];
|
|
137
|
+
const trimmed = text.trim();
|
|
138
|
+
let funcName = null;
|
|
139
|
+
const funcMatch = trimmed.match(funcStartRe);
|
|
140
|
+
const arrowMatch = trimmed.match(arrowFuncRe);
|
|
141
|
+
const methodMatch = trimmed.match(methodRe);
|
|
142
|
+
if (funcMatch) funcName = funcMatch[1];
|
|
143
|
+
else if (arrowMatch) funcName = arrowMatch[1];
|
|
144
|
+
else if (methodMatch && !trimmed.startsWith("if") && !trimmed.startsWith("for") && !trimmed.startsWith("while") && !trimmed.startsWith("switch") && !trimmed.startsWith("catch") && !trimmed.startsWith("class") && !trimmed.startsWith("constructor")) funcName = methodMatch[1];
|
|
145
|
+
if (funcName) {
|
|
146
|
+
const { endLine, bodyLines } = extractBraceBody(lines, i);
|
|
147
|
+
const bodyLineCount = bodyLines.filter((l) => l.trim().length > 0).length;
|
|
148
|
+
const bodyNormalized = bodyLines.map((l) => normalizeLine(l, lang)).filter((l) => l.length > 0).join("\n");
|
|
149
|
+
if (bodyNormalized.length > 20) functions.push({
|
|
150
|
+
filePath,
|
|
151
|
+
name: funcName,
|
|
152
|
+
startLine: num,
|
|
153
|
+
endLine,
|
|
154
|
+
bodyLineCount,
|
|
155
|
+
bodyNormalized
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
if (lang === "python") {
|
|
161
|
+
const defRe = /^\s*def\s+(\w+)\s*\(/;
|
|
162
|
+
for (let i = 0; i < lines.length; i++) {
|
|
163
|
+
const { num, text } = lines[i];
|
|
164
|
+
const match = text.match(defRe);
|
|
165
|
+
if (match) {
|
|
166
|
+
const funcName = match[1];
|
|
167
|
+
const { endLine, bodyLines } = extractPythonBody(lines, i);
|
|
168
|
+
const bodyLineCount = bodyLines.filter((l) => l.trim().length > 0).length;
|
|
169
|
+
const bodyNormalized = bodyLines.map((l) => normalizeLine(l, lang)).filter((l) => l.length > 0).join("\n");
|
|
170
|
+
if (bodyNormalized.length > 20) functions.push({
|
|
171
|
+
filePath,
|
|
172
|
+
name: funcName,
|
|
173
|
+
startLine: num,
|
|
174
|
+
endLine,
|
|
175
|
+
bodyLineCount,
|
|
176
|
+
bodyNormalized
|
|
177
|
+
});
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
return functions;
|
|
182
|
+
}
|
|
183
|
+
/** Extract brace-delimited body from JS/TS starting at given line index */
|
|
184
|
+
function extractBraceBody(lines, startIdx) {
|
|
185
|
+
let depth = 0;
|
|
186
|
+
let started = false;
|
|
187
|
+
const bodyLines = [];
|
|
188
|
+
let endLine = lines[startIdx].num;
|
|
189
|
+
for (let i = startIdx; i < lines.length; i++) {
|
|
190
|
+
const text = lines[i].text;
|
|
191
|
+
for (const ch of text) if (ch === "{") {
|
|
192
|
+
depth++;
|
|
193
|
+
started = true;
|
|
194
|
+
} else if (ch === "}") {
|
|
195
|
+
depth--;
|
|
196
|
+
if (started && depth === 0) {
|
|
197
|
+
endLine = lines[i].num;
|
|
198
|
+
return {
|
|
199
|
+
endLine,
|
|
200
|
+
bodyLines
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
if (started && i > startIdx) bodyLines.push(text);
|
|
205
|
+
endLine = lines[i].num;
|
|
206
|
+
}
|
|
207
|
+
return {
|
|
208
|
+
endLine,
|
|
209
|
+
bodyLines
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
/** Extract indented body from Python starting at given line index */
|
|
213
|
+
function extractPythonBody(lines, startIdx) {
|
|
214
|
+
const defLine = lines[startIdx].text;
|
|
215
|
+
const defIndent = defLine.length - defLine.trimStart().length;
|
|
216
|
+
const bodyLines = [];
|
|
217
|
+
let endLine = lines[startIdx].num;
|
|
218
|
+
for (let i = startIdx + 1; i < lines.length; i++) {
|
|
219
|
+
const text = lines[i].text;
|
|
220
|
+
const trimmed = text.trim();
|
|
221
|
+
if (trimmed.length === 0) {
|
|
222
|
+
bodyLines.push(text);
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
if (text.length - text.trimStart().length <= defIndent && trimmed.length > 0) break;
|
|
226
|
+
bodyLines.push(text);
|
|
227
|
+
endLine = lines[i].num;
|
|
228
|
+
}
|
|
229
|
+
return {
|
|
230
|
+
endLine,
|
|
231
|
+
bodyLines
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
/** Extract named import symbols from raw import text */
|
|
235
|
+
function extractNamedSymbols(raw, lang) {
|
|
236
|
+
if (lang === "typescript" || lang === "javascript") {
|
|
237
|
+
const namedMatch = raw.match(/\{([^}]+)\}/);
|
|
238
|
+
if (namedMatch) return namedMatch[1].split(",").map((s) => s.trim().split(/\s+as\s+/)[0].trim()).filter((s) => s.length > 0);
|
|
239
|
+
const defaultMatch = raw.match(/^import\s+(?:type\s+)?(\w+)\s+from/);
|
|
240
|
+
if (defaultMatch) return [defaultMatch[1]];
|
|
241
|
+
const nsMatch = raw.match(/^import\s+\*\s+as\s+(\w+)\s+from/);
|
|
242
|
+
if (nsMatch) return [nsMatch[1]];
|
|
243
|
+
}
|
|
244
|
+
if (lang === "python") {
|
|
245
|
+
const fromMatch = raw.match(/^from\s+[^\s]+\s+import\s+(.+)/);
|
|
246
|
+
if (fromMatch) return fromMatch[1].split(",").map((s) => s.trim().split(/\s+as\s+/)[0].trim()).filter((s) => s.length > 0);
|
|
247
|
+
const importMatch = raw.match(/^import\s+(.+)/);
|
|
248
|
+
if (importMatch) return importMatch[1].split(",").map((s) => s.trim().split(/\s+as\s+/)[0].trim()).filter((s) => s.length > 0);
|
|
249
|
+
}
|
|
250
|
+
return [];
|
|
251
|
+
}
|
|
252
|
+
/** Extract string literals from a line */
|
|
253
|
+
function extractStringLiterals(line, lang) {
|
|
254
|
+
const results = [];
|
|
255
|
+
if (lang === "python") {
|
|
256
|
+
const stringRe = /(?<!\\)(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)')/g;
|
|
257
|
+
let m;
|
|
258
|
+
while ((m = stringRe.exec(line)) !== null) {
|
|
259
|
+
const value = m[1] ?? m[2] ?? "";
|
|
260
|
+
const col = m.index + 1;
|
|
261
|
+
if (value.length >= REPEATED_CONSTANT_MIN_CHARS) results.push({
|
|
262
|
+
value,
|
|
263
|
+
col
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
} else {
|
|
267
|
+
const stringRe = /(?<!\\)(?:"((?:[^"\\]|\\.)*)"|'((?:[^'\\]|\\.)*)')/g;
|
|
268
|
+
let m;
|
|
269
|
+
while ((m = stringRe.exec(line)) !== null) {
|
|
270
|
+
const value = m[1] ?? m[2] ?? "";
|
|
271
|
+
const col = m.index + 1;
|
|
272
|
+
if (value.length >= REPEATED_CONSTANT_MIN_CHARS) results.push({
|
|
273
|
+
value,
|
|
274
|
+
col
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
const templateRe = /`((?:[^`\\]|\\.)*)`/g;
|
|
278
|
+
while ((m = templateRe.exec(line)) !== null) {
|
|
279
|
+
const value = m[1] ?? "";
|
|
280
|
+
const col = m.index + 1;
|
|
281
|
+
if (value.length >= REPEATED_CONSTANT_MIN_CHARS && !value.includes("${")) results.push({
|
|
282
|
+
value,
|
|
283
|
+
col
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
return results;
|
|
288
|
+
}
|
|
289
|
+
/** Check if a string looks like a meaningful constant (not a URL, path, import, etc.) */
|
|
290
|
+
function isConstantCandidate(value) {
|
|
291
|
+
if (/^https?:\/\//.test(value)) return false;
|
|
292
|
+
if (/^\/|^\.\.?\//.test(value)) return false;
|
|
293
|
+
if (/^\d+$/.test(value)) return false;
|
|
294
|
+
if (/^[.\[]/.test(value)) return false;
|
|
295
|
+
if (/^node_modules/.test(value)) return false;
|
|
296
|
+
if (/^[@a-z0-9][-a-z0-9.]*\/[-a-z0-9.@/]*$/i.test(value)) return false;
|
|
297
|
+
if (/^[a-z][-a-z0-9]{1,20}$/.test(value) && value.length <= 20) return false;
|
|
298
|
+
if (/\bimport\b|\bfrom\b|\brequire\b/.test(value)) return false;
|
|
299
|
+
if (/^\.[a-z]{1,4}$/.test(value)) return false;
|
|
300
|
+
return true;
|
|
301
|
+
}
|
|
302
|
+
function detectIdenticalBlocks(allBlocks, rootDir) {
|
|
303
|
+
const diagnostics = [];
|
|
304
|
+
const groups = /* @__PURE__ */ new Map();
|
|
305
|
+
for (const block of allBlocks) {
|
|
306
|
+
let group = groups.get(block.normalizedText);
|
|
307
|
+
if (!group) {
|
|
308
|
+
group = [];
|
|
309
|
+
groups.set(block.normalizedText, group);
|
|
310
|
+
}
|
|
311
|
+
group.push(block);
|
|
312
|
+
}
|
|
313
|
+
for (const [, group] of groups) {
|
|
314
|
+
const uniqueFiles = new Set(group.map((b) => b.filePath));
|
|
315
|
+
if (group.length < 2 || uniqueFiles.size < 2) continue;
|
|
316
|
+
const merged = mergeOverlappingBlocks(group);
|
|
317
|
+
if (merged.length < 2) continue;
|
|
318
|
+
const reportedPairs = /* @__PURE__ */ new Set();
|
|
319
|
+
for (let i = 0; i < merged.length; i++) for (let j = i + 1; j < merged.length; j++) {
|
|
320
|
+
const a = merged[i];
|
|
321
|
+
const b = merged[j];
|
|
322
|
+
if (a.filePath === b.filePath) continue;
|
|
323
|
+
const pairKey = a.filePath < b.filePath ? `${a.filePath}:${a.startLine}-${a.endLine}|${b.filePath}:${b.startLine}-${b.endLine}` : `${b.filePath}:${b.startLine}-${b.endLine}|${a.filePath}:${a.startLine}-${a.endLine}`;
|
|
324
|
+
if (reportedPairs.has(pairKey)) continue;
|
|
325
|
+
reportedPairs.add(pairKey);
|
|
326
|
+
const relA = relative(rootDir, a.filePath);
|
|
327
|
+
const relB = relative(rootDir, b.filePath);
|
|
328
|
+
diagnostics.push(diag({
|
|
329
|
+
filePath: relA,
|
|
330
|
+
rule: "dup-detect/identical-block",
|
|
331
|
+
severity: "warning",
|
|
332
|
+
message: `Identical code block (${a.endLine - a.startLine + 1} lines) duplicated in ${relB}:${b.startLine}`,
|
|
333
|
+
help: "Extract the duplicated block into a shared utility function or module to reduce maintenance burden.",
|
|
334
|
+
line: a.startLine,
|
|
335
|
+
column: 1,
|
|
336
|
+
fixable: false,
|
|
337
|
+
suggestion: {
|
|
338
|
+
type: "refactor",
|
|
339
|
+
text: `Extract shared logic from ${relA}:${a.startLine}-${a.endLine} and ${relB}:${b.startLine}-${b.endLine} into a common utility.`,
|
|
340
|
+
confidence: .85,
|
|
341
|
+
reason: "Identical code blocks across files indicate copy-paste duplication that should be consolidated."
|
|
342
|
+
},
|
|
343
|
+
detail: {
|
|
344
|
+
duplicateLocations: [{
|
|
345
|
+
file: relA,
|
|
346
|
+
startLine: a.startLine,
|
|
347
|
+
endLine: a.endLine
|
|
348
|
+
}, {
|
|
349
|
+
file: relB,
|
|
350
|
+
startLine: b.startLine,
|
|
351
|
+
endLine: b.endLine
|
|
352
|
+
}],
|
|
353
|
+
lineCount: a.endLine - a.startLine + 1
|
|
354
|
+
}
|
|
355
|
+
}));
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
return diagnostics;
|
|
359
|
+
}
|
|
360
|
+
/** Merge overlapping blocks within each file into single regions.
|
|
361
|
+
* Two blocks overlap if they share >50% of their lines. */
|
|
362
|
+
function mergeOverlappingBlocks(blocks) {
|
|
363
|
+
const byFile = /* @__PURE__ */ new Map();
|
|
364
|
+
for (const b of blocks) {
|
|
365
|
+
let arr = byFile.get(b.filePath);
|
|
366
|
+
if (!arr) {
|
|
367
|
+
arr = [];
|
|
368
|
+
byFile.set(b.filePath, arr);
|
|
369
|
+
}
|
|
370
|
+
arr.push(b);
|
|
371
|
+
}
|
|
372
|
+
const result = [];
|
|
373
|
+
for (const [, fileBlocks] of byFile) {
|
|
374
|
+
if (fileBlocks.length === 0) continue;
|
|
375
|
+
fileBlocks.sort((a, b) => a.startLine - b.startLine);
|
|
376
|
+
const merged = [{ ...fileBlocks[0] }];
|
|
377
|
+
for (let i = 1; i < fileBlocks.length; i++) {
|
|
378
|
+
const block = fileBlocks[i];
|
|
379
|
+
const last = merged[merged.length - 1];
|
|
380
|
+
const overlapStart = Math.max(block.startLine, last.startLine);
|
|
381
|
+
const overlapEnd = Math.min(block.endLine, last.endLine);
|
|
382
|
+
const overlapLines = Math.max(0, overlapEnd - overlapStart + 1);
|
|
383
|
+
const blockLines = block.endLine - block.startLine + 1;
|
|
384
|
+
const lastLines = last.endLine - last.startLine + 1;
|
|
385
|
+
if (overlapLines > blockLines * .5 || overlapLines > lastLines * .5) last.endLine = Math.max(last.endLine, block.endLine);
|
|
386
|
+
else merged.push({ ...block });
|
|
387
|
+
}
|
|
388
|
+
result.push(...merged);
|
|
389
|
+
}
|
|
390
|
+
return result;
|
|
391
|
+
}
|
|
392
|
+
function detectSimilarBlocks(allBlocks, rootDir) {
|
|
393
|
+
const diagnostics = [];
|
|
394
|
+
const reported = /* @__PURE__ */ new Set();
|
|
395
|
+
const blocksWithTokens = allBlocks.filter((b) => b.tokenSet);
|
|
396
|
+
if (blocksWithTokens.length === 0) return diagnostics;
|
|
397
|
+
const byFile = /* @__PURE__ */ new Map();
|
|
398
|
+
for (const block of blocksWithTokens) {
|
|
399
|
+
let arr = byFile.get(block.filePath);
|
|
400
|
+
if (!arr) {
|
|
401
|
+
arr = [];
|
|
402
|
+
byFile.set(block.filePath, arr);
|
|
403
|
+
}
|
|
404
|
+
arr.push(block);
|
|
405
|
+
}
|
|
406
|
+
const files = [...byFile.keys()];
|
|
407
|
+
const normalizedKeys = /* @__PURE__ */ new Map();
|
|
408
|
+
for (const block of blocksWithTokens) normalizedKeys.set(block, block.normalizedText);
|
|
409
|
+
for (let fi = 0; fi < files.length; fi++) for (let fj = fi + 1; fj < files.length; fj++) {
|
|
410
|
+
const blocksA = byFile.get(files[fi]);
|
|
411
|
+
const blocksB = byFile.get(files[fj]);
|
|
412
|
+
for (const a of blocksA) for (const b of blocksB) {
|
|
413
|
+
if (normalizedKeys.get(a) === normalizedKeys.get(b)) continue;
|
|
414
|
+
const similarity = jaccardSimilarity(a.tokenSet, b.tokenSet);
|
|
415
|
+
if (similarity >= SIMILARITY_THRESHOLD) {
|
|
416
|
+
const key = [
|
|
417
|
+
a.filePath,
|
|
418
|
+
a.startLine,
|
|
419
|
+
b.filePath,
|
|
420
|
+
b.startLine
|
|
421
|
+
].sort().join(":");
|
|
422
|
+
if (reported.has(key)) continue;
|
|
423
|
+
reported.add(key);
|
|
424
|
+
const relA = relative(rootDir, a.filePath);
|
|
425
|
+
const relB = relative(rootDir, b.filePath);
|
|
426
|
+
const pct = Math.round(similarity * 100);
|
|
427
|
+
diagnostics.push(diag({
|
|
428
|
+
filePath: relA,
|
|
429
|
+
rule: "dup-detect/similar-block",
|
|
430
|
+
severity: "info",
|
|
431
|
+
message: `Similar code block (${pct}% token overlap) found in ${relB}:${b.startLine}`,
|
|
432
|
+
help: "Consider extracting shared logic into a common utility. Similar blocks often diverge over time, creating maintenance issues.",
|
|
433
|
+
line: a.startLine,
|
|
434
|
+
column: 1,
|
|
435
|
+
fixable: false,
|
|
436
|
+
suggestion: {
|
|
437
|
+
type: "refactor",
|
|
438
|
+
text: `Extract shared logic from ${relA}:${a.startLine}-${a.endLine} and ${relB}:${b.startLine}-${b.endLine} into a parameterized utility.`,
|
|
439
|
+
confidence: .6,
|
|
440
|
+
reason: `Jaccard similarity of ${pct}% suggests substantial code overlap that could be consolidated.`
|
|
441
|
+
},
|
|
442
|
+
detail: {
|
|
443
|
+
similarity: pct,
|
|
444
|
+
duplicateLocations: [{
|
|
445
|
+
file: relA,
|
|
446
|
+
startLine: a.startLine,
|
|
447
|
+
endLine: a.endLine
|
|
448
|
+
}, {
|
|
449
|
+
file: relB,
|
|
450
|
+
startLine: b.startLine,
|
|
451
|
+
endLine: b.endLine
|
|
452
|
+
}]
|
|
453
|
+
}
|
|
454
|
+
}));
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
return diagnostics;
|
|
459
|
+
}
|
|
460
|
+
function detectDuplicateImports(allImports, rootDir) {
|
|
461
|
+
const diagnostics = [];
|
|
462
|
+
const byModule = /* @__PURE__ */ new Map();
|
|
463
|
+
for (const imp of allImports) {
|
|
464
|
+
let arr = byModule.get(imp.source);
|
|
465
|
+
if (!arr) {
|
|
466
|
+
arr = [];
|
|
467
|
+
byModule.set(imp.source, arr);
|
|
468
|
+
}
|
|
469
|
+
arr.push(imp);
|
|
470
|
+
}
|
|
471
|
+
for (const [source, occurrences] of byModule) {
|
|
472
|
+
const uniqueFiles = new Set(occurrences.map((o) => o.filePath));
|
|
473
|
+
if (uniqueFiles.size < DUPLICATE_IMPORT_MIN_FILES) continue;
|
|
474
|
+
const symbolCounts = /* @__PURE__ */ new Map();
|
|
475
|
+
for (const occ of occurrences) for (const sym of occ.symbols) symbolCounts.set(sym, (symbolCounts.get(sym) ?? 0) + 1);
|
|
476
|
+
const threshold = uniqueFiles.size * .3;
|
|
477
|
+
const commonSymbols = [];
|
|
478
|
+
for (const [sym, count] of symbolCounts) if (count >= threshold) commonSymbols.push(sym);
|
|
479
|
+
commonSymbols.sort();
|
|
480
|
+
if (commonSymbols.length === 0) continue;
|
|
481
|
+
const representative = occurrences[0];
|
|
482
|
+
const relPath = relative(rootDir, representative.filePath);
|
|
483
|
+
diagnostics.push(diag({
|
|
484
|
+
filePath: relPath,
|
|
485
|
+
rule: "dup-detect/duplicate-import-across-files",
|
|
486
|
+
severity: "info",
|
|
487
|
+
message: `Module "${source}" imported in ${uniqueFiles.size} files with common symbols: ${commonSymbols.join(", ")}`,
|
|
488
|
+
help: `Create a shared re-export (barrel) file for "${source}" that re-exports the common symbols, then import from the barrel in each consumer.`,
|
|
489
|
+
line: representative.line,
|
|
490
|
+
column: 1,
|
|
491
|
+
fixable: false,
|
|
492
|
+
suggestion: {
|
|
493
|
+
type: "refactor",
|
|
494
|
+
text: `Create a barrel file (e.g., shared/${source.replace(/[/@]/g, "_")}.ts) with:\nexport { ${commonSymbols.join(", ")} } from "${source}";`,
|
|
495
|
+
confidence: .7,
|
|
496
|
+
reason: `${uniqueFiles.size} files import the same common symbols from "${source}". A barrel file reduces duplication and simplifies future refactoring.`
|
|
497
|
+
},
|
|
498
|
+
detail: {
|
|
499
|
+
module: source,
|
|
500
|
+
fileCount: uniqueFiles.size,
|
|
501
|
+
commonSymbols,
|
|
502
|
+
files: [...uniqueFiles].map((f) => relative(rootDir, f))
|
|
503
|
+
}
|
|
504
|
+
}));
|
|
505
|
+
}
|
|
506
|
+
return diagnostics;
|
|
507
|
+
}
|
|
508
|
+
function detectRepeatedConstants(allStrings, rootDir) {
|
|
509
|
+
const diagnostics = [];
|
|
510
|
+
const byValue = /* @__PURE__ */ new Map();
|
|
511
|
+
for (const occ of allStrings) {
|
|
512
|
+
if (!isConstantCandidate(occ.value)) continue;
|
|
513
|
+
let arr = byValue.get(occ.value);
|
|
514
|
+
if (!arr) {
|
|
515
|
+
arr = [];
|
|
516
|
+
byValue.set(occ.value, arr);
|
|
517
|
+
}
|
|
518
|
+
arr.push(occ);
|
|
519
|
+
}
|
|
520
|
+
for (const [value, occurrences] of byValue) {
|
|
521
|
+
if (occurrences.length < REPEATED_CONSTANT_MIN_OCCURRENCES) continue;
|
|
522
|
+
const uniqueFiles = new Set(occurrences.map((o) => o.filePath));
|
|
523
|
+
if (uniqueFiles.size < REPEATED_CONSTANT_MIN_OCCURRENCES) continue;
|
|
524
|
+
const first = occurrences[0];
|
|
525
|
+
const relPath = relative(rootDir, first.filePath);
|
|
526
|
+
const locations = occurrences.slice(0, 10).map((o) => ({
|
|
527
|
+
file: relative(rootDir, o.filePath),
|
|
528
|
+
line: o.line,
|
|
529
|
+
column: o.col
|
|
530
|
+
}));
|
|
531
|
+
const suggestedName = toConstantName(value);
|
|
532
|
+
diagnostics.push(diag({
|
|
533
|
+
filePath: relPath,
|
|
534
|
+
rule: "dup-detect/repeated-constant",
|
|
535
|
+
severity: "warning",
|
|
536
|
+
message: `String "${value.length > 40 ? value.slice(0, 40) + "..." : value}" repeated ${occurrences.length} times across ${uniqueFiles.size} files`,
|
|
537
|
+
help: `Extract this string to a shared constant (e.g., ${suggestedName}) to avoid duplication and ensure consistency.`,
|
|
538
|
+
line: first.line,
|
|
539
|
+
column: first.col,
|
|
540
|
+
fixable: false,
|
|
541
|
+
suggestion: {
|
|
542
|
+
type: "refactor",
|
|
543
|
+
text: `export const ${suggestedName} = "${value.replace(/"/g, "\\\"")}";`,
|
|
544
|
+
confidence: .75,
|
|
545
|
+
reason: `The same string literal appears ${occurrences.length} times. Extracting it to a named constant improves maintainability and prevents typos.`
|
|
546
|
+
},
|
|
547
|
+
detail: {
|
|
548
|
+
value,
|
|
549
|
+
count: occurrences.length,
|
|
550
|
+
fileCount: uniqueFiles.size,
|
|
551
|
+
locations
|
|
552
|
+
}
|
|
553
|
+
}));
|
|
554
|
+
}
|
|
555
|
+
return diagnostics;
|
|
556
|
+
}
|
|
557
|
+
/** Convert a string value to a SCREAMING_SNAKE_CASE constant name */
|
|
558
|
+
function toConstantName(value) {
|
|
559
|
+
const words = value.replace(/[^a-zA-Z0-9\s]/g, " ").split(/\s+/).filter((w) => w.length > 0 && !/^\d+$/.test(w)).slice(0, 4);
|
|
560
|
+
if (words.length === 0) return "SHARED_CONSTANT";
|
|
561
|
+
return words.map((w) => w.toUpperCase()).join("_");
|
|
562
|
+
}
|
|
563
|
+
function detectCopyPasteFunctions(allFunctions, rootDir) {
|
|
564
|
+
const diagnostics = [];
|
|
565
|
+
const filtered = allFunctions.filter((fn) => !COPY_PASTE_NAME_WHITELIST.has(fn.name) && fn.bodyLineCount > COPY_PASTE_MIN_BODY_LINES);
|
|
566
|
+
const bodyGroups = /* @__PURE__ */ new Map();
|
|
567
|
+
for (const fn of filtered) {
|
|
568
|
+
let arr = bodyGroups.get(fn.bodyNormalized);
|
|
569
|
+
if (!arr) {
|
|
570
|
+
arr = [];
|
|
571
|
+
bodyGroups.set(fn.bodyNormalized, arr);
|
|
572
|
+
}
|
|
573
|
+
arr.push(fn);
|
|
574
|
+
}
|
|
575
|
+
for (const [, group] of bodyGroups) {
|
|
576
|
+
const uniqueNames = new Set(group.map((f) => f.name));
|
|
577
|
+
if (group.length < 2 || uniqueNames.size < 2) continue;
|
|
578
|
+
for (let i = 0; i < group.length; i++) for (let j = i + 1; j < group.length; j++) {
|
|
579
|
+
const a = group[i];
|
|
580
|
+
const b = group[j];
|
|
581
|
+
if (a.name === b.name) continue;
|
|
582
|
+
const relA = relative(rootDir, a.filePath);
|
|
583
|
+
const relB = relative(rootDir, b.filePath);
|
|
584
|
+
diagnostics.push(diag({
|
|
585
|
+
filePath: relA,
|
|
586
|
+
rule: "dup-detect/copy-paste-function",
|
|
587
|
+
severity: "warning",
|
|
588
|
+
message: `Function "${a.name}" (${relA}:${a.startLine}) has identical body to "${b.name}" (${relB}:${b.startLine})`,
|
|
589
|
+
help: "Extract the shared logic into a single utility function and call it from both locations, parameterizing any differences.",
|
|
590
|
+
line: a.startLine,
|
|
591
|
+
column: 1,
|
|
592
|
+
fixable: false,
|
|
593
|
+
suggestion: {
|
|
594
|
+
type: "refactor",
|
|
595
|
+
text: `Extract shared logic from "${a.name}" and "${b.name}" into a single utility function, parameterizing any behavioral differences.`,
|
|
596
|
+
confidence: .9,
|
|
597
|
+
reason: "Functions with identical bodies but different names are classic copy-paste duplication. This creates maintenance risk — fixes must be applied in multiple places."
|
|
598
|
+
},
|
|
599
|
+
detail: { duplicateLocations: [{
|
|
600
|
+
file: relA,
|
|
601
|
+
name: a.name,
|
|
602
|
+
startLine: a.startLine,
|
|
603
|
+
endLine: a.endLine
|
|
604
|
+
}, {
|
|
605
|
+
file: relB,
|
|
606
|
+
name: b.name,
|
|
607
|
+
startLine: b.startLine,
|
|
608
|
+
endLine: b.endLine
|
|
609
|
+
}] }
|
|
610
|
+
}));
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
return diagnostics;
|
|
614
|
+
}
|
|
615
|
+
const dupDetectEngine = {
|
|
616
|
+
name: "dup-detect",
|
|
617
|
+
description: "Structural duplicate detection: identical blocks, similar blocks, duplicate imports, repeated constants, copy-paste functions",
|
|
618
|
+
supportedLanguages: [
|
|
619
|
+
"typescript",
|
|
620
|
+
"javascript",
|
|
621
|
+
"python"
|
|
622
|
+
],
|
|
623
|
+
async run(context) {
|
|
624
|
+
const start = performance.now();
|
|
625
|
+
const diagnostics = [];
|
|
626
|
+
if (!context.languages.some((l) => this.supportedLanguages.includes(l))) return {
|
|
627
|
+
engine: this.name,
|
|
628
|
+
diagnostics: [],
|
|
629
|
+
elapsed: performance.now() - start,
|
|
630
|
+
skipped: true,
|
|
631
|
+
skipReason: "No supported languages detected (need typescript, javascript, or python)"
|
|
632
|
+
};
|
|
633
|
+
const enableSimilarBlocks = process.env.DEEPSLOP_SIMILAR_BLOCKS === "1";
|
|
634
|
+
const targetFiles = (await collectFiles(context.rootDirectory, context.languages, context.config.exclude, context.files)).filter((f) => SUPPORTED_EXTS.has(extname(f)));
|
|
635
|
+
if (targetFiles.length === 0) return {
|
|
636
|
+
engine: this.name,
|
|
637
|
+
diagnostics: [],
|
|
638
|
+
elapsed: performance.now() - start,
|
|
639
|
+
skipped: true,
|
|
640
|
+
skipReason: "No supported files found to scan"
|
|
641
|
+
};
|
|
642
|
+
const allBlocks = [];
|
|
643
|
+
const allImports = [];
|
|
644
|
+
const allStrings = [];
|
|
645
|
+
const allFunctions = [];
|
|
646
|
+
for (let batchStart = 0; batchStart < targetFiles.length; batchStart += FILE_BATCH_SIZE) {
|
|
647
|
+
const batch = targetFiles.slice(batchStart, batchStart + FILE_BATCH_SIZE);
|
|
648
|
+
for (const filePath of batch) {
|
|
649
|
+
let content;
|
|
650
|
+
try {
|
|
651
|
+
content = await readFileContent(filePath);
|
|
652
|
+
} catch {
|
|
653
|
+
continue;
|
|
654
|
+
}
|
|
655
|
+
const lines = toLines(content);
|
|
656
|
+
const lang = languageFromPath(filePath);
|
|
657
|
+
if (!(lines.length > LARGE_FILE_LINE_LIMIT)) {
|
|
658
|
+
const blocks = extractBlocks(lines, IDENTICAL_BLOCK_MIN_LINES, BLOCK_OVERLAP_STEP, filePath, lang, enableSimilarBlocks);
|
|
659
|
+
allBlocks.push(...blocks);
|
|
660
|
+
}
|
|
661
|
+
const imports = extractImports(content, lang ?? "typescript");
|
|
662
|
+
for (const imp of imports) {
|
|
663
|
+
const symbols = extractNamedSymbols(imp.raw, lang);
|
|
664
|
+
allImports.push({
|
|
665
|
+
filePath,
|
|
666
|
+
line: imp.line,
|
|
667
|
+
source: imp.source,
|
|
668
|
+
symbols
|
|
669
|
+
});
|
|
670
|
+
}
|
|
671
|
+
for (const line of lines) {
|
|
672
|
+
const literals = extractStringLiterals(line.text, lang);
|
|
673
|
+
for (const lit of literals) allStrings.push({
|
|
674
|
+
filePath,
|
|
675
|
+
line: line.num,
|
|
676
|
+
col: lit.col,
|
|
677
|
+
value: lit.value
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
const functions = extractFunctions(content, filePath, lang);
|
|
681
|
+
allFunctions.push(...functions);
|
|
682
|
+
content = "";
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
const identicalDiags = detectIdenticalBlocks(allBlocks, context.rootDirectory);
|
|
686
|
+
diagnostics.push(...identicalDiags);
|
|
687
|
+
if (enableSimilarBlocks) {
|
|
688
|
+
if (new Set(allBlocks.map((b) => b.filePath)).size >= 2) {
|
|
689
|
+
const similarDiags = detectSimilarBlocks(allBlocks, context.rootDirectory);
|
|
690
|
+
diagnostics.push(...similarDiags);
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
const importDiags = detectDuplicateImports(allImports, context.rootDirectory);
|
|
694
|
+
diagnostics.push(...importDiags);
|
|
695
|
+
const constantDiags = detectRepeatedConstants(allStrings, context.rootDirectory);
|
|
696
|
+
diagnostics.push(...constantDiags);
|
|
697
|
+
const funcDiags = detectCopyPasteFunctions(allFunctions, context.rootDirectory);
|
|
698
|
+
diagnostics.push(...funcDiags);
|
|
699
|
+
return {
|
|
700
|
+
engine: this.name,
|
|
701
|
+
diagnostics,
|
|
702
|
+
elapsed: performance.now() - start,
|
|
703
|
+
skipped: false
|
|
704
|
+
};
|
|
705
|
+
}
|
|
706
|
+
};
|
|
707
|
+
|
|
708
|
+
//#endregion
|
|
709
|
+
export { dupDetectEngine };
|