@aiready/pattern-detect 0.17.8 → 0.17.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer-entry/index.d.mts +1 -1
- package/dist/analyzer-entry/index.d.ts +1 -1
- package/dist/analyzer-entry/index.js +370 -135
- package/dist/analyzer-entry/index.mjs +4 -3
- package/dist/chunk-2P7BQHGR.mjs +306 -0
- package/dist/{chunk-VGMM3L3O.mjs → chunk-3EORD7DC.mjs} +1 -1
- package/dist/{chunk-GREN7X5H.mjs → chunk-4PVPQMRT.mjs} +2 -2
- package/dist/{chunk-RS73WLNI.mjs → chunk-6VDL7TAS.mjs} +5 -113
- package/dist/chunk-AQIP4JGM.mjs +283 -0
- package/dist/{chunk-JBUZ6YHE.mjs → chunk-B4NLWKPZ.mjs} +85 -9
- package/dist/chunk-IPBGVPUX.mjs +143 -0
- package/dist/chunk-LUUJOUK5.mjs +283 -0
- package/dist/chunk-P3BOCGVV.mjs +498 -0
- package/dist/{scoring-entry.js → chunk-PHJE6A3J.mjs} +20 -37
- package/dist/chunk-PQXOORR4.mjs +234 -0
- package/dist/{chunk-GLKAGFKX.mjs → chunk-RDR75DVI.mjs} +85 -9
- package/dist/chunk-SXVLRPMF.mjs +143 -0
- package/dist/{chunk-DNZS4ESD.mjs → chunk-SY7RX5YQ.mjs} +85 -9
- package/dist/{context-rules-entry.js → chunk-TIBF7KST.mjs} +81 -78
- package/dist/chunk-WYYSQX5M.mjs +467 -0
- package/dist/{chunk-I6ETJC7L.mjs → chunk-X553BOMI.mjs} +56 -26
- package/dist/{chunk-K7BO57OO.mjs → chunk-Y6OB7K34.mjs} +80 -4
- package/dist/chunk-YLVV6YZ5.mjs +143 -0
- package/dist/chunk-ZUWPFVJV.mjs +115 -0
- package/dist/chunk-ZZMONVPE.mjs +467 -0
- package/dist/cli.js +402 -167
- package/dist/cli.mjs +4 -3
- package/dist/context-rules-entry/index.d.mts +35 -1
- package/dist/context-rules-entry/index.d.ts +35 -1
- package/dist/context-rules-entry/index.js +194 -48
- package/dist/context-rules-entry/index.mjs +1 -1
- package/dist/detector-entry/index.js +192 -46
- package/dist/detector-entry/index.mjs +2 -2
- package/dist/{analyzer-entry-BVz-HnZd.d.mts → index-B-pnXpgn.d.mts} +10 -1
- package/dist/{index-BwuoiCNm.d.ts → index-CWgYOKaK.d.ts} +35 -16
- package/dist/{index-BVz-HnZd.d.mts → index-Dl4BrGIT.d.mts} +35 -16
- package/dist/{analyzer-entry-BwuoiCNm.d.ts → index-DqS2e0kK.d.ts} +10 -1
- package/dist/index.d.mts +5 -6
- package/dist/index.d.ts +5 -6
- package/dist/index.js +467 -214
- package/dist/index.mjs +37 -22
- package/dist/scoring-entry/index.js +7 -3
- package/dist/scoring-entry/index.mjs +1 -1
- package/package.json +2 -2
- package/dist/analyzer-entry.d.mts +0 -100
- package/dist/analyzer-entry.d.ts +0 -100
- package/dist/analyzer-entry.js +0 -693
- package/dist/analyzer-entry.mjs +0 -12
- package/dist/chunk-262N2JB7.mjs +0 -497
- package/dist/chunk-2R7HOR5H.mjs +0 -777
- package/dist/chunk-3D7RVGHM.mjs +0 -64
- package/dist/chunk-3LS3E6MO.mjs +0 -508
- package/dist/chunk-3VRQYFW3.mjs +0 -782
- package/dist/chunk-3WK24ZOX.mjs +0 -860
- package/dist/chunk-3YYN6ZXN.mjs +0 -1038
- package/dist/chunk-4BPRGZRG.mjs +0 -1041
- package/dist/chunk-4UHDGB7U.mjs +0 -920
- package/dist/chunk-5LYDB7DY.mjs +0 -771
- package/dist/chunk-65G3HXLQ.mjs +0 -497
- package/dist/chunk-65UQ5J2J.mjs +0 -64
- package/dist/chunk-6JTVOBJX.mjs +0 -64
- package/dist/chunk-6OEHUI5J.mjs +0 -1045
- package/dist/chunk-6YUGU4P4.mjs +0 -914
- package/dist/chunk-7EJGNGXM.mjs +0 -771
- package/dist/chunk-7O2DUBSN.mjs +0 -1058
- package/dist/chunk-7S4AUL5S.mjs +0 -911
- package/dist/chunk-A76JUWER.mjs +0 -786
- package/dist/chunk-AJZUNNFH.mjs +0 -817
- package/dist/chunk-AXHGYYYZ.mjs +0 -404
- package/dist/chunk-BKRPSTT2.mjs +0 -64
- package/dist/chunk-BUBQ3W6W.mjs +0 -980
- package/dist/chunk-CCHM2VLK.mjs +0 -1051
- package/dist/chunk-CHFK6EBT.mjs +0 -419
- package/dist/chunk-CMT3MWWO.mjs +0 -948
- package/dist/chunk-CMWW24HW.mjs +0 -259
- package/dist/chunk-CTDBJP25.mjs +0 -1043
- package/dist/chunk-DGAKXYIP.mjs +0 -1041
- package/dist/chunk-DQSLTL7J.mjs +0 -788
- package/dist/chunk-DR5W7S3Z.mjs +0 -968
- package/dist/chunk-EFUKPMBE.mjs +0 -950
- package/dist/chunk-EVBFDILL.mjs +0 -927
- package/dist/chunk-EXORBAXR.mjs +0 -887
- package/dist/chunk-EZT3NZGB.mjs +0 -1057
- package/dist/chunk-FWUKMJEQ.mjs +0 -1133
- package/dist/chunk-GSJFORRO.mjs +0 -504
- package/dist/chunk-H4ADJYOG.mjs +0 -925
- package/dist/chunk-H5FB2USZ.mjs +0 -762
- package/dist/chunk-H73HEG7M.mjs +0 -670
- package/dist/chunk-HOS5Z2NC.mjs +0 -669
- package/dist/chunk-HXHQOQB5.mjs +0 -508
- package/dist/chunk-INEOYHUM.mjs +0 -911
- package/dist/chunk-INJ4SBTV.mjs +0 -754
- package/dist/chunk-J5CW6NYY.mjs +0 -64
- package/dist/chunk-JAFZCZAP.mjs +0 -776
- package/dist/chunk-JKVKOXYR.mjs +0 -407
- package/dist/chunk-JTHW7EYW.mjs +0 -1041
- package/dist/chunk-JWR3AHKO.mjs +0 -788
- package/dist/chunk-KC2CQMG2.mjs +0 -858
- package/dist/chunk-KDWGWBP5.mjs +0 -832
- package/dist/chunk-KPEK5REL.mjs +0 -919
- package/dist/chunk-KT6O2IAE.mjs +0 -861
- package/dist/chunk-KWMNN3TG.mjs +0 -391
- package/dist/chunk-LUA5FXSZ.mjs +0 -771
- package/dist/chunk-LYKRYBSM.mjs +0 -64
- package/dist/chunk-M4PQMW34.mjs +0 -480
- package/dist/chunk-MH6LBXZF.mjs +0 -816
- package/dist/chunk-MHU3CL4R.mjs +0 -64
- package/dist/chunk-MJWBS6SM.mjs +0 -1058
- package/dist/chunk-OFGMDX66.mjs +0 -402
- package/dist/chunk-P7B6Z4I2.mjs +0 -1043
- package/dist/chunk-PBCXSG7E.mjs +0 -658
- package/dist/chunk-PEEHSFDR.mjs +0 -1058
- package/dist/chunk-PSVG2NLH.mjs +0 -966
- package/dist/chunk-PWNQ6JZW.mjs +0 -508
- package/dist/chunk-QE4E3F7C.mjs +0 -410
- package/dist/chunk-QEP76HGK.mjs +0 -1039
- package/dist/chunk-QX2BQJEO.mjs +0 -1058
- package/dist/chunk-R2S73CVG.mjs +0 -503
- package/dist/chunk-RMGDSNLE.mjs +0 -770
- package/dist/chunk-S2KQFII2.mjs +0 -491
- package/dist/chunk-SLDK5PQK.mjs +0 -1129
- package/dist/chunk-SNSDVGWW.mjs +0 -783
- package/dist/chunk-SUUZMLPS.mjs +0 -391
- package/dist/chunk-SVCSIZ2A.mjs +0 -259
- package/dist/chunk-T2C6WS73.mjs +0 -670
- package/dist/chunk-TCG2G32F.mjs +0 -911
- package/dist/chunk-TGBZP7SB.mjs +0 -773
- package/dist/chunk-THF4RW63.mjs +0 -254
- package/dist/chunk-TJKDLVLN.mjs +0 -503
- package/dist/chunk-TXWPOVYU.mjs +0 -402
- package/dist/chunk-UB3CGOQ7.mjs +0 -64
- package/dist/chunk-UKIKN27B.mjs +0 -950
- package/dist/chunk-V5DP4FP6.mjs +0 -876
- package/dist/chunk-VRMXVYDZ.mjs +0 -419
- package/dist/chunk-WACZ5LFH.mjs +0 -1055
- package/dist/chunk-WC7CBAA7.mjs +0 -1058
- package/dist/chunk-WKBCNITM.mjs +0 -1072
- package/dist/chunk-WMOGJFME.mjs +0 -391
- package/dist/chunk-X4GR2N2M.mjs +0 -947
- package/dist/chunk-XCWY2DQY.mjs +0 -788
- package/dist/chunk-XJD35DS6.mjs +0 -1058
- package/dist/chunk-XNPID6FU.mjs +0 -391
- package/dist/chunk-XUUVS54V.mjs +0 -776
- package/dist/chunk-YCGV65F5.mjs +0 -508
- package/dist/chunk-YJYDBFT3.mjs +0 -780
- package/dist/chunk-YP3HEDQW.mjs +0 -859
- package/dist/chunk-YSDOUNJJ.mjs +0 -1142
- package/dist/chunk-Z6GBFFOV.mjs +0 -1040
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/context-rules-entry-y2uJSngh.d.mts +0 -60
- package/dist/context-rules-entry-y2uJSngh.d.ts +0 -60
- package/dist/context-rules-entry.d.mts +0 -55
- package/dist/context-rules-entry.d.ts +0 -55
- package/dist/context-rules-entry.mjs +0 -12
- package/dist/context-rules.d.ts +0 -41
- package/dist/context-rules.d.ts.map +0 -1
- package/dist/context-rules.js +0 -225
- package/dist/context-rules.js.map +0 -1
- package/dist/detector-entry.d.mts +0 -14
- package/dist/detector-entry.d.ts +0 -14
- package/dist/detector-entry.js +0 -301
- package/dist/detector-entry.mjs +0 -7
- package/dist/detector.d.ts +0 -40
- package/dist/detector.d.ts.map +0 -1
- package/dist/detector.js +0 -385
- package/dist/detector.js.map +0 -1
- package/dist/extractors/python-extractor.d.ts +0 -19
- package/dist/extractors/python-extractor.d.ts.map +0 -1
- package/dist/extractors/python-extractor.js +0 -164
- package/dist/extractors/python-extractor.js.map +0 -1
- package/dist/grouping.d.ts +0 -54
- package/dist/grouping.d.ts.map +0 -1
- package/dist/grouping.js +0 -347
- package/dist/grouping.js.map +0 -1
- package/dist/index-y2uJSngh.d.mts +0 -60
- package/dist/index-y2uJSngh.d.ts +0 -60
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/python-extractor-BGKGX6BK.mjs +0 -131
- package/dist/python-extractor-ELAKYK2W.mjs +0 -140
- package/dist/scoring-entry.d.mts +0 -23
- package/dist/scoring-entry.d.ts +0 -23
- package/dist/scoring-entry.mjs +0 -6
- package/dist/scoring.d.ts +0 -12
- package/dist/scoring.d.ts.map +0 -1
- package/dist/scoring.js +0 -116
- package/dist/scoring.js.map +0 -1
- package/dist/types-C4lmb2Yh.d.mts +0 -36
- package/dist/types-C4lmb2Yh.d.ts +0 -36
package/dist/chunk-CMWW24HW.mjs
DELETED
|
@@ -1,259 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
calculateSeverity
|
|
3
|
-
} from "./chunk-I6ETJC7L.mjs";
|
|
4
|
-
|
|
5
|
-
// src/detector.ts
|
|
6
|
-
import { estimateTokens } from "@aiready/core";
|
|
7
|
-
|
|
8
|
-
// src/core/normalizer.ts
|
|
9
|
-
function normalizeCode(code, isPython = false) {
|
|
10
|
-
if (!code) return "";
|
|
11
|
-
let normalized = code;
|
|
12
|
-
if (isPython) {
|
|
13
|
-
normalized = normalized.replace(/#.*/g, "");
|
|
14
|
-
} else {
|
|
15
|
-
normalized = normalized.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "");
|
|
16
|
-
}
|
|
17
|
-
return normalized.replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim();
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
// src/detector.ts
|
|
21
|
-
function extractBlocks(file, content) {
|
|
22
|
-
const isPython = file.toLowerCase().endsWith(".py");
|
|
23
|
-
if (isPython) {
|
|
24
|
-
return extractBlocksPython(file, content);
|
|
25
|
-
}
|
|
26
|
-
const blocks = [];
|
|
27
|
-
const lines = content.split("\n");
|
|
28
|
-
const blockRegex = /^\s*(?:export\s+)?(?:async\s+)?(?:public\s+|private\s+|protected\s+|internal\s+|static\s+|readonly\s+|virtual\s+|abstract\s+|override\s+)*(function|class|interface|type|enum|record|struct|void|func|[a-zA-Z0-9_<>[]]+)\s+([a-zA-Z0-9_]+)(?:\s*\(|(?:\s+extends|\s+implements|\s+where)?\s*\{)|^\s*(?:export\s+)?const\s+([a-zA-Z0-9_]+)\s*=\s*[a-zA-Z0-9_.]+\.object\(|^\s*(app\.(?:get|post|put|delete|patch|use))\(/gm;
|
|
29
|
-
let match;
|
|
30
|
-
while ((match = blockRegex.exec(content)) !== null) {
|
|
31
|
-
const startLine = content.substring(0, match.index).split("\n").length;
|
|
32
|
-
let type;
|
|
33
|
-
let name;
|
|
34
|
-
if (match[1]) {
|
|
35
|
-
type = match[1];
|
|
36
|
-
name = match[2];
|
|
37
|
-
} else if (match[3]) {
|
|
38
|
-
type = "const";
|
|
39
|
-
name = match[3];
|
|
40
|
-
} else {
|
|
41
|
-
type = "handler";
|
|
42
|
-
name = match[4];
|
|
43
|
-
}
|
|
44
|
-
let endLine = -1;
|
|
45
|
-
let openBraces = 0;
|
|
46
|
-
let foundStart = false;
|
|
47
|
-
for (let i = match.index; i < content.length; i++) {
|
|
48
|
-
if (content[i] === "{") {
|
|
49
|
-
openBraces++;
|
|
50
|
-
foundStart = true;
|
|
51
|
-
} else if (content[i] === "}") {
|
|
52
|
-
openBraces--;
|
|
53
|
-
}
|
|
54
|
-
if (foundStart && openBraces === 0) {
|
|
55
|
-
endLine = content.substring(0, i + 1).split("\n").length;
|
|
56
|
-
break;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
if (endLine === -1) {
|
|
60
|
-
const remaining = content.slice(match.index);
|
|
61
|
-
const nextLineMatch = remaining.indexOf("\n");
|
|
62
|
-
if (nextLineMatch !== -1) {
|
|
63
|
-
endLine = startLine;
|
|
64
|
-
} else {
|
|
65
|
-
endLine = lines.length;
|
|
66
|
-
}
|
|
67
|
-
}
|
|
68
|
-
endLine = Math.max(startLine, endLine);
|
|
69
|
-
const blockCode = lines.slice(startLine - 1, endLine).join("\n");
|
|
70
|
-
const tokens = estimateTokens(blockCode);
|
|
71
|
-
blocks.push({
|
|
72
|
-
file,
|
|
73
|
-
startLine,
|
|
74
|
-
endLine,
|
|
75
|
-
code: blockCode,
|
|
76
|
-
tokens,
|
|
77
|
-
patternType: inferPatternType(type, name)
|
|
78
|
-
});
|
|
79
|
-
}
|
|
80
|
-
return blocks;
|
|
81
|
-
}
|
|
82
|
-
function extractBlocksPython(file, content) {
|
|
83
|
-
const blocks = [];
|
|
84
|
-
const lines = content.split("\n");
|
|
85
|
-
const blockRegex = /^\s*(?:async\s+)?(def|class)\s+([a-zA-Z0-9_]+)/gm;
|
|
86
|
-
let match;
|
|
87
|
-
while ((match = blockRegex.exec(content)) !== null) {
|
|
88
|
-
const startLinePos = content.substring(0, match.index).split("\n").length;
|
|
89
|
-
const startLineIdx = startLinePos - 1;
|
|
90
|
-
const initialIndent = lines[startLineIdx].search(/\S/);
|
|
91
|
-
let endLineIdx = startLineIdx;
|
|
92
|
-
for (let i = startLineIdx + 1; i < lines.length; i++) {
|
|
93
|
-
const line = lines[i];
|
|
94
|
-
if (line.trim().length === 0) {
|
|
95
|
-
endLineIdx = i;
|
|
96
|
-
continue;
|
|
97
|
-
}
|
|
98
|
-
const currentIndent = line.search(/\S/);
|
|
99
|
-
if (currentIndent <= initialIndent) {
|
|
100
|
-
break;
|
|
101
|
-
}
|
|
102
|
-
endLineIdx = i;
|
|
103
|
-
}
|
|
104
|
-
while (endLineIdx > startLineIdx && lines[endLineIdx].trim().length === 0) {
|
|
105
|
-
endLineIdx--;
|
|
106
|
-
}
|
|
107
|
-
const blockCode = lines.slice(startLineIdx, endLineIdx + 1).join("\n");
|
|
108
|
-
const tokens = estimateTokens(blockCode);
|
|
109
|
-
blocks.push({
|
|
110
|
-
file,
|
|
111
|
-
startLine: startLinePos,
|
|
112
|
-
endLine: endLineIdx + 1,
|
|
113
|
-
code: blockCode,
|
|
114
|
-
tokens,
|
|
115
|
-
patternType: inferPatternType(match[1], match[2])
|
|
116
|
-
});
|
|
117
|
-
}
|
|
118
|
-
return blocks;
|
|
119
|
-
}
|
|
120
|
-
function inferPatternType(keyword, name) {
|
|
121
|
-
const n = name.toLowerCase();
|
|
122
|
-
if (keyword === "handler" || n.includes("handler") || n.includes("controller") || n.startsWith("app.")) {
|
|
123
|
-
return "api-handler";
|
|
124
|
-
}
|
|
125
|
-
if (n.includes("validate") || n.includes("schema")) return "validator";
|
|
126
|
-
if (n.includes("util") || n.includes("helper")) return "utility";
|
|
127
|
-
if (keyword === "class") return "class-method";
|
|
128
|
-
if (n.match(/^[A-Z]/)) return "component";
|
|
129
|
-
if (keyword === "function") return "function";
|
|
130
|
-
return "unknown";
|
|
131
|
-
}
|
|
132
|
-
function calculateSimilarity(a, b) {
|
|
133
|
-
if (a === b) return 1;
|
|
134
|
-
const tokensA = a.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
135
|
-
const tokensB = b.split(/[^a-zA-Z0-9]+/).filter((t) => t.length > 0);
|
|
136
|
-
if (tokensA.length === 0 || tokensB.length === 0) return 0;
|
|
137
|
-
const setA = new Set(tokensA);
|
|
138
|
-
const setB = new Set(tokensB);
|
|
139
|
-
const intersection = new Set([...setA].filter((x) => setB.has(x)));
|
|
140
|
-
const union = /* @__PURE__ */ new Set([...setA, ...setB]);
|
|
141
|
-
return intersection.size / union.size;
|
|
142
|
-
}
|
|
143
|
-
function calculateConfidence(similarity, tokens, lines) {
|
|
144
|
-
let confidence = similarity;
|
|
145
|
-
if (lines > 20) confidence += 0.05;
|
|
146
|
-
if (tokens > 200) confidence += 0.05;
|
|
147
|
-
if (lines < 5) confidence -= 0.1;
|
|
148
|
-
return Math.max(0, Math.min(1, confidence));
|
|
149
|
-
}
|
|
150
|
-
async function detectDuplicatePatterns(fileContents, options) {
|
|
151
|
-
const {
|
|
152
|
-
minSimilarity,
|
|
153
|
-
minLines,
|
|
154
|
-
streamResults,
|
|
155
|
-
onProgress,
|
|
156
|
-
excludePatterns = [],
|
|
157
|
-
confidenceThreshold = 0,
|
|
158
|
-
ignoreWhitelist = []
|
|
159
|
-
} = options;
|
|
160
|
-
const allBlocks = [];
|
|
161
|
-
const excludeRegexes = excludePatterns.map((p) => new RegExp(p, "i"));
|
|
162
|
-
for (const { file, content } of fileContents) {
|
|
163
|
-
const blocks = extractBlocks(file, content);
|
|
164
|
-
for (const b of blocks) {
|
|
165
|
-
if (b.endLine - b.startLine + 1 < minLines) continue;
|
|
166
|
-
const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
|
|
167
|
-
if (isExcluded) continue;
|
|
168
|
-
allBlocks.push(b);
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
const duplicates = [];
|
|
172
|
-
const totalBlocks = allBlocks.length;
|
|
173
|
-
let comparisons = 0;
|
|
174
|
-
const totalComparisons = totalBlocks * (totalBlocks - 1) / 2;
|
|
175
|
-
if (onProgress) {
|
|
176
|
-
onProgress(
|
|
177
|
-
0,
|
|
178
|
-
totalComparisons,
|
|
179
|
-
`Starting duplicate detection on ${totalBlocks} blocks...`
|
|
180
|
-
);
|
|
181
|
-
}
|
|
182
|
-
for (let i = 0; i < allBlocks.length; i++) {
|
|
183
|
-
if (i % 50 === 0 && i > 0) {
|
|
184
|
-
await new Promise((resolve) => setImmediate(resolve));
|
|
185
|
-
if (onProgress) {
|
|
186
|
-
onProgress(
|
|
187
|
-
comparisons,
|
|
188
|
-
totalComparisons,
|
|
189
|
-
`Analyzing blocks (${i}/${totalBlocks})...`
|
|
190
|
-
);
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
const b1 = allBlocks[i];
|
|
194
|
-
const isPython1 = b1.file.toLowerCase().endsWith(".py");
|
|
195
|
-
const norm1 = normalizeCode(b1.code, isPython1);
|
|
196
|
-
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
197
|
-
comparisons++;
|
|
198
|
-
const b2 = allBlocks[j];
|
|
199
|
-
if (b1.file === b2.file) continue;
|
|
200
|
-
const isWhitelisted = ignoreWhitelist.some((pattern) => {
|
|
201
|
-
return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
|
|
202
|
-
});
|
|
203
|
-
if (isWhitelisted) continue;
|
|
204
|
-
const isPython2 = b2.file.toLowerCase().endsWith(".py");
|
|
205
|
-
const norm2 = normalizeCode(b2.code, isPython2);
|
|
206
|
-
const sim = calculateSimilarity(norm1, norm2);
|
|
207
|
-
if (sim >= minSimilarity) {
|
|
208
|
-
const confidence = calculateConfidence(
|
|
209
|
-
sim,
|
|
210
|
-
b1.tokens,
|
|
211
|
-
b1.endLine - b1.startLine + 1
|
|
212
|
-
);
|
|
213
|
-
if (confidence < confidenceThreshold) continue;
|
|
214
|
-
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
215
|
-
b1.file,
|
|
216
|
-
b2.file,
|
|
217
|
-
b1.code,
|
|
218
|
-
sim,
|
|
219
|
-
b1.endLine - b1.startLine + 1
|
|
220
|
-
);
|
|
221
|
-
const dup = {
|
|
222
|
-
file1: b1.file,
|
|
223
|
-
line1: b1.startLine,
|
|
224
|
-
endLine1: b1.endLine,
|
|
225
|
-
file2: b2.file,
|
|
226
|
-
line2: b2.startLine,
|
|
227
|
-
endLine2: b2.endLine,
|
|
228
|
-
code1: b1.code,
|
|
229
|
-
code2: b2.code,
|
|
230
|
-
similarity: sim,
|
|
231
|
-
confidence,
|
|
232
|
-
patternType: b1.patternType,
|
|
233
|
-
tokenCost: b1.tokens + b2.tokens,
|
|
234
|
-
severity,
|
|
235
|
-
reason,
|
|
236
|
-
suggestion,
|
|
237
|
-
matchedRule
|
|
238
|
-
};
|
|
239
|
-
duplicates.push(dup);
|
|
240
|
-
if (streamResults)
|
|
241
|
-
console.log(
|
|
242
|
-
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
|
|
243
|
-
);
|
|
244
|
-
}
|
|
245
|
-
}
|
|
246
|
-
}
|
|
247
|
-
if (onProgress) {
|
|
248
|
-
onProgress(
|
|
249
|
-
totalComparisons,
|
|
250
|
-
totalComparisons,
|
|
251
|
-
`Duplicate detection complete. Found ${duplicates.length} patterns.`
|
|
252
|
-
);
|
|
253
|
-
}
|
|
254
|
-
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
export {
|
|
258
|
-
detectDuplicatePatterns
|
|
259
|
-
};
|