@aiready/pattern-detect 0.17.15 → 0.17.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer-entry/index.d.mts +2 -2
- package/dist/analyzer-entry/index.d.ts +2 -2
- package/dist/analyzer-entry/index.js +357 -140
- package/dist/analyzer-entry/index.mjs +4 -4
- package/dist/chunk-3LMYFYWG.mjs +514 -0
- package/dist/chunk-4YXKUW4P.mjs +143 -0
- package/dist/chunk-5A3ULAQ5.mjs +571 -0
- package/dist/chunk-5FACKJ7M.mjs +519 -0
- package/dist/chunk-6B72OWZA.mjs +143 -0
- package/dist/chunk-6SHBBRHF.mjs +600 -0
- package/dist/chunk-BKSIA7A2.mjs +516 -0
- package/dist/chunk-CM5YJR7G.mjs +516 -0
- package/dist/chunk-FSXOU23F.mjs +620 -0
- package/dist/chunk-GUYQI3AF.mjs +514 -0
- package/dist/chunk-H2TGXGMX.mjs +587 -0
- package/dist/chunk-KMAOEVRS.mjs +150 -0
- package/dist/chunk-NWG2ZIGX.mjs +146 -0
- package/dist/chunk-OFVJFGQW.mjs +514 -0
- package/dist/chunk-PCCZREHY.mjs +143 -0
- package/dist/chunk-PQS5ACTN.mjs +516 -0
- package/dist/chunk-TVE75IDM.mjs +143 -0
- package/dist/chunk-UDOGQ42Q.mjs +603 -0
- package/dist/chunk-UFI4UDQI.mjs +514 -0
- package/dist/chunk-UXV57HN3.mjs +144 -0
- package/dist/chunk-VC2BOV6R.mjs +143 -0
- package/dist/chunk-VI2OVG73.mjs +514 -0
- package/dist/chunk-VKGYNHFY.mjs +514 -0
- package/dist/chunk-WBLZYAQ2.mjs +518 -0
- package/dist/chunk-WFVXMMB3.mjs +143 -0
- package/dist/chunk-WQC43BIO.mjs +516 -0
- package/dist/chunk-WTAIM3SG.mjs +146 -0
- package/dist/chunk-XC7U55PE.mjs +514 -0
- package/dist/chunk-XR373Q6G.mjs +146 -0
- package/dist/chunk-XWIBTD67.mjs +620 -0
- package/dist/chunk-YUQ2VQVJ.mjs +514 -0
- package/dist/chunk-Z4NOH52X.mjs +143 -0
- package/dist/cli.js +357 -140
- package/dist/cli.mjs +4 -4
- package/dist/context-rules-entry/index.js +351 -139
- package/dist/context-rules-entry/index.mjs +1 -1
- package/dist/detector-entry/index.d.mts +2 -2
- package/dist/detector-entry/index.d.ts +2 -2
- package/dist/detector-entry/index.js +355 -140
- package/dist/detector-entry/index.mjs +2 -2
- package/dist/index-BGvkJ9j1.d.mts +136 -0
- package/dist/index-BJq32qmj.d.mts +137 -0
- package/dist/index-BpoJSgX-.d.mts +136 -0
- package/dist/index-C7qLPKmH.d.ts +150 -0
- package/dist/index-CThnG9hv.d.ts +155 -0
- package/dist/index-D0Hpg9nN.d.mts +150 -0
- package/dist/index-DN6XpBOW.d.mts +155 -0
- package/dist/index-F8xqZ2PS.d.ts +136 -0
- package/dist/index-HNhDr6CV.d.ts +137 -0
- package/dist/index-ux0Wo8Ps.d.ts +136 -0
- package/dist/index.d.mts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +359 -142
- package/dist/index.mjs +4 -4
- package/dist/scoring-entry/index.d.mts +1 -1
- package/dist/scoring-entry/index.d.ts +1 -1
- package/dist/scoring-entry/index.js +2 -2
- package/dist/scoring-entry/index.mjs +1 -1
- package/dist/types-tgrmUrHE.d.mts +37 -0
- package/dist/types-tgrmUrHE.d.ts +37 -0
- package/package.json +5 -3
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import {
|
|
2
|
+
calculateSeverity
|
|
3
|
+
} from "./chunk-FSXOU23F.mjs";
|
|
4
|
+
|
|
5
|
+
// src/detector.ts
|
|
6
|
+
import {
|
|
7
|
+
calculateStringSimilarity,
|
|
8
|
+
calculateHeuristicConfidence,
|
|
9
|
+
extractCodeBlocks
|
|
10
|
+
} from "@aiready/core";
|
|
11
|
+
|
|
12
|
+
// src/core/normalizer.ts
|
|
13
|
+
function normalizeCode(code, isPython = false) {
|
|
14
|
+
if (!code) return "";
|
|
15
|
+
let normalized = code;
|
|
16
|
+
if (isPython) {
|
|
17
|
+
normalized = normalized.replace(/#.*/g, "");
|
|
18
|
+
} else {
|
|
19
|
+
normalized = normalized.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "");
|
|
20
|
+
}
|
|
21
|
+
return normalized.replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim().toLowerCase();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// src/detector.ts
|
|
25
|
+
function extractBlocks(file, content) {
|
|
26
|
+
return extractCodeBlocks(file, content);
|
|
27
|
+
}
|
|
28
|
+
function calculateSimilarity(a, b) {
|
|
29
|
+
return calculateStringSimilarity(a, b);
|
|
30
|
+
}
|
|
31
|
+
function calculateConfidence(similarity, tokens, lines) {
|
|
32
|
+
return calculateHeuristicConfidence(similarity, tokens, lines);
|
|
33
|
+
}
|
|
34
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
35
|
+
const {
|
|
36
|
+
minSimilarity,
|
|
37
|
+
minLines,
|
|
38
|
+
streamResults,
|
|
39
|
+
onProgress,
|
|
40
|
+
excludePatterns = [],
|
|
41
|
+
excludeFiles = [],
|
|
42
|
+
confidenceThreshold = 0,
|
|
43
|
+
ignoreWhitelist = []
|
|
44
|
+
} = options;
|
|
45
|
+
const allBlocks = [];
|
|
46
|
+
const excludeRegexes = (excludePatterns || []).map((p) => new RegExp(p, "i"));
|
|
47
|
+
const excludeFileRegexes = (options.excludeFiles || []).map((f) => {
|
|
48
|
+
const escaped = f.replace(/[.+^${}()|[\]\\]/g, "\\$&");
|
|
49
|
+
const regexStr = escaped.replace(/\*\*/g, ".*").replace(/\*/g, "[^/]*");
|
|
50
|
+
return new RegExp(`^${regexStr}$|${regexStr}`, "i");
|
|
51
|
+
});
|
|
52
|
+
for (const { file, content } of fileContents) {
|
|
53
|
+
if (excludeFileRegexes.some((regex) => regex.test(file))) continue;
|
|
54
|
+
const blocks = extractBlocks(file, content);
|
|
55
|
+
for (const b of blocks) {
|
|
56
|
+
if (b.endLine - b.startLine + 1 < minLines) continue;
|
|
57
|
+
const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
|
|
58
|
+
if (isExcluded) continue;
|
|
59
|
+
allBlocks.push(b);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
const duplicates = [];
|
|
63
|
+
const totalBlocks = allBlocks.length;
|
|
64
|
+
let comparisons = 0;
|
|
65
|
+
const totalComparisons = totalBlocks * (totalBlocks - 1) / 2;
|
|
66
|
+
if (onProgress) {
|
|
67
|
+
onProgress(
|
|
68
|
+
0,
|
|
69
|
+
totalComparisons,
|
|
70
|
+
`Starting duplicate detection on ${totalBlocks} blocks...`
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
for (let i = 0; i < allBlocks.length; i++) {
|
|
74
|
+
if (i % 50 === 0 && i > 0) {
|
|
75
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
76
|
+
if (onProgress) {
|
|
77
|
+
onProgress(
|
|
78
|
+
comparisons,
|
|
79
|
+
totalComparisons,
|
|
80
|
+
`Analyzing blocks (${i}/${totalBlocks})...`
|
|
81
|
+
);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const b1 = allBlocks[i];
|
|
85
|
+
const isPython1 = b1.file.toLowerCase().endsWith(".py");
|
|
86
|
+
const norm1 = normalizeCode(b1.code, isPython1);
|
|
87
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
88
|
+
comparisons++;
|
|
89
|
+
const b2 = allBlocks[j];
|
|
90
|
+
if (b1.file === b2.file) continue;
|
|
91
|
+
const isWhitelisted = ignoreWhitelist.some((pattern) => {
|
|
92
|
+
return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
|
|
93
|
+
});
|
|
94
|
+
if (isWhitelisted) continue;
|
|
95
|
+
const isPython2 = b2.file.toLowerCase().endsWith(".py");
|
|
96
|
+
const norm2 = normalizeCode(b2.code, isPython2);
|
|
97
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
98
|
+
if (sim >= minSimilarity) {
|
|
99
|
+
const confidence = calculateConfidence(
|
|
100
|
+
sim,
|
|
101
|
+
b1.tokens,
|
|
102
|
+
b1.endLine - b1.startLine + 1
|
|
103
|
+
);
|
|
104
|
+
if (confidence < confidenceThreshold) continue;
|
|
105
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
106
|
+
b1.file,
|
|
107
|
+
b2.file,
|
|
108
|
+
b1.code,
|
|
109
|
+
sim,
|
|
110
|
+
b1.endLine - b1.startLine + 1
|
|
111
|
+
);
|
|
112
|
+
const dup = {
|
|
113
|
+
file1: b1.file,
|
|
114
|
+
line1: b1.startLine,
|
|
115
|
+
endLine1: b1.endLine,
|
|
116
|
+
file2: b2.file,
|
|
117
|
+
line2: b2.startLine,
|
|
118
|
+
endLine2: b2.endLine,
|
|
119
|
+
code1: b1.code,
|
|
120
|
+
code2: b2.code,
|
|
121
|
+
similarity: sim,
|
|
122
|
+
confidence,
|
|
123
|
+
patternType: b1.patternType,
|
|
124
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
125
|
+
severity,
|
|
126
|
+
reason,
|
|
127
|
+
suggestion,
|
|
128
|
+
matchedRule
|
|
129
|
+
};
|
|
130
|
+
duplicates.push(dup);
|
|
131
|
+
if (streamResults)
|
|
132
|
+
console.log(
|
|
133
|
+
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
if (onProgress) {
|
|
139
|
+
onProgress(
|
|
140
|
+
totalComparisons,
|
|
141
|
+
totalComparisons,
|
|
142
|
+
`Duplicate detection complete. Found ${duplicates.length} patterns.`
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export {
|
|
149
|
+
detectDuplicatePatterns
|
|
150
|
+
};
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import {
|
|
2
|
+
calculateSeverity
|
|
3
|
+
} from "./chunk-FSXOU23F.mjs";
|
|
4
|
+
|
|
5
|
+
// src/detector.ts
|
|
6
|
+
import {
|
|
7
|
+
calculateStringSimilarity,
|
|
8
|
+
calculateHeuristicConfidence,
|
|
9
|
+
extractCodeBlocks
|
|
10
|
+
} from "@aiready/core";
|
|
11
|
+
|
|
12
|
+
// src/core/normalizer.ts
|
|
13
|
+
function normalizeCode(code, isPython = false) {
|
|
14
|
+
if (!code) return "";
|
|
15
|
+
let normalized = code;
|
|
16
|
+
if (isPython) {
|
|
17
|
+
normalized = normalized.replace(/#.*/g, "");
|
|
18
|
+
} else {
|
|
19
|
+
normalized = normalized.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, "");
|
|
20
|
+
}
|
|
21
|
+
return normalized.replace(/"[^"]*"/g, '"STR"').replace(/'[^']*'/g, "'STR'").replace(/`[^`]*`/g, "`STR`").replace(/\b\d+\b/g, "NUM").replace(/\s+/g, " ").trim().toLowerCase();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// src/detector.ts
|
|
25
|
+
function extractBlocks(file, content) {
|
|
26
|
+
return extractCodeBlocks(file, content);
|
|
27
|
+
}
|
|
28
|
+
function calculateSimilarity(a, b) {
|
|
29
|
+
return calculateStringSimilarity(a, b);
|
|
30
|
+
}
|
|
31
|
+
function calculateConfidence(similarity, tokens, lines) {
|
|
32
|
+
return calculateHeuristicConfidence(similarity, tokens, lines);
|
|
33
|
+
}
|
|
34
|
+
async function detectDuplicatePatterns(fileContents, options) {
|
|
35
|
+
const {
|
|
36
|
+
minSimilarity,
|
|
37
|
+
minLines,
|
|
38
|
+
streamResults,
|
|
39
|
+
onProgress,
|
|
40
|
+
excludePatterns = [],
|
|
41
|
+
confidenceThreshold = 0,
|
|
42
|
+
ignoreWhitelist = []
|
|
43
|
+
} = options;
|
|
44
|
+
const allBlocks = [];
|
|
45
|
+
const regexStr = (f) => f.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\\\*/g, "[^/]*").replace(/\[\^\/\]\*\[\^\/\]\*/g, ".*");
|
|
46
|
+
const excludeRegexes = excludePatterns.map(
|
|
47
|
+
(p) => new RegExp(`${regexStr(p)}$`, "i")
|
|
48
|
+
);
|
|
49
|
+
for (const { file, content } of fileContents) {
|
|
50
|
+
const blocks = extractBlocks(file, content);
|
|
51
|
+
for (const b of blocks) {
|
|
52
|
+
if (b.endLine - b.startLine + 1 < minLines) continue;
|
|
53
|
+
const isExcluded = excludeRegexes.some((regex) => regex.test(b.code));
|
|
54
|
+
if (isExcluded) continue;
|
|
55
|
+
allBlocks.push(b);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const duplicates = [];
|
|
59
|
+
const totalBlocks = allBlocks.length;
|
|
60
|
+
let comparisons = 0;
|
|
61
|
+
const totalComparisons = totalBlocks * (totalBlocks - 1) / 2;
|
|
62
|
+
if (onProgress) {
|
|
63
|
+
onProgress(
|
|
64
|
+
0,
|
|
65
|
+
totalComparisons,
|
|
66
|
+
`Starting duplicate detection on ${totalBlocks} blocks...`
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
for (let i = 0; i < allBlocks.length; i++) {
|
|
70
|
+
if (i % 50 === 0 && i > 0) {
|
|
71
|
+
await new Promise((resolve) => setImmediate(resolve));
|
|
72
|
+
if (onProgress) {
|
|
73
|
+
onProgress(
|
|
74
|
+
comparisons,
|
|
75
|
+
totalComparisons,
|
|
76
|
+
`Analyzing blocks (${i}/${totalBlocks})...`
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
const b1 = allBlocks[i];
|
|
81
|
+
const isPython1 = b1.file.toLowerCase().endsWith(".py");
|
|
82
|
+
const norm1 = normalizeCode(b1.code, isPython1);
|
|
83
|
+
for (let j = i + 1; j < allBlocks.length; j++) {
|
|
84
|
+
comparisons++;
|
|
85
|
+
const b2 = allBlocks[j];
|
|
86
|
+
if (b1.file === b2.file) continue;
|
|
87
|
+
const isWhitelisted = ignoreWhitelist.some((pattern) => {
|
|
88
|
+
return b1.file.includes(pattern) && b2.file.includes(pattern) || pattern === `${b1.file}::${b2.file}` || pattern === `${b2.file}::${b1.file}`;
|
|
89
|
+
});
|
|
90
|
+
if (isWhitelisted) continue;
|
|
91
|
+
const isPython2 = b2.file.toLowerCase().endsWith(".py");
|
|
92
|
+
const norm2 = normalizeCode(b2.code, isPython2);
|
|
93
|
+
const sim = calculateSimilarity(norm1, norm2);
|
|
94
|
+
if (sim >= minSimilarity) {
|
|
95
|
+
const confidence = calculateConfidence(
|
|
96
|
+
sim,
|
|
97
|
+
b1.tokens,
|
|
98
|
+
b1.endLine - b1.startLine + 1
|
|
99
|
+
);
|
|
100
|
+
if (confidence < confidenceThreshold) continue;
|
|
101
|
+
const { severity, reason, suggestion, matchedRule } = calculateSeverity(
|
|
102
|
+
b1.file,
|
|
103
|
+
b2.file,
|
|
104
|
+
b1.code,
|
|
105
|
+
sim,
|
|
106
|
+
b1.endLine - b1.startLine + 1
|
|
107
|
+
);
|
|
108
|
+
const dup = {
|
|
109
|
+
file1: b1.file,
|
|
110
|
+
line1: b1.startLine,
|
|
111
|
+
endLine1: b1.endLine,
|
|
112
|
+
file2: b2.file,
|
|
113
|
+
line2: b2.startLine,
|
|
114
|
+
endLine2: b2.endLine,
|
|
115
|
+
code1: b1.code,
|
|
116
|
+
code2: b2.code,
|
|
117
|
+
similarity: sim,
|
|
118
|
+
confidence,
|
|
119
|
+
patternType: b1.patternType,
|
|
120
|
+
tokenCost: b1.tokens + b2.tokens,
|
|
121
|
+
severity,
|
|
122
|
+
reason,
|
|
123
|
+
suggestion,
|
|
124
|
+
matchedRule
|
|
125
|
+
};
|
|
126
|
+
duplicates.push(dup);
|
|
127
|
+
if (streamResults)
|
|
128
|
+
console.log(
|
|
129
|
+
`[DUPLICATE] ${dup.file1}:${dup.line1} <-> ${dup.file2}:${dup.line2} (${Math.round(sim * 100)}%, conf: ${Math.round(confidence * 100)}%)`
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (onProgress) {
|
|
135
|
+
onProgress(
|
|
136
|
+
totalComparisons,
|
|
137
|
+
totalComparisons,
|
|
138
|
+
`Duplicate detection complete. Found ${duplicates.length} patterns.`
|
|
139
|
+
);
|
|
140
|
+
}
|
|
141
|
+
return duplicates.sort((a, b) => b.similarity - a.similarity);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export {
|
|
145
|
+
detectDuplicatePatterns
|
|
146
|
+
};
|