muaddib-scanner 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,242 +1,246 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
- const { findFiles, forEachSafeFile } = require('../utils.js');
4
-
5
- const ENTROPY_EXCLUDED_DIRS = ['.git', '.muaddib-cache', '__compiled__', '__tests__', '__test__', 'dist', 'build'];
6
-
7
- // File patterns to skip (compiled/minified/bundled)
8
- const SKIP_FILE_PATTERNS = ['.min.js', '.bundle.js', '.prod.js'];
9
-
10
- // Minimum string length to analyze (short strings naturally have low entropy)
11
- const MIN_STRING_LENGTH = 50;
12
-
13
- // Thresholds (string-level only file-level entropy removed, see design notes)
14
- const STRING_ENTROPY_MEDIUM = 5.5;
15
- const STRING_ENTROPY_HIGH = 6.5;
16
-
17
- // Long base64 threshold (chars) — base64 payloads >200 chars outside source maps are suspicious
18
- const LONG_BASE64_THRESHOLD = 200;
19
-
20
- // Whitelist patterns for non-malicious high-entropy strings
21
- const SOURCE_MAP_REGEX = /^data:application\/json;base64,/;
22
- const SHA256_HEX_REGEX = /^[0-9a-fA-F]{64}$/;
23
- const MD5_HEX_REGEX = /^[0-9a-fA-F]{32}$/;
24
- const UUID_REGEX = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
25
- const JWT_REGEX = /^eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/;
26
-
27
- // Obfuscation pattern detection
28
- const HEX_VAR_REGEX = /_0x[a-f0-9]{4,6}/g;
29
- const BASE64_CHARS_REGEX = /^[A-Za-z0-9+/=]+$/;
30
-
31
- /**
32
- * Check if a string matches a known non-malicious pattern.
33
- * @param {string} str - The string to check
34
- * @param {string} filePath - The file path (for context-dependent checks)
35
- * @returns {boolean} true if the string is whitelisted
36
- */
37
- function isWhitelistedString(str, filePath) {
38
- if (SOURCE_MAP_REGEX.test(str)) return true;
39
- if (SHA256_HEX_REGEX.test(str)) return true;
40
- if (MD5_HEX_REGEX.test(str)) return true;
41
- if (UUID_REGEX.test(str)) return true;
42
-
43
- // JWT tokens in test files
44
- if (JWT_REGEX.test(str)) {
45
- const lowerPath = filePath.toLowerCase();
46
- if (lowerPath.includes('test') || lowerPath.includes('spec') || lowerPath.includes('mock') || lowerPath.includes('fixture')) {
47
- return true;
48
- }
49
- }
50
-
51
- return false;
52
- }
53
-
54
- /**
55
- * Calculate Shannon entropy of a string.
56
- * @param {string} str - Input string
57
- * @returns {number} Entropy in bits (0-8)
58
- */
59
- function calculateShannonEntropy(str) {
60
- if (!str || str.length === 0) return 0;
61
-
62
- const freq = {};
63
- for (let i = 0; i < str.length; i++) {
64
- const ch = str[i];
65
- freq[ch] = (freq[ch] || 0) + 1;
66
- }
67
-
68
- const len = str.length;
69
- let entropy = 0;
70
- for (const ch in freq) {
71
- const p = freq[ch] / len;
72
- if (p > 0) {
73
- entropy -= p * Math.log2(p);
74
- }
75
- }
76
-
77
- return entropy;
78
- }
79
-
80
- /**
81
- * Extract string literals from JS source code via regex.
82
- * @param {string} content - JS source code
83
- * @returns {string[]} Array of string contents (without quotes)
84
- */
85
- function extractStringLiterals(content) {
86
- const strings = [];
87
- const regex = /(?:"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)'|`([^`\\]*(?:\\.[^`\\]*)*)`)/g;
88
- let match;
89
- while ((match = regex.exec(content)) !== null) {
90
- const str = match[1] || match[2] || match[3];
91
- if (str) strings.push(str);
92
- }
93
- return strings;
94
- }
95
-
96
- /**
97
- * Check if a file should be skipped based on path patterns.
98
- * @param {string} filePath - Absolute file path
99
- * @returns {boolean} true if the file should be skipped
100
- */
101
- function shouldSkipFile(filePath) {
102
- const basename = path.basename(filePath);
103
- for (const pattern of SKIP_FILE_PATTERNS) {
104
- if (basename.endsWith(pattern)) return true;
105
- }
106
- return false;
107
- }
108
-
109
- /**
110
- * Check if file content contains a source map reference.
111
- * @param {string} content - File content
112
- * @returns {boolean}
113
- */
114
- function hasSourceMap(content) {
115
- return content.includes('//# sourceMappingURL=') || content.includes('//@ sourceMappingURL=');
116
- }
117
-
118
- /**
119
- * Detect JS obfuscation patterns that are signatures of real malware.
120
- * Returns an array of threats for patterns found in the file content.
121
- * @param {string} content - File content
122
- * @param {string} relativePath - Relative file path for threat reporting
123
- * @returns {Array} threats
124
- */
125
- function detectObfuscationPatterns(content, relativePath) {
126
- const threats = [];
127
-
128
- // 1. Hex variable names: _0x[a-f0-9]{4,6} — classic JS obfuscator signature
129
- const hexVarMatches = content.match(HEX_VAR_REGEX);
130
- if (hexVarMatches && hexVarMatches.length >= 3) {
131
- const uniqueVars = new Set(hexVarMatches);
132
- if (uniqueVars.size >= 3) {
133
- threats.push({
134
- type: 'js_obfuscation_pattern',
135
- severity: 'HIGH',
136
- message: `JS obfuscator hex variables detected (${uniqueVars.size} unique _0x* vars) — signature of javascript-obfuscator/obfuscator.io`,
137
- file: relativePath
138
- });
139
- }
140
- }
141
-
142
- // 2. Encoded string arrays: arrays of 20+ string literals that look like base64/hex
143
- const strings = extractStringLiterals(content);
144
- const encodedStrings = strings.filter(s => {
145
- if (s.length < 8) return false;
146
- return BASE64_CHARS_REGEX.test(s) && calculateShannonEntropy(s) > 4.5;
147
- });
148
- if (encodedStrings.length >= 20) {
149
- threats.push({
150
- type: 'js_obfuscation_pattern',
151
- severity: 'HIGH',
152
- message: `Encoded string array detected (${encodedStrings.length} base64/hex strings) — typical of string array rotation obfuscation`,
153
- file: relativePath
154
- });
155
- }
156
-
157
- // 3. eval() or Function() called with high-entropy content
158
- // Match: eval("...high entropy...") or Function("...high entropy...")
159
- const evalFuncRegex = /(?:eval|Function)\s*\(\s*(?:"([^"]{50,})"|'([^']{50,})'|`([^`]{50,})`)/g;
160
- let evalMatch;
161
- while ((evalMatch = evalFuncRegex.exec(content)) !== null) {
162
- const arg = evalMatch[1] || evalMatch[2] || evalMatch[3];
163
- if (arg) {
164
- const argEntropy = calculateShannonEntropy(arg);
165
- if (argEntropy > STRING_ENTROPY_MEDIUM) {
166
- threats.push({
167
- type: 'js_obfuscation_pattern',
168
- severity: 'HIGH',
169
- message: `eval/Function called with high-entropy argument (${argEntropy.toFixed(2)} bits, ${arg.length} chars) — likely executing obfuscated payload`,
170
- file: relativePath
171
- });
172
- break; // One finding per file is enough
173
- }
174
- }
175
- }
176
-
177
- // 4. Long base64 strings (>200 chars) outside source maps
178
- for (const str of strings) {
179
- if (str.length > LONG_BASE64_THRESHOLD && BASE64_CHARS_REGEX.test(str)) {
180
- // Skip source map data URLs
181
- if (SOURCE_MAP_REGEX.test(str)) continue;
182
- threats.push({
183
- type: 'js_obfuscation_pattern',
184
- severity: 'HIGH',
185
- message: `Long base64 payload detected (${str.length} chars) — possible encoded malicious code`,
186
- file: relativePath
187
- });
188
- break; // One finding per file is enough
189
- }
190
- }
191
-
192
- return threats;
193
- }
194
-
195
- /**
196
- * Scan JavaScript files for high-entropy strings and JS obfuscation patterns.
197
- * @param {string} targetPath - Directory to scan
198
- * @param {object} [options] - Options
199
- * @param {number} [options.entropyThreshold] - Custom string-level entropy threshold (default: 5.5)
200
- * @returns {Array} threats
201
- */
202
- function scanEntropy(targetPath, options = {}) {
203
- const threats = [];
204
- const stringThreshold = options.entropyThreshold || STRING_ENTROPY_MEDIUM;
205
- const files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], excludedDirs: ENTROPY_EXCLUDED_DIRS });
206
-
207
- const safeFiles = files.filter(f => !shouldSkipFile(f));
208
- forEachSafeFile(safeFiles, (file, content) => {
209
- // Skip files containing source maps (legitimate compiled output)
210
- if (hasSourceMap(content)) return;
211
-
212
- const relativePath = path.relative(targetPath, file);
213
-
214
- // Obfuscation pattern detection (MUADDIB-ENTROPY-003)
215
- const obfuscationThreats = detectObfuscationPatterns(content, relativePath);
216
- threats.push(...obfuscationThreats);
217
-
218
- // String-level entropy check (MUADDIB-ENTROPY-001)
219
- const strings = extractStringLiterals(content);
220
- for (const str of strings) {
221
- if (str.length < MIN_STRING_LENGTH) continue;
222
-
223
- // Skip whitelisted patterns
224
- if (isWhitelistedString(str, relativePath)) continue;
225
-
226
- const strEntropy = calculateShannonEntropy(str);
227
- if (strEntropy > stringThreshold) {
228
- const severity = strEntropy > STRING_ENTROPY_HIGH ? 'HIGH' : 'MEDIUM';
229
- threats.push({
230
- type: 'high_entropy_string',
231
- severity,
232
- message: `High entropy string (${strEntropy.toFixed(2)} bits, ${str.length} chars) possible base64/hex/encrypted payload`,
233
- file: relativePath
234
- });
235
- }
236
- }
237
- });
238
-
239
- return threats;
240
- }
241
-
242
- module.exports = { scanEntropy, calculateShannonEntropy };
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const { findFiles, forEachSafeFile } = require('../utils.js');
4
+
5
+ const ENTROPY_EXCLUDED_DIRS = ['.git', '.muaddib-cache', '__compiled__', '__tests__', '__test__', 'dist', 'build'];
6
+
7
+ // File patterns to skip (compiled/minified/bundled)
8
+ const SKIP_FILE_PATTERNS = ['.min.js', '.bundle.js', '.prod.js'];
9
+
10
+ // Files containing encoding/character tables have legitimately high entropy
11
+ const ENCODING_TABLE_RE = /(?:encoding|tables|unicode|charmap|codepage)/i;
12
+
13
+ // Minimum string length to analyze (short strings naturally have low entropy)
14
+ const MIN_STRING_LENGTH = 50;
15
+
16
+ // Thresholds (string-level only — file-level entropy removed, see design notes)
17
+ const STRING_ENTROPY_MEDIUM = 5.5;
18
+ const STRING_ENTROPY_HIGH = 6.5;
19
+
20
+ // Long base64 threshold (chars) base64 payloads >200 chars outside source maps are suspicious
21
+ const LONG_BASE64_THRESHOLD = 200;
22
+
23
+ // Whitelist patterns for non-malicious high-entropy strings
24
+ const SOURCE_MAP_REGEX = /^data:application\/json;base64,/;
25
+ const SHA256_HEX_REGEX = /^[0-9a-fA-F]{64}$/;
26
+ const MD5_HEX_REGEX = /^[0-9a-fA-F]{32}$/;
27
+ const UUID_REGEX = /^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$/;
28
+ const JWT_REGEX = /^eyJ[A-Za-z0-9_-]+\.eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+$/;
29
+
30
+ // Obfuscation pattern detection
31
+ const HEX_VAR_REGEX = /_0x[a-f0-9]{4,6}/g;
32
+ const BASE64_CHARS_REGEX = /^[A-Za-z0-9+/=]+$/;
33
+
34
+ /**
35
+ * Check if a string matches a known non-malicious pattern.
36
+ * @param {string} str - The string to check
37
+ * @param {string} filePath - The file path (for context-dependent checks)
38
+ * @returns {boolean} true if the string is whitelisted
39
+ */
40
+ function isWhitelistedString(str, filePath) {
41
+ if (SOURCE_MAP_REGEX.test(str)) return true;
42
+ if (SHA256_HEX_REGEX.test(str)) return true;
43
+ if (MD5_HEX_REGEX.test(str)) return true;
44
+ if (UUID_REGEX.test(str)) return true;
45
+
46
+ // JWT tokens in test files
47
+ if (JWT_REGEX.test(str)) {
48
+ const lowerPath = filePath.toLowerCase();
49
+ if (lowerPath.includes('test') || lowerPath.includes('spec') || lowerPath.includes('mock') || lowerPath.includes('fixture')) {
50
+ return true;
51
+ }
52
+ }
53
+
54
+ return false;
55
+ }
56
+
57
+ /**
58
+ * Calculate Shannon entropy of a string.
59
+ * @param {string} str - Input string
60
+ * @returns {number} Entropy in bits (0-8)
61
+ */
62
+ function calculateShannonEntropy(str) {
63
+ if (!str || str.length === 0) return 0;
64
+
65
+ const freq = {};
66
+ for (let i = 0; i < str.length; i++) {
67
+ const ch = str[i];
68
+ freq[ch] = (freq[ch] || 0) + 1;
69
+ }
70
+
71
+ const len = str.length;
72
+ let entropy = 0;
73
+ for (const ch in freq) {
74
+ const p = freq[ch] / len;
75
+ if (p > 0) {
76
+ entropy -= p * Math.log2(p);
77
+ }
78
+ }
79
+
80
+ return entropy;
81
+ }
82
+
83
+ /**
84
+ * Extract string literals from JS source code via regex.
85
+ * @param {string} content - JS source code
86
+ * @returns {string[]} Array of string contents (without quotes)
87
+ */
88
+ function extractStringLiterals(content) {
89
+ const strings = [];
90
+ const regex = /(?:"([^"\\]*(?:\\.[^"\\]*)*)"|'([^'\\]*(?:\\.[^'\\]*)*)'|`([^`\\]*(?:\\.[^`\\]*)*)`)/g;
91
+ let match;
92
+ while ((match = regex.exec(content)) !== null) {
93
+ const str = match[1] || match[2] || match[3];
94
+ if (str) strings.push(str);
95
+ }
96
+ return strings;
97
+ }
98
+
99
+ /**
100
+ * Check if a file should be skipped based on path patterns.
101
+ * @param {string} filePath - Absolute file path
102
+ * @returns {boolean} true if the file should be skipped
103
+ */
104
+ function shouldSkipFile(filePath) {
105
+ const basename = path.basename(filePath);
106
+ for (const pattern of SKIP_FILE_PATTERNS) {
107
+ if (basename.endsWith(pattern)) return true;
108
+ }
109
+ return false;
110
+ }
111
+
112
+ /**
113
+ * Check if file content contains a source map reference.
114
+ * @param {string} content - File content
115
+ * @returns {boolean}
116
+ */
117
+ function hasSourceMap(content) {
118
+ return content.includes('//# sourceMappingURL=') || content.includes('//@ sourceMappingURL=');
119
+ }
120
+
121
+ /**
122
+ * Detect JS obfuscation patterns that are signatures of real malware.
123
+ * Returns an array of threats for patterns found in the file content.
124
+ * @param {string} content - File content
125
+ * @param {string} relativePath - Relative file path for threat reporting
126
+ * @returns {Array} threats
127
+ */
128
+ function detectObfuscationPatterns(content, relativePath) {
129
+ const threats = [];
130
+
131
+ // 1. Hex variable names: _0x[a-f0-9]{4,6} — classic JS obfuscator signature
132
+ const hexVarMatches = content.match(HEX_VAR_REGEX);
133
+ if (hexVarMatches && hexVarMatches.length >= 3) {
134
+ const uniqueVars = new Set(hexVarMatches);
135
+ if (uniqueVars.size >= 3) {
136
+ threats.push({
137
+ type: 'js_obfuscation_pattern',
138
+ severity: 'HIGH',
139
+ message: `JS obfuscator hex variables detected (${uniqueVars.size} unique _0x* vars) — signature of javascript-obfuscator/obfuscator.io`,
140
+ file: relativePath
141
+ });
142
+ }
143
+ }
144
+
145
+ // 2. Encoded string arrays: arrays of 20+ string literals that look like base64/hex
146
+ const strings = extractStringLiterals(content);
147
+ const encodedStrings = strings.filter(s => {
148
+ if (s.length < 8) return false;
149
+ return BASE64_CHARS_REGEX.test(s) && calculateShannonEntropy(s) > 4.5;
150
+ });
151
+ if (encodedStrings.length >= 20) {
152
+ threats.push({
153
+ type: 'js_obfuscation_pattern',
154
+ severity: 'HIGH',
155
+ message: `Encoded string array detected (${encodedStrings.length} base64/hex strings) — typical of string array rotation obfuscation`,
156
+ file: relativePath
157
+ });
158
+ }
159
+
160
+ // 3. eval() or Function() called with high-entropy content
161
+ // Match: eval("...high entropy...") or Function("...high entropy...")
162
+ const evalFuncRegex = /(?:eval|Function)\s*\(\s*(?:"([^"]{50,})"|'([^']{50,})'|`([^`]{50,})`)/g;
163
+ let evalMatch;
164
+ while ((evalMatch = evalFuncRegex.exec(content)) !== null) {
165
+ const arg = evalMatch[1] || evalMatch[2] || evalMatch[3];
166
+ if (arg) {
167
+ const argEntropy = calculateShannonEntropy(arg);
168
+ if (argEntropy > STRING_ENTROPY_MEDIUM) {
169
+ threats.push({
170
+ type: 'js_obfuscation_pattern',
171
+ severity: 'HIGH',
172
+ message: `eval/Function called with high-entropy argument (${argEntropy.toFixed(2)} bits, ${arg.length} chars) — likely executing obfuscated payload`,
173
+ file: relativePath
174
+ });
175
+ break; // One finding per file is enough
176
+ }
177
+ }
178
+ }
179
+
180
+ // 4. Long base64 strings (>200 chars) outside source maps
181
+ for (const str of strings) {
182
+ if (str.length > LONG_BASE64_THRESHOLD && BASE64_CHARS_REGEX.test(str)) {
183
+ // Skip source map data URLs
184
+ if (SOURCE_MAP_REGEX.test(str)) continue;
185
+ threats.push({
186
+ type: 'js_obfuscation_pattern',
187
+ severity: 'HIGH',
188
+ message: `Long base64 payload detected (${str.length} chars) — possible encoded malicious code`,
189
+ file: relativePath
190
+ });
191
+ break; // One finding per file is enough
192
+ }
193
+ }
194
+
195
+ return threats;
196
+ }
197
+
198
+ /**
199
+ * Scan JavaScript files for high-entropy strings and JS obfuscation patterns.
200
+ * @param {string} targetPath - Directory to scan
201
+ * @param {object} [options] - Options
202
+ * @param {number} [options.entropyThreshold] - Custom string-level entropy threshold (default: 5.5)
203
+ * @returns {Array} threats
204
+ */
205
+ function scanEntropy(targetPath, options = {}) {
206
+ const threats = [];
207
+ const stringThreshold = options.entropyThreshold || STRING_ENTROPY_MEDIUM;
208
+ const files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], excludedDirs: ENTROPY_EXCLUDED_DIRS });
209
+
210
+ const safeFiles = files.filter(f => !shouldSkipFile(f));
211
+ forEachSafeFile(safeFiles, (file, content) => {
212
+ // Skip files containing source maps (legitimate compiled output)
213
+ if (hasSourceMap(content)) return;
214
+
215
+ const relativePath = path.relative(targetPath, file);
216
+
217
+ // Obfuscation pattern detection (MUADDIB-ENTROPY-003)
218
+ const obfuscationThreats = detectObfuscationPatterns(content, relativePath);
219
+ threats.push(...obfuscationThreats);
220
+
221
+ // String-level entropy check (MUADDIB-ENTROPY-001)
222
+ const strings = extractStringLiterals(content);
223
+ for (const str of strings) {
224
+ if (str.length < MIN_STRING_LENGTH) continue;
225
+
226
+ // Skip whitelisted patterns
227
+ if (isWhitelistedString(str, relativePath)) continue;
228
+
229
+ const strEntropy = calculateShannonEntropy(str);
230
+ if (strEntropy > stringThreshold) {
231
+ const isEncodingTable = ENCODING_TABLE_RE.test(relativePath);
232
+ const severity = isEncodingTable ? 'LOW' : (strEntropy > STRING_ENTROPY_HIGH ? 'HIGH' : 'MEDIUM');
233
+ threats.push({
234
+ type: 'high_entropy_string',
235
+ severity,
236
+ message: `High entropy string (${strEntropy.toFixed(2)} bits, ${str.length} chars) — possible base64/hex/encrypted payload`,
237
+ file: relativePath
238
+ });
239
+ }
240
+ }
241
+ });
242
+
243
+ return threats;
244
+ }
245
+
246
+ module.exports = { scanEntropy, calculateShannonEntropy };
@@ -19,7 +19,8 @@ function detectObfuscation(targetPath) {
19
19
  const isBundled = basename.endsWith('.bundle.js');
20
20
  const pathParts = relativePath.split(path.sep);
21
21
  const isInDistOrBuild = pathParts.some(p => p === 'dist' || p === 'build');
22
- const isPackageOutput = isMinified || isBundled || isInDistOrBuild;
22
+ const isLargeCjsMjs = (basename.endsWith('.cjs') || basename.endsWith('.mjs')) && content.length > 100 * 1024;
23
+ const isPackageOutput = isMinified || isBundled || isInDistOrBuild || isLargeCjsMjs;
23
24
 
24
25
  // 1. Ratio code sur une seule ligne (skip .min.js — minification, not obfuscation)
25
26
  if (!isMinified) {