muaddib-scanner 2.10.72 → 2.10.74
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/package.json +1 -1
- package/src/monitor/classify.js +14 -4
- package/src/monitor/deferred-sandbox.js +16 -6
- package/src/pipeline/executor.js +14 -5
- package/src/scanner/ast-detectors/handle-call-expression.js +39 -12
- package/src/scanner/ast-detectors/handle-variable-declarator.js +54 -0
- package/src/scanner/ast.js +5 -0
- package/src/scanner/obfuscation.js +47 -1
- package/src/scoring.js +63 -6
- package/src/shared/bundle-detect.js +148 -0
package/README.md
CHANGED
|
@@ -292,7 +292,7 @@ repos:
|
|
|
292
292
|
| **FPR** (Benign random) | **7.5%** (15/200) | 200 random npm packages, stratified sampling |
|
|
293
293
|
| **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
|
|
294
294
|
|
|
295
|
-
**
|
|
295
|
+
**3134 tests** across 66 files. **200 rules** (195 RULES + 5 PARANOID).
|
|
296
296
|
|
|
297
297
|
> **ML retrain methodology (v2.10.51):**
|
|
298
298
|
> - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
|
|
@@ -340,10 +340,10 @@ npm test
|
|
|
340
340
|
|
|
341
341
|
### Testing
|
|
342
342
|
|
|
343
|
-
- **
|
|
343
|
+
- **3134 tests** across 66 modular test files
|
|
344
344
|
- **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
|
|
345
345
|
- **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
|
|
346
|
-
- **Ground truth validation** -
|
|
346
|
+
- **Ground truth validation** - 67 real-world attacks (93.75% TPR@3, 85.9% TPR@20)
|
|
347
347
|
- **False positive validation** - 14.0% FPR rules, 8.3% after ML on 532 curated npm packages, 7.5% on 200 random
|
|
348
348
|
|
|
349
349
|
---
|
package/package.json
CHANGED
package/src/monitor/classify.js
CHANGED
|
@@ -186,11 +186,21 @@ function isSuspectClassification(result) {
|
|
|
186
186
|
// sandbox queue, starving legitimate T1b/T2 candidates of the dedicated
|
|
187
187
|
// deferred slot.
|
|
188
188
|
//
|
|
189
|
-
//
|
|
190
|
-
//
|
|
191
|
-
//
|
|
189
|
+
// Threat model for this downgrade: an adversary reading the open-source
|
|
190
|
+
// rules can intentionally tune their malware to fire only LOW-severity
|
|
191
|
+
// patterns + 2 distinct non-T3 types to land in this fallback. If we
|
|
192
|
+
// downgrade ALL such cases to tier 3, a weak TIER1_TYPES match (e.g.,
|
|
193
|
+
// staged_payload at LOW, sandbox_evasion at LOW) would bypass sandbox
|
|
194
|
+
// verification entirely — TIER1_TYPES are "quasi-never legitimate" and
|
|
195
|
+
// weak matches still warrant dynamic inspection.
|
|
196
|
+
//
|
|
197
|
+
// Therefore: preserve tier 2 when EITHER (a) any finding is non-LOW
|
|
198
|
+
// severity OR (b) any finding is in TIER1_TYPES even at LOW severity.
|
|
199
|
+
// Downgrade to tier 3 only for packages with 2+ distinct LOW findings
|
|
200
|
+
// where NONE are in the quasi-never-legit TIER1 zone.
|
|
192
201
|
const hasNonLowFinding = result.threats.some(t => t.severity !== 'LOW');
|
|
193
|
-
|
|
202
|
+
const hasTier1Signal = result.threats.some(t => TIER1_TYPES.has(t.type));
|
|
203
|
+
if (hasNonLowFinding || hasTier1Signal) {
|
|
194
204
|
return { suspect: true, tier: 2 };
|
|
195
205
|
}
|
|
196
206
|
return { suspect: true, tier: 3 };
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
const fs = require('fs');
|
|
16
16
|
const path = require('path');
|
|
17
17
|
const { runSandbox } = require('../sandbox/index.js');
|
|
18
|
-
const { isCanaryEnabled } = require('./classify.js');
|
|
18
|
+
const { isCanaryEnabled, TIER1_TYPES } = require('./classify.js');
|
|
19
19
|
const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
|
|
20
20
|
const { sendWebhook } = require('../webhook.js');
|
|
21
21
|
const { atomicWriteFileSync } = require('./state.js');
|
|
@@ -59,11 +59,21 @@ function enqueueDeferred(item) {
|
|
|
59
59
|
|
|
60
60
|
// Defense-in-depth: block low-score items regardless of tier. With the
|
|
61
61
|
// classify.js:183 fallback fix in place, no legitimate enqueue should
|
|
62
|
-
// reach this function with score < DEFERRED_MIN_SCORE
|
|
63
|
-
// console.error makes a future
|
|
64
|
-
// leaks low-score items) loud
|
|
65
|
-
|
|
66
|
-
|
|
62
|
+
// reach this function with score < DEFERRED_MIN_SCORE unless it carries
|
|
63
|
+
// a TIER1_TYPES signal. Logging with console.error makes a future
|
|
64
|
+
// regression (new classification path that leaks low-score items) loud
|
|
65
|
+
// in operator logs.
|
|
66
|
+
//
|
|
67
|
+
// Threat-model exception: packages containing any TIER1_TYPES finding
|
|
68
|
+
// (even at LOW severity) must bypass this min-score guard. TIER1_TYPES
|
|
69
|
+
// are "quasi-never legitimate in benign packages" and weak matches
|
|
70
|
+
// still warrant sandbox verification — an adversary could otherwise
|
|
71
|
+
// tune their malware to fire only LOW-severity TIER1 patterns to
|
|
72
|
+
// bypass sandbox entirely.
|
|
73
|
+
const itemThreats = (item.staticResult && item.staticResult.threats) || [];
|
|
74
|
+
const hasTier1Signal = itemThreats.some(t => TIER1_TYPES.has(t.type));
|
|
75
|
+
if ((item.riskScore || 0) < DEFERRED_MIN_SCORE && !hasTier1Signal) {
|
|
76
|
+
console.error(`[DEFERRED] REJECTED: ${item.name}@${item.version} — score=${item.riskScore || 0} below minimum ${DEFERRED_MIN_SCORE}, no TIER1 signal (possible classification regression)`);
|
|
67
77
|
return false;
|
|
68
78
|
}
|
|
69
79
|
|
package/src/pipeline/executor.js
CHANGED
|
@@ -232,11 +232,18 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
232
232
|
if (wasFilesCapped()) {
|
|
233
233
|
warnings.push('File count cap reached (500 files) — overflow files scanned in quick-scan mode (lifecycle + child_process only).');
|
|
234
234
|
const overflowFiles = getOverflowFiles();
|
|
235
|
+
// v2.10.73 P3: Quick-scan is a DEGRADED regex-based pass — no AST, no scope
|
|
236
|
+
// tracking. It cannot distinguish exec() at module top-level (CRITICAL) from
|
|
237
|
+
// exec() inside an exported route handler (LOW runtime). Audit forensique v2.10.72:
|
|
238
|
+
// 18+ fires AST-007 sur rsshub/dist-lib/*.mjs where spawn() lives inside exported
|
|
239
|
+
// route handlers. Default severity is now MEDIUM (downgraded from HIGH). Module._load
|
|
240
|
+
// remains CRITICAL — very rare outside of malware. Threats are flagged `degraded:true`
|
|
241
|
+
// so scoring.js excludes them from max_file_score (see applyFPReductions).
|
|
235
242
|
const QUICK_SCAN_PATTERNS = [
|
|
236
|
-
{ re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: '
|
|
237
|
-
{ re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: '
|
|
238
|
-
{ re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: '
|
|
239
|
-
{ re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: '
|
|
243
|
+
{ re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("child_process")' },
|
|
244
|
+
{ re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("node:child_process")' },
|
|
245
|
+
{ re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'exec/spawn call' },
|
|
246
|
+
{ re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'MEDIUM', label: 'process.mainModule' },
|
|
240
247
|
{ re: /\bModule\._load\b/, type: 'module_load_bypass', severity: 'CRITICAL', label: 'Module._load' }
|
|
241
248
|
];
|
|
242
249
|
for (const filePath of overflowFiles) {
|
|
@@ -251,7 +258,9 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
251
258
|
type: pat.type,
|
|
252
259
|
severity: pat.severity,
|
|
253
260
|
message: `[quick-scan] ${pat.label} detected in overflow file.`,
|
|
254
|
-
file: relFile
|
|
261
|
+
file: relFile,
|
|
262
|
+
degraded: true, // P3: regex-only detection, no semantic context
|
|
263
|
+
quickScan: true
|
|
255
264
|
});
|
|
256
265
|
}
|
|
257
266
|
}
|
|
@@ -89,6 +89,15 @@ function handleCallExpression(node, ctx) {
|
|
|
89
89
|
// Check if variable was reassignment-tracked to a dangerous module
|
|
90
90
|
const DANGEROUS_MODS_REQ = ['child_process', 'fs', 'net', 'dns', 'http', 'https', 'tls'];
|
|
91
91
|
const resolvedVal = ctx.stringVarValues?.get(arg.name);
|
|
92
|
+
// v2.10.73 P2: source-aware severity (AST-006 plugin loader FP fix)
|
|
93
|
+
// Distinguishes plugin loaders (LOW) from obfuscation (HIGH) from env exfil (CRITICAL).
|
|
94
|
+
// See src/scanner/ast-detectors/handle-variable-declarator.js ctx.varSource tracking.
|
|
95
|
+
const varSource = ctx.varSource?.get(arg.name) || null;
|
|
96
|
+
const isStaticSource =
|
|
97
|
+
varSource === 'string_literal' || varSource === 'array_literal' ||
|
|
98
|
+
varSource === 'object_literal' || varSource === 'fs_readdir' ||
|
|
99
|
+
varSource === 'require_json';
|
|
100
|
+
const isCriticalSource = varSource === 'env_var';
|
|
92
101
|
if (resolvedVal) {
|
|
93
102
|
const norm = resolvedVal.startsWith('node:') ? resolvedVal.slice(5) : resolvedVal;
|
|
94
103
|
if (DANGEROUS_MODS_REQ.includes(norm)) {
|
|
@@ -98,28 +107,46 @@ function handleCallExpression(node, ctx) {
|
|
|
98
107
|
file: ctx.relFile
|
|
99
108
|
});
|
|
100
109
|
} else {
|
|
101
|
-
//
|
|
102
|
-
//
|
|
103
|
-
|
|
110
|
+
// Plugin loader qualification:
|
|
111
|
+
// - string_literal/array_literal/object_literal/fs_readdir/require_json → LOW (legit plugin loader)
|
|
112
|
+
// - env_var → CRITICAL (require(process.env.X) = credential/path exfil vector)
|
|
113
|
+
// - fallback to staticAssignments for legacy coverage
|
|
114
|
+
// - else → HIGH (real obfuscation candidate)
|
|
115
|
+
let severity, message;
|
|
116
|
+
if (isCriticalSource) {
|
|
117
|
+
severity = 'CRITICAL';
|
|
118
|
+
message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
|
|
119
|
+
} else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
|
|
120
|
+
severity = 'LOW';
|
|
121
|
+
message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
|
|
122
|
+
} else {
|
|
123
|
+
severity = 'HIGH';
|
|
124
|
+
message = 'Dynamic require() with variable argument (module name obfuscation).';
|
|
125
|
+
}
|
|
104
126
|
ctx.threats.push({
|
|
105
127
|
type: 'dynamic_require',
|
|
106
128
|
severity,
|
|
107
|
-
message
|
|
108
|
-
? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
|
|
109
|
-
: 'Dynamic require() with variable argument (module name obfuscation).',
|
|
129
|
+
message,
|
|
110
130
|
file: ctx.relFile
|
|
111
131
|
});
|
|
112
132
|
}
|
|
113
133
|
} else {
|
|
114
|
-
//
|
|
115
|
-
|
|
116
|
-
|
|
134
|
+
// Same qualification flow without resolvedVal context
|
|
135
|
+
let severity, message;
|
|
136
|
+
if (isCriticalSource) {
|
|
137
|
+
severity = 'CRITICAL';
|
|
138
|
+
message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
|
|
139
|
+
} else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
|
|
140
|
+
severity = 'LOW';
|
|
141
|
+
message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
|
|
142
|
+
} else {
|
|
143
|
+
severity = 'HIGH';
|
|
144
|
+
message = 'Dynamic require() with variable argument (module name obfuscation).';
|
|
145
|
+
}
|
|
117
146
|
ctx.threats.push({
|
|
118
147
|
type: 'dynamic_require',
|
|
119
148
|
severity,
|
|
120
|
-
message
|
|
121
|
-
? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
|
|
122
|
-
: 'Dynamic require() with variable argument (module name obfuscation).',
|
|
149
|
+
message,
|
|
123
150
|
file: ctx.relFile
|
|
124
151
|
});
|
|
125
152
|
}
|
|
@@ -24,6 +24,60 @@ function handleVariableDeclarator(node, ctx) {
|
|
|
24
24
|
ctx.staticAssignments.add(node.id.name);
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
// v2.10.73 P2: Track WHERE the variable's value originated — used by AST-006
|
|
28
|
+
// to distinguish plugin loaders (LOW) from real obfuscation (HIGH) from
|
|
29
|
+
// credential exfil vectors (CRITICAL). See src/scanner/ast-detectors/handle-call-expression.js
|
|
30
|
+
// around line 103 for consumption.
|
|
31
|
+
if (ctx.varSource && node.init) {
|
|
32
|
+
const init = node.init;
|
|
33
|
+
let source = null;
|
|
34
|
+
if (init.type === 'Literal' && typeof init.value === 'string') {
|
|
35
|
+
source = 'string_literal';
|
|
36
|
+
} else if (init.type === 'TemplateLiteral' && (init.expressions?.length || 0) === 0) {
|
|
37
|
+
source = 'string_literal'; // template with no interpolations is effectively a literal
|
|
38
|
+
} else if (init.type === 'ArrayExpression') {
|
|
39
|
+
source = 'array_literal';
|
|
40
|
+
} else if (init.type === 'ObjectExpression') {
|
|
41
|
+
source = 'object_literal';
|
|
42
|
+
} else if (init.type === 'MemberExpression' &&
|
|
43
|
+
init.object?.type === 'MemberExpression' &&
|
|
44
|
+
init.object.object?.type === 'Identifier' &&
|
|
45
|
+
init.object.object.name === 'process' &&
|
|
46
|
+
init.object.property?.type === 'Identifier' &&
|
|
47
|
+
init.object.property.name === 'env') {
|
|
48
|
+
source = 'env_var'; // const m = process.env.MODULE_NAME
|
|
49
|
+
} else if (init.type === 'CallExpression') {
|
|
50
|
+
const callee = init.callee;
|
|
51
|
+
// fs.readdirSync / fs.readdir / fs.promises.readdir — directory listings
|
|
52
|
+
// are not attacker-controllable unless the dir itself is, which is rare.
|
|
53
|
+
if (callee?.type === 'MemberExpression') {
|
|
54
|
+
const propName = callee.property?.type === 'Identifier' ? callee.property.name : null;
|
|
55
|
+
const objName = callee.object?.type === 'Identifier' ? callee.object.name : null;
|
|
56
|
+
const objPropName = callee.object?.type === 'MemberExpression' &&
|
|
57
|
+
callee.object.property?.type === 'Identifier'
|
|
58
|
+
? callee.object.property.name : null;
|
|
59
|
+
if (objName === 'fs' && propName && /^readdir/.test(propName)) {
|
|
60
|
+
source = 'fs_readdir';
|
|
61
|
+
} else if (objPropName === 'promises' && propName === 'readdir') {
|
|
62
|
+
source = 'fs_readdir'; // fs.promises.readdir
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
// require('./config.json') or require('./cfg.json') — loading a local JSON
|
|
66
|
+
// config is a legit plugin loader pattern (consumer-owned JSON file).
|
|
67
|
+
if (!source &&
|
|
68
|
+
callee?.type === 'Identifier' && callee.name === 'require' &&
|
|
69
|
+
init.arguments?.[0]?.type === 'Literal' &&
|
|
70
|
+
typeof init.arguments[0].value === 'string' &&
|
|
71
|
+
/\.json$/.test(init.arguments[0].value)) {
|
|
72
|
+
source = 'require_json';
|
|
73
|
+
}
|
|
74
|
+
if (!source) source = 'function_call';
|
|
75
|
+
} else {
|
|
76
|
+
source = 'computed_expression';
|
|
77
|
+
}
|
|
78
|
+
ctx.varSource.set(node.id.name, source);
|
|
79
|
+
}
|
|
80
|
+
|
|
27
81
|
// Track dynamic require vars + module aliases
|
|
28
82
|
if (node.init?.type === 'CallExpression') {
|
|
29
83
|
const initCallName = getCallName(node.init);
|
package/src/scanner/ast.js
CHANGED
|
@@ -110,6 +110,11 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
110
110
|
relFile: path.relative(basePath, filePath),
|
|
111
111
|
dynamicRequireVars: new Set(),
|
|
112
112
|
staticAssignments: new Set(),
|
|
113
|
+
// v2.10.73 P2: AST-006 source qualification — tracks WHERE a variable's value came from.
|
|
114
|
+
// Used by dynamic_require to distinguish plugin loaders (LOW: string_literal/array_literal/
|
|
115
|
+
// object_literal/fs_readdir/require_json) from real obfuscation (HIGH: function_call/
|
|
116
|
+
// computed_expression) or credential theft vectors (CRITICAL: env_var).
|
|
117
|
+
varSource: new Map(),
|
|
113
118
|
dangerousCmdVars: new Map(),
|
|
114
119
|
workflowPathVars: new Set(),
|
|
115
120
|
execPathVars: new Map(),
|
|
@@ -1,19 +1,61 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
|
-
const { findFiles, forEachSafeFile } = require('../utils.js');
|
|
3
|
+
const { findFiles, forEachSafeFile, debugLog } = require('../utils.js');
|
|
4
4
|
|
|
5
5
|
// node_modules NOT excluded: detect obfuscated code in dependencies.
|
|
6
6
|
// dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
|
|
7
7
|
// and costs significant processing time on large SDKs.
|
|
8
8
|
const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
|
|
9
9
|
|
|
10
|
+
// v2.10.73 P4: WASM/Emscripten artifact detection
|
|
11
|
+
// These files are high-entropy by construction (compiled WebAssembly, asm.js bytecode
|
|
12
|
+
// tables, Emscripten output). They produced 52+ ENTROPY/obfuscation FP fires in the
|
|
13
|
+
// v2.10.72 audit (e.g. node_modules/mpg123-decoder/src/EmscriptenWasm.js inside
|
|
14
|
+
// @leoqlin/openclaw-qqbot's bundled deps). Skipped from obfuscation detection only —
|
|
15
|
+
// other scanners (AST, dataflow, hash, IOC) still analyze them, so actual malware
|
|
16
|
+
// hidden in a WASM file can still be caught through those channels.
|
|
17
|
+
const WASM_BASENAME_RE = /(?:wasm|emscripten|dcmtk|ffmpeg-wasm|opus-decoder|mpg123-decoder|wasm-audio-decoders)/i;
|
|
18
|
+
const WASM_CONTENT_MARKERS = [
|
|
19
|
+
'Module["asm"]',
|
|
20
|
+
'Module.asm',
|
|
21
|
+
'WebAssembly.instantiate',
|
|
22
|
+
'WebAssembly.compile',
|
|
23
|
+
'_emscripten_',
|
|
24
|
+
'asmLibraryArg',
|
|
25
|
+
'wasmMemory',
|
|
26
|
+
'wasmTable',
|
|
27
|
+
'HEAPU8',
|
|
28
|
+
'HEAP32',
|
|
29
|
+
'AGFzbQ' // base64 of WASM magic bytes \x00asm — TRES specific marker
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
function isWasmEmscriptenArtifact(filePath, content) {
|
|
33
|
+
const basename = path.basename(filePath);
|
|
34
|
+
if (WASM_BASENAME_RE.test(basename)) return true;
|
|
35
|
+
// Sample first 64KB to avoid scanning huge files fully (WASM blobs are often >1MB)
|
|
36
|
+
const sample = content.length > 65536 ? content.slice(0, 65536) : content;
|
|
37
|
+
for (const marker of WASM_CONTENT_MARKERS) {
|
|
38
|
+
if (sample.indexOf(marker) !== -1) return true;
|
|
39
|
+
}
|
|
40
|
+
return false;
|
|
41
|
+
}
|
|
42
|
+
|
|
10
43
|
function detectObfuscation(targetPath) {
|
|
11
44
|
const threats = [];
|
|
45
|
+
let wasmSkipped = 0;
|
|
12
46
|
const files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], excludedDirs: OBF_EXCLUDED_DIRS });
|
|
13
47
|
|
|
14
48
|
forEachSafeFile(files, (file, content) => {
|
|
15
49
|
const relativePath = path.relative(targetPath, file);
|
|
16
50
|
|
|
51
|
+
// v2.10.73 P4: Skip WASM/Emscripten artifacts — high-entropy by construction,
|
|
52
|
+
// produced 52+ FP fires in v2.10.72 audit (mpg123-decoder in @leoqlin/openclaw-qqbot).
|
|
53
|
+
// Other scanners still analyze these files — this only filters obfuscation heuristics.
|
|
54
|
+
if (isWasmEmscriptenArtifact(file, content)) {
|
|
55
|
+
wasmSkipped++;
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
17
59
|
const signals = [];
|
|
18
60
|
let score = 0;
|
|
19
61
|
const basename = path.basename(file);
|
|
@@ -103,6 +145,10 @@ function detectObfuscation(targetPath) {
|
|
|
103
145
|
}
|
|
104
146
|
});
|
|
105
147
|
|
|
148
|
+
if (wasmSkipped > 0) {
|
|
149
|
+
debugLog(`[obfuscation] skipped ${wasmSkipped} WASM/Emscripten artifact(s) — high-entropy by construction`);
|
|
150
|
+
}
|
|
151
|
+
|
|
106
152
|
return threats;
|
|
107
153
|
}
|
|
108
154
|
|
package/src/scoring.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
const { getRule } = require('./rules/index.js');
|
|
2
2
|
const { HIGH_CONFIDENCE_MALICE_TYPES } = require('./monitor/classify.js');
|
|
3
|
+
// v2.10.73 P1: bundle detection helpers — extended bundle path regex + veto check
|
|
4
|
+
const { BUNDLE_PATH_RE, hasBundleVetoSignal } = require('./shared/bundle-detect.js');
|
|
3
5
|
|
|
4
6
|
// ============================================
|
|
5
7
|
// SCORING CONSTANTS
|
|
@@ -258,8 +260,13 @@ const DIST_EXEMPT_TYPES = new Set([
|
|
|
258
260
|
// fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
|
|
259
261
|
]);
|
|
260
262
|
|
|
261
|
-
// Regex matching dist/build/out/output/minified/bundled file paths
|
|
263
|
+
// Regex matching dist/build/out/output/minified/bundled file paths.
|
|
262
264
|
// P7: added out/ and output/ — common build output directories (esbuild, custom build scripts)
|
|
265
|
+
// v2.10.73 P1: DIST_FILE_RE is kept as the narrow legacy regex for backwards compat
|
|
266
|
+
// with existing call sites (other rules reference it). The EXTENDED bundle match is
|
|
267
|
+
// done via BUNDLE_PATH_RE from src/shared/bundle-detect.js — used in the new gate below.
|
|
268
|
+
// BUNDLE_PATH_RE covers: .umd.js, .esm.js, .es.js, .common.js, .max.js, hash chunks,
|
|
269
|
+
// fesm*/, browser/, assets/, chunks/, _app/, lib/bundled/.
|
|
263
270
|
const DIST_FILE_RE = /(?:^|[/\\])(?:dist|build|out|output)[/\\]|\.min\.js$|\.bundle\.js$/i;
|
|
264
271
|
|
|
265
272
|
// Bundler artifact types: get two-notch downgrade in dist/ files (CRITICAL→MEDIUM, HIGH→LOW).
|
|
@@ -287,6 +294,15 @@ const DIST_BUNDLER_ARTIFACT_TYPES = new Set([
|
|
|
287
294
|
// Audit v3 B3: staged_payload (fetch+eval) in dist/ is code splitting / lazy loading,
|
|
288
295
|
// not malicious payload staging. fetch_decrypt_exec remains exempt (triple signal).
|
|
289
296
|
'staged_payload'
|
|
297
|
+
// v2.10.73 P1: credential_regex_harvest, suspicious_dataflow, string_mutation_obfuscation
|
|
298
|
+
// are NOT added here (kept in the one-notch path) — existing scoring-hardening tests
|
|
299
|
+
// (FP-P7 etc.) require these to receive a single-notch downgrade to stay visible as
|
|
300
|
+
// MEDIUM in bundles. The real benefit for these types comes from the extended
|
|
301
|
+
// BUNDLE_PATH_RE (src/shared/bundle-detect.js) which now matches .umd/.esm/.es/.common/
|
|
302
|
+
// .max suffixes, fesm*/, browser/, assets/, chunks/, hash-suffixed chunks — paths
|
|
303
|
+
// where the old narrow DIST_FILE_RE missed the bundle files entirely. One-notch
|
|
304
|
+
// downgrade on a broader set of bundle paths is enough to bring FP clusters under
|
|
305
|
+
// the webhook threshold without compromising true positive detection.
|
|
290
306
|
]);
|
|
291
307
|
|
|
292
308
|
// Types exempt from reachability downgrade — IOC matches, lifecycle, and package-level types.
|
|
@@ -644,8 +660,29 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
|
|
|
644
660
|
// Bundler artifact types (eval, dynamic_require, obfuscation) get two-notch downgrade
|
|
645
661
|
// (CRITICAL→MEDIUM, HIGH→LOW) since bundlers routinely produce these patterns.
|
|
646
662
|
// Other non-exempt types keep one-notch downgrade.
|
|
647
|
-
|
|
648
|
-
|
|
663
|
+
//
|
|
664
|
+
// v2.10.73 P1: two changes to this gate :
|
|
665
|
+
// (a) Match either the narrow legacy DIST_FILE_RE OR the extended BUNDLE_PATH_RE
|
|
666
|
+
// from src/shared/bundle-detect.js (which adds .umd.js/.esm.js/.common.js/
|
|
667
|
+
// hash-chunks/fesm*/browser/assets/chunks/_app). Rationale : the narrow regex
|
|
668
|
+
// missed babylonjs/electron/@testim/@vanwei-wcs/etc. bundle files.
|
|
669
|
+
// (b) Before applying the downgrade, call hasBundleVetoSignal() — if the same
|
|
670
|
+
// file has a threat of type {staged_binary_payload, fetch_decrypt_exec,
|
|
671
|
+
// reverse_shell, node_modules_write, ...} OR an env_access on a sensitive env
|
|
672
|
+
// var (NPM_TOKEN, AWS_*, SSH_*, ...), BLOCK the downgrade. This preserves
|
|
673
|
+
// detection of event-stream / flatmap-stream style injections where malware
|
|
674
|
+
// is packed inside a legitimate-looking bundle.
|
|
675
|
+
const isBundleFile = t.file && (DIST_FILE_RE.test(t.file) || BUNDLE_PATH_RE.test(t.file));
|
|
676
|
+
if (isBundleFile && !DIST_EXEMPT_TYPES.has(t.type)) {
|
|
677
|
+
// Veto check: don't downgrade if the bundle is suspected of injection
|
|
678
|
+
if (hasBundleVetoSignal(threats, t.file)) {
|
|
679
|
+
// Leave the threat at its original severity — the bundle contains a
|
|
680
|
+
// suspicious co-occurring signal (staged payload, credential env read,
|
|
681
|
+
// reverse shell, etc.) so all threats on this file stay un-downgraded.
|
|
682
|
+
// Record it in reductions for audit trail.
|
|
683
|
+
if (!t.reductions) t.reductions = [];
|
|
684
|
+
t.reductions.push({ rule: 'bundle_veto_preserved', from: t.severity, to: t.severity });
|
|
685
|
+
} else if (DIST_BUNDLER_ARTIFACT_TYPES.has(t.type)) {
|
|
649
686
|
// Two-notch downgrade for bundler artifacts
|
|
650
687
|
const fromSev = t.severity;
|
|
651
688
|
if (t.severity === 'CRITICAL') t.severity = 'MEDIUM';
|
|
@@ -789,8 +826,15 @@ function calculateRiskScore(deduped, intentResult) {
|
|
|
789
826
|
// 1. Separate deduped threats into package-level and file-level
|
|
790
827
|
const packageLevelThreats = [];
|
|
791
828
|
const fileLevelThreats = [];
|
|
829
|
+
// v2.10.73 P3: Degraded quick-scan threats get a separate bucket so they
|
|
830
|
+
// contribute a bounded amount to the package score but never inflate max_file_score.
|
|
831
|
+
// Exception: CRITICAL degraded threats (Module._load pattern) pass through normal
|
|
832
|
+
// file-level processing — they are rare and nearly always malicious.
|
|
833
|
+
const degradedNonCriticalThreats = [];
|
|
792
834
|
for (const t of deduped) {
|
|
793
|
-
if (
|
|
835
|
+
if (t.degraded === true && t.severity !== 'CRITICAL') {
|
|
836
|
+
degradedNonCriticalThreats.push(t);
|
|
837
|
+
} else if (isPackageLevelThreat(t)) {
|
|
794
838
|
packageLevelThreats.push(t);
|
|
795
839
|
} else {
|
|
796
840
|
fileLevelThreats.push(t);
|
|
@@ -873,8 +917,21 @@ function calculateRiskScore(deduped, intentResult) {
|
|
|
873
917
|
intentBonus = Math.min(intentResult.intentScore, 30);
|
|
874
918
|
}
|
|
875
919
|
|
|
876
|
-
//
|
|
877
|
-
|
|
920
|
+
// 6b. v2.10.73 P3: Degraded (quick-scan) non-CRITICAL threats contribute a
|
|
921
|
+
// bounded bonus to the final score — they are visible in the report but never
|
|
922
|
+
// inflate max_file_score. Cap at 15 (= 5 MEDIUM threats OR 1 HIGH + small).
|
|
923
|
+
// Rationale: quick-scan is regex-only, cannot distinguish top-level from
|
|
924
|
+
// exported function scope, so detections are low-confidence by construction.
|
|
925
|
+
let degradedScore = 0;
|
|
926
|
+
if (degradedNonCriticalThreats.length > 0) {
|
|
927
|
+
for (const t of degradedNonCriticalThreats) {
|
|
928
|
+
degradedScore += _severityWeights[t.severity] || 0;
|
|
929
|
+
}
|
|
930
|
+
degradedScore = Math.min(15, degradedScore);
|
|
931
|
+
}
|
|
932
|
+
|
|
933
|
+
// 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost + degraded bucket, capped at 100
|
|
934
|
+
let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost + degradedScore);
|
|
878
935
|
|
|
879
936
|
// 7b. MT-1: Score ceiling for packages without lifecycle scripts.
|
|
880
937
|
// 56% of real malware uses install scripts. Packages without lifecycle that score high
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Bundle file detection helpers — v2.10.73 P1 (FP cluster fix).
|
|
5
|
+
*
|
|
6
|
+
* Audit forensique v2.10.72 (2026-04-11, 78 packages deep-reviewed) a révélé
|
|
7
|
+
* que les 14 packages babylonjs/electron/@kitware/vtk.js/@stencil/core/playwright/
|
|
8
|
+
* @testim/testim-cli/@vanwei-wcs/video-player-v2/@bookolosystem/engine/@epie/bi-crud/etc.
|
|
9
|
+
* scoraient ≥50 parce que les rules AST/dataflow/obfuscation tiraient sur des
|
|
10
|
+
* helpers bundler standards (__webpack_require__, Function("return this")(),
|
|
11
|
+
* var __copyProps, .replace chains, prototype pollution for framework reactivity).
|
|
12
|
+
*
|
|
13
|
+
* Fix :
|
|
14
|
+
* 1. Regex étendue `BUNDLE_PATH_RE` couvrant les patterns manquants :
|
|
15
|
+
* .umd.js, .esm.js, .es.js, .common.js, .max.js, hash-suffixed chunks,
|
|
16
|
+
* fesm*, browser/, assets/, chunks/.
|
|
17
|
+
* 2. Liste de veto `VETO_TYPES` — types qui indiquent une injection malveillante
|
|
18
|
+
* dans un bundle (staged_binary_payload, fetch_decrypt_exec, etc.). Si un
|
|
19
|
+
* threat veto est présent dans le même fichier, le bundle downgrade est
|
|
20
|
+
* annulé — bundle suspecté d'injection (event-stream style).
|
|
21
|
+
* 3. Liste `SENSITIVE_ENV_RE` — noms d'env vars sensibles. Un env_access sur
|
|
22
|
+
* un de ces noms dans un bundle annule aussi le downgrade (credential theft).
|
|
23
|
+
*
|
|
24
|
+
* Architecture : pas de lecture de contenu fichier ni de cache — la détection
|
|
25
|
+
* se fait purement sur le path et sur les types de threats co-occurring dans le
|
|
26
|
+
* même fichier. Pour la v2.10.74, un `isStructuralBundle()` avec lecture de
|
|
27
|
+
* signatures (`__webpack_require__`, `sourceMappingURL=`) pourrait être ajouté
|
|
28
|
+
* si les tests FPR montrent qu'il reste des FPs sur des bundles non-nommés.
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
// Extended bundle path/basename regex (replaces the narrow DIST_FILE_RE).
|
|
32
|
+
// Covers the audit findings: babylonjs, electron, @kitware/vtk.js, dprint,
|
|
33
|
+
// @jetbrains/junie, @zuplo/core, @stencil/core, playwright, @equinor/*,
|
|
34
|
+
// @alipay/*, @testim/testim-cli, @vanwei-wcs/video-player-v2, @bookolosystem/engine,
|
|
35
|
+
// @epie/bi-crud, @fairyhunter13/opentui-core, rsshub.
|
|
36
|
+
//
|
|
37
|
+
// Pattern groups:
|
|
38
|
+
// - Directory prefixes (dist/, build/, out/, output/, lib/bundled/, browser/,
|
|
39
|
+
// fesm*/, esm/, esm5/, esm2015/, esm2020/, bundles/, assets/, chunks/, _app/)
|
|
40
|
+
// - Basename suffixes (.min.js, .bundle.js, .umd.js, .esm.js, .es.js,
|
|
41
|
+
// .common.js, .max.js, .prod.js, .production.js, + .cjs / .mjs variants)
|
|
42
|
+
// - Hash-suffixed chunks (esbuild/vite/rollup/webpack convention):
|
|
43
|
+
// `basename-[a-f0-9]{6,16}.js|mjs|cjs`
|
|
44
|
+
const BUNDLE_PATH_RE = new RegExp(
|
|
45
|
+
// Path prefix group
|
|
46
|
+
'(?:^|[/\\\\])' +
|
|
47
|
+
'(?:dist|build|out|output|browser|bundles|assets|chunks|_app|' +
|
|
48
|
+
'lib[/\\\\]bundled|fesm\\d*|esm|esm5|esm2015|esm2020)' +
|
|
49
|
+
'[/\\\\]' +
|
|
50
|
+
// OR basename suffix group
|
|
51
|
+
'|\\.(?:min|bundle|umd|esm|es|common|max|prod|production)\\.(?:m?js|cjs)$' +
|
|
52
|
+
// OR hash-suffixed chunk
|
|
53
|
+
'|(?:^|[/\\\\])[\\w-]+[-.][a-f0-9]{6,16}\\.(?:m?js|cjs)$',
|
|
54
|
+
'i'
|
|
55
|
+
);
|
|
56
|
+
|
|
57
|
+
// Threat types that, when present on the same file as a bundle downgrade
|
|
58
|
+
// candidate, VETO the downgrade entirely — the bundle is suspected of
|
|
59
|
+
// malicious injection or active C2/persistence.
|
|
60
|
+
//
|
|
61
|
+
// IMPORTANT: types that feed existing compound rules are INTENTIONALLY NOT listed
|
|
62
|
+
// here. The scoring pipeline already has a mechanism to recover downgraded signals
|
|
63
|
+
// via `applyCompoundBoosts` + `originalSeverity` gates (see src/scoring.js:462 and
|
|
64
|
+
// compound gate at line 410). Types like `staged_binary_payload`, `crypto_decipher`,
|
|
65
|
+
// `fetch_decrypt_exec`, `zlib_inflate_eval` ARE downgraded in bundles but their
|
|
66
|
+
// `originalSeverity` is preserved so compound rules (crypto_staged_payload, etc.)
|
|
67
|
+
// can still fire. Adding them to VETO_TYPES would break the existing v2.9.6 test
|
|
68
|
+
// suite (compound-scoring.test.js:305 and similar) without adding value.
|
|
69
|
+
//
|
|
70
|
+
// This VETO list is limited to patterns that :
|
|
71
|
+
// 1. Have no compound fallback (rare patterns not yet wired into a compound)
|
|
72
|
+
// 2. Indicate active C2, persistence, or worm propagation (structurally unique to
|
|
73
|
+
// malware — a legit bundler never produces `reverse_shell` or `node_modules_write`)
|
|
74
|
+
// 3. Are IOC hits (highest confidence, never downgraded regardless of context)
|
|
75
|
+
const VETO_TYPES = new Set([
|
|
76
|
+
// Active C2 / backdoor — structurally unique to malware, no legit bundler path
|
|
77
|
+
'reverse_shell',
|
|
78
|
+
'node_modules_write', // worm propagation (Shai-Hulud style)
|
|
79
|
+
'npm_publish_worm',
|
|
80
|
+
'npm_token_steal',
|
|
81
|
+
'systemd_persistence',
|
|
82
|
+
// Unicode steganography (GlassWorm) — bundlers never produce invisible unicode
|
|
83
|
+
'unicode_invisible_injection',
|
|
84
|
+
// IOC hits (never downgraded regardless of context)
|
|
85
|
+
'ioc_match',
|
|
86
|
+
'known_malicious_package',
|
|
87
|
+
'shai_hulud_marker'
|
|
88
|
+
]);
|
|
89
|
+
|
|
90
|
+
// Sensitive environment variable patterns. An `env_access` threat whose
|
|
91
|
+
// `message` contains any of these, present on the same file as a bundle
|
|
92
|
+
// downgrade candidate, VETOs the downgrade — the bundle reads credentials.
|
|
93
|
+
// NODE_ENV, NODE_OPTIONS, PATH, HOME, SHELL, CI, DEBUG etc. are NOT included
|
|
94
|
+
// (they are read by bundler output for legit reasons like runtime detection).
|
|
95
|
+
const SENSITIVE_ENV_RE = new RegExp(
|
|
96
|
+
'\\b(' +
|
|
97
|
+
'NPM_TOKEN|NPM_CONFIG_AUTHTOKEN|NPMRC|' +
|
|
98
|
+
'AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|' +
|
|
99
|
+
'SSH_PRIVATE_KEY|SSH_KEY|SSH_AUTH_SOCK|' +
|
|
100
|
+
'GITHUB_TOKEN|GH_TOKEN|GITLAB_TOKEN|' +
|
|
101
|
+
'GCP_[A-Z_]+|GOOGLE_APPLICATION_CREDENTIALS|' +
|
|
102
|
+
'AZURE_[A-Z_]+|AZURE_CLIENT_SECRET|' +
|
|
103
|
+
'STRIPE_SECRET_KEY|STRIPE_LIVE|' +
|
|
104
|
+
// Catch-all suffix patterns
|
|
105
|
+
'[A-Z][A-Z0-9_]*_SECRET|[A-Z][A-Z0-9_]*_PRIVATE_KEY|' +
|
|
106
|
+
'[A-Z][A-Z0-9_]*_API_KEY|[A-Z][A-Z0-9_]*_AUTH_TOKEN' +
|
|
107
|
+
')\\b'
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Check if a file path matches bundle heuristics.
|
|
112
|
+
* @param {string} filePath - relative or absolute file path
|
|
113
|
+
* @returns {boolean}
|
|
114
|
+
*/
|
|
115
|
+
function isBundlePath(filePath) {
|
|
116
|
+
if (!filePath || typeof filePath !== 'string') return false;
|
|
117
|
+
return BUNDLE_PATH_RE.test(filePath);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Check if any threat in `threats` on the same file as `targetFile` is a
|
|
122
|
+
* veto signal (VETO_TYPES OR env_access on sensitive env var). If so, the
|
|
123
|
+
* bundle-downgrade gate should NOT downgrade — the bundle is suspected of
|
|
124
|
+
* malicious injection (event-stream / flatmap-stream style) or credential theft.
|
|
125
|
+
*
|
|
126
|
+
* @param {Array} threats - full threats array (all scanners combined)
|
|
127
|
+
* @param {string} targetFile - the file path being evaluated for downgrade
|
|
128
|
+
* @returns {boolean} - true if a veto signal is found
|
|
129
|
+
*/
|
|
130
|
+
function hasBundleVetoSignal(threats, targetFile) {
|
|
131
|
+
if (!Array.isArray(threats) || !targetFile) return false;
|
|
132
|
+
for (const t of threats) {
|
|
133
|
+
if (t.file !== targetFile) continue;
|
|
134
|
+
if (VETO_TYPES.has(t.type)) return true;
|
|
135
|
+
if (t.type === 'env_access' && t.message && SENSITIVE_ENV_RE.test(t.message)) {
|
|
136
|
+
return true;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
module.exports = {
|
|
143
|
+
BUNDLE_PATH_RE,
|
|
144
|
+
VETO_TYPES,
|
|
145
|
+
SENSITIVE_ENV_RE,
|
|
146
|
+
isBundlePath,
|
|
147
|
+
hasBundleVetoSignal
|
|
148
|
+
};
|