muaddib-scanner 2.10.72 → 2.10.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -292,7 +292,7 @@ repos:
292
292
  | **FPR** (Benign random) | **7.5%** (15/200) | 200 random npm packages, stratified sampling |
293
293
  | **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
294
294
 
295
- **3068 tests** across 66 files. **200 rules** (195 RULES + 5 PARANOID).
295
+ **3134 tests** across 66 files. **200 rules** (195 RULES + 5 PARANOID).
296
296
 
297
297
  > **ML retrain methodology (v2.10.51):**
298
298
  > - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
@@ -340,10 +340,10 @@ npm test
340
340
 
341
341
  ### Testing
342
342
 
343
- - **3068 tests** across 66 modular test files
343
+ - **3134 tests** across 66 modular test files
344
344
  - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
345
345
  - **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
346
- - **Ground truth validation** - 66 real-world attacks (93.75% TPR@3, 85.9% TPR@20)
346
+ - **Ground truth validation** - 67 real-world attacks (93.75% TPR@3, 85.9% TPR@20)
347
347
  - **False positive validation** - 14.0% FPR rules, 8.3% after ML on 532 curated npm packages, 7.5% on 200 random
348
348
 
349
349
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.72",
3
+ "version": "2.10.77",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -186,11 +186,21 @@ function isSuspectClassification(result) {
186
186
  // sandbox queue, starving legitimate T1b/T2 candidates of the dedicated
187
187
  // deferred slot.
188
188
  //
189
- // A sandbox slot is only justified when there is real signal. Require at
190
- // least one non-LOW finding to reach tier 2 via this fallback — otherwise
191
- // downgrade to tier 3 (log only, no sandbox consumption).
189
+ // Threat model for this downgrade: an adversary reading the open-source
190
+ // rules can intentionally tune their malware to fire only LOW-severity
191
+ // patterns + 2 distinct non-T3 types to land in this fallback. If we
192
+ // downgrade ALL such cases to tier 3, a weak TIER1_TYPES match (e.g.,
193
+ // staged_payload at LOW, sandbox_evasion at LOW) would bypass sandbox
194
+ // verification entirely — TIER1_TYPES are "quasi-never legitimate" and
195
+ // weak matches still warrant dynamic inspection.
196
+ //
197
+ // Therefore: preserve tier 2 when EITHER (a) any finding is non-LOW
198
+ // severity OR (b) any finding is in TIER1_TYPES even at LOW severity.
199
+ // Downgrade to tier 3 only for packages with 2+ distinct LOW findings
200
+ // where NONE are in the quasi-never-legit TIER1 zone.
192
201
  const hasNonLowFinding = result.threats.some(t => t.severity !== 'LOW');
193
- if (hasNonLowFinding) {
202
+ const hasTier1Signal = result.threats.some(t => TIER1_TYPES.has(t.type));
203
+ if (hasNonLowFinding || hasTier1Signal) {
194
204
  return { suspect: true, tier: 2 };
195
205
  }
196
206
  return { suspect: true, tier: 3 };
@@ -15,7 +15,7 @@
15
15
  const fs = require('fs');
16
16
  const path = require('path');
17
17
  const { runSandbox } = require('../sandbox/index.js');
18
- const { isCanaryEnabled } = require('./classify.js');
18
+ const { isCanaryEnabled, TIER1_TYPES } = require('./classify.js');
19
19
  const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
20
20
  const { sendWebhook } = require('../webhook.js');
21
21
  const { atomicWriteFileSync } = require('./state.js');
@@ -59,11 +59,21 @@ function enqueueDeferred(item) {
59
59
 
60
60
  // Defense-in-depth: block low-score items regardless of tier. With the
61
61
  // classify.js:183 fallback fix in place, no legitimate enqueue should
62
- // reach this function with score < DEFERRED_MIN_SCORE. Logging with
63
- // console.error makes a future regression (new classification path that
64
- // leaks low-score items) loud in operator logs.
65
- if ((item.riskScore || 0) < DEFERRED_MIN_SCORE) {
66
- console.error(`[DEFERRED] REJECTED: ${item.name}@${item.version} — score=${item.riskScore || 0} below minimum ${DEFERRED_MIN_SCORE} (possible classification regression)`);
62
+ // reach this function with score < DEFERRED_MIN_SCORE unless it carries
63
+ // a TIER1_TYPES signal. Logging with console.error makes a future
64
+ // regression (new classification path that leaks low-score items) loud
65
+ // in operator logs.
66
+ //
67
+ // Threat-model exception: packages containing any TIER1_TYPES finding
68
+ // (even at LOW severity) must bypass this min-score guard. TIER1_TYPES
69
+ // are "quasi-never legitimate in benign packages" and weak matches
70
+ // still warrant sandbox verification — an adversary could otherwise
71
+ // tune their malware to fire only LOW-severity TIER1 patterns to
72
+ // bypass sandbox entirely.
73
+ const itemThreats = (item.staticResult && item.staticResult.threats) || [];
74
+ const hasTier1Signal = itemThreats.some(t => TIER1_TYPES.has(t.type));
75
+ if ((item.riskScore || 0) < DEFERRED_MIN_SCORE && !hasTier1Signal) {
76
+ console.error(`[DEFERRED] REJECTED: ${item.name}@${item.version} — score=${item.riskScore || 0} below minimum ${DEFERRED_MIN_SCORE}, no TIER1 signal (possible classification regression)`);
67
77
  return false;
68
78
  }
69
79
 
@@ -232,11 +232,18 @@ async function execute(targetPath, options, pythonDeps, warnings) {
232
232
  if (wasFilesCapped()) {
233
233
  warnings.push('File count cap reached (500 files) — overflow files scanned in quick-scan mode (lifecycle + child_process only).');
234
234
  const overflowFiles = getOverflowFiles();
235
+ // v2.10.73 P3: Quick-scan is a DEGRADED regex-based pass — no AST, no scope
236
+ // tracking. It cannot distinguish exec() at module top-level (CRITICAL) from
237
+ // exec() inside an exported route handler (LOW runtime). Audit forensique v2.10.72:
238
+ // 18+ fires AST-007 sur rsshub/dist-lib/*.mjs where spawn() lives inside exported
239
+ // route handlers. Default severity is now MEDIUM (downgraded from HIGH). Module._load
240
+ // remains CRITICAL — very rare outside of malware. Threats are flagged `degraded:true`
241
+ // so scoring.js excludes them from max_file_score (see applyFPReductions).
235
242
  const QUICK_SCAN_PATTERNS = [
236
- { re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("child_process")' },
237
- { re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("node:child_process")' },
238
- { re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'HIGH', label: 'exec/spawn call' },
239
- { re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'HIGH', label: 'process.mainModule' },
243
+ { re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("child_process")' },
244
+ { re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("node:child_process")' },
245
+ { re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'exec/spawn call' },
246
+ { re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'MEDIUM', label: 'process.mainModule' },
240
247
  { re: /\bModule\._load\b/, type: 'module_load_bypass', severity: 'CRITICAL', label: 'Module._load' }
241
248
  ];
242
249
  for (const filePath of overflowFiles) {
@@ -251,7 +258,9 @@ async function execute(targetPath, options, pythonDeps, warnings) {
251
258
  type: pat.type,
252
259
  severity: pat.severity,
253
260
  message: `[quick-scan] ${pat.label} detected in overflow file.`,
254
- file: relFile
261
+ file: relFile,
262
+ degraded: true, // P3: regex-only detection, no semantic context
263
+ quickScan: true
255
264
  });
256
265
  }
257
266
  }
@@ -89,6 +89,15 @@ function handleCallExpression(node, ctx) {
89
89
  // Check if variable was reassignment-tracked to a dangerous module
90
90
  const DANGEROUS_MODS_REQ = ['child_process', 'fs', 'net', 'dns', 'http', 'https', 'tls'];
91
91
  const resolvedVal = ctx.stringVarValues?.get(arg.name);
92
+ // v2.10.73 P2: source-aware severity (AST-006 plugin loader FP fix)
93
+ // Distinguishes plugin loaders (LOW) from obfuscation (HIGH) from env exfil (CRITICAL).
94
+ // See src/scanner/ast-detectors/handle-variable-declarator.js ctx.varSource tracking.
95
+ const varSource = ctx.varSource?.get(arg.name) || null;
96
+ const isStaticSource =
97
+ varSource === 'string_literal' || varSource === 'array_literal' ||
98
+ varSource === 'object_literal' || varSource === 'fs_readdir' ||
99
+ varSource === 'require_json';
100
+ const isCriticalSource = varSource === 'env_var';
92
101
  if (resolvedVal) {
93
102
  const norm = resolvedVal.startsWith('node:') ? resolvedVal.slice(5) : resolvedVal;
94
103
  if (DANGEROUS_MODS_REQ.includes(norm)) {
@@ -98,28 +107,46 @@ function handleCallExpression(node, ctx) {
98
107
  file: ctx.relFile
99
108
  });
100
109
  } else {
101
- // If the variable was assigned from a static value (string literal,
102
- // array of strings, object with string values), it's a plugin loader pattern
103
- const severity = ctx.staticAssignments.has(arg.name) ? 'LOW' : 'HIGH';
110
+ // Plugin loader qualification:
111
+ // - string_literal/array_literal/object_literal/fs_readdir/require_json LOW (legit plugin loader)
112
+ // - env_var CRITICAL (require(process.env.X) = credential/path exfil vector)
113
+ // - fallback to staticAssignments for legacy coverage
114
+ // - else → HIGH (real obfuscation candidate)
115
+ let severity, message;
116
+ if (isCriticalSource) {
117
+ severity = 'CRITICAL';
118
+ message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
119
+ } else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
120
+ severity = 'LOW';
121
+ message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
122
+ } else {
123
+ severity = 'HIGH';
124
+ message = 'Dynamic require() with variable argument (module name obfuscation).';
125
+ }
104
126
  ctx.threats.push({
105
127
  type: 'dynamic_require',
106
128
  severity,
107
- message: severity === 'LOW'
108
- ? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
109
- : 'Dynamic require() with variable argument (module name obfuscation).',
129
+ message,
110
130
  file: ctx.relFile
111
131
  });
112
132
  }
113
133
  } else {
114
- // If the variable was assigned from a static value (string literal,
115
- // array of strings, object with string values), it's a plugin loader pattern
116
- const severity = ctx.staticAssignments.has(arg.name) ? 'LOW' : 'HIGH';
134
+ // Same qualification flow without resolvedVal context
135
+ let severity, message;
136
+ if (isCriticalSource) {
137
+ severity = 'CRITICAL';
138
+ message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
139
+ } else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
140
+ severity = 'LOW';
141
+ message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
142
+ } else {
143
+ severity = 'HIGH';
144
+ message = 'Dynamic require() with variable argument (module name obfuscation).';
145
+ }
117
146
  ctx.threats.push({
118
147
  type: 'dynamic_require',
119
148
  severity,
120
- message: severity === 'LOW'
121
- ? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
122
- : 'Dynamic require() with variable argument (module name obfuscation).',
149
+ message,
123
150
  file: ctx.relFile
124
151
  });
125
152
  }
@@ -24,6 +24,60 @@ function handleVariableDeclarator(node, ctx) {
24
24
  ctx.staticAssignments.add(node.id.name);
25
25
  }
26
26
 
27
+ // v2.10.73 P2: Track WHERE the variable's value originated — used by AST-006
28
+ // to distinguish plugin loaders (LOW) from real obfuscation (HIGH) from
29
+ // credential exfil vectors (CRITICAL). See src/scanner/ast-detectors/handle-call-expression.js
30
+ // around line 103 for consumption.
31
+ if (ctx.varSource && node.init) {
32
+ const init = node.init;
33
+ let source = null;
34
+ if (init.type === 'Literal' && typeof init.value === 'string') {
35
+ source = 'string_literal';
36
+ } else if (init.type === 'TemplateLiteral' && (init.expressions?.length || 0) === 0) {
37
+ source = 'string_literal'; // template with no interpolations is effectively a literal
38
+ } else if (init.type === 'ArrayExpression') {
39
+ source = 'array_literal';
40
+ } else if (init.type === 'ObjectExpression') {
41
+ source = 'object_literal';
42
+ } else if (init.type === 'MemberExpression' &&
43
+ init.object?.type === 'MemberExpression' &&
44
+ init.object.object?.type === 'Identifier' &&
45
+ init.object.object.name === 'process' &&
46
+ init.object.property?.type === 'Identifier' &&
47
+ init.object.property.name === 'env') {
48
+ source = 'env_var'; // const m = process.env.MODULE_NAME
49
+ } else if (init.type === 'CallExpression') {
50
+ const callee = init.callee;
51
+ // fs.readdirSync / fs.readdir / fs.promises.readdir — directory listings
52
+ // are not attacker-controllable unless the dir itself is, which is rare.
53
+ if (callee?.type === 'MemberExpression') {
54
+ const propName = callee.property?.type === 'Identifier' ? callee.property.name : null;
55
+ const objName = callee.object?.type === 'Identifier' ? callee.object.name : null;
56
+ const objPropName = callee.object?.type === 'MemberExpression' &&
57
+ callee.object.property?.type === 'Identifier'
58
+ ? callee.object.property.name : null;
59
+ if (objName === 'fs' && propName && /^readdir/.test(propName)) {
60
+ source = 'fs_readdir';
61
+ } else if (objPropName === 'promises' && propName === 'readdir') {
62
+ source = 'fs_readdir'; // fs.promises.readdir
63
+ }
64
+ }
65
+ // require('./config.json') or require('./cfg.json') — loading a local JSON
66
+ // config is a legit plugin loader pattern (consumer-owned JSON file).
67
+ if (!source &&
68
+ callee?.type === 'Identifier' && callee.name === 'require' &&
69
+ init.arguments?.[0]?.type === 'Literal' &&
70
+ typeof init.arguments[0].value === 'string' &&
71
+ /\.json$/.test(init.arguments[0].value)) {
72
+ source = 'require_json';
73
+ }
74
+ if (!source) source = 'function_call';
75
+ } else {
76
+ source = 'computed_expression';
77
+ }
78
+ ctx.varSource.set(node.id.name, source);
79
+ }
80
+
27
81
  // Track dynamic require vars + module aliases
28
82
  if (node.init?.type === 'CallExpression') {
29
83
  const initCallName = getCallName(node.init);
@@ -110,6 +110,11 @@ function analyzeFile(content, filePath, basePath) {
110
110
  relFile: path.relative(basePath, filePath),
111
111
  dynamicRequireVars: new Set(),
112
112
  staticAssignments: new Set(),
113
+ // v2.10.73 P2: AST-006 source qualification — tracks WHERE a variable's value came from.
114
+ // Used by dynamic_require to distinguish plugin loaders (LOW: string_literal/array_literal/
115
+ // object_literal/fs_readdir/require_json) from real obfuscation (HIGH: function_call/
116
+ // computed_expression) or credential theft vectors (CRITICAL: env_var).
117
+ varSource: new Map(),
113
118
  dangerousCmdVars: new Map(),
114
119
  workflowPathVars: new Set(),
115
120
  execPathVars: new Map(),
@@ -1,19 +1,61 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
- const { findFiles, forEachSafeFile } = require('../utils.js');
3
+ const { findFiles, forEachSafeFile, debugLog } = require('../utils.js');
4
4
 
5
5
  // node_modules NOT excluded: detect obfuscated code in dependencies.
6
6
  // dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
7
7
  // and costs significant processing time on large SDKs.
8
8
  const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
9
9
 
10
+ // v2.10.73 P4: WASM/Emscripten artifact detection
11
+ // These files are high-entropy by construction (compiled WebAssembly, asm.js bytecode
12
+ // tables, Emscripten output). They produced 52+ ENTROPY/obfuscation FP fires in the
13
+ // v2.10.72 audit (e.g. node_modules/mpg123-decoder/src/EmscriptenWasm.js inside
14
+ // @leoqlin/openclaw-qqbot's bundled deps). Skipped from obfuscation detection only —
15
+ // other scanners (AST, dataflow, hash, IOC) still analyze them, so actual malware
16
+ // hidden in a WASM file can still be caught through those channels.
17
+ const WASM_BASENAME_RE = /(?:wasm|emscripten|dcmtk|ffmpeg-wasm|opus-decoder|mpg123-decoder|wasm-audio-decoders)/i;
18
+ const WASM_CONTENT_MARKERS = [
19
+ 'Module["asm"]',
20
+ 'Module.asm',
21
+ 'WebAssembly.instantiate',
22
+ 'WebAssembly.compile',
23
+ '_emscripten_',
24
+ 'asmLibraryArg',
25
+ 'wasmMemory',
26
+ 'wasmTable',
27
+ 'HEAPU8',
28
+ 'HEAP32',
29
+ 'AGFzbQ' // base64 of WASM magic bytes \x00asm — TRES specific marker
30
+ ];
31
+
32
+ function isWasmEmscriptenArtifact(filePath, content) {
33
+ const basename = path.basename(filePath);
34
+ if (WASM_BASENAME_RE.test(basename)) return true;
35
+ // Sample first 64KB to avoid scanning huge files fully (WASM blobs are often >1MB)
36
+ const sample = content.length > 65536 ? content.slice(0, 65536) : content;
37
+ for (const marker of WASM_CONTENT_MARKERS) {
38
+ if (sample.indexOf(marker) !== -1) return true;
39
+ }
40
+ return false;
41
+ }
42
+
10
43
  function detectObfuscation(targetPath) {
11
44
  const threats = [];
45
+ let wasmSkipped = 0;
12
46
  const files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], excludedDirs: OBF_EXCLUDED_DIRS });
13
47
 
14
48
  forEachSafeFile(files, (file, content) => {
15
49
  const relativePath = path.relative(targetPath, file);
16
50
 
51
+ // v2.10.73 P4: Skip WASM/Emscripten artifacts — high-entropy by construction,
52
+ // produced 52+ FP fires in v2.10.72 audit (mpg123-decoder in @leoqlin/openclaw-qqbot).
53
+ // Other scanners still analyze these files — this only filters obfuscation heuristics.
54
+ if (isWasmEmscriptenArtifact(file, content)) {
55
+ wasmSkipped++;
56
+ return;
57
+ }
58
+
17
59
  const signals = [];
18
60
  let score = 0;
19
61
  const basename = path.basename(file);
@@ -103,6 +145,10 @@ function detectObfuscation(targetPath) {
103
145
  }
104
146
  });
105
147
 
148
+ if (wasmSkipped > 0) {
149
+ debugLog(`[obfuscation] skipped ${wasmSkipped} WASM/Emscripten artifact(s) — high-entropy by construction`);
150
+ }
151
+
106
152
  return threats;
107
153
  }
108
154
 
package/src/scoring.js CHANGED
@@ -1,5 +1,7 @@
1
1
  const { getRule } = require('./rules/index.js');
2
2
  const { HIGH_CONFIDENCE_MALICE_TYPES } = require('./monitor/classify.js');
3
+ // v2.10.73 P1: bundle detection helpers — extended bundle path regex + veto check
4
+ const { BUNDLE_PATH_RE, hasBundleVetoSignal } = require('./shared/bundle-detect.js');
3
5
 
4
6
  // ============================================
5
7
  // SCORING CONSTANTS
@@ -258,8 +260,13 @@ const DIST_EXEMPT_TYPES = new Set([
258
260
  // fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
259
261
  ]);
260
262
 
261
- // Regex matching dist/build/out/output/minified/bundled file paths
263
+ // Regex matching dist/build/out/output/minified/bundled file paths.
262
264
  // P7: added out/ and output/ — common build output directories (esbuild, custom build scripts)
265
+ // v2.10.73 P1: DIST_FILE_RE is kept as the narrow legacy regex for backwards compat
266
+ // with existing call sites (other rules reference it). The EXTENDED bundle match is
267
+ // done via BUNDLE_PATH_RE from src/shared/bundle-detect.js — used in the new gate below.
268
+ // BUNDLE_PATH_RE covers: .umd.js, .esm.js, .es.js, .common.js, .max.js, hash chunks,
269
+ // fesm*/, browser/, assets/, chunks/, _app/, lib/bundled/.
263
270
  const DIST_FILE_RE = /(?:^|[/\\])(?:dist|build|out|output)[/\\]|\.min\.js$|\.bundle\.js$/i;
264
271
 
265
272
  // Bundler artifact types: get two-notch downgrade in dist/ files (CRITICAL→MEDIUM, HIGH→LOW).
@@ -287,6 +294,15 @@ const DIST_BUNDLER_ARTIFACT_TYPES = new Set([
287
294
  // Audit v3 B3: staged_payload (fetch+eval) in dist/ is code splitting / lazy loading,
288
295
  // not malicious payload staging. fetch_decrypt_exec remains exempt (triple signal).
289
296
  'staged_payload'
297
+ // v2.10.73 P1: credential_regex_harvest, suspicious_dataflow, string_mutation_obfuscation
298
+ // are NOT added here (kept in the one-notch path) — existing scoring-hardening tests
299
+ // (FP-P7 etc.) require these to receive a single-notch downgrade to stay visible as
300
+ // MEDIUM in bundles. The real benefit for these types comes from the extended
301
+ // BUNDLE_PATH_RE (src/shared/bundle-detect.js) which now matches .umd/.esm/.es/.common/
302
+ // .max suffixes, fesm*/, browser/, assets/, chunks/, hash-suffixed chunks — paths
303
+ // where the old narrow DIST_FILE_RE missed the bundle files entirely. One-notch
304
+ // downgrade on a broader set of bundle paths is enough to bring FP clusters under
305
+ // the webhook threshold without compromising true positive detection.
290
306
  ]);
291
307
 
292
308
  // Types exempt from reachability downgrade — IOC matches, lifecycle, and package-level types.
@@ -644,8 +660,29 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
644
660
  // Bundler artifact types (eval, dynamic_require, obfuscation) get two-notch downgrade
645
661
  // (CRITICAL→MEDIUM, HIGH→LOW) since bundlers routinely produce these patterns.
646
662
  // Other non-exempt types keep one-notch downgrade.
647
- if (t.file && !DIST_EXEMPT_TYPES.has(t.type) && DIST_FILE_RE.test(t.file)) {
648
- if (DIST_BUNDLER_ARTIFACT_TYPES.has(t.type)) {
663
+ //
664
+ // v2.10.73 P1: two changes to this gate :
665
+ // (a) Match either the narrow legacy DIST_FILE_RE OR the extended BUNDLE_PATH_RE
666
+ // from src/shared/bundle-detect.js (which adds .umd.js/.esm.js/.common.js/
667
+ // hash-chunks/fesm*/browser/assets/chunks/_app). Rationale : the narrow regex
668
+ // missed babylonjs/electron/@testim/@vanwei-wcs/etc. bundle files.
669
+ // (b) Before applying the downgrade, call hasBundleVetoSignal() — if the same
670
+ // file has a threat of type {staged_binary_payload, fetch_decrypt_exec,
671
+ // reverse_shell, node_modules_write, ...} OR an env_access on a sensitive env
672
+ // var (NPM_TOKEN, AWS_*, SSH_*, ...), BLOCK the downgrade. This preserves
673
+ // detection of event-stream / flatmap-stream style injections where malware
674
+ // is packed inside a legitimate-looking bundle.
675
+ const isBundleFile = t.file && (DIST_FILE_RE.test(t.file) || BUNDLE_PATH_RE.test(t.file));
676
+ if (isBundleFile && !DIST_EXEMPT_TYPES.has(t.type)) {
677
+ // Veto check: don't downgrade if the bundle is suspected of injection
678
+ if (hasBundleVetoSignal(threats, t.file)) {
679
+ // Leave the threat at its original severity — the bundle contains a
680
+ // suspicious co-occurring signal (staged payload, credential env read,
681
+ // reverse shell, etc.) so all threats on this file stay un-downgraded.
682
+ // Record it in reductions for audit trail.
683
+ if (!t.reductions) t.reductions = [];
684
+ t.reductions.push({ rule: 'bundle_veto_preserved', from: t.severity, to: t.severity });
685
+ } else if (DIST_BUNDLER_ARTIFACT_TYPES.has(t.type)) {
649
686
  // Two-notch downgrade for bundler artifacts
650
687
  const fromSev = t.severity;
651
688
  if (t.severity === 'CRITICAL') t.severity = 'MEDIUM';
@@ -789,8 +826,15 @@ function calculateRiskScore(deduped, intentResult) {
789
826
  // 1. Separate deduped threats into package-level and file-level
790
827
  const packageLevelThreats = [];
791
828
  const fileLevelThreats = [];
829
+ // v2.10.73 P3: Degraded quick-scan threats get a separate bucket so they
830
+ // contribute a bounded amount to the package score but never inflate max_file_score.
831
+ // Exception: CRITICAL degraded threats (Module._load pattern) pass through normal
832
+ // file-level processing — they are rare and nearly always malicious.
833
+ const degradedNonCriticalThreats = [];
792
834
  for (const t of deduped) {
793
- if (isPackageLevelThreat(t)) {
835
+ if (t.degraded === true && t.severity !== 'CRITICAL') {
836
+ degradedNonCriticalThreats.push(t);
837
+ } else if (isPackageLevelThreat(t)) {
794
838
  packageLevelThreats.push(t);
795
839
  } else {
796
840
  fileLevelThreats.push(t);
@@ -873,8 +917,21 @@ function calculateRiskScore(deduped, intentResult) {
873
917
  intentBonus = Math.min(intentResult.intentScore, 30);
874
918
  }
875
919
 
876
- // 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost, capped at 100
877
- let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost);
920
+ // 6b. v2.10.73 P3: Degraded (quick-scan) non-CRITICAL threats contribute a
921
+ // bounded bonus to the final score they are visible in the report but never
922
+ // inflate max_file_score. Cap at 15 (= 5 MEDIUM threats OR 1 HIGH + small).
923
+ // Rationale: quick-scan is regex-only, cannot distinguish top-level from
924
+ // exported function scope, so detections are low-confidence by construction.
925
+ let degradedScore = 0;
926
+ if (degradedNonCriticalThreats.length > 0) {
927
+ for (const t of degradedNonCriticalThreats) {
928
+ degradedScore += _severityWeights[t.severity] || 0;
929
+ }
930
+ degradedScore = Math.min(15, degradedScore);
931
+ }
932
+
933
+ // 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost + degraded bucket, capped at 100
934
+ let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost + degradedScore);
878
935
 
879
936
  // 7b. MT-1: Score ceiling for packages without lifecycle scripts.
880
937
  // 56% of real malware uses install scripts. Packages without lifecycle that score high
@@ -0,0 +1,176 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Bundle file detection helpers — v2.10.73 P1 (FP cluster fix).
5
+ *
6
+ * Audit forensique v2.10.72 (2026-04-11, 78 packages deep-reviewed) a révélé
7
+ * que les 14 packages babylonjs/electron/@kitware/vtk.js/@stencil/core/playwright/
8
+ * @testim/testim-cli/@vanwei-wcs/video-player-v2/@bookolosystem/engine/@epie/bi-crud/etc.
9
+ * scoraient ≥50 parce que les rules AST/dataflow/obfuscation tiraient sur des
10
+ * helpers bundler standards (__webpack_require__, Function("return this")(),
11
+ * var __copyProps, .replace chains, prototype pollution for framework reactivity).
12
+ *
13
+ * Fix :
14
+ * 1. Regex étendue `BUNDLE_PATH_RE` couvrant les patterns manquants :
15
+ * .umd.js, .esm.js, .es.js, .common.js, .max.js, hash-suffixed chunks,
16
+ * fesm*, browser/, assets/, chunks/.
17
+ * 2. Liste de veto `VETO_TYPES` — types qui indiquent une injection malveillante
18
+ * dans un bundle (staged_binary_payload, fetch_decrypt_exec, etc.). Si un
19
+ * threat veto est présent dans le même fichier, le bundle downgrade est
20
+ * annulé — bundle suspecté d'injection (event-stream style).
21
+ * 3. Liste `SENSITIVE_ENV_RE` — noms d'env vars sensibles. Un env_access sur
22
+ * un de ces noms dans un bundle annule aussi le downgrade (credential theft).
23
+ *
24
+ * Architecture : pas de lecture de contenu fichier ni de cache — la détection
25
+ * se fait purement sur le path et sur les types de threats co-occurring dans le
26
+ * même fichier. Pour la v2.10.74, un `isStructuralBundle()` avec lecture de
27
+ * signatures (`__webpack_require__`, `sourceMappingURL=`) pourrait être ajouté
28
+ * si les tests FPR montrent qu'il reste des FPs sur des bundles non-nommés.
29
+ */
30
+
31
+ // Extended bundle path/basename regex (replaces the narrow DIST_FILE_RE).
32
+ // Covers the audit findings: babylonjs, electron, @kitware/vtk.js, dprint,
33
+ // @jetbrains/junie, @zuplo/core, @stencil/core, playwright, @equinor/*,
34
+ // @alipay/*, @testim/testim-cli, @vanwei-wcs/video-player-v2, @bookolosystem/engine,
35
+ // @epie/bi-crud, @fairyhunter13/opentui-core, rsshub.
36
+ //
37
+ // Pattern groups:
38
+ // - Directory prefixes (dist/, build/, out/, output/, lib/bundled/, browser/,
39
+ // fesm*/, esm/, esm5/, esm2015/, esm2020/, bundles/, assets/, chunks/, _app/)
40
+ // - Basename suffixes (.min.js, .bundle.js, .umd.js, .esm.js, .es.js,
41
+ // .common.js, .max.js, .prod.js, .production.js, + .cjs / .mjs variants)
42
+ // - Double-extension bundler outputs (index.cjs.js, index.esm.js, index.umd.js
43
+ // at package root — common pattern for @equinor/*, tsdx/rollup bundled libs)
44
+ // - Hash-suffixed chunks (esbuild/vite/rollup/webpack convention):
45
+ // `basename-[a-f0-9]{6,16}.js|mjs|cjs`
46
+ // - Tool-specific subdirectories that contain vendored bundles (v2.10.75):
47
+ // * `lib/[name]Bundle*/` — Playwright-style `lib/utilsBundleImpl/`
48
+ // * `.yarn/releases/` — vendored yarn/pnpm releases shipped in template packages
49
+ // * `sys/(node|browser|deno)/` — Stencil-style platform-specific bundle
50
+ // * `compiled/` — SWC/Stencil compiled output
51
+ // * `typings/` — only if matches a .d.ts file (defensive)
52
+ const BUNDLE_PATH_RE = new RegExp(
53
+ // Path prefix group (directories that almost always contain bundled output)
54
+ '(?:^|[/\\\\])' +
55
+ '(?:dist|build|out|output|browser|bundles|assets|chunks|_app|compiled|' +
56
+ 'lib[/\\\\]bundled|fesm\\d*|esm|esm5|esm2015|esm2020)' +
57
+ '[/\\\\]' +
58
+ // OR Playwright-style lib/xxxBundle*/ (e.g. lib/utilsBundleImpl/, lib/mcpBundleImpl/,
59
+ // lib/transform/babelBundleImpl.js) — matches the directory form
60
+ // `lib/.../xxxBundleImpl/index.js` and the flat form `lib/.../xxxBundleImpl.js`
61
+ // at any depth under lib/.
62
+ '|(?:^|[/\\\\])lib[/\\\\][^\\n]*[Bb]undle[\\w-]*(?:[/\\\\]|\\.(?:m?js|cjs)$)' +
63
+ // OR vendored yarn/pnpm releases (@backstage/create-app templates etc.)
64
+ '|(?:^|[/\\\\])\\.yarn[/\\\\]releases[/\\\\]' +
65
+ '|(?:^|[/\\\\])\\.pnpm[/\\\\](?:releases|dist)[/\\\\]' +
66
+ // OR Stencil-style sys/(node|browser|deno) containing compiled platform bundles
67
+ '|(?:^|[/\\\\])sys[/\\\\](?:node|browser|deno)[/\\\\]' +
68
+ // OR basename suffix group (single extension)
69
+ '|\\.(?:min|bundle|umd|esm|es|cjs|common|max|prod|production|iife)\\.(?:m?js|cjs)$' +
70
+ // OR double-extension bundler outputs at root: index.cjs.js, index.esm.js, etc.
71
+ // Anchored by `^` or path separator + basename with exactly the double extension.
72
+ '|(?:^|[/\\\\])[\\w-]+\\.(?:cjs|esm|umd|es|iife|min)\\.js$' +
73
+ // OR hash-suffixed chunk
74
+ '|(?:^|[/\\\\])[\\w-]+[-.][a-f0-9]{6,16}\\.(?:m?js|cjs)$',
75
+ 'i'
76
+ );
77
+
78
+ // Threat types that, when present on the same file as a bundle downgrade
79
+ // candidate, VETO the downgrade entirely — the bundle is suspected of
80
+ // malicious injection or active C2/persistence.
81
+ //
82
+ // IMPORTANT: types that feed existing compound rules are INTENTIONALLY NOT listed
83
+ // here. The scoring pipeline already has a mechanism to recover downgraded signals
84
+ // via `applyCompoundBoosts` + `originalSeverity` gates (see src/scoring.js:462 and
85
+ // compound gate at line 410). Types like `staged_binary_payload`, `crypto_decipher`,
86
+ // `fetch_decrypt_exec`, `zlib_inflate_eval` ARE downgraded in bundles but their
87
+ // `originalSeverity` is preserved so compound rules (crypto_staged_payload, etc.)
88
+ // can still fire. Adding them to VETO_TYPES would break the existing v2.9.6 test
89
+ // suite (compound-scoring.test.js:305 and similar) without adding value.
90
+ //
91
+ // This VETO list is limited to patterns that :
92
+ // 1. Have no compound fallback (rare patterns not yet wired into a compound)
93
+ // 2. Indicate active C2, persistence, or worm propagation (structurally unique to
94
+ // malware — a legit bundler never produces `reverse_shell` or `node_modules_write`)
95
+ // 3. Are IOC hits (highest confidence, never downgraded regardless of context)
96
+ const VETO_TYPES = new Set([
97
+ // Active C2 / backdoor — structurally unique to malware, no legit bundler path
98
+ 'reverse_shell',
99
+ 'node_modules_write', // worm propagation (Shai-Hulud style)
100
+ 'npm_publish_worm',
101
+ 'npm_token_steal',
102
+ 'systemd_persistence',
103
+ // Unicode steganography (GlassWorm) — bundlers never produce invisible unicode
104
+ 'unicode_invisible_injection',
105
+ // IOC hits (never downgraded regardless of context)
106
+ 'ioc_match',
107
+ 'known_malicious_package',
108
+ 'shai_hulud_marker'
109
+ ]);
110
+
111
+ // Sensitive environment variable patterns. An `env_access` threat whose
112
+ // `message` contains any of these, present on the same file as a bundle
113
+ // downgrade candidate, VETOs the downgrade — the bundle reads credentials.
114
+ // NODE_ENV, NODE_OPTIONS, PATH, HOME, SHELL, CI, DEBUG etc. are NOT included
115
+ // (they are read by bundler output for legit reasons like runtime detection).
116
+ const SENSITIVE_ENV_RE = new RegExp(
117
+ '\\b(' +
118
+ 'NPM_TOKEN|NPM_CONFIG_AUTHTOKEN|NPMRC|' +
119
+ 'AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|' +
120
+ 'SSH_PRIVATE_KEY|SSH_KEY|SSH_AUTH_SOCK|' +
121
+ 'GITHUB_TOKEN|GH_TOKEN|GITLAB_TOKEN|' +
122
+ 'GCP_[A-Z_]+|GOOGLE_APPLICATION_CREDENTIALS|' +
123
+ 'AZURE_[A-Z_]+|AZURE_CLIENT_SECRET|' +
124
+ 'STRIPE_SECRET_KEY|STRIPE_LIVE|' +
125
+ // Catch-all suffix patterns
126
+ '[A-Z][A-Z0-9_]*_SECRET|[A-Z][A-Z0-9_]*_PRIVATE_KEY|' +
127
+ '[A-Z][A-Z0-9_]*_API_KEY|[A-Z][A-Z0-9_]*_AUTH_TOKEN' +
128
+ ')\\b'
129
+ );
130
+
131
+ /**
132
+ * Check if a file path matches bundle heuristics.
133
+ * @param {string} filePath - relative or absolute file path
134
+ * @returns {boolean}
135
+ */
136
+ function isBundlePath(filePath) {
137
+ if (!filePath || typeof filePath !== 'string') return false;
138
+ return BUNDLE_PATH_RE.test(filePath);
139
+ }
140
+
141
+ /**
142
+ * Check if any threat in `threats` on the same file as `targetFile` is a
143
+ * veto signal (VETO_TYPES OR env_access on sensitive env var). If so, the
144
+ * bundle-downgrade gate should NOT downgrade — the bundle is suspected of
145
+ * malicious injection (event-stream / flatmap-stream style) or credential theft.
146
+ *
147
+ * @param {Array} threats - full threats array (all scanners combined)
148
+ * @param {string} targetFile - the file path being evaluated for downgrade
149
+ * @returns {boolean} - true if a veto signal is found
150
+ */
151
+ function hasBundleVetoSignal(threats, targetFile) {
152
+ if (!Array.isArray(threats) || !targetFile) return false;
153
+ for (const t of threats) {
154
+ if (t.file !== targetFile) continue;
155
+ // v2.10.75 fix: a LOW severity threat should never block the bundle downgrade
156
+ // of unrelated co-occurring threats. Typical regression case: a locale file
157
+ // (locales/fa-IR/*.js) contains `unicode_invisible_injection` at LOW (already
158
+ // downgraded by `isLocaleFile` in obfuscation.js) but also contains bundler
159
+ // helpers. Before this fix, the LOW unicode signal vetoed the bundle downgrade
160
+ // of the other threats, so the package scored higher than pre-v2.10.74.
161
+ if (t.severity === 'LOW') continue;
162
+ if (VETO_TYPES.has(t.type)) return true;
163
+ if (t.type === 'env_access' && t.message && SENSITIVE_ENV_RE.test(t.message)) {
164
+ return true;
165
+ }
166
+ }
167
+ return false;
168
+ }
169
+
170
+ module.exports = {
171
+ BUNDLE_PATH_RE,
172
+ VETO_TYPES,
173
+ SENSITIVE_ENV_RE,
174
+ isBundlePath,
175
+ hasBundleVetoSignal
176
+ };