npm - muaddib-scanner - Versions diffs - 2.10.72 → 2.10.77 - Mend

muaddib-scanner 2.10.72 → 2.10.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +3 -3
package/package.json +1 -1
package/src/monitor/classify.js +14 -4
package/src/monitor/deferred-sandbox.js +16 -6
package/src/pipeline/executor.js +14 -5
package/src/scanner/ast-detectors/handle-call-expression.js +39 -12
package/src/scanner/ast-detectors/handle-variable-declarator.js +54 -0
package/src/scanner/ast.js +5 -0
package/src/scanner/obfuscation.js +47 -1
package/src/scoring.js +63 -6
package/src/shared/bundle-detect.js +176 -0

package/README.md CHANGED Viewed

@@ -292,7 +292,7 @@ repos:
 | **FPR** (Benign random) | **7.5%** (15/200) | 200 random npm packages, stratified sampling |
 | **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
-**3068 tests** across 66 files. **200 rules** (195 RULES + 5 PARANOID).
+**3134 tests** across 66 files. **200 rules** (195 RULES + 5 PARANOID).
 > **ML retrain methodology (v2.10.51):**
 > - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
@@ -340,10 +340,10 @@ npm test
 ### Testing
-- **3068 tests** across 66 modular test files
+- **3134 tests** across 66 modular test files
 - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
 - **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
-- **Ground truth validation** - 66 real-world attacks (93.75% TPR@3, 85.9% TPR@20)
+- **Ground truth validation** - 67 real-world attacks (93.75% TPR@3, 85.9% TPR@20)
 - **False positive validation** - 14.0% FPR rules, 8.3% after ML on 532 curated npm packages, 7.5% on 200 random
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.72",
+  "version": "2.10.77",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/monitor/classify.js CHANGED Viewed

@@ -186,11 +186,21 @@ function isSuspectClassification(result) {
   // sandbox queue, starving legitimate T1b/T2 candidates of the dedicated
   // deferred slot.
   //
-  // A sandbox slot is only justified when there is real signal. Require at
-  // least one non-LOW finding to reach tier 2 via this fallback — otherwise
-  // downgrade to tier 3 (log only, no sandbox consumption).
+  // Threat model for this downgrade: an adversary reading the open-source
+  // rules can intentionally tune their malware to fire only LOW-severity
+  // patterns + 2 distinct non-T3 types to land in this fallback. If we
+  // downgrade ALL such cases to tier 3, a weak TIER1_TYPES match (e.g.,
+  // staged_payload at LOW, sandbox_evasion at LOW) would bypass sandbox
+  // verification entirely — TIER1_TYPES are "quasi-never legitimate" and
+  // weak matches still warrant dynamic inspection.
+  //
+  // Therefore: preserve tier 2 when EITHER (a) any finding is non-LOW
+  // severity OR (b) any finding is in TIER1_TYPES even at LOW severity.
+  // Downgrade to tier 3 only for packages with 2+ distinct LOW findings
+  // where NONE are in the quasi-never-legit TIER1 zone.
   const hasNonLowFinding = result.threats.some(t => t.severity !== 'LOW');
-  if (hasNonLowFinding) {
+  const hasTier1Signal = result.threats.some(t => TIER1_TYPES.has(t.type));
+  if (hasNonLowFinding || hasTier1Signal) {
     return { suspect: true, tier: 2 };
   }
   return { suspect: true, tier: 3 };

package/src/monitor/deferred-sandbox.js CHANGED Viewed

@@ -15,7 +15,7 @@
 const fs = require('fs');
 const path = require('path');
 const { runSandbox } = require('../sandbox/index.js');
-const { isCanaryEnabled } = require('./classify.js');
+const { isCanaryEnabled, TIER1_TYPES } = require('./classify.js');
 const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
 const { sendWebhook } = require('../webhook.js');
 const { atomicWriteFileSync } = require('./state.js');
@@ -59,11 +59,21 @@ function enqueueDeferred(item) {
   // Defense-in-depth: block low-score items regardless of tier. With the
   // classify.js:183 fallback fix in place, no legitimate enqueue should
-  // reach this function with score < DEFERRED_MIN_SCORE. Logging with
-  // console.error makes a future regression (new classification path that
-  // leaks low-score items) loud in operator logs.
-  if ((item.riskScore || 0) < DEFERRED_MIN_SCORE) {
-    console.error(`[DEFERRED] REJECTED: ${item.name}@${item.version} — score=${item.riskScore || 0} below minimum ${DEFERRED_MIN_SCORE} (possible classification regression)`);
+  // reach this function with score < DEFERRED_MIN_SCORE unless it carries
+  // a TIER1_TYPES signal. Logging with console.error makes a future
+  // regression (new classification path that leaks low-score items) loud
+  // in operator logs.
+  //
+  // Threat-model exception: packages containing any TIER1_TYPES finding
+  // (even at LOW severity) must bypass this min-score guard. TIER1_TYPES
+  // are "quasi-never legitimate in benign packages" and weak matches
+  // still warrant sandbox verification — an adversary could otherwise
+  // tune their malware to fire only LOW-severity TIER1 patterns to
+  // bypass sandbox entirely.
+  const itemThreats = (item.staticResult && item.staticResult.threats) || [];
+  const hasTier1Signal = itemThreats.some(t => TIER1_TYPES.has(t.type));
+  if ((item.riskScore || 0) < DEFERRED_MIN_SCORE && !hasTier1Signal) {
+    console.error(`[DEFERRED] REJECTED: ${item.name}@${item.version} — score=${item.riskScore || 0} below minimum ${DEFERRED_MIN_SCORE}, no TIER1 signal (possible classification regression)`);
     return false;
   }

package/src/pipeline/executor.js CHANGED Viewed

@@ -232,11 +232,18 @@ async function execute(targetPath, options, pythonDeps, warnings) {
   if (wasFilesCapped()) {
     warnings.push('File count cap reached (500 files) — overflow files scanned in quick-scan mode (lifecycle + child_process only).');
     const overflowFiles = getOverflowFiles();
+    // v2.10.73 P3: Quick-scan is a DEGRADED regex-based pass — no AST, no scope
+    // tracking. It cannot distinguish exec() at module top-level (CRITICAL) from
+    // exec() inside an exported route handler (LOW runtime). Audit forensique v2.10.72:
+    // 18+ fires AST-007 sur rsshub/dist-lib/*.mjs where spawn() lives inside exported
+    // route handlers. Default severity is now MEDIUM (downgraded from HIGH). Module._load
+    // remains CRITICAL — very rare outside of malware. Threats are flagged `degraded:true`
+    // so scoring.js excludes them from max_file_score (see applyFPReductions).
     const QUICK_SCAN_PATTERNS = [
-      { re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("child_process")' },
-      { re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'HIGH', label: 'require("node:child_process")' },
-      { re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'HIGH', label: 'exec/spawn call' },
-      { re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'HIGH', label: 'process.mainModule' },
+      { re: /\brequire\s*\(\s*['"]child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("child_process")' },
+      { re: /\brequire\s*\(\s*['"]node:child_process['"]\s*\)/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'require("node:child_process")' },
+      { re: /\b(?:exec|execSync|spawn|spawnSync)\s*\(/, type: 'dangerous_exec', severity: 'MEDIUM', label: 'exec/spawn call' },
+      { re: /\bprocess\.mainModule\b/, type: 'dynamic_require', severity: 'MEDIUM', label: 'process.mainModule' },
       { re: /\bModule\._load\b/, type: 'module_load_bypass', severity: 'CRITICAL', label: 'Module._load' }
     ];
     for (const filePath of overflowFiles) {
@@ -251,7 +258,9 @@ async function execute(targetPath, options, pythonDeps, warnings) {
               type: pat.type,
               severity: pat.severity,
               message: `[quick-scan] ${pat.label} detected in overflow file.`,
-              file: relFile
+              file: relFile,
+              degraded: true,  // P3: regex-only detection, no semantic context
+              quickScan: true
             });
           }
         }

package/src/scanner/ast-detectors/handle-call-expression.js CHANGED Viewed

@@ -89,6 +89,15 @@ function handleCallExpression(node, ctx) {
       // Check if variable was reassignment-tracked to a dangerous module
       const DANGEROUS_MODS_REQ = ['child_process', 'fs', 'net', 'dns', 'http', 'https', 'tls'];
       const resolvedVal = ctx.stringVarValues?.get(arg.name);
+      // v2.10.73 P2: source-aware severity (AST-006 plugin loader FP fix)
+      // Distinguishes plugin loaders (LOW) from obfuscation (HIGH) from env exfil (CRITICAL).
+      // See src/scanner/ast-detectors/handle-variable-declarator.js ctx.varSource tracking.
+      const varSource = ctx.varSource?.get(arg.name) || null;
+      const isStaticSource =
+        varSource === 'string_literal' || varSource === 'array_literal' ||
+        varSource === 'object_literal' || varSource === 'fs_readdir' ||
+        varSource === 'require_json';
+      const isCriticalSource = varSource === 'env_var';
       if (resolvedVal) {
         const norm = resolvedVal.startsWith('node:') ? resolvedVal.slice(5) : resolvedVal;
         if (DANGEROUS_MODS_REQ.includes(norm)) {
@@ -98,28 +107,46 @@ function handleCallExpression(node, ctx) {
             file: ctx.relFile
           });
         } else {
-          // If the variable was assigned from a static value (string literal,
-          // array of strings, object with string values), it's a plugin loader pattern
-          const severity = ctx.staticAssignments.has(arg.name) ? 'LOW' : 'HIGH';
+          // Plugin loader qualification:
+          //  - string_literal/array_literal/object_literal/fs_readdir/require_json → LOW (legit plugin loader)
+          //  - env_var → CRITICAL (require(process.env.X) = credential/path exfil vector)
+          //  - fallback to staticAssignments for legacy coverage
+          //  - else → HIGH (real obfuscation candidate)
+          let severity, message;
+          if (isCriticalSource) {
+            severity = 'CRITICAL';
+            message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
+          } else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
+            severity = 'LOW';
+            message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
+          } else {
+            severity = 'HIGH';
+            message = 'Dynamic require() with variable argument (module name obfuscation).';
+          }
           ctx.threats.push({
             type: 'dynamic_require',
             severity,
-            message: severity === 'LOW'
-              ? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
-              : 'Dynamic require() with variable argument (module name obfuscation).',
+            message,
             file: ctx.relFile
           });
         }
       } else {
-        // If the variable was assigned from a static value (string literal,
-        // array of strings, object with string values), it's a plugin loader pattern
-        const severity = ctx.staticAssignments.has(arg.name) ? 'LOW' : 'HIGH';
+        // Same qualification flow without resolvedVal context
+        let severity, message;
+        if (isCriticalSource) {
+          severity = 'CRITICAL';
+          message = `Dynamic require() with variable "${arg.name}" sourced from process.env — environment-driven module loading (credential or path exfil vector).`;
+        } else if (isStaticSource || ctx.staticAssignments.has(arg.name)) {
+          severity = 'LOW';
+          message = `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern, source: ${varSource || 'static-value'}).`;
+        } else {
+          severity = 'HIGH';
+          message = 'Dynamic require() with variable argument (module name obfuscation).';
+        }
         ctx.threats.push({
           type: 'dynamic_require',
           severity,
-          message: severity === 'LOW'
-            ? `Dynamic require() with statically-assigned variable "${arg.name}" (plugin loader pattern).`
-            : 'Dynamic require() with variable argument (module name obfuscation).',
+          message,
           file: ctx.relFile
         });
       }

package/src/scanner/ast-detectors/handle-variable-declarator.js CHANGED Viewed

@@ -24,6 +24,60 @@ function handleVariableDeclarator(node, ctx) {
       ctx.staticAssignments.add(node.id.name);
     }
+    // v2.10.73 P2: Track WHERE the variable's value originated — used by AST-006
+    // to distinguish plugin loaders (LOW) from real obfuscation (HIGH) from
+    // credential exfil vectors (CRITICAL). See src/scanner/ast-detectors/handle-call-expression.js
+    // around line 103 for consumption.
+    if (ctx.varSource && node.init) {
+      const init = node.init;
+      let source = null;
+      if (init.type === 'Literal' && typeof init.value === 'string') {
+        source = 'string_literal';
+      } else if (init.type === 'TemplateLiteral' && (init.expressions?.length || 0) === 0) {
+        source = 'string_literal'; // template with no interpolations is effectively a literal
+      } else if (init.type === 'ArrayExpression') {
+        source = 'array_literal';
+      } else if (init.type === 'ObjectExpression') {
+        source = 'object_literal';
+      } else if (init.type === 'MemberExpression' &&
+                 init.object?.type === 'MemberExpression' &&
+                 init.object.object?.type === 'Identifier' &&
+                 init.object.object.name === 'process' &&
+                 init.object.property?.type === 'Identifier' &&
+                 init.object.property.name === 'env') {
+        source = 'env_var'; // const m = process.env.MODULE_NAME
+      } else if (init.type === 'CallExpression') {
+        const callee = init.callee;
+        // fs.readdirSync / fs.readdir / fs.promises.readdir — directory listings
+        // are not attacker-controllable unless the dir itself is, which is rare.
+        if (callee?.type === 'MemberExpression') {
+          const propName = callee.property?.type === 'Identifier' ? callee.property.name : null;
+          const objName = callee.object?.type === 'Identifier' ? callee.object.name : null;
+          const objPropName = callee.object?.type === 'MemberExpression' &&
+                              callee.object.property?.type === 'Identifier'
+                              ? callee.object.property.name : null;
+          if (objName === 'fs' && propName && /^readdir/.test(propName)) {
+            source = 'fs_readdir';
+          } else if (objPropName === 'promises' && propName === 'readdir') {
+            source = 'fs_readdir'; // fs.promises.readdir
+          }
+        }
+        // require('./config.json') or require('./cfg.json') — loading a local JSON
+        // config is a legit plugin loader pattern (consumer-owned JSON file).
+        if (!source &&
+            callee?.type === 'Identifier' && callee.name === 'require' &&
+            init.arguments?.[0]?.type === 'Literal' &&
+            typeof init.arguments[0].value === 'string' &&
+            /\.json$/.test(init.arguments[0].value)) {
+          source = 'require_json';
+        }
+        if (!source) source = 'function_call';
+      } else {
+        source = 'computed_expression';
+      }
+      ctx.varSource.set(node.id.name, source);
+    }
     // Track dynamic require vars + module aliases
     if (node.init?.type === 'CallExpression') {
       const initCallName = getCallName(node.init);

package/src/scanner/ast.js CHANGED Viewed

@@ -110,6 +110,11 @@ function analyzeFile(content, filePath, basePath) {
     relFile: path.relative(basePath, filePath),
     dynamicRequireVars: new Set(),
     staticAssignments: new Set(),
+    // v2.10.73 P2: AST-006 source qualification — tracks WHERE a variable's value came from.
+    // Used by dynamic_require to distinguish plugin loaders (LOW: string_literal/array_literal/
+    // object_literal/fs_readdir/require_json) from real obfuscation (HIGH: function_call/
+    // computed_expression) or credential theft vectors (CRITICAL: env_var).
+    varSource: new Map(),
     dangerousCmdVars: new Map(),
     workflowPathVars: new Set(),
     execPathVars: new Map(),

package/src/scanner/obfuscation.js CHANGED Viewed

@@ -1,19 +1,61 @@
 const fs = require('fs');
 const path = require('path');
-const { findFiles, forEachSafeFile } = require('../utils.js');
+const { findFiles, forEachSafeFile, debugLog } = require('../utils.js');
 // node_modules NOT excluded: detect obfuscated code in dependencies.
 // dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
 // and costs significant processing time on large SDKs.
 const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
+// v2.10.73 P4: WASM/Emscripten artifact detection
+// These files are high-entropy by construction (compiled WebAssembly, asm.js bytecode
+// tables, Emscripten output). They produced 52+ ENTROPY/obfuscation FP fires in the
+// v2.10.72 audit (e.g. node_modules/mpg123-decoder/src/EmscriptenWasm.js inside
+// @leoqlin/openclaw-qqbot's bundled deps). Skipped from obfuscation detection only —
+// other scanners (AST, dataflow, hash, IOC) still analyze them, so actual malware
+// hidden in a WASM file can still be caught through those channels.
+const WASM_BASENAME_RE = /(?:wasm|emscripten|dcmtk|ffmpeg-wasm|opus-decoder|mpg123-decoder|wasm-audio-decoders)/i;
+const WASM_CONTENT_MARKERS = [
+  'Module["asm"]',
+  'Module.asm',
+  'WebAssembly.instantiate',
+  'WebAssembly.compile',
+  '_emscripten_',
+  'asmLibraryArg',
+  'wasmMemory',
+  'wasmTable',
+  'HEAPU8',
+  'HEAP32',
+  'AGFzbQ' // base64 of WASM magic bytes \x00asm — TRES specific marker
+];
+function isWasmEmscriptenArtifact(filePath, content) {
+  const basename = path.basename(filePath);
+  if (WASM_BASENAME_RE.test(basename)) return true;
+  // Sample first 64KB to avoid scanning huge files fully (WASM blobs are often >1MB)
+  const sample = content.length > 65536 ? content.slice(0, 65536) : content;
+  for (const marker of WASM_CONTENT_MARKERS) {
+    if (sample.indexOf(marker) !== -1) return true;
+  }
+  return false;
+}
 function detectObfuscation(targetPath) {
   const threats = [];
+  let wasmSkipped = 0;
   const files = findFiles(targetPath, { extensions: ['.js', '.mjs', '.cjs'], excludedDirs: OBF_EXCLUDED_DIRS });
   forEachSafeFile(files, (file, content) => {
     const relativePath = path.relative(targetPath, file);
+    // v2.10.73 P4: Skip WASM/Emscripten artifacts — high-entropy by construction,
+    // produced 52+ FP fires in v2.10.72 audit (mpg123-decoder in @leoqlin/openclaw-qqbot).
+    // Other scanners still analyze these files — this only filters obfuscation heuristics.
+    if (isWasmEmscriptenArtifact(file, content)) {
+      wasmSkipped++;
+      return;
+    }
     const signals = [];
     let score = 0;
     const basename = path.basename(file);
@@ -103,6 +145,10 @@ function detectObfuscation(targetPath) {
     }
   });
+  if (wasmSkipped > 0) {
+    debugLog(`[obfuscation] skipped ${wasmSkipped} WASM/Emscripten artifact(s) — high-entropy by construction`);
+  }
   return threats;
 }

package/src/scoring.js CHANGED Viewed

@@ -1,5 +1,7 @@
 const { getRule } = require('./rules/index.js');
 const { HIGH_CONFIDENCE_MALICE_TYPES } = require('./monitor/classify.js');
+// v2.10.73 P1: bundle detection helpers — extended bundle path regex + veto check
+const { BUNDLE_PATH_RE, hasBundleVetoSignal } = require('./shared/bundle-detect.js');
 // ============================================
 // SCORING CONSTANTS
@@ -258,8 +260,13 @@ const DIST_EXEMPT_TYPES = new Set([
   // fetch_decrypt_exec (fetch+decrypt+eval triple) remains exempt — never coincidental.
 ]);
-// Regex matching dist/build/out/output/minified/bundled file paths
+// Regex matching dist/build/out/output/minified/bundled file paths.
 // P7: added out/ and output/ — common build output directories (esbuild, custom build scripts)
+// v2.10.73 P1: DIST_FILE_RE is kept as the narrow legacy regex for backwards compat
+// with existing call sites (other rules reference it). The EXTENDED bundle match is
+// done via BUNDLE_PATH_RE from src/shared/bundle-detect.js — used in the new gate below.
+// BUNDLE_PATH_RE covers: .umd.js, .esm.js, .es.js, .common.js, .max.js, hash chunks,
+// fesm*/, browser/, assets/, chunks/, _app/, lib/bundled/.
 const DIST_FILE_RE = /(?:^|[/\\])(?:dist|build|out|output)[/\\]|\.min\.js$|\.bundle\.js$/i;
 // Bundler artifact types: get two-notch downgrade in dist/ files (CRITICAL→MEDIUM, HIGH→LOW).
@@ -287,6 +294,15 @@ const DIST_BUNDLER_ARTIFACT_TYPES = new Set([
   // Audit v3 B3: staged_payload (fetch+eval) in dist/ is code splitting / lazy loading,
   // not malicious payload staging. fetch_decrypt_exec remains exempt (triple signal).
   'staged_payload'
+  // v2.10.73 P1: credential_regex_harvest, suspicious_dataflow, string_mutation_obfuscation
+  // are NOT added here (kept in the one-notch path) — existing scoring-hardening tests
+  // (FP-P7 etc.) require these to receive a single-notch downgrade to stay visible as
+  // MEDIUM in bundles. The real benefit for these types comes from the extended
+  // BUNDLE_PATH_RE (src/shared/bundle-detect.js) which now matches .umd/.esm/.es/.common/
+  // .max suffixes, fesm*/, browser/, assets/, chunks/, hash-suffixed chunks — paths
+  // where the old narrow DIST_FILE_RE missed the bundle files entirely. One-notch
+  // downgrade on a broader set of bundle paths is enough to bring FP clusters under
+  // the webhook threshold without compromising true positive detection.
 ]);
 // Types exempt from reachability downgrade — IOC matches, lifecycle, and package-level types.
@@ -644,8 +660,29 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
     // Bundler artifact types (eval, dynamic_require, obfuscation) get two-notch downgrade
     // (CRITICAL→MEDIUM, HIGH→LOW) since bundlers routinely produce these patterns.
     // Other non-exempt types keep one-notch downgrade.
-    if (t.file && !DIST_EXEMPT_TYPES.has(t.type) && DIST_FILE_RE.test(t.file)) {
-      if (DIST_BUNDLER_ARTIFACT_TYPES.has(t.type)) {
+    //
+    // v2.10.73 P1: two changes to this gate :
+    //  (a) Match either the narrow legacy DIST_FILE_RE OR the extended BUNDLE_PATH_RE
+    //      from src/shared/bundle-detect.js (which adds .umd.js/.esm.js/.common.js/
+    //      hash-chunks/fesm*/browser/assets/chunks/_app). Rationale : the narrow regex
+    //      missed babylonjs/electron/@testim/@vanwei-wcs/etc. bundle files.
+    //  (b) Before applying the downgrade, call hasBundleVetoSignal() — if the same
+    //      file has a threat of type {staged_binary_payload, fetch_decrypt_exec,
+    //      reverse_shell, node_modules_write, ...} OR an env_access on a sensitive env
+    //      var (NPM_TOKEN, AWS_*, SSH_*, ...), BLOCK the downgrade. This preserves
+    //      detection of event-stream / flatmap-stream style injections where malware
+    //      is packed inside a legitimate-looking bundle.
+    const isBundleFile = t.file && (DIST_FILE_RE.test(t.file) || BUNDLE_PATH_RE.test(t.file));
+    if (isBundleFile && !DIST_EXEMPT_TYPES.has(t.type)) {
+      // Veto check: don't downgrade if the bundle is suspected of injection
+      if (hasBundleVetoSignal(threats, t.file)) {
+        // Leave the threat at its original severity — the bundle contains a
+        // suspicious co-occurring signal (staged payload, credential env read,
+        // reverse shell, etc.) so all threats on this file stay un-downgraded.
+        // Record it in reductions for audit trail.
+        if (!t.reductions) t.reductions = [];
+        t.reductions.push({ rule: 'bundle_veto_preserved', from: t.severity, to: t.severity });
+      } else if (DIST_BUNDLER_ARTIFACT_TYPES.has(t.type)) {
         // Two-notch downgrade for bundler artifacts
         const fromSev = t.severity;
         if (t.severity === 'CRITICAL') t.severity = 'MEDIUM';
@@ -789,8 +826,15 @@ function calculateRiskScore(deduped, intentResult) {
   // 1. Separate deduped threats into package-level and file-level
   const packageLevelThreats = [];
   const fileLevelThreats = [];
+  // v2.10.73 P3: Degraded quick-scan threats get a separate bucket so they
+  // contribute a bounded amount to the package score but never inflate max_file_score.
+  // Exception: CRITICAL degraded threats (Module._load pattern) pass through normal
+  // file-level processing — they are rare and nearly always malicious.
+  const degradedNonCriticalThreats = [];
   for (const t of deduped) {
-    if (isPackageLevelThreat(t)) {
+    if (t.degraded === true && t.severity !== 'CRITICAL') {
+      degradedNonCriticalThreats.push(t);
+    } else if (isPackageLevelThreat(t)) {
       packageLevelThreats.push(t);
     } else {
       fileLevelThreats.push(t);
@@ -873,8 +917,21 @@ function calculateRiskScore(deduped, intentResult) {
     intentBonus = Math.min(intentResult.intentScore, 30);
   }
-  // 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost, capped at 100
-  let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost);
+  // 6b. v2.10.73 P3: Degraded (quick-scan) non-CRITICAL threats contribute a
+  // bounded bonus to the final score — they are visible in the report but never
+  // inflate max_file_score. Cap at 15 (= 5 MEDIUM threats OR 1 HIGH + small).
+  // Rationale: quick-scan is regex-only, cannot distinguish top-level from
+  // exported function scope, so detections are low-confidence by construction.
+  let degradedScore = 0;
+  if (degradedNonCriticalThreats.length > 0) {
+    for (const t of degradedNonCriticalThreats) {
+      degradedScore += _severityWeights[t.severity] || 0;
+    }
+    degradedScore = Math.min(15, degradedScore);
+  }
+  // 7. Final score = max file score + cross-file bonus + intent bonus + package-level score + lifecycle boost + degraded bucket, capped at 100
+  let riskScore = Math.min(MAX_RISK_SCORE, maxFileScore + crossFileBonus + intentBonus + packageScore + lifecycleBoost + degradedScore);
   // 7b. MT-1: Score ceiling for packages without lifecycle scripts.
   // 56% of real malware uses install scripts. Packages without lifecycle that score high

package/src/shared/bundle-detect.js ADDED Viewed

@@ -0,0 +1,176 @@
+'use strict';
+/**
+ * Bundle file detection helpers — v2.10.73 P1 (FP cluster fix).
+ *
+ * Audit forensique v2.10.72 (2026-04-11, 78 packages deep-reviewed) a révélé
+ * que les 14 packages babylonjs/electron/@kitware/vtk.js/@stencil/core/playwright/
+ * @testim/testim-cli/@vanwei-wcs/video-player-v2/@bookolosystem/engine/@epie/bi-crud/etc.
+ * scoraient ≥50 parce que les rules AST/dataflow/obfuscation tiraient sur des
+ * helpers bundler standards (__webpack_require__, Function("return this")(),
+ * var __copyProps, .replace chains, prototype pollution for framework reactivity).
+ *
+ * Fix :
+ * 1. Regex étendue `BUNDLE_PATH_RE` couvrant les patterns manquants :
+ *    .umd.js, .esm.js, .es.js, .common.js, .max.js, hash-suffixed chunks,
+ *    fesm*, browser/, assets/, chunks/.
+ * 2. Liste de veto `VETO_TYPES` — types qui indiquent une injection malveillante
+ *    dans un bundle (staged_binary_payload, fetch_decrypt_exec, etc.). Si un
+ *    threat veto est présent dans le même fichier, le bundle downgrade est
+ *    annulé — bundle suspecté d'injection (event-stream style).
+ * 3. Liste `SENSITIVE_ENV_RE` — noms d'env vars sensibles. Un env_access sur
+ *    un de ces noms dans un bundle annule aussi le downgrade (credential theft).
+ *
+ * Architecture : pas de lecture de contenu fichier ni de cache — la détection
+ * se fait purement sur le path et sur les types de threats co-occurring dans le
+ * même fichier. Pour la v2.10.74, un `isStructuralBundle()` avec lecture de
+ * signatures (`__webpack_require__`, `sourceMappingURL=`) pourrait être ajouté
+ * si les tests FPR montrent qu'il reste des FPs sur des bundles non-nommés.
+ */
+// Extended bundle path/basename regex (replaces the narrow DIST_FILE_RE).
+// Covers the audit findings: babylonjs, electron, @kitware/vtk.js, dprint,
+// @jetbrains/junie, @zuplo/core, @stencil/core, playwright, @equinor/*,
+// @alipay/*, @testim/testim-cli, @vanwei-wcs/video-player-v2, @bookolosystem/engine,
+// @epie/bi-crud, @fairyhunter13/opentui-core, rsshub.
+//
+// Pattern groups:
+//  - Directory prefixes (dist/, build/, out/, output/, lib/bundled/, browser/,
+//    fesm*/, esm/, esm5/, esm2015/, esm2020/, bundles/, assets/, chunks/, _app/)
+//  - Basename suffixes (.min.js, .bundle.js, .umd.js, .esm.js, .es.js,
+//    .common.js, .max.js, .prod.js, .production.js, + .cjs / .mjs variants)
+//  - Double-extension bundler outputs (index.cjs.js, index.esm.js, index.umd.js
+//    at package root — common pattern for @equinor/*, tsdx/rollup bundled libs)
+//  - Hash-suffixed chunks (esbuild/vite/rollup/webpack convention):
+//    `basename-[a-f0-9]{6,16}.js|mjs|cjs`
+//  - Tool-specific subdirectories that contain vendored bundles (v2.10.75):
+//    * `lib/[name]Bundle*/` — Playwright-style `lib/utilsBundleImpl/`
+//    * `.yarn/releases/` — vendored yarn/pnpm releases shipped in template packages
+//    * `sys/(node|browser|deno)/` — Stencil-style platform-specific bundle
+//    * `compiled/` — SWC/Stencil compiled output
+//    * `typings/` — only if matches a .d.ts file (defensive)
+const BUNDLE_PATH_RE = new RegExp(
+  // Path prefix group (directories that almost always contain bundled output)
+  '(?:^|[/\\\\])' +
+  '(?:dist|build|out|output|browser|bundles|assets|chunks|_app|compiled|' +
+  'lib[/\\\\]bundled|fesm\\d*|esm|esm5|esm2015|esm2020)' +
+  '[/\\\\]' +
+  // OR Playwright-style lib/xxxBundle*/ (e.g. lib/utilsBundleImpl/, lib/mcpBundleImpl/,
+  // lib/transform/babelBundleImpl.js) — matches the directory form
+  // `lib/.../xxxBundleImpl/index.js` and the flat form `lib/.../xxxBundleImpl.js`
+  // at any depth under lib/.
+  '|(?:^|[/\\\\])lib[/\\\\][^\\n]*[Bb]undle[\\w-]*(?:[/\\\\]|\\.(?:m?js|cjs)$)' +
+  // OR vendored yarn/pnpm releases (@backstage/create-app templates etc.)
+  '|(?:^|[/\\\\])\\.yarn[/\\\\]releases[/\\\\]' +
+  '|(?:^|[/\\\\])\\.pnpm[/\\\\](?:releases|dist)[/\\\\]' +
+  // OR Stencil-style sys/(node|browser|deno) containing compiled platform bundles
+  '|(?:^|[/\\\\])sys[/\\\\](?:node|browser|deno)[/\\\\]' +
+  // OR basename suffix group (single extension)
+  '|\\.(?:min|bundle|umd|esm|es|cjs|common|max|prod|production|iife)\\.(?:m?js|cjs)$' +
+  // OR double-extension bundler outputs at root: index.cjs.js, index.esm.js, etc.
+  // Anchored by `^` or path separator + basename with exactly the double extension.
+  '|(?:^|[/\\\\])[\\w-]+\\.(?:cjs|esm|umd|es|iife|min)\\.js$' +
+  // OR hash-suffixed chunk
+  '|(?:^|[/\\\\])[\\w-]+[-.][a-f0-9]{6,16}\\.(?:m?js|cjs)$',
+  'i'
+);
+// Threat types that, when present on the same file as a bundle downgrade
+// candidate, VETO the downgrade entirely — the bundle is suspected of
+// malicious injection or active C2/persistence.
+//
+// IMPORTANT: types that feed existing compound rules are INTENTIONALLY NOT listed
+// here. The scoring pipeline already has a mechanism to recover downgraded signals
+// via `applyCompoundBoosts` + `originalSeverity` gates (see src/scoring.js:462 and
+// compound gate at line 410). Types like `staged_binary_payload`, `crypto_decipher`,
+// `fetch_decrypt_exec`, `zlib_inflate_eval` ARE downgraded in bundles but their
+// `originalSeverity` is preserved so compound rules (crypto_staged_payload, etc.)
+// can still fire. Adding them to VETO_TYPES would break the existing v2.9.6 test
+// suite (compound-scoring.test.js:305 and similar) without adding value.
+//
+// This VETO list is limited to patterns that :
+//  1. Have no compound fallback (rare patterns not yet wired into a compound)
+//  2. Indicate active C2, persistence, or worm propagation (structurally unique to
+//     malware — a legit bundler never produces `reverse_shell` or `node_modules_write`)
+//  3. Are IOC hits (highest confidence, never downgraded regardless of context)
+const VETO_TYPES = new Set([
+  // Active C2 / backdoor — structurally unique to malware, no legit bundler path
+  'reverse_shell',
+  'node_modules_write',        // worm propagation (Shai-Hulud style)
+  'npm_publish_worm',
+  'npm_token_steal',
+  'systemd_persistence',
+  // Unicode steganography (GlassWorm) — bundlers never produce invisible unicode
+  'unicode_invisible_injection',
+  // IOC hits (never downgraded regardless of context)
+  'ioc_match',
+  'known_malicious_package',
+  'shai_hulud_marker'
+]);
+// Sensitive environment variable patterns. An `env_access` threat whose
+// `message` contains any of these, present on the same file as a bundle
+// downgrade candidate, VETOs the downgrade — the bundle reads credentials.
+// NODE_ENV, NODE_OPTIONS, PATH, HOME, SHELL, CI, DEBUG etc. are NOT included
+// (they are read by bundler output for legit reasons like runtime detection).
+const SENSITIVE_ENV_RE = new RegExp(
+  '\\b(' +
+    'NPM_TOKEN|NPM_CONFIG_AUTHTOKEN|NPMRC|' +
+    'AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|' +
+    'SSH_PRIVATE_KEY|SSH_KEY|SSH_AUTH_SOCK|' +
+    'GITHUB_TOKEN|GH_TOKEN|GITLAB_TOKEN|' +
+    'GCP_[A-Z_]+|GOOGLE_APPLICATION_CREDENTIALS|' +
+    'AZURE_[A-Z_]+|AZURE_CLIENT_SECRET|' +
+    'STRIPE_SECRET_KEY|STRIPE_LIVE|' +
+    // Catch-all suffix patterns
+    '[A-Z][A-Z0-9_]*_SECRET|[A-Z][A-Z0-9_]*_PRIVATE_KEY|' +
+    '[A-Z][A-Z0-9_]*_API_KEY|[A-Z][A-Z0-9_]*_AUTH_TOKEN' +
+  ')\\b'
+);
+/**
+ * Check if a file path matches bundle heuristics.
+ * @param {string} filePath - relative or absolute file path
+ * @returns {boolean}
+ */
+function isBundlePath(filePath) {
+  if (!filePath || typeof filePath !== 'string') return false;
+  return BUNDLE_PATH_RE.test(filePath);
+}
+/**
+ * Check if any threat in `threats` on the same file as `targetFile` is a
+ * veto signal (VETO_TYPES OR env_access on sensitive env var). If so, the
+ * bundle-downgrade gate should NOT downgrade — the bundle is suspected of
+ * malicious injection (event-stream / flatmap-stream style) or credential theft.
+ *
+ * @param {Array} threats - full threats array (all scanners combined)
+ * @param {string} targetFile - the file path being evaluated for downgrade
+ * @returns {boolean} - true if a veto signal is found
+ */
+function hasBundleVetoSignal(threats, targetFile) {
+  if (!Array.isArray(threats) || !targetFile) return false;
+  for (const t of threats) {
+    if (t.file !== targetFile) continue;
+    // v2.10.75 fix: a LOW severity threat should never block the bundle downgrade
+    // of unrelated co-occurring threats. Typical regression case: a locale file
+    // (locales/fa-IR/*.js) contains `unicode_invisible_injection` at LOW (already
+    // downgraded by `isLocaleFile` in obfuscation.js) but also contains bundler
+    // helpers. Before this fix, the LOW unicode signal vetoed the bundle downgrade
+    // of the other threats, so the package scored higher than pre-v2.10.74.
+    if (t.severity === 'LOW') continue;
+    if (VETO_TYPES.has(t.type)) return true;
+    if (t.type === 'env_access' && t.message && SENSITIVE_ENV_RE.test(t.message)) {
+      return true;
+    }
+  }
+  return false;
+}
+module.exports = {
+  BUNDLE_PATH_RE,
+  VETO_TYPES,
+  SENSITIVE_ENV_RE,
+  isBundlePath,
+  hasBundleVetoSignal
+};