npm - muaddib-scanner - Versions diffs - 2.10.82 → 2.10.85 - Mend

muaddib-scanner 2.10.82 → 2.10.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/monitor/adaptive-concurrency.js +22 -10
package/src/monitor/ingestion.js +5 -1
package/src/monitor/queue.js +49 -7
package/src/sandbox/index.js +6 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.82",
+  "version": "2.10.85",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/monitor/adaptive-concurrency.js CHANGED Viewed

@@ -37,11 +37,14 @@ const TIMEOUT_RATE_MIN_SAMPLES = 20;
 let _prevScanned = 0;
 let _prevTimeouts = 0;
-// Throughput plateau detection: if we scaled up but throughput didn't increase,
-// we've hit I/O saturation (npm registry rate limiting, disk contention).
-// More workers would make it worse — scale back instead.
+// Throughput plateau detection: if we scaled up but throughput didn't increase
+// over MULTIPLE consecutive windows, we've hit I/O saturation.
+// Requires 2 consecutive flat windows to trigger — a single 30s window has too
+// much variance from sandbox timeouts (90-270s) to be reliable.
 let _prevThroughput = 0;
 let _lastScaleDirection = 0; // +1 = scaled up, -1 = scaled down, 0 = stable
+let _plateauStreak = 0;      // consecutive windows where throughput didn't improve after scale-up
+const PLATEAU_STREAK_REQUIRED = 2; // must see flat throughput N times before triggering
 /**
  * Compute new target concurrency from system signals.
@@ -85,16 +88,24 @@ function computeTarget(current, queueDepth, stats) {
     return { target, reason: `high_timeout_rate (${(timeoutRate * 100).toFixed(0)}%, ${timeoutDelta}/${scannedDelta})` };
   }
-  // Priority 3: Throughput plateau — scaled up last tick but throughput flat/down.
-  // This catches I/O saturation: more workers = more concurrent HTTP to npm registry
-  // = rate limiting + contention = scan times 10s→90s = throughput drops.
-  // Scale back instead of continuing to add workers.
+  // Priority 3: Throughput plateau — scaled up recently but throughput flat/down.
+  // Requires PLATEAU_STREAK_REQUIRED consecutive flat windows to trigger.
+  // A single bad window (sandbox timeout finishing in wrong 30s slot) is noise, not saturation.
   if (_lastScaleDirection > 0 && _prevThroughput > 0 && scannedDelta > 0 && scannedDelta <= _prevThroughput) {
-    const prevTp = _prevThroughput;
+    _plateauStreak++;
+    if (_plateauStreak >= PLATEAU_STREAK_REQUIRED) {
+      const prevTp = _prevThroughput;
+      _prevThroughput = scannedDelta;
+      _lastScaleDirection = -1;
+      _plateauStreak = 0;
+      return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s × ${PLATEAU_STREAK_REQUIRED} windows)` };
+    }
+    // Not enough consecutive flat windows yet — keep current level, don't scale up further
     _prevThroughput = scannedDelta;
-    _lastScaleDirection = -1;
-    return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s, more workers didn't help)` };
+    return { target: current, reason: `plateau_warning (${_plateauStreak}/${PLATEAU_STREAK_REQUIRED}, ${scannedDelta} scans/30s)` };
   }
+  // Throughput improved or no scale-up context — reset streak
+  _plateauStreak = 0;
   // Priority 4: Queue depth — scale up for backlog, down toward base when idle
   if (queueDepth > QUEUE_BACKLOG_THRESHOLD) {
@@ -128,6 +139,7 @@ function resetDeltas() {
   _prevTimeouts = 0;
   _prevThroughput = 0;
   _lastScaleDirection = 0;
+  _plateauStreak = 0;
 }
 module.exports = {

package/src/monitor/ingestion.js CHANGED Viewed

@@ -442,6 +442,10 @@ async function pollNpmChanges(state, scanQueue, stats) {
       // Layer 3: Evaluate if this package should be cached
       const cacheTrigger = evaluateCacheTrigger(name, docMeta, change.doc || null);
+      // Layer 2: Extract tarball URL from CouchDB doc (eliminates lazy resolution 404 race)
+      // NOTE: fastTrack flag is computed in resolveTarballAndScan() AFTER metadata
+      // resolution via getNpmLatestTarball(). It cannot be computed here because
+      // post-May 2025, include_docs is deprecated and change.doc is always null.
       scanQueue.push({
         name,
         version: docMeta ? docMeta.version : '',
@@ -643,7 +647,7 @@ async function pollPyPI(state, scanQueue) {
  * @param {Array} scanQueue - Mutable scan queue array
  * @param {Object} stats - Mutable stats object
  */
-const SOFT_BACKPRESSURE_THRESHOLD = 10_000;
+const SOFT_BACKPRESSURE_THRESHOLD = 30_000;
 async function poll(state, scanQueue, stats) {
   // Soft backpressure: skip poll when queue is very deep.

package/src/monitor/queue.js CHANGED Viewed

@@ -336,7 +336,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     let alreadyExtracted = false;
     let extractedDir = null;
-    if (unpackedSize > LARGE_PACKAGE_SIZE) {
+    if (unpackedSize > LARGE_PACKAGE_SIZE || meta.fastTrack) {
       // Exception 1: IOC match — always full scan
       let isKnownIOC = false;
       try {
@@ -678,7 +678,10 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
         stats.suspect++;
         // Fire-and-forget tarball archiving — never blocks the pipeline
-        archiveSuspectTarball(name, version, tarballUrl, {
+        // Skip for fast-track packages (large boring enterprise packages — not worth archiving)
+        if (meta.fastTrack) {
+          console.log(`[MONITOR] FAST-TRACK SKIP: ${name}@${version} — skipping archive + LLM (static-only)`);
+        } else archiveSuspectTarball(name, version, tarballUrl, {
           score: riskScore,
           priority: tierLabel,
           rulesTriggered: (result.threats || []).map(t => t.ruleId || t.type).filter(Boolean),
@@ -687,13 +690,35 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
           console.warn(`[Archive] Failed for ${name}@${version}: ${err.message}`);
         });
-        // Sandbox decision based on tier
+        // Sandbox decision based on tier + smart skip for large low-signal packages.
+        // Large packages (>15MB or >80 deps) with only MEDIUM/LOW findings timeout
+        // systematically (90s × 3 = INCONCLUSIVE = 0 detection). Skipping frees slots
+        // for real suspects. Guard-fous: any HIGH/CRITICAL, temporal anomaly, maintainer
+        // change, or dormant spike → sandbox runs regardless of size.
+        const SANDBOX_SIZE_SKIP_BYTES = 15 * 1024 * 1024; // 15MB
+        const SANDBOX_DEPS_SKIP = 80;
+        const isLargePackage = (meta.unpackedSize || 0) > SANDBOX_SIZE_SKIP_BYTES ||
+          (meta.dependencyCount || 0) > SANDBOX_DEPS_SKIP;
+        const hasHighOrCriticalFinding = (result.summary.critical || 0) > 0 || (result.summary.high || 0) > 0;
+        const hasTemporalSignal = (result.threats || []).some(t =>
+          t.type === 'postinstall_added' || t.type === 'preinstall_added' ||
+          t.type === 'install_added' || t.type === 'maintainer_change' ||
+          t.type === 'dormant_spike' || t.type === 'publish_anomaly'
+        );
+        const skipSandboxLargePackage = (isLargePackage || meta.fastTrack) && !hasHighOrCriticalFinding && !hasTemporalSignal;
+        if (skipSandboxLargePackage && meta.fastTrack) {
+          console.log(`[MONITOR] FAST-TRACK: ${name}@${version} — large package static-only (${((meta.unpackedSize || 0) / 1024 / 1024).toFixed(1)}MB, no lifecycle scripts)`);
+        } else if (skipSandboxLargePackage) {
+          console.log(`[MONITOR] SANDBOX SKIP (large low-signal): ${name}@${version} (${((meta.unpackedSize || 0) / 1024 / 1024).toFixed(1)}MB, deps=${meta.dependencyCount || '?'}, no HIGH/CRIT, no temporal)`);
+        }
         // T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
         // T1b: conditional sandbox (HIGH/CRITICAL without HC type — bundler FP zone)
         //       → sandbox only if score >= 25 (significant risk) or queue pressure is low
         // T2: sandbox if queue < 50 (as before)
         let sandboxResult = null;
-        const shouldSandbox = isSandboxEnabled() && sandboxAvailable && (
+        const shouldSandbox = !skipSandboxLargePackage && isSandboxEnabled() && sandboxAvailable && (
           tier === '1a' ||
           (tier === '1b' && (riskScore >= 25 || scanQueue.length < 20)) ||
           (tier === 2 && scanQueue.length < 50)
@@ -845,8 +870,9 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
         // Record daily alert with post-reputation score for top suspects ranking
         dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
         // LLM Detective: AI-powered analysis for T1a/T1b suspects
+        // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
         let llmResult = null;
-        if ((tier === '1a' || tier === '1b') && (adjustedResult.summary.riskScore || 0) >= 25) {
+        if (!meta.fastTrack && (tier === '1a' || tier === '1b') && (adjustedResult.summary.riskScore || 0) >= 25) {
           try {
             const { investigatePackage, isLlmEnabled, getLlmMode } = require('../ml/llm-detective.js');
             if (isLlmEnabled()) {
@@ -1057,6 +1083,19 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
       if (npmInfo.version) item.version = npmInfo.version;
       if (npmInfo.unpackedSize) item.unpackedSize = npmInfo.unpackedSize;
       if (npmInfo.scripts) item.registryScripts = npmInfo.scripts;
+      // Fast-track decision: large packages (>15MB) with no lifecycle scripts and no IOC match.
+      // Computed HERE (after metadata resolution), not at ingestion time — post-May 2025
+      // CouchDB changes feed has no docs, so metadata is only available after lazy fetch.
+      // Fast-track packages get: quick static scan (package.json + shell only), no AST,
+      // no sandbox, no LLM, no archiving. Exits in ~2-3s instead of 30-300s.
+      const FAST_TRACK_SIZE_BYTES = 15 * 1024 * 1024;
+      if (!item.isIOCMatch && (item.unpackedSize || 0) > FAST_TRACK_SIZE_BYTES) {
+        const scripts = item.registryScripts || {};
+        if (!scripts.preinstall && !scripts.postinstall && !scripts.install) {
+          item.fastTrack = true;
+        }
+      }
     } catch (err) {
       console.error(`[MONITOR] ERROR resolving npm tarball for ${item.name}: ${err.message}`);
       recordError(err, stats);
@@ -1114,9 +1153,11 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
   let publishResult = null;
   let maintainerResult = null;
-  if (item.ecosystem === 'npm') {
+  if (item.ecosystem === 'npm' && !item.fastTrack) {
     // Run all 4 temporal checks in parallel — each is independent.
     // With metadata cache (temporal-analysis.js), the 4 modules share 1 HTTP request.
+    // Skipped for fast-track packages (large boring packages — temporal checks make
+    // 4 HTTP requests to npm registry per package, pointless for 50MB enterprise packages).
     const [tempRes, astRes, pubRes, maintRes] = await Promise.allSettled([
       runTemporalCheck(item.name, dailyAlerts),
       runTemporalAstCheck(item.name, dailyAlerts),
@@ -1135,7 +1176,8 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
   const scanResult = await scanPackage(item.name, item.version, item.ecosystem, item.tarballUrl, {
     unpackedSize: item.unpackedSize || 0,
     registryScripts: item.registryScripts || null,
-    _cacheTrigger: item._cacheTrigger || null
+    _cacheTrigger: item._cacheTrigger || null,
+    fastTrack: item.fastTrack || false
   }, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
   const sandboxResult = scanResult && scanResult.sandboxResult;
   const staticClean = scanResult && scanResult.staticClean;

package/src/sandbox/index.js CHANGED Viewed

@@ -224,7 +224,12 @@ async function runSingleSandbox(packageName, options = {}) {
       '--cap-drop=ALL'
     ];
-    // gVisor runtime: use runsc instead of default runc
+    // gVisor runtime: use runsc instead of default runc.
+    // Performance: configure --directfs and --overlay2=all:memory in daemon.json:
+    //   "runsc": { "path": "/usr/bin/runsc", "runtimeArgs": ["--directfs", "--overlay2=all:memory"] }
+    // --directfs: bypass gofer process for direct filesystem access (fewer RPCs, faster I/O)
+    // --overlay2=all:memory: sandbox writes go to tmpfs instead of host (faster, isolated)
+    // These flags require gVisor >= 2023-06-01.
     if (gvisorMode) {
       dockerArgs.push('--runtime=runsc');
       dockerArgs.push('-e', 'MUADDIB_GVISOR=1');