npm - muaddib-scanner - Versions diffs - 2.10.84 → 2.10.86 - Mend

muaddib-scanner 2.10.84 → 2.10.86

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/src/monitor/adaptive-concurrency.js +22 -10
package/src/monitor/queue.js +22 -5
package/src/sandbox/index.js +5 -2
package/src/shared/http-limiter.js +1 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.10.84",
+  "version": "2.10.86",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/monitor/adaptive-concurrency.js CHANGED Viewed

@@ -37,11 +37,14 @@ const TIMEOUT_RATE_MIN_SAMPLES = 20;
 let _prevScanned = 0;
 let _prevTimeouts = 0;
-// Throughput plateau detection: if we scaled up but throughput didn't increase,
-// we've hit I/O saturation (npm registry rate limiting, disk contention).
-// More workers would make it worse — scale back instead.
+// Throughput plateau detection: if we scaled up but throughput didn't increase
+// over MULTIPLE consecutive windows, we've hit I/O saturation.
+// Requires 2 consecutive flat windows to trigger — a single 30s window has too
+// much variance from sandbox timeouts (90-270s) to be reliable.
 let _prevThroughput = 0;
 let _lastScaleDirection = 0; // +1 = scaled up, -1 = scaled down, 0 = stable
+let _plateauStreak = 0;      // consecutive windows where throughput didn't improve after scale-up
+const PLATEAU_STREAK_REQUIRED = 2; // must see flat throughput N times before triggering
 /**
  * Compute new target concurrency from system signals.
@@ -85,16 +88,24 @@ function computeTarget(current, queueDepth, stats) {
     return { target, reason: `high_timeout_rate (${(timeoutRate * 100).toFixed(0)}%, ${timeoutDelta}/${scannedDelta})` };
   }
-  // Priority 3: Throughput plateau — scaled up last tick but throughput flat/down.
-  // This catches I/O saturation: more workers = more concurrent HTTP to npm registry
-  // = rate limiting + contention = scan times 10s→90s = throughput drops.
-  // Scale back instead of continuing to add workers.
+  // Priority 3: Throughput plateau — scaled up recently but throughput flat/down.
+  // Requires PLATEAU_STREAK_REQUIRED consecutive flat windows to trigger.
+  // A single bad window (sandbox timeout finishing in wrong 30s slot) is noise, not saturation.
   if (_lastScaleDirection > 0 && _prevThroughput > 0 && scannedDelta > 0 && scannedDelta <= _prevThroughput) {
-    const prevTp = _prevThroughput;
+    _plateauStreak++;
+    if (_plateauStreak >= PLATEAU_STREAK_REQUIRED) {
+      const prevTp = _prevThroughput;
+      _prevThroughput = scannedDelta;
+      _lastScaleDirection = -1;
+      _plateauStreak = 0;
+      return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s × ${PLATEAU_STREAK_REQUIRED} windows)` };
+    }
+    // Not enough consecutive flat windows yet — keep current level, don't scale up further
     _prevThroughput = scannedDelta;
-    _lastScaleDirection = -1;
-    return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s, more workers didn't help)` };
+    return { target: current, reason: `plateau_warning (${_plateauStreak}/${PLATEAU_STREAK_REQUIRED}, ${scannedDelta} scans/30s)` };
   }
+  // Throughput improved or no scale-up context — reset streak
+  _plateauStreak = 0;
   // Priority 4: Queue depth — scale up for backlog, down toward base when idle
   if (queueDepth > QUEUE_BACKLOG_THRESHOLD) {
@@ -128,6 +139,7 @@ function resetDeltas() {
   _prevTimeouts = 0;
   _prevThroughput = 0;
   _lastScaleDirection = 0;
+  _plateauStreak = 0;
 }
 module.exports = {

package/src/monitor/queue.js CHANGED Viewed

@@ -285,6 +285,16 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
   const cacheTrigger = meta._cacheTrigger || null;
   try {
+    // Pre-download size check: reject packages known to exceed MAX_TARBALL_SIZE
+    // from registry metadata, without wasting a download + 300s timeout.
+    // unpackedSize is available from getNpmLatestTarball() after lazy resolution.
+    const metaSize = meta.unpackedSize || 0;
+    if (metaSize > MAX_TARBALL_SIZE) {
+      console.log(`[MONITOR] SIZE_REJECT: ${name}@${version} — metadata size ${(metaSize / 1024 / 1024).toFixed(1)}MB exceeds ${(MAX_TARBALL_SIZE / 1024 / 1024).toFixed(0)}MB limit (skipped without download)`);
+      stats.scanned++;
+      return;
+    }
     const tgzPath = path.join(tmpDir, 'package.tar.gz');
     // Layer 3: Check tarball cache before downloading
@@ -729,7 +739,10 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
             const canary = isCanaryEnabled();
             const reason = tier === 2 ? ' (T2, queue low)' : tier === '1b' ? ' (T1b, conditional)' : '';
             console.log(`[MONITOR] SANDBOX${reason}: launching for ${name}@${version}${canary ? ' (canary: on)' : ''}...`);
-            sandboxResult = await runSandbox(name, { canary });
+            // T1a: 3 runs (time bomb detection via libfaketime — mandatory for high-confidence threats)
+            // T1b/T2: 1 run (270s→90s — time bombs are rare, throughput matters more under load)
+            const maxRuns = tier === '1a' ? undefined : 1;
+            sandboxResult = await runSandbox(name, { canary, maxRuns });
             console.log(`[MONITOR] SANDBOX: ${name}@${version} → score: ${sandboxResult.score}, severity: ${sandboxResult.severity}`);
             // Check for canary exfiltration findings and send dedicated alert
@@ -1153,11 +1166,13 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
   let publishResult = null;
   let maintainerResult = null;
-  if (item.ecosystem === 'npm' && !item.fastTrack) {
+  const TEMPORAL_LOAD_SHED_THRESHOLD = 2000;
+  const skipTemporal = item.fastTrack || scanQueue.length > TEMPORAL_LOAD_SHED_THRESHOLD;
+  if (item.ecosystem === 'npm' && !skipTemporal) {
     // Run all 4 temporal checks in parallel — each is independent.
-    // With metadata cache (temporal-analysis.js), the 4 modules share 1 HTTP request.
-    // Skipped for fast-track packages (large boring packages — temporal checks make
-    // 4 HTTP requests to npm registry per package, pointless for 50MB enterprise packages).
+    // AST diff alone consumes 5 HTTP semaphore slots per package (2 tarball downloads + 3 metadata).
+    // With 16 workers that's 80 slot requests for 10 slots → workers blocked 80% of the time.
+    // Load-shed when queue > 2000: temporal analysis is a luxury during catch-up.
     const [tempRes, astRes, pubRes, maintRes] = await Promise.allSettled([
       runTemporalCheck(item.name, dailyAlerts),
       runTemporalAstCheck(item.name, dailyAlerts),
@@ -1168,6 +1183,8 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
     astResult = astRes.status === 'fulfilled' ? astRes.value : null;
     publishResult = pubRes.status === 'fulfilled' ? pubRes.value : null;
     maintainerResult = maintRes.status === 'fulfilled' ? maintRes.value : null;
+  } else if (skipTemporal && item.ecosystem === 'npm' && !item.fastTrack) {
+    console.log(`[MONITOR] TEMPORAL LOAD-SHED: ${item.name}@${item.version} (queue=${scanQueue.length} > ${TEMPORAL_LOAD_SHED_THRESHOLD})`);
   }
   // Abort check: if timeout fired during temporal checks, skip the expensive scan

package/src/sandbox/index.js CHANGED Viewed

@@ -641,12 +641,15 @@ async function runSandbox(packageName, options = {}) {
   try {
     const runtimeLabel = useGvisor ? 'gvisor' : 'docker';
     const slotInfo = skipSem ? 'deferred-slot' : `${_sandboxSemaphore.active}/${SANDBOX_CONCURRENCY_MAX}`;
-    console.log(`[SANDBOX] Analyzing "${displayName}" in isolated container (mode: ${mode}, runtime: ${runtimeLabel}${canaryEnabled ? ', canary: on' : ''}${local ? ', local' : ''}, runs: ${TIME_OFFSETS.length}, slots: ${slotInfo})...`);
+    // maxRuns: cap number of time-offset runs. Default: all TIME_OFFSETS (3 runs).
+    // T1b/T2 use maxRuns=1 to reduce 270s→90s — time bomb detection is a luxury under load.
+    const effectiveRuns = Math.min(options.maxRuns || TIME_OFFSETS.length, TIME_OFFSETS.length);
+    console.log(`[SANDBOX] Analyzing "${displayName}" in isolated container (mode: ${mode}, runtime: ${runtimeLabel}${canaryEnabled ? ', canary: on' : ''}${local ? ', local' : ''}, runs: ${effectiveRuns}, slots: ${slotInfo})...`);
     const allRuns = [];
     let bestResult = cleanResult;
-    for (let i = 0; i < TIME_OFFSETS.length; i++) {
+    for (let i = 0; i < effectiveRuns; i++) {
       const { offset, label } = TIME_OFFSETS[i];
       console.log(`[SANDBOX] Run ${i + 1}/${TIME_OFFSETS.length} (${label})...`);

package/src/shared/http-limiter.js CHANGED Viewed

@@ -14,7 +14,7 @@
  * NOT covered: api.npmjs.org (different server), replicate.npmjs.com (CouchDB changes stream).
  */
-const REGISTRY_SEMAPHORE_MAX = 10;
+const REGISTRY_SEMAPHORE_MAX = 20;
 const RATE_LIMIT_PER_SEC = 30;
 // --- Concurrency semaphore ---