muaddib-scanner 2.10.81 → 2.10.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.81",
3
+ "version": "2.10.84",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -17,7 +17,7 @@ const os = require('os');
17
17
 
18
18
  const MIN_CONCURRENCY = 4;
19
19
  const BASE_CONCURRENCY = Math.max(MIN_CONCURRENCY, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 8);
20
- const MAX_CONCURRENCY = Math.max(BASE_CONCURRENCY, parseInt(process.env.MUADDIB_MAX_CONCURRENCY, 10) || 32);
20
+ const MAX_CONCURRENCY = Math.max(BASE_CONCURRENCY, parseInt(process.env.MUADDIB_MAX_CONCURRENCY, 10) || 16);
21
21
  const ADJUST_INTERVAL_MS = 30_000;
22
22
 
23
23
  // Queue depth thresholds
@@ -37,6 +37,12 @@ const TIMEOUT_RATE_MIN_SAMPLES = 20;
37
37
  let _prevScanned = 0;
38
38
  let _prevTimeouts = 0;
39
39
 
40
+ // Throughput plateau detection: if we scaled up but throughput didn't increase,
41
+ // we've hit I/O saturation (npm registry rate limiting, disk contention).
42
+ // More workers would make it worse — scale back instead.
43
+ let _prevThroughput = 0;
44
+ let _lastScaleDirection = 0; // +1 = scaled up, -1 = scaled down, 0 = stable
45
+
40
46
  /**
41
47
  * Compute new target concurrency from system signals.
42
48
  * Uses stats deltas (not cumulative) for timeout rate — avoids stale data.
@@ -74,21 +80,39 @@ function computeTarget(current, queueDepth, stats) {
74
80
  // Priority 2: High timeout rate — system saturated, adding workers makes it worse
75
81
  if (timeoutRate > TIMEOUT_RATE_THRESHOLD) {
76
82
  const target = clamp(current - 2);
83
+ _prevThroughput = scannedDelta;
84
+ _lastScaleDirection = target < current ? -1 : 0;
77
85
  return { target, reason: `high_timeout_rate (${(timeoutRate * 100).toFixed(0)}%, ${timeoutDelta}/${scannedDelta})` };
78
86
  }
79
87
 
80
- // Priority 3: Queue depthscale up for backlog, down toward base when idle
88
+ // Priority 3: Throughput plateauscaled up last tick but throughput flat/down.
89
+ // This catches I/O saturation: more workers = more concurrent HTTP to npm registry
90
+ // = rate limiting + contention = scan times 10s→90s = throughput drops.
91
+ // Scale back instead of continuing to add workers.
92
+ if (_lastScaleDirection > 0 && _prevThroughput > 0 && scannedDelta > 0 && scannedDelta <= _prevThroughput) {
93
+ const prevTp = _prevThroughput;
94
+ _prevThroughput = scannedDelta;
95
+ _lastScaleDirection = -1;
96
+ return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s, more workers didn't help)` };
97
+ }
98
+
99
+ // Priority 4: Queue depth — scale up for backlog, down toward base when idle
81
100
  if (queueDepth > QUEUE_BACKLOG_THRESHOLD) {
82
101
  const target = clamp(current + 4);
102
+ // Record throughput at the point of scale-up — next tick compares against this
103
+ _prevThroughput = scannedDelta;
104
+ _lastScaleDirection = target > current ? 1 : 0;
83
105
  return { target, reason: `backlog (queue=${queueDepth})` };
84
106
  }
85
107
 
86
108
  if (queueDepth < QUEUE_IDLE_THRESHOLD) {
87
109
  // Converge toward BASE, not MIN — normal traffic needs BASE capacity
88
110
  const target = Math.max(BASE_CONCURRENCY, clamp(current - 2));
111
+ _lastScaleDirection = target < current ? -1 : 0;
89
112
  return { target, reason: `idle (queue=${queueDepth})` };
90
113
  }
91
114
 
115
+ _lastScaleDirection = 0;
92
116
  return { target: current, reason: 'stable' };
93
117
  }
94
118
 
@@ -102,6 +126,8 @@ function clamp(n) {
102
126
  function resetDeltas() {
103
127
  _prevScanned = 0;
104
128
  _prevTimeouts = 0;
129
+ _prevThroughput = 0;
130
+ _lastScaleDirection = 0;
105
131
  }
106
132
 
107
133
  module.exports = {
@@ -442,6 +442,10 @@ async function pollNpmChanges(state, scanQueue, stats) {
442
442
  // Layer 3: Evaluate if this package should be cached
443
443
  const cacheTrigger = evaluateCacheTrigger(name, docMeta, change.doc || null);
444
444
 
445
+ // Layer 2: Extract tarball URL from CouchDB doc (eliminates lazy resolution 404 race)
446
+ // NOTE: fastTrack flag is computed in resolveTarballAndScan() AFTER metadata
447
+ // resolution via getNpmLatestTarball(). It cannot be computed here because
448
+ // post-May 2025, include_docs is deprecated and change.doc is always null.
445
449
  scanQueue.push({
446
450
  name,
447
451
  version: docMeta ? docMeta.version : '',
@@ -643,13 +647,20 @@ async function pollPyPI(state, scanQueue) {
643
647
  * @param {Array} scanQueue - Mutable scan queue array
644
648
  * @param {Object} stats - Mutable stats object
645
649
  */
650
+ const SOFT_BACKPRESSURE_THRESHOLD = 30_000;
651
+
646
652
  async function poll(state, scanQueue, stats) {
647
- // Backpressure removed: polling ALWAYS runs regardless of queue depth.
648
- // The queue can grow unbounded in memory (entries are ~300 bytes, 100K = 30MB).
649
- // This prevents the data loss scenario where the CouchDB seq advances but
650
- // queued items are not persisted packages would be permanently invisible.
653
+ // Soft backpressure: skip poll when queue is very deep.
654
+ // Safe because: CouchDB seq is NOT advanced (stays in memory only, persisted
655
+ // by daemon.js AFTER poll returns) next poll resumes from the same point.
656
+ // Combined with adaptive concurrency: workers scale up queue drains → poll resumes.
657
+ // This prevents the queue from growing to 30-40K during catch-up (OOM risk).
658
+ if (scanQueue.length >= SOFT_BACKPRESSURE_THRESHOLD) {
659
+ console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${SOFT_BACKPRESSURE_THRESHOLD}) — seq not advanced, 0 packages lost`);
660
+ return;
661
+ }
651
662
  if (scanQueue.length > 5_000) {
652
- console.log(`[MONITOR] QUEUE_DEPTH: ${scanQueue.length} items — polling continues (no backpressure skip)`);
663
+ console.log(`[MONITOR] QUEUE_DEPTH: ${scanQueue.length} items — polling continues`);
653
664
  }
654
665
 
655
666
  const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
@@ -336,7 +336,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
336
336
  let alreadyExtracted = false;
337
337
  let extractedDir = null;
338
338
 
339
- if (unpackedSize > LARGE_PACKAGE_SIZE) {
339
+ if (unpackedSize > LARGE_PACKAGE_SIZE || meta.fastTrack) {
340
340
  // Exception 1: IOC match — always full scan
341
341
  let isKnownIOC = false;
342
342
  try {
@@ -678,7 +678,10 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
678
678
  stats.suspect++;
679
679
 
680
680
  // Fire-and-forget tarball archiving — never blocks the pipeline
681
- archiveSuspectTarball(name, version, tarballUrl, {
681
+ // Skip for fast-track packages (large boring enterprise packages — not worth archiving)
682
+ if (meta.fastTrack) {
683
+ console.log(`[MONITOR] FAST-TRACK SKIP: ${name}@${version} — skipping archive + LLM (static-only)`);
684
+ } else archiveSuspectTarball(name, version, tarballUrl, {
682
685
  score: riskScore,
683
686
  priority: tierLabel,
684
687
  rulesTriggered: (result.threats || []).map(t => t.ruleId || t.type).filter(Boolean),
@@ -687,13 +690,35 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
687
690
  console.warn(`[Archive] Failed for ${name}@${version}: ${err.message}`);
688
691
  });
689
692
 
690
- // Sandbox decision based on tier
693
+ // Sandbox decision based on tier + smart skip for large low-signal packages.
694
+ // Large packages (>15MB or >80 deps) with only MEDIUM/LOW findings timeout
695
+ // systematically (90s × 3 = INCONCLUSIVE = 0 detection). Skipping frees slots
696
+ // for real suspects. Guard-fous: any HIGH/CRITICAL, temporal anomaly, maintainer
697
+ // change, or dormant spike → sandbox runs regardless of size.
698
+ const SANDBOX_SIZE_SKIP_BYTES = 15 * 1024 * 1024; // 15MB
699
+ const SANDBOX_DEPS_SKIP = 80;
700
+ const isLargePackage = (meta.unpackedSize || 0) > SANDBOX_SIZE_SKIP_BYTES ||
701
+ (meta.dependencyCount || 0) > SANDBOX_DEPS_SKIP;
702
+ const hasHighOrCriticalFinding = (result.summary.critical || 0) > 0 || (result.summary.high || 0) > 0;
703
+ const hasTemporalSignal = (result.threats || []).some(t =>
704
+ t.type === 'postinstall_added' || t.type === 'preinstall_added' ||
705
+ t.type === 'install_added' || t.type === 'maintainer_change' ||
706
+ t.type === 'dormant_spike' || t.type === 'publish_anomaly'
707
+ );
708
+ const skipSandboxLargePackage = (isLargePackage || meta.fastTrack) && !hasHighOrCriticalFinding && !hasTemporalSignal;
709
+
710
+ if (skipSandboxLargePackage && meta.fastTrack) {
711
+ console.log(`[MONITOR] FAST-TRACK: ${name}@${version} — large package static-only (${((meta.unpackedSize || 0) / 1024 / 1024).toFixed(1)}MB, no lifecycle scripts)`);
712
+ } else if (skipSandboxLargePackage) {
713
+ console.log(`[MONITOR] SANDBOX SKIP (large low-signal): ${name}@${version} (${((meta.unpackedSize || 0) / 1024 / 1024).toFixed(1)}MB, deps=${meta.dependencyCount || '?'}, no HIGH/CRIT, no temporal)`);
714
+ }
715
+
691
716
  // T1a: mandatory sandbox (HC malice types, TIER1_TYPES non-LOW, lifecycle + intent compound)
692
717
  // T1b: conditional sandbox (HIGH/CRITICAL without HC type — bundler FP zone)
693
718
  // → sandbox only if score >= 25 (significant risk) or queue pressure is low
694
719
  // T2: sandbox if queue < 50 (as before)
695
720
  let sandboxResult = null;
696
- const shouldSandbox = isSandboxEnabled() && sandboxAvailable && (
721
+ const shouldSandbox = !skipSandboxLargePackage && isSandboxEnabled() && sandboxAvailable && (
697
722
  tier === '1a' ||
698
723
  (tier === '1b' && (riskScore >= 25 || scanQueue.length < 20)) ||
699
724
  (tier === 2 && scanQueue.length < 50)
@@ -845,8 +870,9 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
845
870
  // Record daily alert with post-reputation score for top suspects ranking
846
871
  dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
847
872
  // LLM Detective: AI-powered analysis for T1a/T1b suspects
873
+ // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
848
874
  let llmResult = null;
849
- if ((tier === '1a' || tier === '1b') && (adjustedResult.summary.riskScore || 0) >= 25) {
875
+ if (!meta.fastTrack && (tier === '1a' || tier === '1b') && (adjustedResult.summary.riskScore || 0) >= 25) {
850
876
  try {
851
877
  const { investigatePackage, isLlmEnabled, getLlmMode } = require('../ml/llm-detective.js');
852
878
  if (isLlmEnabled()) {
@@ -1057,6 +1083,19 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1057
1083
  if (npmInfo.version) item.version = npmInfo.version;
1058
1084
  if (npmInfo.unpackedSize) item.unpackedSize = npmInfo.unpackedSize;
1059
1085
  if (npmInfo.scripts) item.registryScripts = npmInfo.scripts;
1086
+
1087
+ // Fast-track decision: large packages (>15MB) with no lifecycle scripts and no IOC match.
1088
+ // Computed HERE (after metadata resolution), not at ingestion time — post-May 2025
1089
+ // CouchDB changes feed has no docs, so metadata is only available after lazy fetch.
1090
+ // Fast-track packages get: quick static scan (package.json + shell only), no AST,
1091
+ // no sandbox, no LLM, no archiving. Exits in ~2-3s instead of 30-300s.
1092
+ const FAST_TRACK_SIZE_BYTES = 15 * 1024 * 1024;
1093
+ if (!item.isIOCMatch && (item.unpackedSize || 0) > FAST_TRACK_SIZE_BYTES) {
1094
+ const scripts = item.registryScripts || {};
1095
+ if (!scripts.preinstall && !scripts.postinstall && !scripts.install) {
1096
+ item.fastTrack = true;
1097
+ }
1098
+ }
1060
1099
  } catch (err) {
1061
1100
  console.error(`[MONITOR] ERROR resolving npm tarball for ${item.name}: ${err.message}`);
1062
1101
  recordError(err, stats);
@@ -1114,9 +1153,11 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1114
1153
  let publishResult = null;
1115
1154
  let maintainerResult = null;
1116
1155
 
1117
- if (item.ecosystem === 'npm') {
1156
+ if (item.ecosystem === 'npm' && !item.fastTrack) {
1118
1157
  // Run all 4 temporal checks in parallel — each is independent.
1119
1158
  // With metadata cache (temporal-analysis.js), the 4 modules share 1 HTTP request.
1159
+ // Skipped for fast-track packages (large boring packages — temporal checks make
1160
+ // 4 HTTP requests to npm registry per package, pointless for 50MB enterprise packages).
1120
1161
  const [tempRes, astRes, pubRes, maintRes] = await Promise.allSettled([
1121
1162
  runTemporalCheck(item.name, dailyAlerts),
1122
1163
  runTemporalAstCheck(item.name, dailyAlerts),
@@ -1135,7 +1176,8 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1135
1176
  const scanResult = await scanPackage(item.name, item.version, item.ecosystem, item.tarballUrl, {
1136
1177
  unpackedSize: item.unpackedSize || 0,
1137
1178
  registryScripts: item.registryScripts || null,
1138
- _cacheTrigger: item._cacheTrigger || null
1179
+ _cacheTrigger: item._cacheTrigger || null,
1180
+ fastTrack: item.fastTrack || false
1139
1181
  }, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
1140
1182
  const sandboxResult = scanResult && scanResult.sandboxResult;
1141
1183
  const staticClean = scanResult && scanResult.staticClean;
@@ -224,7 +224,12 @@ async function runSingleSandbox(packageName, options = {}) {
224
224
  '--cap-drop=ALL'
225
225
  ];
226
226
 
227
- // gVisor runtime: use runsc instead of default runc
227
+ // gVisor runtime: use runsc instead of default runc.
228
+ // Performance: configure --directfs and --overlay2=all:memory in daemon.json:
229
+ // "runsc": { "path": "/usr/bin/runsc", "runtimeArgs": ["--directfs", "--overlay2=all:memory"] }
230
+ // --directfs: bypass gofer process for direct filesystem access (fewer RPCs, faster I/O)
231
+ // --overlay2=all:memory: sandbox writes go to tmpfs instead of host (faster, isolated)
232
+ // These flags require gVisor >= 2023-06-01.
228
233
  if (gvisorMode) {
229
234
  dockerArgs.push('--runtime=runsc');
230
235
  dockerArgs.push('-e', 'MUADDIB_GVISOR=1');
@@ -171,6 +171,12 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
171
171
  }
172
172
  return doRequest(absoluteLocation, redirectCount + 1);
173
173
  }
174
+ if (res.statusCode === 429) {
175
+ res.resume();
176
+ // Signal rate limiter to back off — drains tokens, forces ~1s pause
177
+ try { require('./http-limiter.js').signal429(); } catch {}
178
+ return reject(new Error(`HTTP 429 rate limited for ${requestUrl}`));
179
+ }
174
180
  if (res.statusCode < 200 || res.statusCode >= 300) {
175
181
  res.resume();
176
182
  return reject(new Error(`HTTP ${res.statusCode} for ${requestUrl}`));
@@ -1,29 +1,35 @@
1
1
  'use strict';
2
2
 
3
3
  /**
4
- * Centralized HTTP concurrency limiter for npm registry requests.
4
+ * Centralized HTTP concurrency + rate limiter for npm registry requests.
5
5
  *
6
- * With 16 monitor workers × 7+ HTTP requests/package, uncapped concurrency
7
- * reaches 112+ simultaneous requestswell above npm's implicit rate limit.
8
- * This module caps ALL registry.npmjs.org requests to a single semaphore
9
- * so that no more than REGISTRY_SEMAPHORE_MAX requests are in-flight at once.
6
+ * Two layers of protection:
7
+ * 1. Concurrency semaphore (REGISTRY_SEMAPHORE_MAX = 10) caps in-flight requests
8
+ * 2. Rate limiter (RATE_LIMIT_PER_SEC = 30) — caps requests/second via token bucket
10
9
  *
11
- * Consumers: temporal-analysis.js, temporal-ast-diff.js, monitor.js (getNpmLatestTarball),
12
- * npm-registry.js (fetchWithRetry to registry.npmjs.org).
10
+ * Without rate limiting, 10 concurrent slots × fast-completing requests = 100+ req/s
11
+ * bursts that trigger npm 429 responses → exponential backoff → scan times 10s→90s.
12
+ *
13
+ * Consumers: queue.js (downloadToFile), temporal-analysis.js, npm-registry.js.
13
14
  * NOT covered: api.npmjs.org (different server), replicate.npmjs.com (CouchDB changes stream).
14
15
  */
15
16
 
16
17
  const REGISTRY_SEMAPHORE_MAX = 10;
18
+ const RATE_LIMIT_PER_SEC = 30;
19
+
20
+ // --- Concurrency semaphore ---
17
21
 
18
22
  const _semaphore = { active: 0, queue: [] };
19
23
 
20
24
  function acquireRegistrySlot() {
21
25
  if (_semaphore.active < REGISTRY_SEMAPHORE_MAX) {
22
26
  _semaphore.active++;
23
- return Promise.resolve();
27
+ return _acquireRateToken();
24
28
  }
25
29
  return new Promise(resolve => {
26
- _semaphore.queue.push(resolve);
30
+ _semaphore.queue.push(() => {
31
+ _acquireRateToken().then(resolve);
32
+ });
27
33
  });
28
34
  }
29
35
 
@@ -36,9 +42,64 @@ function releaseRegistrySlot() {
36
42
  }
37
43
  }
38
44
 
45
+ // --- Token bucket rate limiter ---
46
+ // Refills RATE_LIMIT_PER_SEC tokens per second. Each request consumes 1 token.
47
+ // If no tokens available, waits until the next refill.
48
+
49
+ let _tokens = RATE_LIMIT_PER_SEC;
50
+ let _lastRefill = Date.now();
51
+
52
+ function _refillTokens() {
53
+ const now = Date.now();
54
+ const elapsed = now - _lastRefill;
55
+ if (elapsed >= 1000) {
56
+ _tokens = Math.min(RATE_LIMIT_PER_SEC, _tokens + Math.floor(elapsed / 1000) * RATE_LIMIT_PER_SEC);
57
+ _lastRefill = now;
58
+ }
59
+ }
60
+
61
+ function _acquireRateToken() {
62
+ _refillTokens();
63
+ if (_tokens > 0) {
64
+ _tokens--;
65
+ return Promise.resolve();
66
+ }
67
+ // Wait until next refill
68
+ const waitMs = 1000 - (Date.now() - _lastRefill);
69
+ return new Promise(resolve => {
70
+ setTimeout(() => {
71
+ _refillTokens();
72
+ _tokens = Math.max(0, _tokens - 1);
73
+ resolve();
74
+ }, Math.max(10, waitMs));
75
+ });
76
+ }
77
+
78
+ // --- 429 backoff helper ---
79
+ // Call this when a 429 response is received. Drains all tokens to force
80
+ // a ~1s pause on subsequent requests (token bucket naturally refills).
81
+
82
+ let _backoffCount = 0;
83
+
84
+ function signal429() {
85
+ _tokens = 0;
86
+ _lastRefill = Date.now() + 1000; // Force 1s pause
87
+ _backoffCount++;
88
+ if (_backoffCount % 10 === 1) {
89
+ console.warn(`[HTTP-LIMITER] 429 rate limited by npm registry (total: ${_backoffCount})`);
90
+ }
91
+ }
92
+
93
+ function getBackoffCount() {
94
+ return _backoffCount;
95
+ }
96
+
39
97
  function resetLimiter() {
40
98
  _semaphore.active = 0;
41
99
  _semaphore.queue.length = 0;
100
+ _tokens = RATE_LIMIT_PER_SEC;
101
+ _lastRefill = Date.now();
102
+ _backoffCount = 0;
42
103
  }
43
104
 
44
105
  function getActiveSemaphore() {
@@ -47,8 +108,11 @@ function getActiveSemaphore() {
47
108
 
48
109
  module.exports = {
49
110
  REGISTRY_SEMAPHORE_MAX,
111
+ RATE_LIMIT_PER_SEC,
50
112
  acquireRegistrySlot,
51
113
  releaseRegistrySlot,
114
+ signal429,
115
+ getBackoffCount,
52
116
  resetLimiter,
53
117
  getActiveSemaphore
54
118
  };