muaddib-scanner 2.10.80 → 2.10.82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.80",
3
+ "version": "2.10.82",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -17,7 +17,7 @@ const os = require('os');
17
17
 
18
18
  const MIN_CONCURRENCY = 4;
19
19
  const BASE_CONCURRENCY = Math.max(MIN_CONCURRENCY, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 8);
20
- const MAX_CONCURRENCY = Math.max(BASE_CONCURRENCY, parseInt(process.env.MUADDIB_MAX_CONCURRENCY, 10) || 32);
20
+ const MAX_CONCURRENCY = Math.max(BASE_CONCURRENCY, parseInt(process.env.MUADDIB_MAX_CONCURRENCY, 10) || 16);
21
21
  const ADJUST_INTERVAL_MS = 30_000;
22
22
 
23
23
  // Queue depth thresholds
@@ -37,6 +37,12 @@ const TIMEOUT_RATE_MIN_SAMPLES = 20;
37
37
  let _prevScanned = 0;
38
38
  let _prevTimeouts = 0;
39
39
 
40
+ // Throughput plateau detection: if we scaled up but throughput didn't increase,
41
+ // we've hit I/O saturation (npm registry rate limiting, disk contention).
42
+ // More workers would make it worse — scale back instead.
43
+ let _prevThroughput = 0;
44
+ let _lastScaleDirection = 0; // +1 = scaled up, -1 = scaled down, 0 = stable
45
+
40
46
  /**
41
47
  * Compute new target concurrency from system signals.
42
48
  * Uses stats deltas (not cumulative) for timeout rate — avoids stale data.
@@ -74,21 +80,39 @@ function computeTarget(current, queueDepth, stats) {
74
80
  // Priority 2: High timeout rate — system saturated, adding workers makes it worse
75
81
  if (timeoutRate > TIMEOUT_RATE_THRESHOLD) {
76
82
  const target = clamp(current - 2);
83
+ _prevThroughput = scannedDelta;
84
+ _lastScaleDirection = target < current ? -1 : 0;
77
85
  return { target, reason: `high_timeout_rate (${(timeoutRate * 100).toFixed(0)}%, ${timeoutDelta}/${scannedDelta})` };
78
86
  }
79
87
 
80
- // Priority 3: Queue depthscale up for backlog, down toward base when idle
88
+ // Priority 3: Throughput plateauscaled up last tick but throughput flat/down.
89
+ // This catches I/O saturation: more workers = more concurrent HTTP to npm registry
90
+ // = rate limiting + contention = scan times 10s→90s = throughput drops.
91
+ // Scale back instead of continuing to add workers.
92
+ if (_lastScaleDirection > 0 && _prevThroughput > 0 && scannedDelta > 0 && scannedDelta <= _prevThroughput) {
93
+ const prevTp = _prevThroughput;
94
+ _prevThroughput = scannedDelta;
95
+ _lastScaleDirection = -1;
96
+ return { target: clamp(current - 2), reason: `throughput_plateau (${prevTp}→${scannedDelta} scans/30s, more workers didn't help)` };
97
+ }
98
+
99
+ // Priority 4: Queue depth — scale up for backlog, down toward base when idle
81
100
  if (queueDepth > QUEUE_BACKLOG_THRESHOLD) {
82
101
  const target = clamp(current + 4);
102
+ // Record throughput at the point of scale-up — next tick compares against this
103
+ _prevThroughput = scannedDelta;
104
+ _lastScaleDirection = target > current ? 1 : 0;
83
105
  return { target, reason: `backlog (queue=${queueDepth})` };
84
106
  }
85
107
 
86
108
  if (queueDepth < QUEUE_IDLE_THRESHOLD) {
87
109
  // Converge toward BASE, not MIN — normal traffic needs BASE capacity
88
110
  const target = Math.max(BASE_CONCURRENCY, clamp(current - 2));
111
+ _lastScaleDirection = target < current ? -1 : 0;
89
112
  return { target, reason: `idle (queue=${queueDepth})` };
90
113
  }
91
114
 
115
+ _lastScaleDirection = 0;
92
116
  return { target: current, reason: 'stable' };
93
117
  }
94
118
 
@@ -102,6 +126,8 @@ function clamp(n) {
102
126
  function resetDeltas() {
103
127
  _prevScanned = 0;
104
128
  _prevTimeouts = 0;
129
+ _prevThroughput = 0;
130
+ _lastScaleDirection = 0;
105
131
  }
106
132
 
107
133
  module.exports = {
@@ -480,15 +480,37 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
480
480
  console.log('[MONITOR] Deferred sandbox worker started (30s interval, dedicated slot)');
481
481
  }
482
482
 
483
- // Initial poll + scan (sequential for first run)
483
+ // ─── Initial poll ───
484
+ // Fills the queue with pending packages. Processing starts in the main loop
485
+ // via ensureWorkers (non-blocking) — NOT await processQueue (blocking).
486
+ // A blocking processQueue here would prevent adaptive concurrency from
487
+ // firing until the entire initial batch is drained at BASE_CONCURRENCY.
484
488
  await poll(state, scanQueue, stats);
485
489
  // Atomicity fix: persist queue AND seq together after each poll.
486
490
  // Previously, seq was saved inside pollNpmChanges() but queue persisted
487
- // every 60s ��� crash between the two lost queued items permanently.
491
+ // every 60s crash between the two lost queued items permanently.
488
492
  persistQueue(scanQueue, state);
489
493
  saveNpmSeq(state.npmLastSeq);
490
494
  saveState(state, stats);
491
- await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
495
+ console.log(`[MONITOR] Initial poll complete ${scanQueue.length} packages queued for processing`);
496
+
497
+ // ─── Adaptive concurrency ───
498
+ // Set up BEFORE the main loop so it fires during the initial batch.
499
+ // Adjusts scan worker count every 30s based on queue depth, memory, timeout rate.
500
+ // Scale-up is aggressive (+4) during backlog, scale-down is gradual (-2) when idle.
501
+ concurrencyAdjustHandle = setInterval(() => {
502
+ if (!running) return;
503
+ const current = getTargetConcurrency();
504
+ const { target, reason } = computeTarget(current, scanQueue.length, stats);
505
+ if (target !== current) {
506
+ console.log(`[MONITOR] ADAPTIVE: concurrency ${current} → ${target} (${reason}, active=${getActiveWorkers()})`);
507
+ setTargetConcurrency(target);
508
+ // Immediately spawn new workers if scaling up (don't wait for next loop tick)
509
+ if (target > current) {
510
+ ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
511
+ }
512
+ }
513
+ }, ADJUST_INTERVAL_MS);
492
514
 
493
515
  // ─── Decoupled polling ───
494
516
  // Poll runs on its own interval, independent of processing.
@@ -523,23 +545,6 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
523
545
  persistDeferredQueue(); // Piggyback: persist deferred sandbox queue on same interval
524
546
  }, QUEUE_PERSIST_INTERVAL);
525
547
 
526
- // ─── Adaptive concurrency ───
527
- // Adjusts scan worker count every 30s based on queue depth, memory, timeout rate.
528
- // Scale-up is aggressive (+4) during backlog, scale-down is gradual (-2) when idle.
529
- concurrencyAdjustHandle = setInterval(() => {
530
- if (!running) return;
531
- const current = getTargetConcurrency();
532
- const { target, reason } = computeTarget(current, scanQueue.length, stats);
533
- if (target !== current) {
534
- console.log(`[MONITOR] ADAPTIVE: concurrency ${current} → ${target} (${reason}, active=${getActiveWorkers()})`);
535
- setTargetConcurrency(target);
536
- // Immediately spawn new workers if scaling up (don't wait for next loop tick)
537
- if (target > current) {
538
- ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
539
- }
540
- }
541
- }, ADJUST_INTERVAL_MS);
542
-
543
548
  // ─── Continuous processing loop ───
544
549
  // Non-blocking: ensureWorkers spawns fire-and-forget background workers.
545
550
  // This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
@@ -643,13 +643,20 @@ async function pollPyPI(state, scanQueue) {
643
643
  * @param {Array} scanQueue - Mutable scan queue array
644
644
  * @param {Object} stats - Mutable stats object
645
645
  */
646
+ const SOFT_BACKPRESSURE_THRESHOLD = 10_000;
647
+
646
648
  async function poll(state, scanQueue, stats) {
647
- // Backpressure removed: polling ALWAYS runs regardless of queue depth.
648
- // The queue can grow unbounded in memory (entries are ~300 bytes, 100K = 30MB).
649
- // This prevents the data loss scenario where the CouchDB seq advances but
650
- // queued items are not persisted packages would be permanently invisible.
649
+ // Soft backpressure: skip poll when queue is very deep.
650
+ // Safe because: CouchDB seq is NOT advanced (stays in memory only, persisted
651
+ // by daemon.js AFTER poll returns) next poll resumes from the same point.
652
+ // Combined with adaptive concurrency: workers scale up queue drains → poll resumes.
653
+ // This prevents the queue from growing to 30-40K during catch-up (OOM risk).
654
+ if (scanQueue.length >= SOFT_BACKPRESSURE_THRESHOLD) {
655
+ console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${SOFT_BACKPRESSURE_THRESHOLD}) — seq not advanced, 0 packages lost`);
656
+ return;
657
+ }
651
658
  if (scanQueue.length > 5_000) {
652
- console.log(`[MONITOR] QUEUE_DEPTH: ${scanQueue.length} items — polling continues (no backpressure skip)`);
659
+ console.log(`[MONITOR] QUEUE_DEPTH: ${scanQueue.length} items — polling continues`);
653
660
  }
654
661
 
655
662
  const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
@@ -171,6 +171,12 @@ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
171
171
  }
172
172
  return doRequest(absoluteLocation, redirectCount + 1);
173
173
  }
174
+ if (res.statusCode === 429) {
175
+ res.resume();
176
+ // Signal rate limiter to back off — drains tokens, forces ~1s pause
177
+ try { require('./http-limiter.js').signal429(); } catch {}
178
+ return reject(new Error(`HTTP 429 rate limited for ${requestUrl}`));
179
+ }
174
180
  if (res.statusCode < 200 || res.statusCode >= 300) {
175
181
  res.resume();
176
182
  return reject(new Error(`HTTP ${res.statusCode} for ${requestUrl}`));
@@ -1,29 +1,35 @@
1
1
  'use strict';
2
2
 
3
3
  /**
4
- * Centralized HTTP concurrency limiter for npm registry requests.
4
+ * Centralized HTTP concurrency + rate limiter for npm registry requests.
5
5
  *
6
- * With 16 monitor workers × 7+ HTTP requests/package, uncapped concurrency
7
- * reaches 112+ simultaneous requestswell above npm's implicit rate limit.
8
- * This module caps ALL registry.npmjs.org requests to a single semaphore
9
- * so that no more than REGISTRY_SEMAPHORE_MAX requests are in-flight at once.
6
+ * Two layers of protection:
7
+ * 1. Concurrency semaphore (REGISTRY_SEMAPHORE_MAX = 10) caps in-flight requests
8
+ * 2. Rate limiter (RATE_LIMIT_PER_SEC = 30) — caps requests/second via token bucket
10
9
  *
11
- * Consumers: temporal-analysis.js, temporal-ast-diff.js, monitor.js (getNpmLatestTarball),
12
- * npm-registry.js (fetchWithRetry to registry.npmjs.org).
10
+ * Without rate limiting, 10 concurrent slots × fast-completing requests = 100+ req/s
11
+ * bursts that trigger npm 429 responses → exponential backoff → scan times 10s→90s.
12
+ *
13
+ * Consumers: queue.js (downloadToFile), temporal-analysis.js, npm-registry.js.
13
14
  * NOT covered: api.npmjs.org (different server), replicate.npmjs.com (CouchDB changes stream).
14
15
  */
15
16
 
16
17
  const REGISTRY_SEMAPHORE_MAX = 10;
18
+ const RATE_LIMIT_PER_SEC = 30;
19
+
20
+ // --- Concurrency semaphore ---
17
21
 
18
22
  const _semaphore = { active: 0, queue: [] };
19
23
 
20
24
  function acquireRegistrySlot() {
21
25
  if (_semaphore.active < REGISTRY_SEMAPHORE_MAX) {
22
26
  _semaphore.active++;
23
- return Promise.resolve();
27
+ return _acquireRateToken();
24
28
  }
25
29
  return new Promise(resolve => {
26
- _semaphore.queue.push(resolve);
30
+ _semaphore.queue.push(() => {
31
+ _acquireRateToken().then(resolve);
32
+ });
27
33
  });
28
34
  }
29
35
 
@@ -36,9 +42,64 @@ function releaseRegistrySlot() {
36
42
  }
37
43
  }
38
44
 
45
+ // --- Token bucket rate limiter ---
46
+ // Refills RATE_LIMIT_PER_SEC tokens per second. Each request consumes 1 token.
47
+ // If no tokens available, waits until the next refill.
48
+
49
+ let _tokens = RATE_LIMIT_PER_SEC;
50
+ let _lastRefill = Date.now();
51
+
52
+ function _refillTokens() {
53
+ const now = Date.now();
54
+ const elapsed = now - _lastRefill;
55
+ if (elapsed >= 1000) {
56
+ _tokens = Math.min(RATE_LIMIT_PER_SEC, _tokens + Math.floor(elapsed / 1000) * RATE_LIMIT_PER_SEC);
57
+ _lastRefill = now;
58
+ }
59
+ }
60
+
61
+ function _acquireRateToken() {
62
+ _refillTokens();
63
+ if (_tokens > 0) {
64
+ _tokens--;
65
+ return Promise.resolve();
66
+ }
67
+ // Wait until next refill
68
+ const waitMs = 1000 - (Date.now() - _lastRefill);
69
+ return new Promise(resolve => {
70
+ setTimeout(() => {
71
+ _refillTokens();
72
+ _tokens = Math.max(0, _tokens - 1);
73
+ resolve();
74
+ }, Math.max(10, waitMs));
75
+ });
76
+ }
77
+
78
+ // --- 429 backoff helper ---
79
+ // Call this when a 429 response is received. Drains all tokens to force
80
+ // a ~1s pause on subsequent requests (token bucket naturally refills).
81
+
82
+ let _backoffCount = 0;
83
+
84
+ function signal429() {
85
+ _tokens = 0;
86
+ _lastRefill = Date.now() + 1000; // Force 1s pause
87
+ _backoffCount++;
88
+ if (_backoffCount % 10 === 1) {
89
+ console.warn(`[HTTP-LIMITER] 429 rate limited by npm registry (total: ${_backoffCount})`);
90
+ }
91
+ }
92
+
93
+ function getBackoffCount() {
94
+ return _backoffCount;
95
+ }
96
+
39
97
  function resetLimiter() {
40
98
  _semaphore.active = 0;
41
99
  _semaphore.queue.length = 0;
100
+ _tokens = RATE_LIMIT_PER_SEC;
101
+ _lastRefill = Date.now();
102
+ _backoffCount = 0;
42
103
  }
43
104
 
44
105
  function getActiveSemaphore() {
@@ -47,8 +108,11 @@ function getActiveSemaphore() {
47
108
 
48
109
  module.exports = {
49
110
  REGISTRY_SEMAPHORE_MAX,
111
+ RATE_LIMIT_PER_SEC,
50
112
  acquireRegistrySlot,
51
113
  releaseRegistrySlot,
114
+ signal429,
115
+ getBackoffCount,
52
116
  resetLimiter,
53
117
  getActiveSemaphore
54
118
  };