muaddib-scanner 2.10.87 → 2.10.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.87",
3
+ "version": "2.10.88",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -11,7 +11,8 @@ const { poll } = require('./ingestion.js');
11
11
  const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency, getActiveWorkers, SCAN_CONCURRENCY } = require('./queue.js');
12
12
  const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
13
13
  const { startHealthcheck } = require('./healthcheck.js');
14
- const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue } = require('./deferred-sandbox.js');
14
+ const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
15
+ const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
15
16
 
16
17
  const POLL_INTERVAL = 60_000;
17
18
  const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
@@ -21,9 +22,44 @@ const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.j
21
22
  const QUEUE_STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24h expiry
22
23
  const MAX_QUEUE_PERSIST_SIZE = 200_000; // Don't persist if queue > 200K items (OOM guard)
23
24
  const MAX_RESTORE_QUEUE_SIZE = 100_000; // Cap restored queue at 100K items
24
- // MAX_SCAN_QUEUE removed: backpressure no longer skips polling.
25
- // Queue grows unbounded in memory (entries are ~300B, 100K = 30MB on 12GB VPS).
26
- // Adaptive concurrency adjusts processing speed to match ingestion rate.
25
+
26
+ // ─── Memory pressure circuit breaker ───
27
+ // Graduated response based on V8 heap usage ratio.
28
+ // Threat model: when GC thrashing starts (>90% heap), throughput drops to 0 and
29
+ // the queue grows unbounded because ingestion continues. Without a circuit breaker,
30
+ // the only recovery is OOM kill or manual restart — losing the entire in-memory queue.
31
+ //
32
+ // Levels:
33
+ // NONE (<75%) — normal operation
34
+ // ELEVATED (75%) — log warning, reduce concurrency target
35
+ // HIGH (85%) — prune caches, stop spawning new workers
36
+ // CRITICAL (90%) — stop ingestion, clear scanner caches, force GC
37
+ // EMERGENCY (95%) — truncate queue to most recent N items, clear deferred queue
38
+ //
39
+ // The key insight from the 2026-04-13 incident: emergency prune at 85% only cleared
40
+ // ~4MB of auxiliary caches (recentlyScanned, downloadsCache, alertedPackageRules) on a
41
+ // 3571MB heap. The real memory was held by N concurrent scan workers retaining AST trees,
42
+ // scan results, and extracted file references. Stopping worker spawning is the only way
43
+ // to let running scans finish and release their memory.
44
+ const MEMORY_PRESSURE_LEVELS = {
45
+ NONE: 0,
46
+ ELEVATED: 1,
47
+ HIGH: 2,
48
+ CRITICAL: 3,
49
+ EMERGENCY: 4
50
+ };
51
+ const MEMORY_THRESHOLD_ELEVATED = 0.75;
52
+ const MEMORY_THRESHOLD_HIGH = 0.85;
53
+ const MEMORY_THRESHOLD_CRITICAL = 0.90;
54
+ const MEMORY_THRESHOLD_EMERGENCY = 0.95;
55
+ // When truncating queue under EMERGENCY, keep the N most recent items.
56
+ // These are the newest packages — most likely to still be on npm for re-scan.
57
+ const EMERGENCY_QUEUE_KEEP = 500;
58
+ // Memory check interval adapts: 5min under NONE/ELEVATED, 15s under HIGH+.
59
+ // Fast checks are critical because at 50 pkg/min ingestion, 5min = 250 new items.
60
+ const MEMORY_LOG_INTERVAL_NORMAL = 300_000; // 5 minutes
61
+ const MEMORY_LOG_INTERVAL_PRESSURE = 15_000; // 15 seconds
62
+ let _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.NONE;
27
63
 
28
64
  function sleep(ms) {
29
65
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -246,6 +282,37 @@ function checkDiskSpace() {
246
282
  const MAX_RECENTLY_SCANNED = 50_000;
247
283
  const MAX_ALERTED_PACKAGES = 5_000;
248
284
 
285
+ /**
286
+ * Compute current memory pressure level from V8 heap usage.
287
+ * Returns one of MEMORY_PRESSURE_LEVELS and updates the module-level _memoryPressureLevel.
288
+ * Cheap call (~0.1ms) — safe to run every 2s in the main loop.
289
+ */
290
+ function computeMemoryPressure() {
291
+ const mem = process.memoryUsage();
292
+ const ratio = mem.heapTotal > 0 ? mem.heapUsed / mem.heapTotal : 0;
293
+
294
+ if (ratio >= MEMORY_THRESHOLD_EMERGENCY) {
295
+ _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.EMERGENCY;
296
+ } else if (ratio >= MEMORY_THRESHOLD_CRITICAL) {
297
+ _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.CRITICAL;
298
+ } else if (ratio >= MEMORY_THRESHOLD_HIGH) {
299
+ _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.HIGH;
300
+ } else if (ratio >= MEMORY_THRESHOLD_ELEVATED) {
301
+ _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.ELEVATED;
302
+ } else {
303
+ _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.NONE;
304
+ }
305
+ return { level: _memoryPressureLevel, mem, ratio };
306
+ }
307
+
308
+ /**
309
+ * Get the current memory pressure level.
310
+ * Used by ingestion.js to decide whether to skip polling.
311
+ */
312
+ function getMemoryPressureLevel() {
313
+ return _memoryPressureLevel;
314
+ }
315
+
249
316
  /**
250
317
  * Prune in-memory caches to prevent unbounded growth between daily resets.
251
318
  * Called hourly from the main loop. Targets:
@@ -284,6 +351,76 @@ function pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules)
284
351
  }
285
352
  }
286
353
 
354
+ /**
355
+ * Graduated memory pressure response. Called from the main loop when
356
+ * computeMemoryPressure() detects a level >= HIGH.
357
+ *
358
+ * The key principle: clearing caches alone is futile when the real memory is held
359
+ * by N concurrent scan workers retaining AST trees, scan results, and extracted
360
+ * file references. The only effective response is to STOP creating new work and
361
+ * let running scans finish/timeout and release their memory.
362
+ *
363
+ * Level actions (cumulative — higher levels include lower-level actions):
364
+ * HIGH (85%): clear auxiliary caches (recentlyScanned, downloadsCache, etc.)
365
+ * CRITICAL (90%): clear scanner caches (temporal metadata), force GC, log loudly
366
+ * EMERGENCY (95%): truncate queue to EMERGENCY_QUEUE_KEEP, clear deferred queue
367
+ *
368
+ * Worker spawning is gated separately in the main loop (ensureWorkers skipped at HIGH+).
369
+ * Ingestion is gated in ingestion.js via getMemoryPressureLevel() (skipped at CRITICAL+).
370
+ */
371
+ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, scanQueue) {
372
+ const pct = (ratio * 100).toFixed(0);
373
+
374
+ // HIGH (85%+): clear auxiliary caches — same as old emergency prune
375
+ if (level >= MEMORY_PRESSURE_LEVELS.HIGH) {
376
+ console.error(`[MONITOR] MEMORY PRESSURE HIGH: heap at ${pct}% — pruning caches, stopping new workers`);
377
+ recentlyScanned.clear();
378
+ downloadsCache.clear();
379
+ alertedPackageRules.clear();
380
+ }
381
+
382
+ // CRITICAL (90%+): clear scanner caches, force GC
383
+ if (level >= MEMORY_PRESSURE_LEVELS.CRITICAL) {
384
+ console.error(`[MONITOR] MEMORY PRESSURE CRITICAL: heap at ${pct}% — stopping ingestion, clearing scanner caches`);
385
+ // temporal-analysis._metadataCache (200 entries × full npm registry metadata)
386
+ try { clearMetadataCache(); } catch {}
387
+ // pendingGrouped webhook buffers
388
+ for (const [scope, group] of pendingGrouped) {
389
+ clearTimeout(group.timer);
390
+ }
391
+ pendingGrouped.clear();
392
+ // Force GC if available (requires --expose-gc)
393
+ if (global.gc) {
394
+ global.gc();
395
+ console.log('[MONITOR] Forced garbage collection');
396
+ }
397
+ }
398
+
399
+ // EMERGENCY (95%+): queue truncation + deferred queue clear
400
+ if (level >= MEMORY_PRESSURE_LEVELS.EMERGENCY) {
401
+ const queueBefore = scanQueue.length;
402
+ if (queueBefore > EMERGENCY_QUEUE_KEEP) {
403
+ // Keep the LAST N items (most recently added = newest packages).
404
+ // These are the packages most likely to still exist on npm for re-scan later.
405
+ // Dropped items are public packages — they'll appear again on republish or
406
+ // can be re-fetched from the registry if needed.
407
+ const dropped = queueBefore - EMERGENCY_QUEUE_KEEP;
408
+ // splice from the front: older items were pushed first
409
+ scanQueue.splice(0, dropped);
410
+ console.error(`[MONITOR] MEMORY EMERGENCY: heap at ${pct}% — truncated queue ${queueBefore} → ${scanQueue.length} (dropped ${dropped} oldest items)`);
411
+ }
412
+ // Clear deferred sandbox queue (holds full staticResult objects)
413
+ const deferredDropped = clearDeferredQueue();
414
+ if (deferredDropped > 0) {
415
+ console.error(`[MONITOR] MEMORY EMERGENCY: cleared ${deferredDropped} deferred sandbox items`);
416
+ }
417
+ // Second GC pass after freeing queue + deferred references
418
+ if (global.gc) {
419
+ global.gc();
420
+ }
421
+ }
422
+ }
423
+
287
424
  function reportStats(stats) {
288
425
  const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
289
426
  const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
@@ -515,8 +652,8 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
515
652
  // ─── Decoupled polling ───
516
653
  // Poll runs on its own interval, independent of processing.
517
654
  // This ensures new packages are ingested even while a large batch is being scanned.
518
- // Backpressure removed: polling ALWAYS runs. Queue grows unbounded in memory
519
- // (entries ~300B, 100K = 30MB). Adaptive concurrency adjusts scan throughput.
655
+ // Backpressure: poll() skips when queue >= 30K or memory pressure >= CRITICAL (90%).
656
+ // Adaptive concurrency adjusts scan throughput to match ingestion rate.
520
657
  let pollInProgress = false;
521
658
  pollIntervalHandle = setInterval(async () => {
522
659
  if (!running || pollInProgress) return;
@@ -549,33 +686,39 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
549
686
  // Non-blocking: ensureWorkers spawns fire-and-forget background workers.
550
687
  // This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
551
688
  // without being blocked by long-running scans.
552
- const MEMORY_LOG_INTERVAL = 300_000; // 5 minutes
553
- const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% heap usage triggers emergency prune
554
689
  let lastMemoryLogTime = Date.now();
555
690
 
556
691
  while (running) {
557
- // Top up workers (non-blocking spawns missing workers as background promises)
558
- ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
559
-
560
- // ─── Memory watchdog (every 5 min) ───
561
- if (Date.now() - lastMemoryLogTime >= MEMORY_LOG_INTERVAL) {
562
- const mem = process.memoryUsage();
563
- const heapUsedMB = (mem.heapUsed / 1024 / 1024).toFixed(0);
564
- const heapTotalMB = (mem.heapTotal / 1024 / 1024).toFixed(0);
565
- const rssMB = (mem.rss / 1024 / 1024).toFixed(0);
566
- console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapTotalMB}MB, rss=${rssMB}MB, queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}`);
567
-
568
- // Emergency prune under memory pressure
569
- if (mem.heapUsed / mem.heapTotal > MEMORY_PRESSURE_THRESHOLD) {
570
- console.error(`[MONITOR] MEMORY PRESSURE: heap at ${((mem.heapUsed / mem.heapTotal) * 100).toFixed(0)}% — emergency prune`);
571
- recentlyScanned.clear();
572
- downloadsCache.clear();
573
- alertedPackageRules.clear();
574
- // Force GC if available (requires --expose-gc)
575
- if (global.gc) {
576
- global.gc();
577
- console.log('[MONITOR] Forced garbage collection');
578
- }
692
+ // ─── Memory circuit breaker (every iteration) ───
693
+ // computeMemoryPressure() is cheap (~0.1ms). Running every 2s ensures fast
694
+ // reaction to memory spikes — the 2026-04-13 incident showed that checking
695
+ // every 5min is too slow (250 packages ingested between checks).
696
+ const { level: pressureLevel, mem: currentMem, ratio: heapRatio } = computeMemoryPressure();
697
+
698
+ // Top up workers ONLY when memory pressure is below HIGH.
699
+ // At HIGH+, existing workers continue (they'll finish or timeout) but no new
700
+ // ones are spawned. This is the core mechanism: let running scans release their
701
+ // memory (AST trees, scan results, extracted files) before starting new ones.
702
+ if (pressureLevel < MEMORY_PRESSURE_LEVELS.HIGH) {
703
+ ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
704
+ }
705
+
706
+ // ─── Memory watchdog (adaptive interval) ───
707
+ // Log every 5min normally, every 15s under pressure.
708
+ const memLogInterval = pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH
709
+ ? MEMORY_LOG_INTERVAL_PRESSURE
710
+ : MEMORY_LOG_INTERVAL_NORMAL;
711
+
712
+ if (Date.now() - lastMemoryLogTime >= memLogInterval) {
713
+ const heapUsedMB = (currentMem.heapUsed / 1024 / 1024).toFixed(0);
714
+ const heapTotalMB = (currentMem.heapTotal / 1024 / 1024).toFixed(0);
715
+ const rssMB = (currentMem.rss / 1024 / 1024).toFixed(0);
716
+ const levelName = Object.keys(MEMORY_PRESSURE_LEVELS).find(k => MEMORY_PRESSURE_LEVELS[k] === pressureLevel) || 'UNKNOWN';
717
+ console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapTotalMB}MB, rss=${rssMB}MB, queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}, pressure=${levelName}`);
718
+
719
+ // Graduated response at HIGH+
720
+ if (pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH) {
721
+ handleMemoryPressure(pressureLevel, heapRatio, recentlyScanned, downloadsCache, scanQueue);
579
722
  }
580
723
  lastMemoryLogTime = Date.now();
581
724
  }
@@ -632,5 +775,17 @@ module.exports = {
632
775
  MAX_RESTORE_QUEUE_SIZE,
633
776
  pruneMemoryCaches,
634
777
  MAX_RECENTLY_SCANNED,
635
- MAX_ALERTED_PACKAGES
778
+ MAX_ALERTED_PACKAGES,
779
+ // Memory circuit breaker
780
+ computeMemoryPressure,
781
+ getMemoryPressureLevel,
782
+ handleMemoryPressure,
783
+ MEMORY_PRESSURE_LEVELS,
784
+ MEMORY_THRESHOLD_ELEVATED,
785
+ MEMORY_THRESHOLD_HIGH,
786
+ MEMORY_THRESHOLD_CRITICAL,
787
+ MEMORY_THRESHOLD_EMERGENCY,
788
+ EMERGENCY_QUEUE_KEEP,
789
+ MEMORY_LOG_INTERVAL_NORMAL,
790
+ MEMORY_LOG_INTERVAL_PRESSURE
636
791
  };
@@ -419,6 +419,18 @@ function isDeferredSlotBusy() {
419
419
  return _deferredSlotBusy;
420
420
  }
421
421
 
422
+ /**
423
+ * Emergency clear: drop all deferred items and free their staticResult references.
424
+ * Called by daemon.js memory circuit breaker at EMERGENCY level.
425
+ * Returns the count of items dropped for logging.
426
+ */
427
+ function clearDeferredQueue() {
428
+ const count = _deferredQueue.length;
429
+ _deferredQueue.length = 0;
430
+ _deferredSeen.clear();
431
+ return count;
432
+ }
433
+
422
434
  module.exports = {
423
435
  enqueueDeferred,
424
436
  getDeferredQueue,
@@ -431,6 +443,7 @@ module.exports = {
431
443
  buildDeferredFollowUpEmbed,
432
444
  pruneExpired,
433
445
  isDeferredSlotBusy,
446
+ clearDeferredQueue,
434
447
  _resetDeferredQueue,
435
448
  DEFERRED_QUEUE_MAX,
436
449
  DEFERRED_TTL_MS,
@@ -650,7 +650,21 @@ async function pollPyPI(state, scanQueue) {
650
650
  const SOFT_BACKPRESSURE_THRESHOLD = 30_000;
651
651
 
652
652
  async function poll(state, scanQueue, stats) {
653
- // Soft backpressure: skip poll when queue is very deep.
653
+ // Memory-based backpressure: skip poll when heap is at CRITICAL+ (90%+).
654
+ // This is the primary defense against the 2026-04-13 death spiral where
655
+ // ingestion continued at 50 pkg/min while processing was at 0 throughput.
656
+ // Safe because: CouchDB seq is NOT advanced — next poll resumes from same point.
657
+ try {
658
+ const { getMemoryPressureLevel } = require('./daemon.js');
659
+ const pressureLevel = getMemoryPressureLevel();
660
+ // CRITICAL=3, EMERGENCY=4
661
+ if (pressureLevel >= 3) {
662
+ console.log(`[MONITOR] MEMORY BACKPRESSURE: skipping poll (pressure level ${pressureLevel} >= CRITICAL) — seq not advanced, 0 packages lost`);
663
+ return;
664
+ }
665
+ } catch { /* daemon.js not loaded yet (initial poll) — proceed normally */ }
666
+
667
+ // Queue-depth backpressure: skip poll when queue is very deep.
654
668
  // Safe because: CouchDB seq is NOT advanced (stays in memory only, persisted
655
669
  // by daemon.js AFTER poll returns) — next poll resumes from the same point.
656
670
  // Combined with adaptive concurrency: workers scale up → queue drains → poll resumes.