muaddib-scanner 2.10.87 → 2.10.88
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/monitor/daemon.js +186 -31
- package/src/monitor/deferred-sandbox.js +13 -0
- package/src/monitor/ingestion.js +15 -1
package/package.json
CHANGED
package/src/monitor/daemon.js
CHANGED
|
@@ -11,7 +11,8 @@ const { poll } = require('./ingestion.js');
|
|
|
11
11
|
const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency, getActiveWorkers, SCAN_CONCURRENCY } = require('./queue.js');
|
|
12
12
|
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
|
|
13
13
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
14
|
-
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue } = require('./deferred-sandbox.js');
|
|
14
|
+
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
15
|
+
const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
|
|
15
16
|
|
|
16
17
|
const POLL_INTERVAL = 60_000;
|
|
17
18
|
const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
|
|
@@ -21,9 +22,44 @@ const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.j
|
|
|
21
22
|
const QUEUE_STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24h expiry
|
|
22
23
|
const MAX_QUEUE_PERSIST_SIZE = 200_000; // Don't persist if queue > 200K items (OOM guard)
|
|
23
24
|
const MAX_RESTORE_QUEUE_SIZE = 100_000; // Cap restored queue at 100K items
|
|
24
|
-
|
|
25
|
-
//
|
|
26
|
-
//
|
|
25
|
+
|
|
26
|
+
// ─── Memory pressure circuit breaker ───
|
|
27
|
+
// Graduated response based on V8 heap usage ratio.
|
|
28
|
+
// Threat model: when GC thrashing starts (>90% heap), throughput drops to 0 and
|
|
29
|
+
// the queue grows unbounded because ingestion continues. Without a circuit breaker,
|
|
30
|
+
// the only recovery is OOM kill or manual restart — losing the entire in-memory queue.
|
|
31
|
+
//
|
|
32
|
+
// Levels:
|
|
33
|
+
// NONE (<75%) — normal operation
|
|
34
|
+
// ELEVATED (75%) — log warning, reduce concurrency target
|
|
35
|
+
// HIGH (85%) — prune caches, stop spawning new workers
|
|
36
|
+
// CRITICAL (90%) — stop ingestion, clear scanner caches, force GC
|
|
37
|
+
// EMERGENCY (95%) — truncate queue to most recent N items, clear deferred queue
|
|
38
|
+
//
|
|
39
|
+
// The key insight from the 2026-04-13 incident: emergency prune at 85% only cleared
|
|
40
|
+
// ~4MB of auxiliary caches (recentlyScanned, downloadsCache, alertedPackageRules) on a
|
|
41
|
+
// 3571MB heap. The real memory was held by N concurrent scan workers retaining AST trees,
|
|
42
|
+
// scan results, and extracted file references. Stopping worker spawning is the only way
|
|
43
|
+
// to let running scans finish and release their memory.
|
|
44
|
+
const MEMORY_PRESSURE_LEVELS = {
|
|
45
|
+
NONE: 0,
|
|
46
|
+
ELEVATED: 1,
|
|
47
|
+
HIGH: 2,
|
|
48
|
+
CRITICAL: 3,
|
|
49
|
+
EMERGENCY: 4
|
|
50
|
+
};
|
|
51
|
+
const MEMORY_THRESHOLD_ELEVATED = 0.75;
|
|
52
|
+
const MEMORY_THRESHOLD_HIGH = 0.85;
|
|
53
|
+
const MEMORY_THRESHOLD_CRITICAL = 0.90;
|
|
54
|
+
const MEMORY_THRESHOLD_EMERGENCY = 0.95;
|
|
55
|
+
// When truncating queue under EMERGENCY, keep the N most recent items.
|
|
56
|
+
// These are the newest packages — most likely to still be on npm for re-scan.
|
|
57
|
+
const EMERGENCY_QUEUE_KEEP = 500;
|
|
58
|
+
// Memory check interval adapts: 5min under NONE/ELEVATED, 15s under HIGH+.
|
|
59
|
+
// Fast checks are critical because at 50 pkg/min ingestion, 5min = 250 new items.
|
|
60
|
+
const MEMORY_LOG_INTERVAL_NORMAL = 300_000; // 5 minutes
|
|
61
|
+
const MEMORY_LOG_INTERVAL_PRESSURE = 15_000; // 15 seconds
|
|
62
|
+
let _memoryPressureLevel = MEMORY_PRESSURE_LEVELS.NONE;
|
|
27
63
|
|
|
28
64
|
function sleep(ms) {
|
|
29
65
|
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
@@ -246,6 +282,37 @@ function checkDiskSpace() {
|
|
|
246
282
|
const MAX_RECENTLY_SCANNED = 50_000;
|
|
247
283
|
const MAX_ALERTED_PACKAGES = 5_000;
|
|
248
284
|
|
|
285
|
+
/**
|
|
286
|
+
* Compute current memory pressure level from V8 heap usage.
|
|
287
|
+
* Returns one of MEMORY_PRESSURE_LEVELS and updates the module-level _memoryPressureLevel.
|
|
288
|
+
* Cheap call (~0.1ms) — safe to run every 2s in the main loop.
|
|
289
|
+
*/
|
|
290
|
+
function computeMemoryPressure() {
|
|
291
|
+
const mem = process.memoryUsage();
|
|
292
|
+
const ratio = mem.heapTotal > 0 ? mem.heapUsed / mem.heapTotal : 0;
|
|
293
|
+
|
|
294
|
+
if (ratio >= MEMORY_THRESHOLD_EMERGENCY) {
|
|
295
|
+
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.EMERGENCY;
|
|
296
|
+
} else if (ratio >= MEMORY_THRESHOLD_CRITICAL) {
|
|
297
|
+
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.CRITICAL;
|
|
298
|
+
} else if (ratio >= MEMORY_THRESHOLD_HIGH) {
|
|
299
|
+
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.HIGH;
|
|
300
|
+
} else if (ratio >= MEMORY_THRESHOLD_ELEVATED) {
|
|
301
|
+
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.ELEVATED;
|
|
302
|
+
} else {
|
|
303
|
+
_memoryPressureLevel = MEMORY_PRESSURE_LEVELS.NONE;
|
|
304
|
+
}
|
|
305
|
+
return { level: _memoryPressureLevel, mem, ratio };
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
/**
|
|
309
|
+
* Get the current memory pressure level.
|
|
310
|
+
* Used by ingestion.js to decide whether to skip polling.
|
|
311
|
+
*/
|
|
312
|
+
function getMemoryPressureLevel() {
|
|
313
|
+
return _memoryPressureLevel;
|
|
314
|
+
}
|
|
315
|
+
|
|
249
316
|
/**
|
|
250
317
|
* Prune in-memory caches to prevent unbounded growth between daily resets.
|
|
251
318
|
* Called hourly from the main loop. Targets:
|
|
@@ -284,6 +351,76 @@ function pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules)
|
|
|
284
351
|
}
|
|
285
352
|
}
|
|
286
353
|
|
|
354
|
+
/**
|
|
355
|
+
* Graduated memory pressure response. Called from the main loop when
|
|
356
|
+
* computeMemoryPressure() detects a level >= HIGH.
|
|
357
|
+
*
|
|
358
|
+
* The key principle: clearing caches alone is futile when the real memory is held
|
|
359
|
+
* by N concurrent scan workers retaining AST trees, scan results, and extracted
|
|
360
|
+
* file references. The only effective response is to STOP creating new work and
|
|
361
|
+
* let running scans finish/timeout and release their memory.
|
|
362
|
+
*
|
|
363
|
+
* Level actions (cumulative — higher levels include lower-level actions):
|
|
364
|
+
* HIGH (85%): clear auxiliary caches (recentlyScanned, downloadsCache, etc.)
|
|
365
|
+
* CRITICAL (90%): clear scanner caches (temporal metadata), force GC, log loudly
|
|
366
|
+
* EMERGENCY (95%): truncate queue to EMERGENCY_QUEUE_KEEP, clear deferred queue
|
|
367
|
+
*
|
|
368
|
+
* Worker spawning is gated separately in the main loop (ensureWorkers skipped at HIGH+).
|
|
369
|
+
* Ingestion is gated in ingestion.js via getMemoryPressureLevel() (skipped at CRITICAL+).
|
|
370
|
+
*/
|
|
371
|
+
function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, scanQueue) {
|
|
372
|
+
const pct = (ratio * 100).toFixed(0);
|
|
373
|
+
|
|
374
|
+
// HIGH (85%+): clear auxiliary caches — same as old emergency prune
|
|
375
|
+
if (level >= MEMORY_PRESSURE_LEVELS.HIGH) {
|
|
376
|
+
console.error(`[MONITOR] MEMORY PRESSURE HIGH: heap at ${pct}% — pruning caches, stopping new workers`);
|
|
377
|
+
recentlyScanned.clear();
|
|
378
|
+
downloadsCache.clear();
|
|
379
|
+
alertedPackageRules.clear();
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// CRITICAL (90%+): clear scanner caches, force GC
|
|
383
|
+
if (level >= MEMORY_PRESSURE_LEVELS.CRITICAL) {
|
|
384
|
+
console.error(`[MONITOR] MEMORY PRESSURE CRITICAL: heap at ${pct}% — stopping ingestion, clearing scanner caches`);
|
|
385
|
+
// temporal-analysis._metadataCache (200 entries × full npm registry metadata)
|
|
386
|
+
try { clearMetadataCache(); } catch {}
|
|
387
|
+
// pendingGrouped webhook buffers
|
|
388
|
+
for (const [scope, group] of pendingGrouped) {
|
|
389
|
+
clearTimeout(group.timer);
|
|
390
|
+
}
|
|
391
|
+
pendingGrouped.clear();
|
|
392
|
+
// Force GC if available (requires --expose-gc)
|
|
393
|
+
if (global.gc) {
|
|
394
|
+
global.gc();
|
|
395
|
+
console.log('[MONITOR] Forced garbage collection');
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// EMERGENCY (95%+): queue truncation + deferred queue clear
|
|
400
|
+
if (level >= MEMORY_PRESSURE_LEVELS.EMERGENCY) {
|
|
401
|
+
const queueBefore = scanQueue.length;
|
|
402
|
+
if (queueBefore > EMERGENCY_QUEUE_KEEP) {
|
|
403
|
+
// Keep the LAST N items (most recently added = newest packages).
|
|
404
|
+
// These are the packages most likely to still exist on npm for re-scan later.
|
|
405
|
+
// Dropped items are public packages — they'll appear again on republish or
|
|
406
|
+
// can be re-fetched from the registry if needed.
|
|
407
|
+
const dropped = queueBefore - EMERGENCY_QUEUE_KEEP;
|
|
408
|
+
// splice from the front: older items were pushed first
|
|
409
|
+
scanQueue.splice(0, dropped);
|
|
410
|
+
console.error(`[MONITOR] MEMORY EMERGENCY: heap at ${pct}% — truncated queue ${queueBefore} → ${scanQueue.length} (dropped ${dropped} oldest items)`);
|
|
411
|
+
}
|
|
412
|
+
// Clear deferred sandbox queue (holds full staticResult objects)
|
|
413
|
+
const deferredDropped = clearDeferredQueue();
|
|
414
|
+
if (deferredDropped > 0) {
|
|
415
|
+
console.error(`[MONITOR] MEMORY EMERGENCY: cleared ${deferredDropped} deferred sandbox items`);
|
|
416
|
+
}
|
|
417
|
+
// Second GC pass after freeing queue + deferred references
|
|
418
|
+
if (global.gc) {
|
|
419
|
+
global.gc();
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
287
424
|
function reportStats(stats) {
|
|
288
425
|
const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
|
|
289
426
|
const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
|
|
@@ -515,8 +652,8 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
515
652
|
// ─── Decoupled polling ───
|
|
516
653
|
// Poll runs on its own interval, independent of processing.
|
|
517
654
|
// This ensures new packages are ingested even while a large batch is being scanned.
|
|
518
|
-
// Backpressure
|
|
519
|
-
//
|
|
655
|
+
// Backpressure: poll() skips when queue >= 30K or memory pressure >= CRITICAL (90%).
|
|
656
|
+
// Adaptive concurrency adjusts scan throughput to match ingestion rate.
|
|
520
657
|
let pollInProgress = false;
|
|
521
658
|
pollIntervalHandle = setInterval(async () => {
|
|
522
659
|
if (!running || pollInProgress) return;
|
|
@@ -549,33 +686,39 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
549
686
|
// Non-blocking: ensureWorkers spawns fire-and-forget background workers.
|
|
550
687
|
// This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
|
|
551
688
|
// without being blocked by long-running scans.
|
|
552
|
-
const MEMORY_LOG_INTERVAL = 300_000; // 5 minutes
|
|
553
|
-
const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% heap usage triggers emergency prune
|
|
554
689
|
let lastMemoryLogTime = Date.now();
|
|
555
690
|
|
|
556
691
|
while (running) {
|
|
557
|
-
//
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
//
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
692
|
+
// ─── Memory circuit breaker (every iteration) ───
|
|
693
|
+
// computeMemoryPressure() is cheap (~0.1ms). Running every 2s ensures fast
|
|
694
|
+
// reaction to memory spikes — the 2026-04-13 incident showed that checking
|
|
695
|
+
// every 5min is too slow (250 packages ingested between checks).
|
|
696
|
+
const { level: pressureLevel, mem: currentMem, ratio: heapRatio } = computeMemoryPressure();
|
|
697
|
+
|
|
698
|
+
// Top up workers ONLY when memory pressure is below HIGH.
|
|
699
|
+
// At HIGH+, existing workers continue (they'll finish or timeout) but no new
|
|
700
|
+
// ones are spawned. This is the core mechanism: let running scans release their
|
|
701
|
+
// memory (AST trees, scan results, extracted files) before starting new ones.
|
|
702
|
+
if (pressureLevel < MEMORY_PRESSURE_LEVELS.HIGH) {
|
|
703
|
+
ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// ─── Memory watchdog (adaptive interval) ───
|
|
707
|
+
// Log every 5min normally, every 15s under pressure.
|
|
708
|
+
const memLogInterval = pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH
|
|
709
|
+
? MEMORY_LOG_INTERVAL_PRESSURE
|
|
710
|
+
: MEMORY_LOG_INTERVAL_NORMAL;
|
|
711
|
+
|
|
712
|
+
if (Date.now() - lastMemoryLogTime >= memLogInterval) {
|
|
713
|
+
const heapUsedMB = (currentMem.heapUsed / 1024 / 1024).toFixed(0);
|
|
714
|
+
const heapTotalMB = (currentMem.heapTotal / 1024 / 1024).toFixed(0);
|
|
715
|
+
const rssMB = (currentMem.rss / 1024 / 1024).toFixed(0);
|
|
716
|
+
const levelName = Object.keys(MEMORY_PRESSURE_LEVELS).find(k => MEMORY_PRESSURE_LEVELS[k] === pressureLevel) || 'UNKNOWN';
|
|
717
|
+
console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapTotalMB}MB, rss=${rssMB}MB, queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}, pressure=${levelName}`);
|
|
718
|
+
|
|
719
|
+
// Graduated response at HIGH+
|
|
720
|
+
if (pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH) {
|
|
721
|
+
handleMemoryPressure(pressureLevel, heapRatio, recentlyScanned, downloadsCache, scanQueue);
|
|
579
722
|
}
|
|
580
723
|
lastMemoryLogTime = Date.now();
|
|
581
724
|
}
|
|
@@ -632,5 +775,17 @@ module.exports = {
|
|
|
632
775
|
MAX_RESTORE_QUEUE_SIZE,
|
|
633
776
|
pruneMemoryCaches,
|
|
634
777
|
MAX_RECENTLY_SCANNED,
|
|
635
|
-
MAX_ALERTED_PACKAGES
|
|
778
|
+
MAX_ALERTED_PACKAGES,
|
|
779
|
+
// Memory circuit breaker
|
|
780
|
+
computeMemoryPressure,
|
|
781
|
+
getMemoryPressureLevel,
|
|
782
|
+
handleMemoryPressure,
|
|
783
|
+
MEMORY_PRESSURE_LEVELS,
|
|
784
|
+
MEMORY_THRESHOLD_ELEVATED,
|
|
785
|
+
MEMORY_THRESHOLD_HIGH,
|
|
786
|
+
MEMORY_THRESHOLD_CRITICAL,
|
|
787
|
+
MEMORY_THRESHOLD_EMERGENCY,
|
|
788
|
+
EMERGENCY_QUEUE_KEEP,
|
|
789
|
+
MEMORY_LOG_INTERVAL_NORMAL,
|
|
790
|
+
MEMORY_LOG_INTERVAL_PRESSURE
|
|
636
791
|
};
|
|
@@ -419,6 +419,18 @@ function isDeferredSlotBusy() {
|
|
|
419
419
|
return _deferredSlotBusy;
|
|
420
420
|
}
|
|
421
421
|
|
|
422
|
+
/**
|
|
423
|
+
* Emergency clear: drop all deferred items and free their staticResult references.
|
|
424
|
+
* Called by daemon.js memory circuit breaker at EMERGENCY level.
|
|
425
|
+
* Returns the count of items dropped for logging.
|
|
426
|
+
*/
|
|
427
|
+
function clearDeferredQueue() {
|
|
428
|
+
const count = _deferredQueue.length;
|
|
429
|
+
_deferredQueue.length = 0;
|
|
430
|
+
_deferredSeen.clear();
|
|
431
|
+
return count;
|
|
432
|
+
}
|
|
433
|
+
|
|
422
434
|
module.exports = {
|
|
423
435
|
enqueueDeferred,
|
|
424
436
|
getDeferredQueue,
|
|
@@ -431,6 +443,7 @@ module.exports = {
|
|
|
431
443
|
buildDeferredFollowUpEmbed,
|
|
432
444
|
pruneExpired,
|
|
433
445
|
isDeferredSlotBusy,
|
|
446
|
+
clearDeferredQueue,
|
|
434
447
|
_resetDeferredQueue,
|
|
435
448
|
DEFERRED_QUEUE_MAX,
|
|
436
449
|
DEFERRED_TTL_MS,
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -650,7 +650,21 @@ async function pollPyPI(state, scanQueue) {
|
|
|
650
650
|
const SOFT_BACKPRESSURE_THRESHOLD = 30_000;
|
|
651
651
|
|
|
652
652
|
async function poll(state, scanQueue, stats) {
|
|
653
|
-
//
|
|
653
|
+
// Memory-based backpressure: skip poll when heap is at CRITICAL+ (90%+).
|
|
654
|
+
// This is the primary defense against the 2026-04-13 death spiral where
|
|
655
|
+
// ingestion continued at 50 pkg/min while processing was at 0 throughput.
|
|
656
|
+
// Safe because: CouchDB seq is NOT advanced — next poll resumes from same point.
|
|
657
|
+
try {
|
|
658
|
+
const { getMemoryPressureLevel } = require('./daemon.js');
|
|
659
|
+
const pressureLevel = getMemoryPressureLevel();
|
|
660
|
+
// CRITICAL=3, EMERGENCY=4
|
|
661
|
+
if (pressureLevel >= 3) {
|
|
662
|
+
console.log(`[MONITOR] MEMORY BACKPRESSURE: skipping poll (pressure level ${pressureLevel} >= CRITICAL) — seq not advanced, 0 packages lost`);
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
} catch { /* daemon.js not loaded yet (initial poll) — proceed normally */ }
|
|
666
|
+
|
|
667
|
+
// Queue-depth backpressure: skip poll when queue is very deep.
|
|
654
668
|
// Safe because: CouchDB seq is NOT advanced (stays in memory only, persisted
|
|
655
669
|
// by daemon.js AFTER poll returns) — next poll resumes from the same point.
|
|
656
670
|
// Combined with adaptive concurrency: workers scale up → queue drains → poll resumes.
|