muaddib-scanner 2.10.78 → 2.10.79

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.78",
3
+ "version": "2.10.79",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -434,6 +434,7 @@ function createFreshness(source, confidence) {
434
434
  */
435
435
  function extractVersions(affected) {
436
436
  const versions = new Set();
437
+ let hasUnboundedRange = false;
437
438
 
438
439
  if (affected.versions && affected.versions.length > 0) {
439
440
  for (const v of affected.versions) {
@@ -445,7 +446,16 @@ function extractVersions(affected) {
445
446
  for (const range of affected.ranges) {
446
447
  if (range.events) {
447
448
  for (const event of range.events) {
448
- if (event.introduced && event.introduced !== '0') {
449
+ if (event.introduced === '0') {
450
+ // "introduced": "0" with no "fixed" = all versions malicious (wildcard).
451
+ // This is the standard OSV format used by Amazon Inspector bulk imports
452
+ // (tea.xyz campaign, 150K+ packages). Without this, these entries are
453
+ // silently dropped and the IOC database loses ~185K packages.
454
+ const hasFixed = range.events.some(e => e.fixed);
455
+ if (!hasFixed) {
456
+ hasUnboundedRange = true;
457
+ }
458
+ } else if (event.introduced) {
449
459
  versions.add(event.introduced);
450
460
  }
451
461
  }
@@ -453,6 +463,11 @@ function extractVersions(affected) {
453
463
  }
454
464
  }
455
465
 
466
+ // Wildcard: unbounded range (introduced=0, no fixed) and no explicit versions
467
+ if (versions.size === 0 && hasUnboundedRange) {
468
+ return ['*'];
469
+ }
470
+
456
471
  if (versions.size === 0) {
457
472
  _noVersionSkipCount++;
458
473
  return [];
@@ -0,0 +1,106 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Adaptive concurrency controller for the scan worker pool.
5
+ *
6
+ * Adjusts target concurrency every ADJUST_INTERVAL_MS based on three signals:
7
+ * 1. Queue depth — scale up when backlog grows, down when idle
8
+ * 2. Memory pressure — always reduce under heap pressure
9
+ * 3. Timeout rate — reduce when system is saturated (I/O contention)
10
+ *
11
+ * Scale-up is aggressive (+4) because backlog = lost coverage.
12
+ * Scale-down is gradual (-2) to avoid thrashing.
13
+ * Memory pressure overrides everything (OOM kills lose the in-memory queue).
14
+ */
15
+
16
+ const MIN_CONCURRENCY = 4;
17
+ const BASE_CONCURRENCY = Math.max(MIN_CONCURRENCY, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 8);
18
+ const MAX_CONCURRENCY = Math.max(BASE_CONCURRENCY, parseInt(process.env.MUADDIB_MAX_CONCURRENCY, 10) || 32);
19
+ const ADJUST_INTERVAL_MS = 30_000;
20
+
21
+ // Queue depth thresholds
22
+ const QUEUE_BACKLOG_THRESHOLD = 1000;
23
+ const QUEUE_IDLE_THRESHOLD = 100;
24
+
25
+ // System pressure thresholds
26
+ const MEMORY_PRESSURE_THRESHOLD = 0.75;
27
+ const TIMEOUT_RATE_THRESHOLD = 0.15;
28
+ const TIMEOUT_RATE_MIN_SAMPLES = 20;
29
+
30
+ // Track previous stats snapshot for delta computation
31
+ let _prevScanned = 0;
32
+ let _prevTimeouts = 0;
33
+
34
+ /**
35
+ * Compute new target concurrency from system signals.
36
+ * Uses stats deltas (not cumulative) for timeout rate — avoids stale data.
37
+ *
38
+ * @param {number} current - Current target concurrency
39
+ * @param {number} queueDepth - scanQueue.length
40
+ * @param {Object} stats - Monitor stats object (scanned, errorsByType.static_timeout)
41
+ * @returns {{ target: number, reason: string }}
42
+ */
43
+ function computeTarget(current, queueDepth, stats) {
44
+ const mem = process.memoryUsage();
45
+ const memPressure = mem.heapUsed / mem.heapTotal;
46
+
47
+ // Priority 1: Memory pressure — always reduce, overrides everything
48
+ if (memPressure > MEMORY_PRESSURE_THRESHOLD) {
49
+ const target = clamp(current - 4);
50
+ _prevScanned = stats.scanned || 0;
51
+ _prevTimeouts = (stats.errorsByType && stats.errorsByType.static_timeout) || 0;
52
+ return { target, reason: `memory_pressure (${(memPressure * 100).toFixed(0)}%)` };
53
+ }
54
+
55
+ // Compute timeout rate from stats deltas (sliding window between adjustments)
56
+ const scannedNow = stats.scanned || 0;
57
+ const timeoutsNow = (stats.errorsByType && stats.errorsByType.static_timeout) || 0;
58
+ const scannedDelta = scannedNow - _prevScanned;
59
+ const timeoutDelta = timeoutsNow - _prevTimeouts;
60
+ _prevScanned = scannedNow;
61
+ _prevTimeouts = timeoutsNow;
62
+
63
+ const timeoutRate = scannedDelta >= TIMEOUT_RATE_MIN_SAMPLES ? timeoutDelta / scannedDelta : 0;
64
+
65
+ // Priority 2: High timeout rate — system saturated, adding workers makes it worse
66
+ if (timeoutRate > TIMEOUT_RATE_THRESHOLD) {
67
+ const target = clamp(current - 2);
68
+ return { target, reason: `high_timeout_rate (${(timeoutRate * 100).toFixed(0)}%, ${timeoutDelta}/${scannedDelta})` };
69
+ }
70
+
71
+ // Priority 3: Queue depth — scale up for backlog, down toward base when idle
72
+ if (queueDepth > QUEUE_BACKLOG_THRESHOLD) {
73
+ const target = clamp(current + 4);
74
+ return { target, reason: `backlog (queue=${queueDepth})` };
75
+ }
76
+
77
+ if (queueDepth < QUEUE_IDLE_THRESHOLD) {
78
+ // Converge toward BASE, not MIN — normal traffic needs BASE capacity
79
+ const target = Math.max(BASE_CONCURRENCY, clamp(current - 2));
80
+ return { target, reason: `idle (queue=${queueDepth})` };
81
+ }
82
+
83
+ return { target: current, reason: 'stable' };
84
+ }
85
+
86
+ function clamp(n) {
87
+ return Math.max(MIN_CONCURRENCY, Math.min(MAX_CONCURRENCY, n));
88
+ }
89
+
90
+ /**
91
+ * Reset delta tracking (e.g. after daily stats reset).
92
+ */
93
+ function resetDeltas() {
94
+ _prevScanned = 0;
95
+ _prevTimeouts = 0;
96
+ }
97
+
98
+ module.exports = {
99
+ MIN_CONCURRENCY,
100
+ BASE_CONCURRENCY,
101
+ MAX_CONCURRENCY,
102
+ ADJUST_INTERVAL_MS,
103
+ computeTarget,
104
+ resetDeltas,
105
+ clamp
106
+ };
@@ -4,11 +4,12 @@ const path = require('path');
4
4
  const os = require('os');
5
5
  const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
6
6
  const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode, DOWNLOADS_CACHE_TTL } = require('./classify.js');
7
- const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync } = require('./state.js');
7
+ const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq } = require('./state.js');
8
8
  const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
9
9
  const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules } = require('./webhook.js');
10
10
  const { poll } = require('./ingestion.js');
11
- const { processQueue, SCAN_CONCURRENCY } = require('./queue.js');
11
+ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency, getActiveWorkers, SCAN_CONCURRENCY } = require('./queue.js');
12
+ const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
12
13
  const { startHealthcheck } = require('./healthcheck.js');
13
14
  const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue } = require('./deferred-sandbox.js');
14
15
 
@@ -18,8 +19,11 @@ const QUEUE_WARNING_THRESHOLD = 5_000; // Warn if queue depth exceeds this
18
19
  const QUEUE_PERSIST_INTERVAL = 60_000; // Persist queue to disk every 60s
19
20
  const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.json');
20
21
  const QUEUE_STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24h expiry
21
- const MAX_QUEUE_PERSIST_SIZE = 100_000; // Don't persist if queue > 100K items
22
- const MAX_SCAN_QUEUE = 10_000; // Backpressure: skip polling when queue exceeds this
22
+ const MAX_QUEUE_PERSIST_SIZE = 200_000; // Don't persist if queue > 200K items (OOM guard)
23
+ const MAX_RESTORE_QUEUE_SIZE = 100_000; // Cap restored queue at 100K items
24
+ // MAX_SCAN_QUEUE removed: backpressure no longer skips polling.
25
+ // Queue grows unbounded in memory (entries are ~300B, 100K = 30MB on 12GB VPS).
26
+ // Adaptive concurrency adjusts processing speed to match ingestion rate.
23
27
 
24
28
  function sleep(ms) {
25
29
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -88,11 +92,11 @@ function restoreQueue(scanQueue) {
88
92
  return 0;
89
93
  }
90
94
 
91
- // Restore items (cap at MAX_SCAN_QUEUE to prevent OOM from stale persisted queues)
95
+ // Restore items (cap at MAX_RESTORE_QUEUE_SIZE to prevent OOM from stale persisted queues)
92
96
  let items = data.items;
93
- if (items.length > MAX_SCAN_QUEUE) {
94
- console.log(`[MONITOR] Truncating restored queue from ${items.length} to ${MAX_SCAN_QUEUE} items`);
95
- items = items.slice(0, MAX_SCAN_QUEUE);
97
+ if (items.length > MAX_RESTORE_QUEUE_SIZE) {
98
+ console.log(`[MONITOR] Truncating restored queue from ${items.length} to ${MAX_RESTORE_QUEUE_SIZE} items`);
99
+ items = items.slice(0, MAX_RESTORE_QUEUE_SIZE);
96
100
  }
97
101
  const count = items.length;
98
102
  if (count === 0) {
@@ -404,13 +408,14 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
404
408
  loadDailyStats(stats, dailyAlerts); // Restore counters from previous run (survives restarts)
405
409
  console.log(`[MONITOR] State loaded — npm last: ${state.npmLastPackage || 'none'}, pypi last: ${state.pypiLastPackage || 'none'}, npm seq: ${state.npmLastSeq || 'none'}`);
406
410
  console.log('[MONITOR] npm changes stream enabled (replicate.npmjs.com) with RSS fallback');
407
- console.log(`[MONITOR] Scan concurrency: ${SCAN_CONCURRENCY} (MUADDIB_SCAN_CONCURRENCY to override)`);
411
+ console.log(`[MONITOR] Scan concurrency: adaptive ${BASE_CONCURRENCY}→${getTargetConcurrency()} (base MUADDIB_SCAN_CONCURRENCY=${BASE_CONCURRENCY}, max MUADDIB_MAX_CONCURRENCY)`);
408
412
  console.log(`[MONITOR] Sandbox concurrency: ${SANDBOX_CONCURRENCY_MAX} (MUADDIB_SANDBOX_CONCURRENCY to override)`);
409
413
  console.log(`[MONITOR] Polling every ${POLL_INTERVAL / 1000}s (decoupled from processing). Ctrl+C to stop.\n`);
410
414
 
411
415
  let running = true;
412
416
  let pollIntervalHandle = null; // Decoupled poll timer — set after initial poll
413
417
  let queuePersistHandle = null; // Queue persistence timer
418
+ let concurrencyAdjustHandle = null; // Adaptive concurrency timer
414
419
 
415
420
  // Restore queue from previous run (if file exists and is < 24h old)
416
421
  const restoredCount = restoreQueue(scanQueue);
@@ -438,6 +443,13 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
438
443
  clearInterval(queuePersistHandle);
439
444
  queuePersistHandle = null;
440
445
  }
446
+ if (concurrencyAdjustHandle) {
447
+ clearInterval(concurrencyAdjustHandle);
448
+ concurrencyAdjustHandle = null;
449
+ }
450
+ // Wait for in-flight scans to complete (soft drain)
451
+ console.log(`[MONITOR] Draining ${getActiveWorkers()} active worker(s)...`);
452
+ await drainWorkers();
441
453
  // Persist remaining queue items so they survive the restart
442
454
  persistQueue(scanQueue, state);
443
455
  // Stop deferred sandbox worker and persist its queue
@@ -470,26 +482,28 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
470
482
 
471
483
  // Initial poll + scan (sequential for first run)
472
484
  await poll(state, scanQueue, stats);
485
+ // Atomicity fix: persist queue AND seq together after each poll.
486
+ // Previously, seq was saved inside pollNpmChanges() but queue persisted
487
+ // every 60s ��� crash between the two lost queued items permanently.
488
+ persistQueue(scanQueue, state);
489
+ saveNpmSeq(state.npmLastSeq);
473
490
  saveState(state, stats);
474
491
  await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
475
492
 
476
493
  // ─── Decoupled polling ───
477
494
  // Poll runs on its own interval, independent of processing.
478
495
  // This ensures new packages are ingested even while a large batch is being scanned.
479
- // Without this, a 2h processing batch blocks all polling — packages published and
480
- // removed during that window are never seen (e.g. axios/plain-crypto-js 2026-03-30).
496
+ // Backpressure removed: polling ALWAYS runs. Queue grows unbounded in memory
497
+ // (entries ~300B, 100K = 30MB). Adaptive concurrency adjusts scan throughput.
481
498
  let pollInProgress = false;
482
499
  pollIntervalHandle = setInterval(async () => {
483
500
  if (!running || pollInProgress) return;
484
- // Backpressure: skip poll when queue is too deep.
485
- // CouchDB seq is NOT advanced — next poll resumes from the same point. No packages lost.
486
- if (scanQueue.length >= MAX_SCAN_QUEUE) {
487
- console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${MAX_SCAN_QUEUE})`);
488
- return;
489
- }
490
501
  pollInProgress = true;
491
502
  try {
492
503
  await poll(state, scanQueue, stats);
504
+ // Atomicity: persist queue + seq together after each poll
505
+ persistQueue(scanQueue, state);
506
+ saveNpmSeq(state.npmLastSeq);
493
507
  saveState(state, stats);
494
508
  if (scanQueue.length > QUEUE_WARNING_THRESHOLD) {
495
509
  console.log(`[MONITOR] WARNING: scan queue depth ${scanQueue.length} — processing may be lagging behind ingestion`);
@@ -502,27 +516,41 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
502
516
  }, POLL_INTERVAL);
503
517
 
504
518
  // ─── Queue persistence ───
505
- // Snapshot queue to disk every 60s so items survive restarts/crashes.
506
- // Without this, the decoupled poll advances the CouchDB seq but queued
507
- // items are lost on restart — they won't be re-polled.
519
+ // Periodic snapshot as safety net (in addition to post-poll persist).
508
520
  queuePersistHandle = setInterval(() => {
509
521
  if (!running) return;
510
522
  persistQueue(scanQueue, state);
511
523
  persistDeferredQueue(); // Piggyback: persist deferred sandbox queue on same interval
512
524
  }, QUEUE_PERSIST_INTERVAL);
513
525
 
526
+ // ─── Adaptive concurrency ───
527
+ // Adjusts scan worker count every 30s based on queue depth, memory, timeout rate.
528
+ // Scale-up is aggressive (+4) during backlog, scale-down is gradual (-2) when idle.
529
+ concurrencyAdjustHandle = setInterval(() => {
530
+ if (!running) return;
531
+ const current = getTargetConcurrency();
532
+ const { target, reason } = computeTarget(current, scanQueue.length, stats);
533
+ if (target !== current) {
534
+ console.log(`[MONITOR] ADAPTIVE: concurrency ${current} → ${target} (${reason}, active=${getActiveWorkers()})`);
535
+ setTargetConcurrency(target);
536
+ // Immediately spawn new workers if scaling up (don't wait for next loop tick)
537
+ if (target > current) {
538
+ ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
539
+ }
540
+ }
541
+ }, ADJUST_INTERVAL_MS);
542
+
514
543
  // ─── Continuous processing loop ───
515
- // Consumes scanQueue independently of polling. Workers inside processQueue
516
- // check scanQueue.length > 0 after each item, so items added by a concurrent
517
- // poll are picked up immediately by running workers.
544
+ // Non-blocking: ensureWorkers spawns fire-and-forget background workers.
545
+ // This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
546
+ // without being blocked by long-running scans.
518
547
  const MEMORY_LOG_INTERVAL = 300_000; // 5 minutes
519
548
  const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% heap usage triggers emergency prune
520
549
  let lastMemoryLogTime = Date.now();
521
550
 
522
551
  while (running) {
523
- if (scanQueue.length > 0) {
524
- await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
525
- }
552
+ // Top up workers (non-blocking spawns missing workers as background promises)
553
+ ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
526
554
 
527
555
  // ─── Memory watchdog (every 5 min) ───
528
556
  if (Date.now() - lastMemoryLogTime >= MEMORY_LOG_INTERVAL) {
@@ -596,7 +624,7 @@ module.exports = {
596
624
  QUEUE_STATE_FILE,
597
625
  QUEUE_STATE_MAX_AGE_MS,
598
626
  MAX_QUEUE_PERSIST_SIZE,
599
- MAX_SCAN_QUEUE,
627
+ MAX_RESTORE_QUEUE_SIZE,
600
628
  pruneMemoryCaches,
601
629
  MAX_RECENTLY_SCANNED,
602
630
  MAX_ALERTED_PACKAGES
@@ -455,10 +455,10 @@ async function pollNpmChanges(state, scanQueue, stats) {
455
455
  queued++;
456
456
  }
457
457
 
458
- // Persist new seq
458
+ // Update seq in memory only — disk persistence is handled by daemon.js
459
+ // after both queue and seq are saved atomically (prevents data loss on crash).
459
460
  if (data.last_seq != null) {
460
461
  state.npmLastSeq = data.last_seq;
461
- saveNpmSeq(data.last_seq);
462
462
  }
463
463
 
464
464
  if (queued > 0) {
@@ -644,12 +644,12 @@ async function pollPyPI(state, scanQueue) {
644
644
  * @param {Object} stats - Mutable stats object
645
645
  */
646
646
  async function poll(state, scanQueue, stats) {
647
- // Backpressure: skip ingestion when queue is saturated.
648
- // CouchDB seq and PyPI lastPackage are NOT advanced next poll resumes from same point.
649
- const MAX_SCAN_QUEUE = 10_000;
650
- if (scanQueue.length >= MAX_SCAN_QUEUE) {
651
- console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${MAX_SCAN_QUEUE})`);
652
- return;
647
+ // Backpressure removed: polling ALWAYS runs regardless of queue depth.
648
+ // The queue can grow unbounded in memory (entries are ~300 bytes, 100K = 30MB).
649
+ // This prevents the data loss scenario where the CouchDB seq advances but
650
+ // queued items are not persisted — packages would be permanently invisible.
651
+ if (scanQueue.length > 5_000) {
652
+ console.log(`[MONITOR] QUEUE_DEPTH: ${scanQueue.length} items — polling continues (no backpressure skip)`);
653
653
  }
654
654
 
655
655
  const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
@@ -106,9 +106,19 @@ const { archiveSuspectTarball } = require('./tarball-archive.js');
106
106
  // From ./deferred-sandbox.js
107
107
  const { enqueueDeferred } = require('./deferred-sandbox.js');
108
108
 
109
- // --- Constants ---
109
+ // --- Adaptive concurrency ---
110
110
 
111
- const SCAN_CONCURRENCY = Math.max(1, parseInt(process.env.MUADDIB_SCAN_CONCURRENCY, 10) || 8);
111
+ const { BASE_CONCURRENCY, MIN_CONCURRENCY, MAX_CONCURRENCY } = require('./adaptive-concurrency.js');
112
+
113
+ // SCAN_CONCURRENCY kept as getter for backward compatibility (tests, logging)
114
+ let _targetConcurrency = BASE_CONCURRENCY;
115
+ const SCAN_CONCURRENCY = BASE_CONCURRENCY; // legacy export — tests check this value
116
+ let _activeWorkers = 0;
117
+ const _workerPromises = new Set();
118
+
119
+ function getTargetConcurrency() { return _targetConcurrency; }
120
+ function setTargetConcurrency(n) { _targetConcurrency = Math.max(MIN_CONCURRENCY, Math.min(MAX_CONCURRENCY, n)); }
121
+ function getActiveWorkers() { return _activeWorkers; }
112
122
  const SCAN_TIMEOUT_MS = 300_000; // 5 minutes per package (3 sandbox runs × 90s + static scan headroom)
113
123
  const STATIC_SCAN_TIMEOUT_MS = 45_000; // 45s for static analysis only
114
124
  const LARGE_PACKAGE_SIZE = 10 * 1024 * 1024; // 10MB
@@ -967,30 +977,65 @@ async function processQueueItem(item, stats, dailyAlerts, recentlyScanned, downl
967
977
  }
968
978
 
969
979
  /**
970
- * Worker-pool consumer for the scan queue.
971
- * Runs up to SCAN_CONCURRENCY scans in parallel. Each worker pulls from the
972
- * shared scanQueue until it's empty. Node.js is single-threaded so
973
- * scanQueue.shift() is atomic — no race conditions between workers.
980
+ * Spawn a single worker that pulls from scanQueue until:
981
+ * - queue is empty, OR
982
+ * - activeWorkers exceeds targetConcurrency (soft drain on scale-down)
983
+ *
984
+ * Workers are fire-and-forget: they run as background promises tracked
985
+ * in _workerPromises. Node.js is single-threaded so scanQueue.shift()
986
+ * is atomic — no race conditions between workers.
974
987
  */
975
- async function processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
976
- if (scanQueue.length === 0) return;
977
-
978
- if (SCAN_CONCURRENCY > 1 && scanQueue.length > 1) {
979
- console.log(`[MONITOR] Processing ${scanQueue.length} queued packages (concurrency: ${SCAN_CONCURRENCY})`);
980
- }
981
-
982
- async function worker() {
983
- while (scanQueue.length > 0) {
988
+ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
989
+ _activeWorkers++;
990
+ try {
991
+ while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
984
992
  const item = scanQueue.shift();
993
+ if (!item) break;
985
994
  await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
986
995
  }
996
+ } finally {
997
+ _activeWorkers--;
987
998
  }
999
+ }
988
1000
 
989
- const workers = [];
990
- for (let i = 0; i < Math.min(SCAN_CONCURRENCY, scanQueue.length); i++) {
991
- workers.push(worker());
1001
+ /**
1002
+ * Ensure the target number of workers are running. Non-blocking: spawns
1003
+ * missing workers as background promises. Called from the daemon main loop
1004
+ * every PROCESS_LOOP_INTERVAL (2s), and after concurrency adjustments.
1005
+ */
1006
+ function ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
1007
+ if (scanQueue.length === 0) return;
1008
+ const toSpawn = Math.min(_targetConcurrency - _activeWorkers, scanQueue.length);
1009
+ if (toSpawn <= 0) return;
1010
+
1011
+ console.log(`[MONITOR] Spawning ${toSpawn} worker(s) (active: ${_activeWorkers}, target: ${_targetConcurrency}, queue: ${scanQueue.length})`);
1012
+ for (let i = 0; i < toSpawn; i++) {
1013
+ const p = _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable)
1014
+ .catch(err => console.error('[MONITOR] Worker error:', err.message))
1015
+ .finally(() => _workerPromises.delete(p));
1016
+ _workerPromises.add(p);
992
1017
  }
993
- await Promise.all(workers);
1018
+ }
1019
+
1020
+ /**
1021
+ * Wait for all active workers to finish. Used for:
1022
+ * - Graceful shutdown (drain in-flight scans)
1023
+ * - Tests (backward-compatible await)
1024
+ */
1025
+ async function drainWorkers() {
1026
+ if (_workerPromises.size === 0) return;
1027
+ await Promise.all(_workerPromises);
1028
+ }
1029
+
1030
+ /**
1031
+ * Backward-compatible processQueue: ensure workers + await completion.
1032
+ * Used by tests and the initial sequential scan at startup.
1033
+ * The daemon main loop uses ensureWorkers() directly (non-blocking).
1034
+ */
1035
+ async function processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
1036
+ if (scanQueue.length === 0) return;
1037
+ ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable);
1038
+ await drainWorkers();
994
1039
  }
995
1040
 
996
1041
  /**
@@ -1195,6 +1240,13 @@ module.exports = {
1195
1240
  TEST_FILE_PATTERN,
1196
1241
  SCAN_WORKER_PATH,
1197
1242
 
1243
+ // Adaptive concurrency
1244
+ getTargetConcurrency,
1245
+ setTargetConcurrency,
1246
+ getActiveWorkers,
1247
+ ensureWorkers,
1248
+ drainWorkers,
1249
+
1198
1250
  // Functions
1199
1251
  isBundledToolingOnly,
1200
1252
  recordTrainingSample,