muaddib-scanner 2.10.63 → 2.10.64

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.63",
3
+ "version": "2.10.64",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -44,10 +44,10 @@
44
44
  "node": ">=18.0.0"
45
45
  },
46
46
  "dependencies": {
47
- "@inquirer/prompts": "8.3.2",
47
+ "@inquirer/prompts": "8.4.1",
48
48
  "acorn": "8.16.0",
49
49
  "acorn-walk": "8.3.5",
50
- "adm-zip": "0.5.16",
50
+ "adm-zip": "0.5.17",
51
51
  "js-yaml": "4.1.1"
52
52
  },
53
53
  "overrides": {
@@ -55,7 +55,7 @@
55
55
  },
56
56
  "devDependencies": {
57
57
  "@eslint/js": "10.0.1",
58
- "eslint": "10.1.0",
58
+ "eslint": "10.2.0",
59
59
  "eslint-plugin-security": "^4.0.0",
60
60
  "globals": "17.4.0"
61
61
  }
@@ -3,10 +3,10 @@ const fs = require('fs');
3
3
  const path = require('path');
4
4
  const os = require('os');
5
5
  const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
6
- const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode } = require('./classify.js');
6
+ const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode, DOWNLOADS_CACHE_TTL } = require('./classify.js');
7
7
  const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync } = require('./state.js');
8
8
  const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
9
- const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR } = require('./webhook.js');
9
+ const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules } = require('./webhook.js');
10
10
  const { poll } = require('./ingestion.js');
11
11
  const { processQueue, SCAN_CONCURRENCY } = require('./queue.js');
12
12
  const { startHealthcheck } = require('./healthcheck.js');
@@ -19,6 +19,7 @@ const QUEUE_PERSIST_INTERVAL = 60_000; // Persist queue to disk every 60s
19
19
  const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.json');
20
20
  const QUEUE_STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24h expiry
21
21
  const MAX_QUEUE_PERSIST_SIZE = 100_000; // Don't persist if queue > 100K items
22
+ const MAX_SCAN_QUEUE = 10_000; // Backpressure: skip polling when queue exceeds this
22
23
 
23
24
  function sleep(ms) {
24
25
  return new Promise((resolve) => setTimeout(resolve, ms));
@@ -87,13 +88,18 @@ function restoreQueue(scanQueue) {
87
88
  return 0;
88
89
  }
89
90
 
90
- // Restore items
91
- const count = data.items.length;
91
+ // Restore items (cap at MAX_SCAN_QUEUE to prevent OOM from stale persisted queues)
92
+ let items = data.items;
93
+ if (items.length > MAX_SCAN_QUEUE) {
94
+ console.log(`[MONITOR] Truncating restored queue from ${items.length} to ${MAX_SCAN_QUEUE} items`);
95
+ items = items.slice(0, MAX_SCAN_QUEUE);
96
+ }
97
+ const count = items.length;
92
98
  if (count === 0) {
93
99
  try { fs.unlinkSync(QUEUE_STATE_FILE); } catch {}
94
100
  return 0;
95
101
  }
96
- scanQueue.push(...data.items);
102
+ scanQueue.push(...items);
97
103
  console.log(`[MONITOR] Restored ${count} packages from queue state (saved at ${data.savedAt})`);
98
104
 
99
105
  // Delete after successful restore
@@ -231,6 +237,49 @@ function checkDiskSpace() {
231
237
  }
232
238
  }
233
239
 
240
+ // --- Memory management ---
241
+
242
+ const MAX_RECENTLY_SCANNED = 50_000;
243
+ const MAX_ALERTED_PACKAGES = 5_000;
244
+
245
+ /**
246
+ * Prune in-memory caches to prevent unbounded growth between daily resets.
247
+ * Called hourly from the main loop. Targets:
248
+ * - recentlyScanned: Set used for 24h dedup (no TTL, only cleared at daily report)
249
+ * - downloadsCache: Map with 24h TTL but no proactive eviction
250
+ * - alertedPackageRules: Map for webhook dedup (only cleared at daily report)
251
+ */
252
+ function pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules) {
253
+ let pruned = 0;
254
+
255
+ // 1. recentlyScanned — cap size (FIFO semantics: oldest entries are irrelevant)
256
+ if (recentlyScanned.size > MAX_RECENTLY_SCANNED) {
257
+ console.log(`[MONITOR] PRUNE: recentlyScanned ${recentlyScanned.size} > ${MAX_RECENTLY_SCANNED} — clearing`);
258
+ recentlyScanned.clear();
259
+ pruned++;
260
+ }
261
+
262
+ // 2. downloadsCache — evict entries past 24h TTL
263
+ const now = Date.now();
264
+ for (const [key, entry] of downloadsCache) {
265
+ if (now - entry.fetchedAt > DOWNLOADS_CACHE_TTL) {
266
+ downloadsCache.delete(key);
267
+ pruned++;
268
+ }
269
+ }
270
+
271
+ // 3. alertedPackageRules — cap size
272
+ if (alertedPackageRules.size > MAX_ALERTED_PACKAGES) {
273
+ console.log(`[MONITOR] PRUNE: alertedPackageRules ${alertedPackageRules.size} > ${MAX_ALERTED_PACKAGES} — clearing`);
274
+ alertedPackageRules.clear();
275
+ pruned++;
276
+ }
277
+
278
+ if (pruned > 0) {
279
+ console.log(`[MONITOR] PRUNE: ${pruned} cache entries/collections pruned`);
280
+ }
281
+ }
282
+
234
283
  function reportStats(stats) {
235
284
  const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
236
285
  const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
@@ -432,6 +481,12 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
432
481
  let pollInProgress = false;
433
482
  pollIntervalHandle = setInterval(async () => {
434
483
  if (!running || pollInProgress) return;
484
+ // Backpressure: skip poll when queue is too deep.
485
+ // CouchDB seq is NOT advanced — next poll resumes from the same point. No packages lost.
486
+ if (scanQueue.length >= MAX_SCAN_QUEUE) {
487
+ console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${MAX_SCAN_QUEUE})`);
488
+ return;
489
+ }
435
490
  pollInProgress = true;
436
491
  try {
437
492
  await poll(state, scanQueue, stats);
@@ -460,16 +515,44 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
460
515
  // Consumes scanQueue independently of polling. Workers inside processQueue
461
516
  // check scanQueue.length > 0 after each item, so items added by a concurrent
462
517
  // poll are picked up immediately by running workers.
518
+ const MEMORY_LOG_INTERVAL = 300_000; // 5 minutes
519
+ const MEMORY_PRESSURE_THRESHOLD = 0.85; // 85% heap usage triggers emergency prune
520
+ let lastMemoryLogTime = Date.now();
521
+
463
522
  while (running) {
464
523
  if (scanQueue.length > 0) {
465
524
  await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
466
525
  }
467
526
 
468
- // Hourly stats report + cache purge + runsc cleanup
527
+ // ─── Memory watchdog (every 5 min) ───
528
+ if (Date.now() - lastMemoryLogTime >= MEMORY_LOG_INTERVAL) {
529
+ const mem = process.memoryUsage();
530
+ const heapUsedMB = (mem.heapUsed / 1024 / 1024).toFixed(0);
531
+ const heapTotalMB = (mem.heapTotal / 1024 / 1024).toFixed(0);
532
+ const rssMB = (mem.rss / 1024 / 1024).toFixed(0);
533
+ console.log(`[MONITOR] MEMORY: heap=${heapUsedMB}MB/${heapTotalMB}MB, rss=${rssMB}MB, queue=${scanQueue.length}, dedup=${recentlyScanned.size}, downloads=${downloadsCache.size}, alerts=${alertedPackageRules.size}`);
534
+
535
+ // Emergency prune under memory pressure
536
+ if (mem.heapUsed / mem.heapTotal > MEMORY_PRESSURE_THRESHOLD) {
537
+ console.error(`[MONITOR] MEMORY PRESSURE: heap at ${((mem.heapUsed / mem.heapTotal) * 100).toFixed(0)}% — emergency prune`);
538
+ recentlyScanned.clear();
539
+ downloadsCache.clear();
540
+ alertedPackageRules.clear();
541
+ // Force GC if available (requires --expose-gc)
542
+ if (global.gc) {
543
+ global.gc();
544
+ console.log('[MONITOR] Forced garbage collection');
545
+ }
546
+ }
547
+ lastMemoryLogTime = Date.now();
548
+ }
549
+
550
+ // Hourly stats report + cache purge + runsc cleanup + memory pruning
469
551
  if (Date.now() - stats.lastReportTime >= 3600_000) {
470
552
  reportStats(stats);
471
553
  purgeTarballCache();
472
554
  cleanupRunscOrphans();
555
+ pruneMemoryCaches(recentlyScanned, downloadsCache, alertedPackageRules);
473
556
  }
474
557
 
475
558
  // Daily webhook report at 08:00 Paris time
@@ -499,5 +582,9 @@ module.exports = {
499
582
  QUEUE_PERSIST_INTERVAL,
500
583
  QUEUE_STATE_FILE,
501
584
  QUEUE_STATE_MAX_AGE_MS,
502
- MAX_QUEUE_PERSIST_SIZE
585
+ MAX_QUEUE_PERSIST_SIZE,
586
+ MAX_SCAN_QUEUE,
587
+ pruneMemoryCaches,
588
+ MAX_RECENTLY_SCANNED,
589
+ MAX_ALERTED_PACKAGES
503
590
  };
@@ -644,6 +644,14 @@ async function pollPyPI(state, scanQueue) {
644
644
  * @param {Object} stats - Mutable stats object
645
645
  */
646
646
  async function poll(state, scanQueue, stats) {
647
+ // Backpressure: skip ingestion when queue is saturated.
648
+ // CouchDB seq and PyPI lastPackage are NOT advanced — next poll resumes from same point.
649
+ const MAX_SCAN_QUEUE = 10_000;
650
+ if (scanQueue.length >= MAX_SCAN_QUEUE) {
651
+ console.log(`[MONITOR] BACKPRESSURE: skipping poll (queue ${scanQueue.length} >= ${MAX_SCAN_QUEUE})`);
652
+ return;
653
+ }
654
+
647
655
  const timestamp = new Date().toISOString().slice(0, 19).replace('T', ' ');
648
656
  console.log(`[MONITOR] ${timestamp} — polling registries...`);
649
657
 
@@ -865,10 +865,18 @@ function isDailyReportDue(stats) {
865
865
  * Encapsulates the full per-package flow: scan -> sandbox -> reputation -> webhook.
866
866
  */
867
867
  async function processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable) {
868
+ // AbortController: signals the scan to stop after timeout.
869
+ // Prevents zombie scans from continuing expensive work (HTTP, sandbox) in the background.
870
+ const controller = new AbortController();
871
+ const timeoutId = setTimeout(() => controller.abort(), SCAN_TIMEOUT_MS);
868
872
  try {
869
873
  await Promise.race([
870
- resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable),
871
- timeoutPromise(SCAN_TIMEOUT_MS)
874
+ resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable, controller.signal),
875
+ new Promise((_, reject) => {
876
+ controller.signal.addEventListener('abort', () => {
877
+ reject(new Error(`Scan timeout after ${SCAN_TIMEOUT_MS / 1000}s`));
878
+ }, { once: true });
879
+ })
872
880
  ]);
873
881
  } catch (err) {
874
882
  recordError(err, stats);
@@ -900,6 +908,8 @@ async function processQueueItem(item, stats, dailyAlerts, recentlyScanned, downl
900
908
  console.error(`[MONITOR] IOC fallback webhook failed: ${webhookErr.message}`);
901
909
  }
902
910
  }
911
+ } finally {
912
+ clearTimeout(timeoutId);
903
913
  }
904
914
  maybePersistDailyStats(stats, dailyAlerts);
905
915
 
@@ -942,7 +952,9 @@ async function processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, down
942
952
  * For npm packages, tarballUrl is already set from the registry response.
943
953
  * For PyPI packages, we need to fetch the JSON API to get the tarball URL.
944
954
  */
945
- async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable) {
955
+ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable, signal) {
956
+ if (signal && signal.aborted) return;
957
+
946
958
  if (item.ecosystem === 'npm' && !item.tarballUrl) {
947
959
  try {
948
960
  const npmInfo = await getNpmLatestTarball(item.name);
@@ -1001,6 +1013,9 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1001
1013
  }
1002
1014
  recentlyScanned.add(dedupeKey);
1003
1015
 
1016
+ // Abort check: if timeout fired during URL resolution or dedup, bail out
1017
+ if (signal && signal.aborted) return;
1018
+
1004
1019
  // Temporal analysis: check for sudden lifecycle script changes (npm only)
1005
1020
  // Webhooks are deferred until after sandbox confirms the threat
1006
1021
  let temporalResult = null;
@@ -1023,6 +1038,9 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1023
1038
  maintainerResult = maintRes.status === 'fulfilled' ? maintRes.value : null;
1024
1039
  }
1025
1040
 
1041
+ // Abort check: if timeout fired during temporal checks, skip the expensive scan
1042
+ if (signal && signal.aborted) return;
1043
+
1026
1044
  const scanResult = await scanPackage(item.name, item.version, item.ecosystem, item.tarballUrl, {
1027
1045
  unpackedSize: item.unpackedSize || 0,
1028
1046
  registryScripts: item.registryScripts || null,