muaddib-scanner 2.10.46 → 2.10.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.46",
3
+ "version": "2.10.48",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -58,6 +58,15 @@ function persistQueue(scanQueue, state) {
58
58
  * Skips if file is missing, corrupt, or older than 24h.
59
59
  */
60
60
  function restoreQueue(scanQueue) {
61
+ // Cleanup orphan .tmp from previous crash / disk-full (ENOSPC)
62
+ const tmpFile = QUEUE_STATE_FILE + '.tmp';
63
+ try {
64
+ if (fs.existsSync(tmpFile)) {
65
+ console.log(`[MONITOR] Cleaning up orphan ${path.basename(tmpFile)}`);
66
+ fs.unlinkSync(tmpFile);
67
+ }
68
+ } catch { /* best-effort */ }
69
+
61
70
  try {
62
71
  if (!fs.existsSync(QUEUE_STATE_FILE)) return 0;
63
72
  const raw = fs.readFileSync(QUEUE_STATE_FILE, 'utf8');
@@ -134,6 +143,94 @@ function cleanupOrphanContainers() {
134
143
  }
135
144
  }
136
145
 
146
+ /**
147
+ * Clean up orphan gVisor runtime directories in /tmp/runsc.
148
+ * runsc creates per-container state dirs that are NOT cleaned up when gVisor or
149
+ * Docker crashes. In production this reached 61GB and filled the disk (ENOSPC),
150
+ * cascading into 0-byte .tmp files and total persistence failure.
151
+ * Removes directories older than maxAgeMs (default: 1h).
152
+ */
153
+ function cleanupRunscOrphans(maxAgeMs = 3600_000) {
154
+ const runscDir = process.env.MUADDIB_GVISOR_LOG_DIR || '/tmp/runsc';
155
+ try {
156
+ if (!fs.existsSync(runscDir)) return 0;
157
+ const entries = fs.readdirSync(runscDir);
158
+ const now = Date.now();
159
+ let cleaned = 0;
160
+ for (const entry of entries) {
161
+ const fullPath = path.join(runscDir, entry);
162
+ try {
163
+ const stat = fs.statSync(fullPath);
164
+ if (now - stat.mtimeMs > maxAgeMs) {
165
+ fs.rmSync(fullPath, { recursive: true, force: true });
166
+ cleaned++;
167
+ }
168
+ } catch { /* skip unreadable entries */ }
169
+ }
170
+ if (cleaned > 0) {
171
+ console.log(`[MONITOR] Cleaned up ${cleaned} orphan runsc dir(s) in ${runscDir}`);
172
+ }
173
+ return cleaned;
174
+ } catch {
175
+ return 0;
176
+ }
177
+ }
178
+
179
+ /**
180
+ * Check disk usage at boot. Warns if root partition > 90% full and logs
181
+ * the largest consumers in /tmp/ and data/ to aid diagnosis.
182
+ * Uses df + du — Linux-only, silently skips on other platforms.
183
+ */
184
+ function checkDiskSpace() {
185
+ try {
186
+ // df --output=pcent / → "Use%\n 42%\n"
187
+ const dfOutput = execFileSync('df', ['--output=pcent', '/'], {
188
+ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 5000
189
+ });
190
+ const match = dfOutput.match(/(\d+)%/);
191
+ if (!match) return;
192
+ const usagePercent = parseInt(match[1], 10);
193
+ if (usagePercent < 90) return;
194
+
195
+ console.warn(`[MONITOR] WARNING: disk usage at ${usagePercent}% — persistence may fail (ENOSPC)`);
196
+
197
+ // Top consumers in /tmp/
198
+ try {
199
+ const tmpDu = execFileSync('du', ['-sh', '--max-depth=1', '/tmp/'], {
200
+ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000
201
+ });
202
+ const lines = tmpDu.trim().split('\n')
203
+ .map(l => { const m = l.match(/^([\d.]+[KMGT]?)\s+(.+)/); return m ? { size: m[1], path: m[2] } : null; })
204
+ .filter(Boolean)
205
+ .sort((a, b) => b.size.localeCompare(a.size))
206
+ .slice(0, 5);
207
+ if (lines.length > 0) {
208
+ console.warn('[MONITOR] Top /tmp/ consumers:');
209
+ for (const l of lines) console.warn(`[MONITOR] ${l.size}\t${l.path}`);
210
+ }
211
+ } catch { /* du failed */ }
212
+
213
+ // Top consumers in data/
214
+ const dataDir = path.join(__dirname, '..', '..', 'data');
215
+ try {
216
+ const dataDu = execFileSync('du', ['-sh', '--max-depth=1', dataDir], {
217
+ encoding: 'utf8', stdio: ['pipe', 'pipe', 'pipe'], timeout: 10000
218
+ });
219
+ const lines = dataDu.trim().split('\n')
220
+ .map(l => { const m = l.match(/^([\d.]+[KMGT]?)\s+(.+)/); return m ? { size: m[1], path: m[2] } : null; })
221
+ .filter(Boolean)
222
+ .sort((a, b) => b.size.localeCompare(a.size))
223
+ .slice(0, 5);
224
+ if (lines.length > 0) {
225
+ console.warn('[MONITOR] Top data/ consumers:');
226
+ for (const l of lines) console.warn(`[MONITOR] ${l.size}\t${l.path}`);
227
+ }
228
+ } catch { /* du failed */ }
229
+ } catch {
230
+ // df not available (non-Linux) — skip silently
231
+ }
232
+ }
233
+
137
234
  function reportStats(stats) {
138
235
  const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
139
236
  const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
@@ -153,7 +250,7 @@ function reportStats(stats) {
153
250
  if (stats.sandboxDeferred || stats.deferredProcessed) {
154
251
  const { getDeferredQueueStats } = require('./deferred-sandbox.js');
155
252
  const dq = getDeferredQueueStats();
156
- console.log(`[MONITOR] Deferred sandbox: ${stats.sandboxDeferred || 0} enqueued, ${stats.deferredProcessed || 0} processed, ${stats.deferredExpired || 0} expired, ${dq.size} pending`);
253
+ console.log(`[MONITOR] Deferred sandbox: ${stats.sandboxDeferred || 0} enqueued, ${stats.deferredProcessed || 0} processed, ${stats.deferredExpired || 0} expired, ${stats.deferredSkipped || 0} skipped, ${dq.size} pending`);
157
254
  }
158
255
  stats.lastReportTime = Date.now();
159
256
  }
@@ -171,10 +268,14 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
171
268
  setVerboseMode(true);
172
269
  }
173
270
 
271
+ // Disk space check — early warning before ENOSPC cascading failure
272
+ checkDiskSpace();
174
273
  // Cleanup temp dirs from previous runs (SIGTERM/crash may leave orphans)
175
274
  cleanupOrphanTmpDirs();
176
275
  // Kill orphan sandbox containers from previous crash (npm-audit-* prefix)
177
276
  cleanupOrphanContainers();
277
+ // Clean up stale gVisor runtime dirs (runsc leak — caused 61GB disk fill in prod)
278
+ cleanupRunscOrphans();
178
279
  // Layer 3: Purge expired cached tarballs on startup
179
280
  purgeTarballCache();
180
281
 
@@ -364,10 +465,11 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
364
465
  await processQueue(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
365
466
  }
366
467
 
367
- // Hourly stats report + cache purge
468
+ // Hourly stats report + cache purge + runsc cleanup
368
469
  if (Date.now() - stats.lastReportTime >= 3600_000) {
369
470
  reportStats(stats);
370
471
  purgeTarballCache();
472
+ cleanupRunscOrphans();
371
473
  }
372
474
 
373
475
  // Daily webhook report at 08:00 Paris time
@@ -384,6 +486,8 @@ module.exports = {
384
486
  startMonitor,
385
487
  cleanupOrphanTmpDirs,
386
488
  cleanupOrphanContainers,
489
+ cleanupRunscOrphans,
490
+ checkDiskSpace,
387
491
  reportStats,
388
492
  isDailyReportDue,
389
493
  sleep,
@@ -6,11 +6,15 @@
6
6
  * Items are sorted by riskScore DESC (highest-risk first) to defend
7
7
  * against queue-poisoning attacks.
8
8
  *
9
- * The worker reserves 1 sandbox slot for T1a (never uses the last slot).
9
+ * The worker owns a dedicated sandbox slot (_deferredSlotBusy) that is
10
+ * completely independent from the shared semaphore used by T1a/T1b/T2.
11
+ * This guarantees the deferred worker can always process, regardless of
12
+ * how many main-path sandboxes are running. The VPS supports N+1
13
+ * concurrent gVisor containers (3 main + 1 deferred).
10
14
  */
11
15
  const fs = require('fs');
12
16
  const path = require('path');
13
- const { runSandbox, getSandboxSemaphore, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
17
+ const { runSandbox } = require('../sandbox/index.js');
14
18
  const { isCanaryEnabled } = require('./classify.js');
15
19
  const { getWebhookUrl, alertedPackageRules, persistAlert, buildAlertData } = require('./webhook.js');
16
20
  const { sendWebhook } = require('../webhook.js');
@@ -28,6 +32,7 @@ const _deferredQueue = [];
28
32
  const _deferredSeen = new Set(); // name@version dedup
29
33
  let _workerHandle = null;
30
34
  let _stats = null; // reference to shared stats object
35
+ let _deferredSlotBusy = false; // Dedicated slot: true while deferred sandbox is running
31
36
 
32
37
  // ── Queue management ──
33
38
 
@@ -126,10 +131,10 @@ async function processDeferredItem(stats) {
126
131
 
127
132
  if (_deferredQueue.length === 0) return null;
128
133
 
129
- // 2. Yield check: reserve 1 slot for T1a
130
- const sem = getSandboxSemaphore();
131
- if (sem.active >= SANDBOX_CONCURRENCY_MAX - 1) {
132
- return null; // All slots busy or only 1 free — keep it for T1a
134
+ // 2. Dedicated slot check completely independent from main semaphore
135
+ if (_deferredSlotBusy) {
136
+ if (stats) stats.deferredSkipped = (stats.deferredSkipped || 0) + 1;
137
+ return null;
133
138
  }
134
139
 
135
140
  // 3. Pick highest-score item
@@ -139,11 +144,12 @@ async function processDeferredItem(stats) {
139
144
 
140
145
  console.log(`[DEFERRED] PROCESSING: ${key} (tier=${item.tier === 2 ? 'T2' : 'T1b'}, score=${item.riskScore}, retries=${item.retries})`);
141
146
 
142
- // 4. Run sandbox
147
+ // 4. Run sandbox on dedicated slot (bypasses shared semaphore)
148
+ _deferredSlotBusy = true;
143
149
  let sandboxResult;
144
150
  try {
145
151
  const canary = isCanaryEnabled();
146
- sandboxResult = await runSandbox(item.name, { canary });
152
+ sandboxResult = await runSandbox(item.name, { canary, skipSemaphore: true });
147
153
  console.log(`[DEFERRED] SANDBOX COMPLETE: ${key} -> score=${sandboxResult.score}, severity=${sandboxResult.severity}`);
148
154
  } catch (err) {
149
155
  console.error(`[DEFERRED] SANDBOX ERROR: ${key} — ${err.message}`);
@@ -158,6 +164,8 @@ async function processDeferredItem(stats) {
158
164
  console.log(`[DEFERRED] RE-ENQUEUED: ${key} for retry (attempt ${item.retries + 1}/${DEFERRED_MAX_RETRIES})`);
159
165
  }
160
166
  return null;
167
+ } finally {
168
+ _deferredSlotBusy = false;
161
169
  }
162
170
 
163
171
  // 5. Follow-up webhook if sandbox found something
@@ -311,6 +319,16 @@ function persistDeferredQueue() {
311
319
  }
312
320
 
313
321
  function restoreDeferredQueue() {
322
+ // Cleanup orphan .tmp from previous crash / disk-full (ENOSPC)
323
+ const tmpFile = DEFERRED_STATE_FILE + '.tmp';
324
+ try {
325
+ if (fs.existsSync(tmpFile)) {
326
+ const stat = fs.statSync(tmpFile);
327
+ console.log(`[DEFERRED] Cleaning up orphan ${path.basename(tmpFile)} (${stat.size} bytes)`);
328
+ fs.unlinkSync(tmpFile);
329
+ }
330
+ } catch { /* best-effort */ }
331
+
314
332
  try {
315
333
  if (!fs.existsSync(DEFERRED_STATE_FILE)) return 0;
316
334
  const raw = fs.readFileSync(DEFERRED_STATE_FILE, 'utf8');
@@ -365,9 +383,14 @@ function _resetDeferredQueue() {
365
383
  _deferredQueue.length = 0;
366
384
  _deferredSeen.clear();
367
385
  _stats = null;
386
+ _deferredSlotBusy = false;
368
387
  stopDeferredWorker();
369
388
  }
370
389
 
390
+ function isDeferredSlotBusy() {
391
+ return _deferredSlotBusy;
392
+ }
393
+
371
394
  module.exports = {
372
395
  enqueueDeferred,
373
396
  getDeferredQueue,
@@ -379,6 +402,7 @@ module.exports = {
379
402
  restoreDeferredQueue,
380
403
  buildDeferredFollowUpEmbed,
381
404
  pruneExpired,
405
+ isDeferredSlotBusy,
382
406
  _resetDeferredQueue,
383
407
  DEFERRED_QUEUE_MAX,
384
408
  DEFERRED_TTL_MS,
@@ -116,6 +116,10 @@ function atomicWriteFileSync(filePath, data) {
116
116
  console.warn(`[MONITOR] Cannot create directory ${dir} (${err.code}) — skipping write to ${path.basename(filePath)}`);
117
117
  return;
118
118
  }
119
+ if (err.code === 'ENOSPC') {
120
+ console.warn(`[MONITOR] WARNING: disk full (ENOSPC) — cannot create directory ${dir}. Free space immediately.`);
121
+ return;
122
+ }
119
123
  throw err;
120
124
  }
121
125
  const tmpFile = filePath + '.tmp';
@@ -125,7 +129,11 @@ function atomicWriteFileSync(filePath, data) {
125
129
  } catch (err) {
126
130
  if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
127
131
  console.warn(`[MONITOR] Cannot write ${path.basename(filePath)} (${err.code}) — skipping`);
128
- // Clean up .tmp if it was partially written
132
+ try { fs.unlinkSync(tmpFile); } catch (_) { /* ignore */ }
133
+ return;
134
+ }
135
+ if (err.code === 'ENOSPC') {
136
+ console.warn(`[MONITOR] WARNING: disk full (ENOSPC) — cannot write ${path.basename(filePath)}. Free space in /tmp and data/ immediately.`);
129
137
  try { fs.unlinkSync(tmpFile); } catch (_) { /* ignore */ }
130
138
  return;
131
139
  }
@@ -682,6 +690,7 @@ function loadDailyStats(stats, dailyAlerts) {
682
690
  stats.mlFiltered = data.mlFiltered || 0;
683
691
  stats.llmAnalyzed = data.llmAnalyzed || 0;
684
692
  stats.llmSuppressed = data.llmSuppressed || 0;
693
+ stats.changesStreamPackages = data.changesStreamPackages || 0;
685
694
  if (Array.isArray(data.dailyAlerts)) {
686
695
  dailyAlerts.length = 0;
687
696
  dailyAlerts.push(...data.dailyAlerts);
@@ -708,6 +717,7 @@ function saveDailyStats(stats, dailyAlerts) {
708
717
  mlFiltered: stats.mlFiltered,
709
718
  llmAnalyzed: stats.llmAnalyzed || 0,
710
719
  llmSuppressed: stats.llmSuppressed || 0,
720
+ changesStreamPackages: stats.changesStreamPackages || 0,
711
721
  dailyAlerts: dailyAlerts.slice()
712
722
  };
713
723
  atomicWriteFileSync(DAILY_STATS_FILE, JSON.stringify(data, null, 2));
@@ -23,7 +23,8 @@ const CONTAINER_TIMEOUT = 120000; // 120 seconds
23
23
  const SINGLE_RUN_TIMEOUT = 90000; // 90 seconds per run in multi-run mode (gVisor ~30% I/O overhead)
24
24
 
25
25
  // ── Sandbox concurrency limiter ──
26
- // Prevents Docker container saturation under load (16 workers × 3 runs = 48 containers).
26
+ // Prevents Docker container saturation under load (main-path T1a/T1b/T2).
27
+ // The deferred worker manages its own dedicated slot outside this semaphore.
27
28
  // Pattern: same semaphore as src/shared/http-limiter.js.
28
29
  const SANDBOX_CONCURRENCY_MAX = Math.max(1, parseInt(process.env.MUADDIB_SANDBOX_CONCURRENCY, 10) || 3);
29
30
 
@@ -565,16 +566,21 @@ async function runSandbox(packageName, options = {}) {
565
566
 
566
567
  const mode = strict ? 'strict' : 'permissive';
567
568
 
568
- // Acquire sandbox slot — blocks if SANDBOX_CONCURRENCY_MAX containers already running
569
- const queueLen = _sandboxSemaphore.queue.length;
570
- if (queueLen > 0) {
571
- console.log(`[SANDBOX] Waiting for sandbox slot (${_sandboxSemaphore.active}/${SANDBOX_CONCURRENCY_MAX} active, ${queueLen} queued)...`);
569
+ // Acquire sandbox slot — blocks if SANDBOX_CONCURRENCY_MAX containers already running.
570
+ // skipSemaphore: deferred worker manages its own dedicated slot outside this semaphore.
571
+ const skipSem = options.skipSemaphore === true;
572
+ if (!skipSem) {
573
+ const queueLen = _sandboxSemaphore.queue.length;
574
+ if (queueLen > 0) {
575
+ console.log(`[SANDBOX] Waiting for sandbox slot (${_sandboxSemaphore.active}/${SANDBOX_CONCURRENCY_MAX} active, ${queueLen} queued)...`);
576
+ }
577
+ await acquireSandboxSlot();
572
578
  }
573
- await acquireSandboxSlot();
574
579
 
575
580
  try {
576
581
  const runtimeLabel = useGvisor ? 'gvisor' : 'docker';
577
- console.log(`[SANDBOX] Analyzing "${displayName}" in isolated container (mode: ${mode}, runtime: ${runtimeLabel}${canaryEnabled ? ', canary: on' : ''}${local ? ', local' : ''}, runs: ${TIME_OFFSETS.length}, slots: ${_sandboxSemaphore.active}/${SANDBOX_CONCURRENCY_MAX})...`);
582
+ const slotInfo = skipSem ? 'deferred-slot' : `${_sandboxSemaphore.active}/${SANDBOX_CONCURRENCY_MAX}`;
583
+ console.log(`[SANDBOX] Analyzing "${displayName}" in isolated container (mode: ${mode}, runtime: ${runtimeLabel}${canaryEnabled ? ', canary: on' : ''}${local ? ', local' : ''}, runs: ${TIME_OFFSETS.length}, slots: ${slotInfo})...`);
578
584
 
579
585
  const allRuns = [];
580
586
  let bestResult = cleanResult;
@@ -639,7 +645,7 @@ async function runSandbox(packageName, options = {}) {
639
645
  displayResults(bestResult);
640
646
  return bestResult;
641
647
  } finally {
642
- releaseSandboxSlot();
648
+ if (!skipSem) releaseSandboxSlot();
643
649
  }
644
650
  }
645
651