npm - muaddib-scanner - Versions diffs - 2.11.92 → 2.11.94 - Mend

muaddib-scanner 2.11.92 → 2.11.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/{self-scan-v2.11.92.json → self-scan-v2.11.94.json} +1 -1
package/src/monitor/daemon.js +60 -5
package/src/monitor/queue.js +39 -2
package/src/monitor/scan-queue.js +52 -23
package/src/monitor/spill.js +246 -0
package/src/monitor/state.js +9 -2
package/src/monitor/webhook.js +22 -1
package/src/scanner/ast-detectors/handle-call-expression.js +42 -2
package/src/scanner/ast-detectors/handle-post-walk.js +13 -0
package/src/scanner/ast-detectors/mcp-write-classifier.js +71 -0
package/src/scanner/ast.js +4 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.92",
+  "version": "2.11.94",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.92.json → self-scan-v2.11.94.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-06-11T11:05:03.615Z",
+  "timestamp": "2026-06-11T12:30:31.641Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/monitor/daemon.js CHANGED Viewed

@@ -14,7 +14,8 @@ const { ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency,
 const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY } = require('./adaptive-concurrency.js');
 const { startHealthcheck } = require('./healthcheck.js');
 const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
-const { evictFromScanQueueBulk } = require('./scan-queue.js');
+const { evictFromScanQueueBulk, enqueueScan } = require('./scan-queue.js');
+const { isSpillEnabled, shouldDrain, drainBacklog, getBacklogSize } = require('./spill.js');
 const { startGhsaPoller, stopGhsaPoller } = require('../ioc/ghsa-poller.js');
 const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
 const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
@@ -27,6 +28,24 @@ const { clearASTCache } = require('../shared/constants.js');
 const POLL_INTERVAL = 60_000;
 const PROCESS_LOOP_INTERVAL = 2_000;    // Queue check interval when empty
+// ── Spill drain (disk waiting list re-ingestion) ──
+// Drain only when pressure is fully cleared AND the live queue has headroom; the
+// 12 calm hours/day do the catch-up of burst-time evictions. Rate-limited to one
+// batch per interval (the main loop ticks every 2s — unthrottled it would re-spike
+// the queue in seconds). All env-tunable for the staged rollout.
+const SPILL_DRAIN_THRESHOLD = (() => {
+  const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_THRESHOLD, 10);
+  return Number.isFinite(v) && v > 0 ? v : 500;
+})();
+const SPILL_DRAIN_BATCH = (() => {
+  const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_BATCH, 10);
+  return Number.isFinite(v) && v > 0 ? v : 200;
+})();
+const SPILL_DRAIN_INTERVAL_MS = (() => {
+  const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_INTERVAL_MS, 10);
+  return Number.isFinite(v) && v > 0 ? v : 30_000;
+})();
 const QUEUE_WARNING_THRESHOLD = 5_000;  // Warn if queue depth exceeds this
 const QUEUE_PERSIST_INTERVAL = 60_000;  // Persist queue to disk every 60s
 const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.json');
@@ -591,14 +610,16 @@ function handleMemoryPressure(level, ratio, rssRatio, recentlyScanned, downloads
       // first (newest survive — most likely to still exist for re-scan), protected only as
       // a last resort, and LEDGERS every drop. Closes the v2.10.88 gap where the raw
       // splice(0,n) silently dropped protected scans (CLAUDE.md "ne jamais perdre de scan").
-      const { dropped, droppedProtected } = evictFromScanQueueBulk(scanQueue, EMERGENCY_QUEUE_KEEP, 'mem_emergency');
+      const { dropped, droppedProtected, spilled } = evictFromScanQueueBulk(scanQueue, EMERGENCY_QUEUE_KEEP, 'mem_emergency');
       summary.queueDropped = dropped;
       summary.queueDroppedProtected = droppedProtected;
+      summary.queueSpilled = spilled || 0;
       if (stats) {
         stats.queueEmergencyDrops = (stats.queueEmergencyDrops || 0) + dropped;
         if (droppedProtected) stats.queueEmergencyProtectedDrops = (stats.queueEmergencyProtectedDrops || 0) + droppedProtected;
+        if (spilled) stats.spilled = (stats.spilled || 0) + spilled;
       }
-      console.error(`[MONITOR] MEMORY EMERGENCY: ${memPctLabel} — truncated queue ${queueBefore} → ${scanQueue.length} (dropped ${dropped} oldest UNPROTECTED${droppedProtected ? ` + ${droppedProtected} protected as last resort` : ''}, all ledgered)`);
+      console.error(`[MONITOR] MEMORY EMERGENCY: ${memPctLabel} — truncated queue ${queueBefore} → ${scanQueue.length} (${spilled ? `SPILLED ${spilled} to disk backlog` : `dropped ${dropped} oldest UNPROTECTED${droppedProtected ? ` + ${droppedProtected} protected as last resort` : ''}`}, all ledgered)`);
     }
     // Clear deferred sandbox queue (holds full staticResult objects)
     const deferredDropped = clearDeferredQueue();
@@ -635,8 +656,12 @@ function reportStats(stats) {
   const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
   const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
   console.log(`[MONITOR] Stats: ${stats.scanned} scanned, ${stats.clean} clean, ${stats.suspect} suspect (T1a:${t1a} T1b:${t1b} T1:${t1} T2:${t2} T3:${t3}), ${stats.errors} error${stats.errors !== 1 ? 's' : ''}, avg ${avg}s/pkg`);
-  if (stats.temporalLoadShed || stats.queueHardDrops || (stats.restartsToday || 0) > 1) {
-    console.log(`[MONITOR]   Stability: restarts(24h)=${stats.restartsToday || 0}, temporal load-shed=${stats.temporalLoadShed || 0}, queue hard-drops=${stats.queueHardDrops || 0}`);
+  if (stats.temporalLoadShed || stats.queueHardDrops || (stats.restartsToday || 0) > 1 || stats.spilled || stats.workerOom) {
+    // Backlog size read best-effort: the convergence signal for the spill rollout
+    // (must oscillate, not grow monotonically — see plan validation step 4).
+    let backlog = 0;
+    try { if (isSpillEnabled()) backlog = getBacklogSize(); } catch { /* best-effort */ }
+    console.log(`[MONITOR]   Stability: restarts(24h)=${stats.restartsToday || 0}, temporal load-shed=${stats.temporalLoadShed || 0}, queue hard-drops=${stats.queueHardDrops || 0}, spilled=${stats.spilled || 0}, drained=${stats.spillDrained || 0}, backlog=${backlog}, workerOom=${stats.workerOom || 0}`);
   }
   if (stats.changesStreamPackages) {
     console.log(`[MONITOR]   Changes stream packages: ${stats.changesStreamPackages}`);
@@ -1064,6 +1089,7 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
   // This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
   // without being blocked by long-running scans.
   let lastMemoryLogTime = Date.now();
+  let lastSpillDrainTime = 0;
   while (running) {
     // ─── Memory circuit breaker (every iteration) ───
@@ -1080,6 +1106,35 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
       ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
     }
+    // ─── Spill drain (MUADDIB_QUEUE_SPILL=1) ───
+    // Re-ingest evicted scans from the disk backlog during calm windows: pressure
+    // fully NONE + queue headroom, one bounded batch per SPILL_DRAIN_INTERVAL_MS.
+    // Protected items (IOC/burst/first-publish/ATO) drain first — a malicious
+    // package is often unpublished quickly, late drains lose the tarball.
+    if (isSpillEnabled() &&
+        Date.now() - lastSpillDrainTime >= SPILL_DRAIN_INTERVAL_MS &&
+        shouldDrain(pressureLevel, scanQueue.length, SPILL_DRAIN_THRESHOLD)) {
+      lastSpillDrainTime = Date.now();
+      try {
+        // Dedup against recentlyScanned (same key format as processQueueItem) AND
+        // the live queue (small here by the shouldDrain threshold).
+        const inQueue = new Set(scanQueue.map(it => `${it.ecosystem}/${it.name}@${it.version}`));
+        const r = drainBacklog(scanQueue, stats, {
+          maxItems: Math.min(SPILL_DRAIN_BATCH, Math.max(1, SPILL_DRAIN_THRESHOLD - scanQueue.length)),
+          enqueueFn: enqueueScan,
+          isDuplicate: (e) => {
+            const key = `${e.ecosystem}/${e.name}@${e.version}`;
+            return recentlyScanned.has(key) || inQueue.has(key);
+          }
+        });
+        if (r.drained > 0 || r.deduped > 0) {
+          console.log(`[MONITOR] SPILL_DRAIN: re-ingested ${r.drained} (${r.deduped} deduped, backlog ${r.remaining} remaining)`);
+        }
+      } catch (err) {
+        console.error(`[MONITOR] SPILL_DRAIN failed: ${err.message}`);
+      }
+    }
     // ─── Memory watchdog (adaptive interval) ───
     // Log every 5min normally, every 15s under pressure.
     const memLogInterval = pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH

package/src/monitor/queue.js CHANGED Viewed

@@ -400,9 +400,29 @@ function shouldSkipSandbox(ctx) {
  */
 function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = null) {
   return new Promise((resolve, reject) => {
-    const worker = new Worker(SCAN_WORKER_PATH, {
+    const workerOpts = {
       workerData: { extractedDir, scanContext: scanContext || {} }
-    });
+    };
+    // Per-worker V8 memory limits (OOM durable fix): the 2026-06 RSS spikes
+    // (8.2-8.8GB with heap ~550MB) are off-heap allocations inside scan workers —
+    // one pathological package could blow the WHOLE process toward the EMERGENCY
+    // breaker (queue purge + worker kills). With a per-worker cap, that package
+    // OOMs ITS worker only: ERR_WORKER_OUT_OF_MEMORY → rejected → ledgered
+    // `worker_oom` (never counted clean) while the daemon and its siblings keep
+    // running. This is also what allows raising MUADDIB_SCAN_CONCURRENCY back
+    // up (it was clamped 12-16 → 8 on 2026-06-08 as the OOM mitigation).
+    // OFF unless MUADDIB_WORKER_MAX_OLD_MB is set (staged rollout; suggested 1024).
+    const maxOldMb = parseInt(globalThis.process.env.MUADDIB_WORKER_MAX_OLD_MB, 10);
+    if (Number.isFinite(maxOldMb) && maxOldMb > 0) {
+      const maxYoungMb = parseInt(globalThis.process.env.MUADDIB_WORKER_MAX_YOUNG_MB, 10);
+      workerOpts.resourceLimits = {
+        maxOldGenerationSizeMb: maxOldMb,
+        maxYoungGenerationSizeMb: Number.isFinite(maxYoungMb) && maxYoungMb > 0 ? maxYoungMb : 128,
+        codeRangeSizeMb: 64,
+        stackSizeMb: 8
+      };
+    }
+    const worker = new Worker(SCAN_WORKER_PATH, workerOpts);
     const _sc = scanContext || {};
     _liveWorkers.set(worker, { name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem });
@@ -1246,6 +1266,23 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     recordError(err, stats);
     stats.scanned++;
     stats.totalTimeMs += Date.now() - startTime;
+    // Per-worker resourceLimits breach: the worker died on ITS V8 cap
+    // (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
+    // garde-fou as static_timeout: a package that OOMs the scanner must NOT
+    // count clean — inconclusive, distinct ledger source, distinct log line
+    // (the live-validation metric for the limits rollout). No retry: an OOM
+    // re-OOMs deterministically.
+    const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
+      /ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
+    if (isWorkerOom) {
+      console.error(`[MONITOR] WORKER_OOM: ${name}@${version} — scan worker hit its resourceLimits cap (kept INCONCLUSIVE, not clean)`);
+      stats.workerOom = (stats.workerOom || 0) + 1;
+      updateScanStats('sandbox_inconclusive');
+      try {
+        appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_oom' });
+      } catch { /* ledger is best-effort */ }
+      return { sandboxResult: null, staticClean: false };
+    }
     console.error(`[MONITOR] ERROR scanning ${name}@${version}: ${err.message}`);
     // Ledger the terminal failure so the scan-ledger never over-states coverage (an errored
     // package is NOT clean). Also captures EMERGENCY worker-terminate losses, whose reject

package/src/monitor/scan-queue.js CHANGED Viewed

@@ -59,6 +59,19 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
     const evicted = protectedFallback ? scanQueue.shift() : scanQueue.splice(victimIdx, 1)[0];
     dropped = true;
     if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
+    // Spill-to-disk waiting list (MUADDIB_QUEUE_SPILL=1): the evicted item goes to
+    // data/scan-backlog.jsonl for re-ingestion during calm periods instead of being
+    // lost. Lazy require (same pattern as state.js below) — spill.js requires this
+    // module for isProtected, so a top-level import would be a cycle. On spill
+    // failure (or flag off) the behavior degrades to the pre-spill drop, ledgered.
+    let spilled = false;
+    try {
+      const spillMod = require('./spill.js');
+      if (spillMod.isSpillEnabled() && evicted && evicted.name) {
+        spilled = spillMod.spillItems([evicted]) === 1;
+        if (spilled && stats) stats.spilled = (stats.spilled || 0) + 1;
+      }
+    } catch { /* spill is best-effort — fall through to the drop ledger */ }
     // Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
     // "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
     // require avoids any top-level coupling with state.js; best-effort, never throws.
@@ -68,7 +81,8 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
       if (evicted && evicted.name) {
         require('./state.js').appendScanLedger({
           name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
-          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
+          outcome: spilled ? 'spilled' : 'dropped',
+          source: (protectedFallback ? 'queue_cap_protected' : 'queue_cap') + (spilled ? '_spill' : ''),
           // AUDIT-A1 observability (see evictFromScanQueueBulk)
           firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
         });
@@ -127,33 +141,48 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
     try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
   }
-  // Compact survivors in place, ledgering each evicted item with an identity-preserving
-  // source (protected drops get a distinct suffix so the rare case stays visible in the rollup).
+  // Compact survivors in place, collecting the evicted items for the spill below.
+  const evictedItems = [];
   let w = 0;
   for (let r = 0; r < before; r++) {
-    if (dropSet.has(r)) {
-      const item = scanQueue[r];
-      if (appendLedger && item && item.name) {
-        try {
-          appendLedger({
-            name: item.name, version: item.version, ecosystem: item.ecosystem,
-            outcome: 'dropped',
-            source: _isProtected(item) ? `${source}_protected` : source,
-            // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
-            // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
-            // measure if the memory breaker is evicting genuine new packages.
-            firstPublish: !!item.firstPublish,
-            isBurstExtra: !!item.isATOBurstExtra
-          });
-        } catch { /* ledger is best-effort — must never break the breaker */ }
-      }
-    } else {
-      scanQueue[w++] = scanQueue[r];
-    }
+    if (dropSet.has(r)) evictedItems.push(scanQueue[r]);
+    else scanQueue[w++] = scanQueue[r];
   }
   scanQueue.length = w;
-  return { dropped: toDrop, droppedProtected };
+  // Spill-to-disk waiting list (MUADDIB_QUEUE_SPILL=1): ONE batched append for the
+  // whole eviction (an EMERGENCY evicts thousands — per-item appends would thrash).
+  // spillItems is all-or-nothing per call (single buffered write), so `spilled`
+  // cleanly selects the ledger outcome for the batch. Lazy require: spill.js
+  // imports isProtected from this module — a top-level import would be a cycle.
+  // On spill failure (or flag off) the behavior degrades to the pre-spill drop.
+  let spilled = false;
+  try {
+    const spillMod = require('./spill.js');
+    if (spillMod.isSpillEnabled() && evictedItems.length > 0) {
+      spilled = spillMod.spillItems(evictedItems) > 0;
+    }
+  } catch { /* spill is best-effort */ }
+  // Ledger each evicted item with an identity-preserving source (protected drops get
+  // a distinct suffix so the rare case stays visible in the rollup).
+  for (const item of evictedItems) {
+    if (!appendLedger || !item || !item.name) continue;
+    try {
+      appendLedger({
+        name: item.name, version: item.version, ecosystem: item.ecosystem,
+        outcome: spilled ? 'spilled' : 'dropped',
+        source: (_isProtected(item) ? `${source}_protected` : source) + (spilled ? '_spill' : ''),
+        // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
+        // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
+        // measure if the memory breaker is evicting genuine new packages.
+        firstPublish: !!item.firstPublish,
+        isBurstExtra: !!item.isATOBurstExtra
+      });
+    } catch { /* ledger is best-effort — must never break the breaker */ }
+  }
+  return { dropped: toDrop, droppedProtected, spilled: spilled ? evictedItems.length : 0 };
 }
 // ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────

package/src/monitor/spill.js ADDED Viewed

@@ -0,0 +1,246 @@
+'use strict';
+/**
+ * spill.js — disk-backed waiting list for the scan queue.
+ *
+ * Today an EMERGENCY memory purge (and the queue hard-cap) DROPS evicted scans:
+ * ledgered, but lost (91K mem_emergency drops / 64K distinct never-scanned
+ * versions in the 2026-06-11 24h window). The queue entries are tiny metadata —
+ * dropping them frees almost nothing; the memory relief comes from the
+ * container/worker kills the breaker also performs. This module converts those
+ * drops into DEFERRALS: evicted items append to a bounded JSONL backlog and are
+ * re-ingested progressively during calm periods (12h/24 have zero drops — the
+ * baseline flow is fully absorbed; losses are burst-shaped).
+ *
+ * Defensive priority (mirrors scan-queue.js `isProtected`): malicious packages
+ * are often unpublished quickly — draining late can mean the tarball is gone.
+ *   - drain: protected items first (IOC match / burst / first-publish / ATO),
+ *     then FIFO. No LIFO: under repeated spikes the oldest would never drain —
+ *     a disguised loss.
+ *   - cap compaction: evict oldest UNPROTECTED first, protected as last resort
+ *     (the evictFromScanQueueBulk contract), every eviction ledgered. We lose
+ *     noise before we lose signal.
+ *
+ * Bounds & resilience (CLAUDE.md production rules):
+ *   - MUADDIB_SPILL_MAX entries (default 200 000 ≈ 30 MB ≈ ~2 days of worst-case
+ *     spikes). The cap should never be reached if the drain converges — if it
+ *     is, evictions are ledgered (`spill_cap`), never silent.
+ *   - All writes are append-one-line or tmp+rename rewrites; a crash mid-drain
+ *     at worst re-drains the same items, deduplicated by the caller.
+ *   - Every function is never-throw: a spill failure must degrade to the old
+ *     behavior (drop, ledgered), not break the breaker.
+ *
+ * Env (read at call time): MUADDIB_QUEUE_SPILL=1 (master switch, default OFF),
+ * MUADDIB_SPILL_FILE (override, tests), MUADDIB_SPILL_MAX.
+ */
+const fs = require('fs');
+const path = require('path');
+const { isProtected } = require('./scan-queue.js');
+const DEFAULT_SPILL_FILE = path.join(__dirname, '..', '..', 'data', 'scan-backlog.jsonl');
+const DEFAULT_MAX_ENTRIES = 200_000;
+// Fields persisted per item — everything re-enqueue + protection need, nothing
+// else (bounded line size ≈ 150-250 bytes).
+const SPILL_FIELDS = [
+  'name', 'version', 'ecosystem', 'tarballUrl',
+  'firstPublish', 'isIOCMatch', 'isBurst', 'atoSignal', 'isATOBurstExtra'
+];
+function isSpillEnabled() {
+  return globalThis.process.env.MUADDIB_QUEUE_SPILL === '1';
+}
+function _spillFile() {
+  return globalThis.process.env.MUADDIB_SPILL_FILE || DEFAULT_SPILL_FILE;
+}
+function _maxEntries() {
+  const raw = globalThis.process.env.MUADDIB_SPILL_MAX;
+  const n = raw ? parseInt(raw, 10) : NaN;
+  return (Number.isFinite(n) && n >= 10 && n <= 5_000_000) ? n : DEFAULT_MAX_ENTRIES;
+}
+function _readEntries(file) {
+  let raw;
+  try { raw = fs.readFileSync(file, 'utf8'); } catch { return []; }
+  const out = [];
+  for (const line of raw.split('\n')) {
+    if (!line.trim()) continue;
+    try {
+      const e = JSON.parse(line);
+      if (e && e.name) out.push(e);
+    } catch { /* truncated/corrupt line (crash mid-write) — skip */ }
+  }
+  return out;
+}
+function _writeEntries(file, entries) {
+  const tmp = file + '.tmp';
+  fs.writeFileSync(tmp, entries.length ? entries.map(e => JSON.stringify(e)).join('\n') + '\n' : '', 'utf8');
+  fs.renameSync(tmp, file);
+}
+/**
+ * Append evicted queue items to the backlog. Never throws; on write failure the
+ * caller's fallback is the pre-spill behavior (drop, ledgered).
+ * @param {Array<object>} items evicted scan-queue items
+ * @returns {number} how many items were actually persisted
+ */
+function spillItems(items) {
+  if (!Array.isArray(items) || items.length === 0) return 0;
+  const file = _spillFile();
+  let written = 0;
+  try {
+    const dir = path.dirname(file);
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+    let buf = '';
+    for (const item of items) {
+      if (!item || !item.name) continue;
+      const rec = { ts: new Date().toISOString() };
+      for (const f of SPILL_FIELDS) {
+        if (item[f] !== undefined && item[f] !== null && item[f] !== false) rec[f] = item[f];
+      }
+      buf += JSON.stringify(rec) + '\n';
+      written++;
+    }
+    if (buf) fs.appendFileSync(file, buf, 'utf8');
+    _compactBacklog(file);
+  } catch {
+    return 0; // degrade to drop-with-ledger at the call site
+  }
+  return written;
+}
+/**
+ * Cap enforcement: evict down to MUADDIB_SPILL_MAX — oldest UNPROTECTED first,
+ * protected last resort. Every eviction is ledgered (`spill_cap` /
+ * `spill_cap_protected`): a backlog overflow is a real loss and must be visible.
+ */
+function _compactBacklog(file, ledgerFn = null) {
+  try {
+    const max = _maxEntries();
+    const entries = _readEntries(file);
+    if (entries.length <= max) return;
+    const toDrop = entries.length - max;
+    const dropSet = new Set();
+    for (let i = 0; i < entries.length && dropSet.size < toDrop; i++) {
+      if (!isProtected(entries[i])) dropSet.add(i);
+    }
+    for (let i = 0; i < entries.length && dropSet.size < toDrop; i++) {
+      if (!dropSet.has(i)) dropSet.add(i); // protected, last resort
+    }
+    let appendLedger = ledgerFn;
+    if (!appendLedger) {
+      try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
+    }
+    const kept = [];
+    for (let i = 0; i < entries.length; i++) {
+      if (!dropSet.has(i)) { kept.push(entries[i]); continue; }
+      const e = entries[i];
+      if (appendLedger) {
+        try {
+          appendLedger({
+            name: e.name, version: e.version, ecosystem: e.ecosystem,
+            outcome: 'dropped',
+            source: isProtected(e) ? 'spill_cap_protected' : 'spill_cap',
+            firstPublish: !!e.firstPublish, isBurstExtra: !!e.isATOBurstExtra
+          });
+        } catch { /* best-effort */ }
+      }
+    }
+    _writeEntries(file, kept);
+    console.warn(`[MONITOR] SPILL_CAP: backlog over ${max} — evicted ${toDrop} oldest (ledgered). The drain is not keeping up.`);
+  } catch { /* never throw */ }
+}
+/**
+ * Pure drain predicate (exported for tests + the daemon main loop): drain only
+ * when memory pressure is fully cleared AND the live queue has headroom.
+ */
+function shouldDrain(pressureLevel, queueLen, threshold) {
+  return pressureLevel === 0 && queueLen < threshold;
+}
+/**
+ * Re-ingest up to maxItems from the backlog into the live scan queue.
+ * Protected entries drain first (oldest-first within each class), then FIFO.
+ * Remaining entries are rewritten atomically (tmp+rename). Crash-resilient: a
+ * kill between enqueue and rewrite re-drains the same items on the next tick —
+ * the caller's isDuplicate (recentlyScanned + in-queue keys) absorbs replays.
+ *
+ * @param {Array} scanQueue   live queue (enqueued via injected enqueueFn)
+ * @param {object|null} stats monitor stats (spillDrained / spillDeduped counters)
+ * @param {object} opts
+ * @param {number}   opts.maxItems    batch bound (required > 0)
+ * @param {Function} opts.enqueueFn   (scanQueue, item, stats) => void — scan-queue.enqueueScan
+ * @param {Function} [opts.isDuplicate] (key "name@version") => boolean
+ * @returns {{drained:number, deduped:number, remaining:number}}
+ */
+function drainBacklog(scanQueue, stats, opts = {}) {
+  const res = { drained: 0, deduped: 0, remaining: 0 };
+  try {
+    const file = _spillFile();
+    const maxItems = opts.maxItems | 0;
+    if (maxItems <= 0 || typeof opts.enqueueFn !== 'function') return res;
+    let st;
+    try { st = fs.statSync(file); } catch { return res; } // no backlog — cheap exit
+    if (!st.size) return res;
+    const entries = _readEntries(file);
+    if (entries.length === 0) { res.remaining = 0; return res; }
+    // Selection AND enqueue order: protected first (oldest-first within the
+    // class), then FIFO — bounded by maxItems. Order matters: the live queue
+    // is consumed FIFO, so protected items must be enqueued ahead of plain
+    // ones, not merely included in the batch.
+    const takeIdx = new Set();
+    const takeOrder = [];
+    for (let i = 0; i < entries.length && takeOrder.length < maxItems; i++) {
+      if (isProtected(entries[i])) { takeIdx.add(i); takeOrder.push(i); }
+    }
+    for (let i = 0; i < entries.length && takeOrder.length < maxItems; i++) {
+      if (!takeIdx.has(i)) { takeIdx.add(i); takeOrder.push(i); }
+    }
+    for (const i of takeOrder) {
+      const e = entries[i];
+      // The caller owns the dedupe-key format (the monitor uses
+      // `${ecosystem}/${name}@${version}` for recentlyScanned) — pass the
+      // whole entry instead of imposing a key shape here.
+      if (opts.isDuplicate && opts.isDuplicate(e)) {
+        res.deduped++;
+        continue; // already scanned or already queued — discard from backlog
+      }
+      const { ts: _ts, ...item } = e; // strip the spill timestamp, restore the queue item shape
+      opts.enqueueFn(scanQueue, item, stats);
+      res.drained++;
+    }
+    const remaining = entries.filter((_, i) => !takeIdx.has(i));
+    _writeEntries(file, remaining);
+    res.remaining = remaining.length;
+    if (stats) {
+      stats.spillDrained = (stats.spillDrained || 0) + res.drained;
+      stats.spillDeduped = (stats.spillDeduped || 0) + res.deduped;
+    }
+  } catch { /* never throw — worst case the same items drain next tick */ }
+  return res;
+}
+/** Entry count (0 on missing/unreadable file). */
+function getBacklogSize() {
+  return _readEntries(_spillFile()).length;
+}
+module.exports = {
+  isSpillEnabled,
+  spillItems,
+  drainBacklog,
+  shouldDrain,
+  getBacklogSize,
+  // test seams
+  _compactBacklog,
+  SPILL_FIELDS
+};

package/src/monitor/state.js CHANGED Viewed

@@ -972,7 +972,11 @@ let _scanLedgerAppendedSinceCompact = 0;
 const SCAN_LEDGER_OUTCOMES = new Set([
   'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
   'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
-  'static_timeout', 'size_skip', 'dropped', 'error'
+  // 'spilled' = evicted to the disk waiting list (data/scan-backlog.jsonl) instead
+  // of dropped — NOT scanned. A later drain + scan writes a normal scan entry; a
+  // spilled item that never drains stays an honest coverage hole (counted with
+  // dropped in the rollup).
+  'static_timeout', 'size_skip', 'dropped', 'spilled', 'error'
 ]);
 // Benign terminal verdicts — the ledger-headline "clean" bucket. Mirrors the
@@ -1162,7 +1166,10 @@ function computeLedgerRollup(sinceTs, opts = {}) {
     const key = `${e.name}@${e.version || ''}`;
     const underCap = exactVanished && (scannedKeys.size + droppedKeys.size) < MAX_ROLLUP_KEYS;
-    if (outcome === 'dropped') {
+    // 'spilled' (disk waiting list, not yet rescanned) counts with 'dropped' on the
+    // non-scanned side — honest coverage: a spilled item only becomes "covered" when
+    // its drained re-scan writes a real verdict entry. byOutcome keeps them distinct.
+    if (outcome === 'dropped' || outcome === 'spilled') {
       dropped++; ecoNode.dropped++;
       if (underCap) { droppedKeys.add(key); allNames.add(e.name); } else exactVanished = false;
     } else {

package/src/monitor/webhook.js CHANGED Viewed

@@ -1149,6 +1149,27 @@ function mcpTriageTag(a) {
   return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
 }
+/**
+ * Stability field for the daily report. The spill segment (spilled / drained /
+ * backlog size) only appears when the disk waiting list is enabled — backlog
+ * size is THE convergence signal of the spill rollout (must oscillate around
+ * 0 across days; monotonic growth = drain capacity too low, raise concurrency).
+ * Best-effort: a spill read failure must never break the report.
+ */
+function _stabilityFieldValue(stats) {
+  let v = `Restarts (24h): ${stats.restartsToday || 0} | Temporal load-shed: ${stats.temporalLoadShed || 0} | Queue hard-drops: ${stats.queueHardDrops || 0}`;
+  try {
+    const { isSpillEnabled, getBacklogSize } = require('./spill.js');
+    if (isSpillEnabled()) {
+      v += `\nSpill: ${stats.spilled || 0} spilled | ${stats.spillDrained || 0} drained | backlog ${getBacklogSize()}`;
+      if (stats.workerOom) v += ` | worker OOM: ${stats.workerOom}`;
+    } else if (stats.workerOom) {
+      v += ` | worker OOM: ${stats.workerOom}`;
+    }
+  } catch { /* best-effort */ }
+  return v;
+}
 function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
   // Use in-memory stats (accumulated since last reset, restored from disk on restart)
   // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1307,7 +1328,7 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
         ...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
           ? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
           : []),
-        { name: 'Stability', value: `Restarts (24h): ${stats.restartsToday || 0} | Temporal load-shed: ${stats.temporalLoadShed || 0} | Queue hard-drops: ${stats.queueHardDrops || 0}`, inline: false },
+        { name: 'Stability', value: _stabilityFieldValue(stats), inline: false },
         ...(ledgerField ? [ledgerField] : []),
         { name: 'System', value: healthText, inline: false }
       ],

package/src/scanner/ast-detectors/handle-call-expression.js CHANGED Viewed

@@ -49,6 +49,31 @@ const {
   containsDecodePattern,
   resolveNumericExpression
 } = require('./helpers.js');
+const { countInvisibleUnicode } = require('../../shared/unicode-invisibles.js');
+const { classifyMcpWrite } = require('./mcp-write-classifier.js');
+const { isShadowEnabled, recordShadowDivergence } = require('../../shared/shadow.js');
+/**
+ * SHADOW 3-tier classification for mcp_config_injection emissions (R5 + R5b).
+ * Computes the candidate class (template / shell_exec / instruction_injection)
+ * and logs a divergence ONLY when the candidate semantics would downgrade the
+ * verdict (template → MEDIUM). Zero effect on the threat emitted by the caller
+ * — the live severity stays CRITICAL until the shadow data adjudicates the flip.
+ * The package identity is not available at AST level; evidence carries the file.
+ */
+function _shadowClassifyMcpWrite(contentStr, checkPath, rule, ctx) {
+  try {
+    if (!isShadowEnabled()) return;
+    const { cls, signals } = classifyMcpWrite(contentStr, checkPath);
+    if (cls !== 'template') return; // shell_exec / instruction_injection keep CRITICAL — no divergence
+    recordShadowDivergence({
+      detector: 'mcp_config_injection_3tier',
+      oldVerdict: 'CRITICAL',
+      newVerdict: 'MEDIUM',
+      evidence: { cls, signals, path: checkPath, rule, file: ctx.relFile }
+    });
+  } catch { /* shadow must never affect the scan */ }
+}
 /**
  * Detect whether an AST node points at a user-level filesystem location:
@@ -756,6 +781,10 @@ function handleCallExpression(node, ctx) {
           ? MCP_CONTENT_PATTERNS.some(p => contentStr.includes(p.replace(/"/g, '')))
           : isSensitiveConfigFile; // dynamic content only suspicious for known config files
         if (hasContentPattern) {
+          // SHADOW 3-tier classification (zero effect on the emitted severity):
+          // template-class writes are the scaffolder FP under adjudication —
+          // log the would-be CRITICAL→MEDIUM divergence for `shadow-report`.
+          _shadowClassifyMcpWrite(contentStr, mcpCheckPath, 'R5', ctx);
           ctx.threats.push({
             type: 'mcp_config_injection',
             severity: 'CRITICAL',
@@ -780,11 +809,20 @@ function handleCallExpression(node, ctx) {
           const contentStr2 = extractStringValue(contentArg2);
           const hasShellContent = !!contentStr2 && /(?:curl|wget)\s+[^\n]*\|\s*(?:sh|bash|zsh)\b|\beval\s*\(|\bsh\s+-c\s+|\bbash\s+-c\s+|\bnode\s+-e\s+/i.test(contentStr2);
           const hasInjectionInstruction = !!contentStr2 && /IMPORTANT[:\s]+(?:before|after|run|execute)|do\s+not\s+(?:display|show|mention)|always\s+run/i.test(contentStr2);
-          if (hasUserLevelPath || hasShellContent || hasInjectionInstruction) {
+          // 3d (additive, v2.11.91): zero-width/bidi Unicode in the written
+          // content — the TrapDoor hidden-instruction encoding (Socket,
+          // 2026-05-25: instructions invisible in an editor, word-broken so
+          // the 3b/3c plain-text regexes can't match). A legitimate generator
+          // never emits invisible codepoints into a rules file. Strictly
+          // additive: can only ADD detections to the 3a/3b/3c OR.
+          const hasInvisibleContent = !!contentStr2 && countInvisibleUnicode(contentStr2) > 0;
+          if (hasUserLevelPath || hasShellContent || hasInjectionInstruction || hasInvisibleContent) {
             const reasons = [];
             if (hasUserLevelPath) reasons.push('user-level destination (homedir/cwd/env.HOME)');
             if (hasShellContent) reasons.push('shell command in content');
             if (hasInjectionInstruction) reasons.push('AI prompt-injection instruction in content');
+            if (hasInvisibleContent) reasons.push('zero-width/bidi Unicode in content (hidden-instruction encoding)');
+            _shadowClassifyMcpWrite(contentStr2, mcpCheckPath, 'R5b', ctx);
             ctx.threats.push({
               type: 'mcp_config_injection',
               severity: 'CRITICAL',
@@ -2047,4 +2085,6 @@ function handleCallExpression(node, ctx) {
 }
-module.exports = { handleCallExpression };
+// _shadowClassifyMcpWrite is shared with handle-post-walk.js (the Wave-4
+// keyword-co-occurrence emitter — the third mcp_config_injection site).
+module.exports = { handleCallExpression, _shadowClassifyMcpWrite };

package/src/scanner/ast-detectors/handle-post-walk.js CHANGED Viewed

@@ -274,6 +274,19 @@ function handlePostWalk(ctx) {
   // Wave 4: MCP content keywords in file with writeFileSync = MCP injection signal
   if (ctx.hasMcpContentKeywords && !ctx.threats.some(t => t.type === 'mcp_config_injection')) {
+    // SHADOW 3-tier classification (zero effect on the emitted severity). The
+    // 2026-06-11 backtest showed this keyword-co-occurrence rule emits ~85% of
+    // historical mcp_config_injection alerts (100/118 packages) — every
+    // legitimate MCP server installer carries mcpServers keywords + writes —
+    // so the adjudication MUST cover this site, not just R5/R5b. The classifier
+    // runs on the FILE source (the written content is not extractable here):
+    // a file whose code carries shell-exec or hidden-instruction markers keeps
+    // CRITICAL silently; an inert config-writer logs the CRITICAL→MEDIUM
+    // candidate divergence, tagged rule:'W4' so the report splits it out.
+    try {
+      const { _shadowClassifyMcpWrite } = require('./handle-call-expression.js');
+      _shadowClassifyMcpWrite(typeof ctx._content === 'string' ? ctx._content : null, '(file-level keyword co-occurrence)', 'W4', ctx);
+    } catch { /* shadow must never affect the scan */ }
     ctx.threats.push({
       type: 'mcp_config_injection',
       severity: 'CRITICAL',

package/src/scanner/ast-detectors/mcp-write-classifier.js ADDED Viewed

@@ -0,0 +1,71 @@
+'use strict';
+/**
+ * mcp-write-classifier.js — pure 3-tier classifier for mcp_config_injection
+ * candidates (SHADOW adjudication + the future severity flip).
+ *
+ * Empirical classes (web research 2026-06-11, calibrated on real campaigns):
+ *   (a) template              — write with inert content: the scaffolder shape
+ *       (ruler, rulesync, cursor-rules, cursor-tools all legitimately write
+ *       .cursorrules/CLAUDE.md/AGENTS.md). Candidate MEDIUM after adjudication.
+ *   (b) shell_exec            — content carries a shell command or an
+ *       agent-hook exec (SafeDep campaign, 2026-05-13: .claude/settings.json
+ *       SessionStart hook → ELF). Stays CRITICAL.
+ *   (c) instruction_injection — content carries hidden instructions: zero-
+ *       width/bidi Unicode (TrapDoor encoding — Socket 2026-05-25; GitHub
+ *       flags the same) or agent-addressed directives ("do not tell the
+ *       user…"). Stays CRITICAL.
+ *
+ * The classifier is PURE (no I/O, no ctx) so it is unit-testable per class and
+ * is exactly what gets promoted when the flip lands. Until then it feeds the
+ * shadow log: oldVerdict CRITICAL vs newVerdict (template→MEDIUM).
+ *
+ * Honest default: content that cannot be extracted statically classifies as
+ * `template` with signal `dynamic_content` — we don't know, so the shadow
+ * numbers must not pretend we do. (The live R5/R5b severity is unaffected
+ * either way — this module emits no threats.)
+ */
+const { countInvisibleUnicode } = require('../../shared/unicode-invisibles.js');
+// (c) — agent-addressed directives. Superset of the live R5b 3c regex
+// (IMPORTANT/do-not-display/always-run) with the additions calibrated on the
+// Rules-File-Backdoor / Mini-Shai-Hulud wording. Word-boundaried enough not to
+// match benign docs ("important: run tests before committing" matches — by
+// design, that wording addressed to an agent IS the attack shape; the
+// difference is made by the write target, which the caller already gated on).
+const INJECTION_DIRECTIVE_RE = /IMPORTANT[:\s]+(?:before|after|run|execute)|do\s+not\s+(?:display|show|mention|tell)|never\s+(?:mention|reveal|disclose)|hide\s+this\s+from|always\s+run/i;
+// (b) — shell command in content. Same expression as the live R5b 3b gate.
+const SHELL_CONTENT_RE = /(?:curl|wget)\s+[^\n]*\|\s*(?:sh|bash|zsh)\b|\beval\s*\(|\bsh\s+-c\s+|\bbash\s+-c\s+|\bnode\s+-e\s+/i;
+// (b) — agent-hook exec in JSON content: a "hooks" structure carrying a
+// "command" (the SafeDep .claude/settings.json SessionStart shape). Order-
+// insensitive containment — the content is config the attacker controls, a
+// strict JSON parse would be evadable with trailing garbage.
+const HOOKS_COMMAND_RE = /"hooks"[\s\S]{0,400}"command"|"command"[\s\S]{0,400}"hooks"/;
+/**
+ * @param {string|null|undefined} contentStr statically-extracted write content
+ *        (null/undefined = dynamic, not extractable)
+ * @param {string} [checkPath] lowercased destination path (reserved for future
+ *        signals; not used for class decision today)
+ * @returns {{cls: 'template'|'shell_exec'|'instruction_injection', signals: string[]}}
+ */
+function classifyMcpWrite(contentStr, checkPath) { // eslint-disable-line no-unused-vars
+  if (contentStr === null || contentStr === undefined || typeof contentStr !== 'string') {
+    return { cls: 'template', signals: ['dynamic_content'] };
+  }
+  const signals = [];
+  if (countInvisibleUnicode(contentStr) > 0) signals.push('zero_width_unicode');
+  if (INJECTION_DIRECTIVE_RE.test(contentStr)) signals.push('injection_directive');
+  if (signals.length > 0) return { cls: 'instruction_injection', signals };
+  if (SHELL_CONTENT_RE.test(contentStr)) signals.push('shell_command');
+  if (HOOKS_COMMAND_RE.test(contentStr)) signals.push('hooks_command_json');
+  if (signals.length > 0) return { cls: 'shell_exec', signals };
+  return { cls: 'template', signals: [] };
+}
+module.exports = { classifyMcpWrite, INJECTION_DIRECTIVE_RE, SHELL_CONTENT_RE, HOOKS_COMMAND_RE };

package/src/scanner/ast.js CHANGED Viewed

@@ -111,6 +111,10 @@ function analyzeFile(content, filePath, basePath) {
   const ctx = {
     threats,
     relFile: path.relative(basePath, filePath),
+    // File source reference for the post-walk shadow classifier (Wave-4 MCP
+    // site has no extractable written-content string — it classifies the file).
+    // A reference to the already-held string: no copy, freed with the ctx.
+    _content: content,
     dynamicRequireVars: new Set(),
     staticAssignments: new Set(),
     // v2.10.73 P2: AST-006 source qualification — tracks WHERE a variable's value came from.