muaddib-scanner 2.11.92 → 2.11.94
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.92.json → self-scan-v2.11.94.json} +1 -1
- package/src/monitor/daemon.js +60 -5
- package/src/monitor/queue.js +39 -2
- package/src/monitor/scan-queue.js +52 -23
- package/src/monitor/spill.js +246 -0
- package/src/monitor/state.js +9 -2
- package/src/monitor/webhook.js +22 -1
- package/src/scanner/ast-detectors/handle-call-expression.js +42 -2
- package/src/scanner/ast-detectors/handle-post-walk.js +13 -0
- package/src/scanner/ast-detectors/mcp-write-classifier.js +71 -0
- package/src/scanner/ast.js +4 -0
package/package.json
CHANGED
package/src/monitor/daemon.js
CHANGED
|
@@ -14,7 +14,8 @@ const { ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency,
|
|
|
14
14
|
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY } = require('./adaptive-concurrency.js');
|
|
15
15
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
16
16
|
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
17
|
-
const { evictFromScanQueueBulk } = require('./scan-queue.js');
|
|
17
|
+
const { evictFromScanQueueBulk, enqueueScan } = require('./scan-queue.js');
|
|
18
|
+
const { isSpillEnabled, shouldDrain, drainBacklog, getBacklogSize } = require('./spill.js');
|
|
18
19
|
const { startGhsaPoller, stopGhsaPoller } = require('../ioc/ghsa-poller.js');
|
|
19
20
|
const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
|
|
20
21
|
const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
|
|
@@ -27,6 +28,24 @@ const { clearASTCache } = require('../shared/constants.js');
|
|
|
27
28
|
|
|
28
29
|
const POLL_INTERVAL = 60_000;
|
|
29
30
|
const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
|
|
31
|
+
|
|
32
|
+
// ── Spill drain (disk waiting list re-ingestion) ──
|
|
33
|
+
// Drain only when pressure is fully cleared AND the live queue has headroom; the
|
|
34
|
+
// 12 calm hours/day do the catch-up of burst-time evictions. Rate-limited to one
|
|
35
|
+
// batch per interval (the main loop ticks every 2s — unthrottled it would re-spike
|
|
36
|
+
// the queue in seconds). All env-tunable for the staged rollout.
|
|
37
|
+
const SPILL_DRAIN_THRESHOLD = (() => {
|
|
38
|
+
const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_THRESHOLD, 10);
|
|
39
|
+
return Number.isFinite(v) && v > 0 ? v : 500;
|
|
40
|
+
})();
|
|
41
|
+
const SPILL_DRAIN_BATCH = (() => {
|
|
42
|
+
const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_BATCH, 10);
|
|
43
|
+
return Number.isFinite(v) && v > 0 ? v : 200;
|
|
44
|
+
})();
|
|
45
|
+
const SPILL_DRAIN_INTERVAL_MS = (() => {
|
|
46
|
+
const v = parseInt(process.env.MUADDIB_SPILL_DRAIN_INTERVAL_MS, 10);
|
|
47
|
+
return Number.isFinite(v) && v > 0 ? v : 30_000;
|
|
48
|
+
})();
|
|
30
49
|
const QUEUE_WARNING_THRESHOLD = 5_000; // Warn if queue depth exceeds this
|
|
31
50
|
const QUEUE_PERSIST_INTERVAL = 60_000; // Persist queue to disk every 60s
|
|
32
51
|
const QUEUE_STATE_FILE = path.join(__dirname, '..', '..', 'data', 'queue-state.json');
|
|
@@ -591,14 +610,16 @@ function handleMemoryPressure(level, ratio, rssRatio, recentlyScanned, downloads
|
|
|
591
610
|
// first (newest survive — most likely to still exist for re-scan), protected only as
|
|
592
611
|
// a last resort, and LEDGERS every drop. Closes the v2.10.88 gap where the raw
|
|
593
612
|
// splice(0,n) silently dropped protected scans (CLAUDE.md "ne jamais perdre de scan").
|
|
594
|
-
const { dropped, droppedProtected } = evictFromScanQueueBulk(scanQueue, EMERGENCY_QUEUE_KEEP, 'mem_emergency');
|
|
613
|
+
const { dropped, droppedProtected, spilled } = evictFromScanQueueBulk(scanQueue, EMERGENCY_QUEUE_KEEP, 'mem_emergency');
|
|
595
614
|
summary.queueDropped = dropped;
|
|
596
615
|
summary.queueDroppedProtected = droppedProtected;
|
|
616
|
+
summary.queueSpilled = spilled || 0;
|
|
597
617
|
if (stats) {
|
|
598
618
|
stats.queueEmergencyDrops = (stats.queueEmergencyDrops || 0) + dropped;
|
|
599
619
|
if (droppedProtected) stats.queueEmergencyProtectedDrops = (stats.queueEmergencyProtectedDrops || 0) + droppedProtected;
|
|
620
|
+
if (spilled) stats.spilled = (stats.spilled || 0) + spilled;
|
|
600
621
|
}
|
|
601
|
-
console.error(`[MONITOR] MEMORY EMERGENCY: ${memPctLabel} — truncated queue ${queueBefore} → ${scanQueue.length} (dropped ${dropped} oldest UNPROTECTED${droppedProtected ? ` + ${droppedProtected} protected as last resort` : ''}, all ledgered)`);
|
|
622
|
+
console.error(`[MONITOR] MEMORY EMERGENCY: ${memPctLabel} — truncated queue ${queueBefore} → ${scanQueue.length} (${spilled ? `SPILLED ${spilled} to disk backlog` : `dropped ${dropped} oldest UNPROTECTED${droppedProtected ? ` + ${droppedProtected} protected as last resort` : ''}`}, all ledgered)`);
|
|
602
623
|
}
|
|
603
624
|
// Clear deferred sandbox queue (holds full staticResult objects)
|
|
604
625
|
const deferredDropped = clearDeferredQueue();
|
|
@@ -635,8 +656,12 @@ function reportStats(stats) {
|
|
|
635
656
|
const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
|
|
636
657
|
const { t1, t1a, t1b, t2, t3 } = stats.suspectByTier;
|
|
637
658
|
console.log(`[MONITOR] Stats: ${stats.scanned} scanned, ${stats.clean} clean, ${stats.suspect} suspect (T1a:${t1a} T1b:${t1b} T1:${t1} T2:${t2} T3:${t3}), ${stats.errors} error${stats.errors !== 1 ? 's' : ''}, avg ${avg}s/pkg`);
|
|
638
|
-
if (stats.temporalLoadShed || stats.queueHardDrops || (stats.restartsToday || 0) > 1) {
|
|
639
|
-
|
|
659
|
+
if (stats.temporalLoadShed || stats.queueHardDrops || (stats.restartsToday || 0) > 1 || stats.spilled || stats.workerOom) {
|
|
660
|
+
// Backlog size read best-effort: the convergence signal for the spill rollout
|
|
661
|
+
// (must oscillate, not grow monotonically — see plan validation step 4).
|
|
662
|
+
let backlog = 0;
|
|
663
|
+
try { if (isSpillEnabled()) backlog = getBacklogSize(); } catch { /* best-effort */ }
|
|
664
|
+
console.log(`[MONITOR] Stability: restarts(24h)=${stats.restartsToday || 0}, temporal load-shed=${stats.temporalLoadShed || 0}, queue hard-drops=${stats.queueHardDrops || 0}, spilled=${stats.spilled || 0}, drained=${stats.spillDrained || 0}, backlog=${backlog}, workerOom=${stats.workerOom || 0}`);
|
|
640
665
|
}
|
|
641
666
|
if (stats.changesStreamPackages) {
|
|
642
667
|
console.log(`[MONITOR] Changes stream packages: ${stats.changesStreamPackages}`);
|
|
@@ -1064,6 +1089,7 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
1064
1089
|
// This loop tops up workers every 2s AND runs housekeeping (memory, daily report)
|
|
1065
1090
|
// without being blocked by long-running scans.
|
|
1066
1091
|
let lastMemoryLogTime = Date.now();
|
|
1092
|
+
let lastSpillDrainTime = 0;
|
|
1067
1093
|
|
|
1068
1094
|
while (running) {
|
|
1069
1095
|
// ─── Memory circuit breaker (every iteration) ───
|
|
@@ -1080,6 +1106,35 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
1080
1106
|
ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailableRef.value);
|
|
1081
1107
|
}
|
|
1082
1108
|
|
|
1109
|
+
// ─── Spill drain (MUADDIB_QUEUE_SPILL=1) ───
|
|
1110
|
+
// Re-ingest evicted scans from the disk backlog during calm windows: pressure
|
|
1111
|
+
// fully NONE + queue headroom, one bounded batch per SPILL_DRAIN_INTERVAL_MS.
|
|
1112
|
+
// Protected items (IOC/burst/first-publish/ATO) drain first — a malicious
|
|
1113
|
+
// package is often unpublished quickly, late drains lose the tarball.
|
|
1114
|
+
if (isSpillEnabled() &&
|
|
1115
|
+
Date.now() - lastSpillDrainTime >= SPILL_DRAIN_INTERVAL_MS &&
|
|
1116
|
+
shouldDrain(pressureLevel, scanQueue.length, SPILL_DRAIN_THRESHOLD)) {
|
|
1117
|
+
lastSpillDrainTime = Date.now();
|
|
1118
|
+
try {
|
|
1119
|
+
// Dedup against recentlyScanned (same key format as processQueueItem) AND
|
|
1120
|
+
// the live queue (small here by the shouldDrain threshold).
|
|
1121
|
+
const inQueue = new Set(scanQueue.map(it => `${it.ecosystem}/${it.name}@${it.version}`));
|
|
1122
|
+
const r = drainBacklog(scanQueue, stats, {
|
|
1123
|
+
maxItems: Math.min(SPILL_DRAIN_BATCH, Math.max(1, SPILL_DRAIN_THRESHOLD - scanQueue.length)),
|
|
1124
|
+
enqueueFn: enqueueScan,
|
|
1125
|
+
isDuplicate: (e) => {
|
|
1126
|
+
const key = `${e.ecosystem}/${e.name}@${e.version}`;
|
|
1127
|
+
return recentlyScanned.has(key) || inQueue.has(key);
|
|
1128
|
+
}
|
|
1129
|
+
});
|
|
1130
|
+
if (r.drained > 0 || r.deduped > 0) {
|
|
1131
|
+
console.log(`[MONITOR] SPILL_DRAIN: re-ingested ${r.drained} (${r.deduped} deduped, backlog ${r.remaining} remaining)`);
|
|
1132
|
+
}
|
|
1133
|
+
} catch (err) {
|
|
1134
|
+
console.error(`[MONITOR] SPILL_DRAIN failed: ${err.message}`);
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1083
1138
|
// ─── Memory watchdog (adaptive interval) ───
|
|
1084
1139
|
// Log every 5min normally, every 15s under pressure.
|
|
1085
1140
|
const memLogInterval = pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH
|
package/src/monitor/queue.js
CHANGED
|
@@ -400,9 +400,29 @@ function shouldSkipSandbox(ctx) {
|
|
|
400
400
|
*/
|
|
401
401
|
function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = null) {
|
|
402
402
|
return new Promise((resolve, reject) => {
|
|
403
|
-
const
|
|
403
|
+
const workerOpts = {
|
|
404
404
|
workerData: { extractedDir, scanContext: scanContext || {} }
|
|
405
|
-
}
|
|
405
|
+
};
|
|
406
|
+
// Per-worker V8 memory limits (OOM durable fix): the 2026-06 RSS spikes
|
|
407
|
+
// (8.2-8.8GB with heap ~550MB) are off-heap allocations inside scan workers —
|
|
408
|
+
// one pathological package could blow the WHOLE process toward the EMERGENCY
|
|
409
|
+
// breaker (queue purge + worker kills). With a per-worker cap, that package
|
|
410
|
+
// OOMs ITS worker only: ERR_WORKER_OUT_OF_MEMORY → rejected → ledgered
|
|
411
|
+
// `worker_oom` (never counted clean) while the daemon and its siblings keep
|
|
412
|
+
// running. This is also what allows raising MUADDIB_SCAN_CONCURRENCY back
|
|
413
|
+
// up (it was clamped 12-16 → 8 on 2026-06-08 as the OOM mitigation).
|
|
414
|
+
// OFF unless MUADDIB_WORKER_MAX_OLD_MB is set (staged rollout; suggested 1024).
|
|
415
|
+
const maxOldMb = parseInt(globalThis.process.env.MUADDIB_WORKER_MAX_OLD_MB, 10);
|
|
416
|
+
if (Number.isFinite(maxOldMb) && maxOldMb > 0) {
|
|
417
|
+
const maxYoungMb = parseInt(globalThis.process.env.MUADDIB_WORKER_MAX_YOUNG_MB, 10);
|
|
418
|
+
workerOpts.resourceLimits = {
|
|
419
|
+
maxOldGenerationSizeMb: maxOldMb,
|
|
420
|
+
maxYoungGenerationSizeMb: Number.isFinite(maxYoungMb) && maxYoungMb > 0 ? maxYoungMb : 128,
|
|
421
|
+
codeRangeSizeMb: 64,
|
|
422
|
+
stackSizeMb: 8
|
|
423
|
+
};
|
|
424
|
+
}
|
|
425
|
+
const worker = new Worker(SCAN_WORKER_PATH, workerOpts);
|
|
406
426
|
const _sc = scanContext || {};
|
|
407
427
|
_liveWorkers.set(worker, { name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem });
|
|
408
428
|
|
|
@@ -1246,6 +1266,23 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1246
1266
|
recordError(err, stats);
|
|
1247
1267
|
stats.scanned++;
|
|
1248
1268
|
stats.totalTimeMs += Date.now() - startTime;
|
|
1269
|
+
// Per-worker resourceLimits breach: the worker died on ITS V8 cap
|
|
1270
|
+
// (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
|
|
1271
|
+
// garde-fou as static_timeout: a package that OOMs the scanner must NOT
|
|
1272
|
+
// count clean — inconclusive, distinct ledger source, distinct log line
|
|
1273
|
+
// (the live-validation metric for the limits rollout). No retry: an OOM
|
|
1274
|
+
// re-OOMs deterministically.
|
|
1275
|
+
const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
|
|
1276
|
+
/ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
|
|
1277
|
+
if (isWorkerOom) {
|
|
1278
|
+
console.error(`[MONITOR] WORKER_OOM: ${name}@${version} — scan worker hit its resourceLimits cap (kept INCONCLUSIVE, not clean)`);
|
|
1279
|
+
stats.workerOom = (stats.workerOom || 0) + 1;
|
|
1280
|
+
updateScanStats('sandbox_inconclusive');
|
|
1281
|
+
try {
|
|
1282
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_oom' });
|
|
1283
|
+
} catch { /* ledger is best-effort */ }
|
|
1284
|
+
return { sandboxResult: null, staticClean: false };
|
|
1285
|
+
}
|
|
1249
1286
|
console.error(`[MONITOR] ERROR scanning ${name}@${version}: ${err.message}`);
|
|
1250
1287
|
// Ledger the terminal failure so the scan-ledger never over-states coverage (an errored
|
|
1251
1288
|
// package is NOT clean). Also captures EMERGENCY worker-terminate losses, whose reject
|
|
@@ -59,6 +59,19 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
|
59
59
|
const evicted = protectedFallback ? scanQueue.shift() : scanQueue.splice(victimIdx, 1)[0];
|
|
60
60
|
dropped = true;
|
|
61
61
|
if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
|
|
62
|
+
// Spill-to-disk waiting list (MUADDIB_QUEUE_SPILL=1): the evicted item goes to
|
|
63
|
+
// data/scan-backlog.jsonl for re-ingestion during calm periods instead of being
|
|
64
|
+
// lost. Lazy require (same pattern as state.js below) — spill.js requires this
|
|
65
|
+
// module for isProtected, so a top-level import would be a cycle. On spill
|
|
66
|
+
// failure (or flag off) the behavior degrades to the pre-spill drop, ledgered.
|
|
67
|
+
let spilled = false;
|
|
68
|
+
try {
|
|
69
|
+
const spillMod = require('./spill.js');
|
|
70
|
+
if (spillMod.isSpillEnabled() && evicted && evicted.name) {
|
|
71
|
+
spilled = spillMod.spillItems([evicted]) === 1;
|
|
72
|
+
if (spilled && stats) stats.spilled = (stats.spilled || 0) + 1;
|
|
73
|
+
}
|
|
74
|
+
} catch { /* spill is best-effort — fall through to the drop ledger */ }
|
|
62
75
|
// Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
|
|
63
76
|
// "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
|
|
64
77
|
// require avoids any top-level coupling with state.js; best-effort, never throws.
|
|
@@ -68,7 +81,8 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
|
68
81
|
if (evicted && evicted.name) {
|
|
69
82
|
require('./state.js').appendScanLedger({
|
|
70
83
|
name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
|
|
71
|
-
outcome:
|
|
84
|
+
outcome: spilled ? 'spilled' : 'dropped',
|
|
85
|
+
source: (protectedFallback ? 'queue_cap_protected' : 'queue_cap') + (spilled ? '_spill' : ''),
|
|
72
86
|
// AUDIT-A1 observability (see evictFromScanQueueBulk)
|
|
73
87
|
firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
|
|
74
88
|
});
|
|
@@ -127,33 +141,48 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
|
|
|
127
141
|
try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
|
|
128
142
|
}
|
|
129
143
|
|
|
130
|
-
// Compact survivors in place,
|
|
131
|
-
|
|
144
|
+
// Compact survivors in place, collecting the evicted items for the spill below.
|
|
145
|
+
const evictedItems = [];
|
|
132
146
|
let w = 0;
|
|
133
147
|
for (let r = 0; r < before; r++) {
|
|
134
|
-
if (dropSet.has(r))
|
|
135
|
-
|
|
136
|
-
if (appendLedger && item && item.name) {
|
|
137
|
-
try {
|
|
138
|
-
appendLedger({
|
|
139
|
-
name: item.name, version: item.version, ecosystem: item.ecosystem,
|
|
140
|
-
outcome: 'dropped',
|
|
141
|
-
source: _isProtected(item) ? `${source}_protected` : source,
|
|
142
|
-
// AUDIT-A1 observability: record whether a DROPPED item was a first-publish
|
|
143
|
-
// (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
|
|
144
|
-
// measure if the memory breaker is evicting genuine new packages.
|
|
145
|
-
firstPublish: !!item.firstPublish,
|
|
146
|
-
isBurstExtra: !!item.isATOBurstExtra
|
|
147
|
-
});
|
|
148
|
-
} catch { /* ledger is best-effort — must never break the breaker */ }
|
|
149
|
-
}
|
|
150
|
-
} else {
|
|
151
|
-
scanQueue[w++] = scanQueue[r];
|
|
152
|
-
}
|
|
148
|
+
if (dropSet.has(r)) evictedItems.push(scanQueue[r]);
|
|
149
|
+
else scanQueue[w++] = scanQueue[r];
|
|
153
150
|
}
|
|
154
151
|
scanQueue.length = w;
|
|
155
152
|
|
|
156
|
-
|
|
153
|
+
// Spill-to-disk waiting list (MUADDIB_QUEUE_SPILL=1): ONE batched append for the
|
|
154
|
+
// whole eviction (an EMERGENCY evicts thousands — per-item appends would thrash).
|
|
155
|
+
// spillItems is all-or-nothing per call (single buffered write), so `spilled`
|
|
156
|
+
// cleanly selects the ledger outcome for the batch. Lazy require: spill.js
|
|
157
|
+
// imports isProtected from this module — a top-level import would be a cycle.
|
|
158
|
+
// On spill failure (or flag off) the behavior degrades to the pre-spill drop.
|
|
159
|
+
let spilled = false;
|
|
160
|
+
try {
|
|
161
|
+
const spillMod = require('./spill.js');
|
|
162
|
+
if (spillMod.isSpillEnabled() && evictedItems.length > 0) {
|
|
163
|
+
spilled = spillMod.spillItems(evictedItems) > 0;
|
|
164
|
+
}
|
|
165
|
+
} catch { /* spill is best-effort */ }
|
|
166
|
+
|
|
167
|
+
// Ledger each evicted item with an identity-preserving source (protected drops get
|
|
168
|
+
// a distinct suffix so the rare case stays visible in the rollup).
|
|
169
|
+
for (const item of evictedItems) {
|
|
170
|
+
if (!appendLedger || !item || !item.name) continue;
|
|
171
|
+
try {
|
|
172
|
+
appendLedger({
|
|
173
|
+
name: item.name, version: item.version, ecosystem: item.ecosystem,
|
|
174
|
+
outcome: spilled ? 'spilled' : 'dropped',
|
|
175
|
+
source: (_isProtected(item) ? `${source}_protected` : source) + (spilled ? '_spill' : ''),
|
|
176
|
+
// AUDIT-A1 observability: record whether a DROPPED item was a first-publish
|
|
177
|
+
// (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
|
|
178
|
+
// measure if the memory breaker is evicting genuine new packages.
|
|
179
|
+
firstPublish: !!item.firstPublish,
|
|
180
|
+
isBurstExtra: !!item.isATOBurstExtra
|
|
181
|
+
});
|
|
182
|
+
} catch { /* ledger is best-effort — must never break the breaker */ }
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return { dropped: toDrop, droppedProtected, spilled: spilled ? evictedItems.length : 0 };
|
|
157
186
|
}
|
|
158
187
|
|
|
159
188
|
// ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* spill.js — disk-backed waiting list for the scan queue.
|
|
5
|
+
*
|
|
6
|
+
* Today an EMERGENCY memory purge (and the queue hard-cap) DROPS evicted scans:
|
|
7
|
+
* ledgered, but lost (91K mem_emergency drops / 64K distinct never-scanned
|
|
8
|
+
* versions in the 2026-06-11 24h window). The queue entries are tiny metadata —
|
|
9
|
+
* dropping them frees almost nothing; the memory relief comes from the
|
|
10
|
+
* container/worker kills the breaker also performs. This module converts those
|
|
11
|
+
* drops into DEFERRALS: evicted items append to a bounded JSONL backlog and are
|
|
12
|
+
* re-ingested progressively during calm periods (12h/24 have zero drops — the
|
|
13
|
+
* baseline flow is fully absorbed; losses are burst-shaped).
|
|
14
|
+
*
|
|
15
|
+
* Defensive priority (mirrors scan-queue.js `isProtected`): malicious packages
|
|
16
|
+
* are often unpublished quickly — draining late can mean the tarball is gone.
|
|
17
|
+
* - drain: protected items first (IOC match / burst / first-publish / ATO),
|
|
18
|
+
* then FIFO. No LIFO: under repeated spikes the oldest would never drain —
|
|
19
|
+
* a disguised loss.
|
|
20
|
+
* - cap compaction: evict oldest UNPROTECTED first, protected as last resort
|
|
21
|
+
* (the evictFromScanQueueBulk contract), every eviction ledgered. We lose
|
|
22
|
+
* noise before we lose signal.
|
|
23
|
+
*
|
|
24
|
+
* Bounds & resilience (CLAUDE.md production rules):
|
|
25
|
+
* - MUADDIB_SPILL_MAX entries (default 200 000 ≈ 30 MB ≈ ~2 days of worst-case
|
|
26
|
+
* spikes). The cap should never be reached if the drain converges — if it
|
|
27
|
+
* is, evictions are ledgered (`spill_cap`), never silent.
|
|
28
|
+
* - All writes are append-one-line or tmp+rename rewrites; a crash mid-drain
|
|
29
|
+
* at worst re-drains the same items, deduplicated by the caller.
|
|
30
|
+
* - Every function is never-throw: a spill failure must degrade to the old
|
|
31
|
+
* behavior (drop, ledgered), not break the breaker.
|
|
32
|
+
*
|
|
33
|
+
* Env (read at call time): MUADDIB_QUEUE_SPILL=1 (master switch, default OFF),
|
|
34
|
+
* MUADDIB_SPILL_FILE (override, tests), MUADDIB_SPILL_MAX.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
const fs = require('fs');
|
|
38
|
+
const path = require('path');
|
|
39
|
+
|
|
40
|
+
const { isProtected } = require('./scan-queue.js');
|
|
41
|
+
|
|
42
|
+
const DEFAULT_SPILL_FILE = path.join(__dirname, '..', '..', 'data', 'scan-backlog.jsonl');
|
|
43
|
+
const DEFAULT_MAX_ENTRIES = 200_000;
|
|
44
|
+
|
|
45
|
+
// Fields persisted per item — everything re-enqueue + protection need, nothing
|
|
46
|
+
// else (bounded line size ≈ 150-250 bytes).
|
|
47
|
+
const SPILL_FIELDS = [
|
|
48
|
+
'name', 'version', 'ecosystem', 'tarballUrl',
|
|
49
|
+
'firstPublish', 'isIOCMatch', 'isBurst', 'atoSignal', 'isATOBurstExtra'
|
|
50
|
+
];
|
|
51
|
+
|
|
52
|
+
function isSpillEnabled() {
|
|
53
|
+
return globalThis.process.env.MUADDIB_QUEUE_SPILL === '1';
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function _spillFile() {
|
|
57
|
+
return globalThis.process.env.MUADDIB_SPILL_FILE || DEFAULT_SPILL_FILE;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function _maxEntries() {
|
|
61
|
+
const raw = globalThis.process.env.MUADDIB_SPILL_MAX;
|
|
62
|
+
const n = raw ? parseInt(raw, 10) : NaN;
|
|
63
|
+
return (Number.isFinite(n) && n >= 10 && n <= 5_000_000) ? n : DEFAULT_MAX_ENTRIES;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function _readEntries(file) {
|
|
67
|
+
let raw;
|
|
68
|
+
try { raw = fs.readFileSync(file, 'utf8'); } catch { return []; }
|
|
69
|
+
const out = [];
|
|
70
|
+
for (const line of raw.split('\n')) {
|
|
71
|
+
if (!line.trim()) continue;
|
|
72
|
+
try {
|
|
73
|
+
const e = JSON.parse(line);
|
|
74
|
+
if (e && e.name) out.push(e);
|
|
75
|
+
} catch { /* truncated/corrupt line (crash mid-write) — skip */ }
|
|
76
|
+
}
|
|
77
|
+
return out;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function _writeEntries(file, entries) {
|
|
81
|
+
const tmp = file + '.tmp';
|
|
82
|
+
fs.writeFileSync(tmp, entries.length ? entries.map(e => JSON.stringify(e)).join('\n') + '\n' : '', 'utf8');
|
|
83
|
+
fs.renameSync(tmp, file);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* Append evicted queue items to the backlog. Never throws; on write failure the
|
|
88
|
+
* caller's fallback is the pre-spill behavior (drop, ledgered).
|
|
89
|
+
* @param {Array<object>} items evicted scan-queue items
|
|
90
|
+
* @returns {number} how many items were actually persisted
|
|
91
|
+
*/
|
|
92
|
+
function spillItems(items) {
|
|
93
|
+
if (!Array.isArray(items) || items.length === 0) return 0;
|
|
94
|
+
const file = _spillFile();
|
|
95
|
+
let written = 0;
|
|
96
|
+
try {
|
|
97
|
+
const dir = path.dirname(file);
|
|
98
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
99
|
+
let buf = '';
|
|
100
|
+
for (const item of items) {
|
|
101
|
+
if (!item || !item.name) continue;
|
|
102
|
+
const rec = { ts: new Date().toISOString() };
|
|
103
|
+
for (const f of SPILL_FIELDS) {
|
|
104
|
+
if (item[f] !== undefined && item[f] !== null && item[f] !== false) rec[f] = item[f];
|
|
105
|
+
}
|
|
106
|
+
buf += JSON.stringify(rec) + '\n';
|
|
107
|
+
written++;
|
|
108
|
+
}
|
|
109
|
+
if (buf) fs.appendFileSync(file, buf, 'utf8');
|
|
110
|
+
_compactBacklog(file);
|
|
111
|
+
} catch {
|
|
112
|
+
return 0; // degrade to drop-with-ledger at the call site
|
|
113
|
+
}
|
|
114
|
+
return written;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Cap enforcement: evict down to MUADDIB_SPILL_MAX — oldest UNPROTECTED first,
|
|
119
|
+
* protected last resort. Every eviction is ledgered (`spill_cap` /
|
|
120
|
+
* `spill_cap_protected`): a backlog overflow is a real loss and must be visible.
|
|
121
|
+
*/
|
|
122
|
+
function _compactBacklog(file, ledgerFn = null) {
|
|
123
|
+
try {
|
|
124
|
+
const max = _maxEntries();
|
|
125
|
+
const entries = _readEntries(file);
|
|
126
|
+
if (entries.length <= max) return;
|
|
127
|
+
const toDrop = entries.length - max;
|
|
128
|
+
const dropSet = new Set();
|
|
129
|
+
for (let i = 0; i < entries.length && dropSet.size < toDrop; i++) {
|
|
130
|
+
if (!isProtected(entries[i])) dropSet.add(i);
|
|
131
|
+
}
|
|
132
|
+
for (let i = 0; i < entries.length && dropSet.size < toDrop; i++) {
|
|
133
|
+
if (!dropSet.has(i)) dropSet.add(i); // protected, last resort
|
|
134
|
+
}
|
|
135
|
+
let appendLedger = ledgerFn;
|
|
136
|
+
if (!appendLedger) {
|
|
137
|
+
try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
|
|
138
|
+
}
|
|
139
|
+
const kept = [];
|
|
140
|
+
for (let i = 0; i < entries.length; i++) {
|
|
141
|
+
if (!dropSet.has(i)) { kept.push(entries[i]); continue; }
|
|
142
|
+
const e = entries[i];
|
|
143
|
+
if (appendLedger) {
|
|
144
|
+
try {
|
|
145
|
+
appendLedger({
|
|
146
|
+
name: e.name, version: e.version, ecosystem: e.ecosystem,
|
|
147
|
+
outcome: 'dropped',
|
|
148
|
+
source: isProtected(e) ? 'spill_cap_protected' : 'spill_cap',
|
|
149
|
+
firstPublish: !!e.firstPublish, isBurstExtra: !!e.isATOBurstExtra
|
|
150
|
+
});
|
|
151
|
+
} catch { /* best-effort */ }
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
_writeEntries(file, kept);
|
|
155
|
+
console.warn(`[MONITOR] SPILL_CAP: backlog over ${max} — evicted ${toDrop} oldest (ledgered). The drain is not keeping up.`);
|
|
156
|
+
} catch { /* never throw */ }
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Pure drain predicate (exported for tests + the daemon main loop): drain only
|
|
161
|
+
* when memory pressure is fully cleared AND the live queue has headroom.
|
|
162
|
+
*/
|
|
163
|
+
function shouldDrain(pressureLevel, queueLen, threshold) {
|
|
164
|
+
return pressureLevel === 0 && queueLen < threshold;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Re-ingest up to maxItems from the backlog into the live scan queue.
|
|
169
|
+
* Protected entries drain first (oldest-first within each class), then FIFO.
|
|
170
|
+
* Remaining entries are rewritten atomically (tmp+rename). Crash-resilient: a
|
|
171
|
+
* kill between enqueue and rewrite re-drains the same items on the next tick —
|
|
172
|
+
* the caller's isDuplicate (recentlyScanned + in-queue keys) absorbs replays.
|
|
173
|
+
*
|
|
174
|
+
* @param {Array} scanQueue live queue (enqueued via injected enqueueFn)
|
|
175
|
+
* @param {object|null} stats monitor stats (spillDrained / spillDeduped counters)
|
|
176
|
+
* @param {object} opts
|
|
177
|
+
* @param {number} opts.maxItems batch bound (required > 0)
|
|
178
|
+
* @param {Function} opts.enqueueFn (scanQueue, item, stats) => void — scan-queue.enqueueScan
|
|
179
|
+
* @param {Function} [opts.isDuplicate] (key "name@version") => boolean
|
|
180
|
+
* @returns {{drained:number, deduped:number, remaining:number}}
|
|
181
|
+
*/
|
|
182
|
+
function drainBacklog(scanQueue, stats, opts = {}) {
|
|
183
|
+
const res = { drained: 0, deduped: 0, remaining: 0 };
|
|
184
|
+
try {
|
|
185
|
+
const file = _spillFile();
|
|
186
|
+
const maxItems = opts.maxItems | 0;
|
|
187
|
+
if (maxItems <= 0 || typeof opts.enqueueFn !== 'function') return res;
|
|
188
|
+
let st;
|
|
189
|
+
try { st = fs.statSync(file); } catch { return res; } // no backlog — cheap exit
|
|
190
|
+
if (!st.size) return res;
|
|
191
|
+
|
|
192
|
+
const entries = _readEntries(file);
|
|
193
|
+
if (entries.length === 0) { res.remaining = 0; return res; }
|
|
194
|
+
|
|
195
|
+
// Selection AND enqueue order: protected first (oldest-first within the
|
|
196
|
+
// class), then FIFO — bounded by maxItems. Order matters: the live queue
|
|
197
|
+
// is consumed FIFO, so protected items must be enqueued ahead of plain
|
|
198
|
+
// ones, not merely included in the batch.
|
|
199
|
+
const takeIdx = new Set();
|
|
200
|
+
const takeOrder = [];
|
|
201
|
+
for (let i = 0; i < entries.length && takeOrder.length < maxItems; i++) {
|
|
202
|
+
if (isProtected(entries[i])) { takeIdx.add(i); takeOrder.push(i); }
|
|
203
|
+
}
|
|
204
|
+
for (let i = 0; i < entries.length && takeOrder.length < maxItems; i++) {
|
|
205
|
+
if (!takeIdx.has(i)) { takeIdx.add(i); takeOrder.push(i); }
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
for (const i of takeOrder) {
|
|
209
|
+
const e = entries[i];
|
|
210
|
+
// The caller owns the dedupe-key format (the monitor uses
|
|
211
|
+
// `${ecosystem}/${name}@${version}` for recentlyScanned) — pass the
|
|
212
|
+
// whole entry instead of imposing a key shape here.
|
|
213
|
+
if (opts.isDuplicate && opts.isDuplicate(e)) {
|
|
214
|
+
res.deduped++;
|
|
215
|
+
continue; // already scanned or already queued — discard from backlog
|
|
216
|
+
}
|
|
217
|
+
const { ts: _ts, ...item } = e; // strip the spill timestamp, restore the queue item shape
|
|
218
|
+
opts.enqueueFn(scanQueue, item, stats);
|
|
219
|
+
res.drained++;
|
|
220
|
+
}
|
|
221
|
+
const remaining = entries.filter((_, i) => !takeIdx.has(i));
|
|
222
|
+
_writeEntries(file, remaining);
|
|
223
|
+
res.remaining = remaining.length;
|
|
224
|
+
if (stats) {
|
|
225
|
+
stats.spillDrained = (stats.spillDrained || 0) + res.drained;
|
|
226
|
+
stats.spillDeduped = (stats.spillDeduped || 0) + res.deduped;
|
|
227
|
+
}
|
|
228
|
+
} catch { /* never throw — worst case the same items drain next tick */ }
|
|
229
|
+
return res;
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/** Entry count (0 on missing/unreadable file). */
|
|
233
|
+
function getBacklogSize() {
|
|
234
|
+
return _readEntries(_spillFile()).length;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
module.exports = {
|
|
238
|
+
isSpillEnabled,
|
|
239
|
+
spillItems,
|
|
240
|
+
drainBacklog,
|
|
241
|
+
shouldDrain,
|
|
242
|
+
getBacklogSize,
|
|
243
|
+
// test seams
|
|
244
|
+
_compactBacklog,
|
|
245
|
+
SPILL_FIELDS
|
|
246
|
+
};
|
package/src/monitor/state.js
CHANGED
|
@@ -972,7 +972,11 @@ let _scanLedgerAppendedSinceCompact = 0;
|
|
|
972
972
|
const SCAN_LEDGER_OUTCOMES = new Set([
|
|
973
973
|
'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
|
|
974
974
|
'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
|
|
975
|
-
|
|
975
|
+
// 'spilled' = evicted to the disk waiting list (data/scan-backlog.jsonl) instead
|
|
976
|
+
// of dropped — NOT scanned. A later drain + scan writes a normal scan entry; a
|
|
977
|
+
// spilled item that never drains stays an honest coverage hole (counted with
|
|
978
|
+
// dropped in the rollup).
|
|
979
|
+
'static_timeout', 'size_skip', 'dropped', 'spilled', 'error'
|
|
976
980
|
]);
|
|
977
981
|
|
|
978
982
|
// Benign terminal verdicts — the ledger-headline "clean" bucket. Mirrors the
|
|
@@ -1162,7 +1166,10 @@ function computeLedgerRollup(sinceTs, opts = {}) {
|
|
|
1162
1166
|
|
|
1163
1167
|
const key = `${e.name}@${e.version || ''}`;
|
|
1164
1168
|
const underCap = exactVanished && (scannedKeys.size + droppedKeys.size) < MAX_ROLLUP_KEYS;
|
|
1165
|
-
|
|
1169
|
+
// 'spilled' (disk waiting list, not yet rescanned) counts with 'dropped' on the
|
|
1170
|
+
// non-scanned side — honest coverage: a spilled item only becomes "covered" when
|
|
1171
|
+
// its drained re-scan writes a real verdict entry. byOutcome keeps them distinct.
|
|
1172
|
+
if (outcome === 'dropped' || outcome === 'spilled') {
|
|
1166
1173
|
dropped++; ecoNode.dropped++;
|
|
1167
1174
|
if (underCap) { droppedKeys.add(key); allNames.add(e.name); } else exactVanished = false;
|
|
1168
1175
|
} else {
|
package/src/monitor/webhook.js
CHANGED
|
@@ -1149,6 +1149,27 @@ function mcpTriageTag(a) {
|
|
|
1149
1149
|
return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
|
|
1150
1150
|
}
|
|
1151
1151
|
|
|
1152
|
+
/**
|
|
1153
|
+
* Stability field for the daily report. The spill segment (spilled / drained /
|
|
1154
|
+
* backlog size) only appears when the disk waiting list is enabled — backlog
|
|
1155
|
+
* size is THE convergence signal of the spill rollout (must oscillate around
|
|
1156
|
+
* 0 across days; monotonic growth = drain capacity too low, raise concurrency).
|
|
1157
|
+
* Best-effort: a spill read failure must never break the report.
|
|
1158
|
+
*/
|
|
1159
|
+
function _stabilityFieldValue(stats) {
|
|
1160
|
+
let v = `Restarts (24h): ${stats.restartsToday || 0} | Temporal load-shed: ${stats.temporalLoadShed || 0} | Queue hard-drops: ${stats.queueHardDrops || 0}`;
|
|
1161
|
+
try {
|
|
1162
|
+
const { isSpillEnabled, getBacklogSize } = require('./spill.js');
|
|
1163
|
+
if (isSpillEnabled()) {
|
|
1164
|
+
v += `\nSpill: ${stats.spilled || 0} spilled | ${stats.spillDrained || 0} drained | backlog ${getBacklogSize()}`;
|
|
1165
|
+
if (stats.workerOom) v += ` | worker OOM: ${stats.workerOom}`;
|
|
1166
|
+
} else if (stats.workerOom) {
|
|
1167
|
+
v += ` | worker OOM: ${stats.workerOom}`;
|
|
1168
|
+
}
|
|
1169
|
+
} catch { /* best-effort */ }
|
|
1170
|
+
return v;
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1152
1173
|
function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
1153
1174
|
// Use in-memory stats (accumulated since last reset, restored from disk on restart)
|
|
1154
1175
|
// instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
|
|
@@ -1307,7 +1328,7 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1307
1328
|
...((stats.sandboxDeferred || stats.deferredProcessed || stats.deferredExpired)
|
|
1308
1329
|
? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
|
|
1309
1330
|
: []),
|
|
1310
|
-
{ name: 'Stability', value:
|
|
1331
|
+
{ name: 'Stability', value: _stabilityFieldValue(stats), inline: false },
|
|
1311
1332
|
...(ledgerField ? [ledgerField] : []),
|
|
1312
1333
|
{ name: 'System', value: healthText, inline: false }
|
|
1313
1334
|
],
|
|
@@ -49,6 +49,31 @@ const {
|
|
|
49
49
|
containsDecodePattern,
|
|
50
50
|
resolveNumericExpression
|
|
51
51
|
} = require('./helpers.js');
|
|
52
|
+
const { countInvisibleUnicode } = require('../../shared/unicode-invisibles.js');
|
|
53
|
+
const { classifyMcpWrite } = require('./mcp-write-classifier.js');
|
|
54
|
+
const { isShadowEnabled, recordShadowDivergence } = require('../../shared/shadow.js');
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* SHADOW 3-tier classification for mcp_config_injection emissions (R5 + R5b).
|
|
58
|
+
* Computes the candidate class (template / shell_exec / instruction_injection)
|
|
59
|
+
* and logs a divergence ONLY when the candidate semantics would downgrade the
|
|
60
|
+
* verdict (template → MEDIUM). Zero effect on the threat emitted by the caller
|
|
61
|
+
* — the live severity stays CRITICAL until the shadow data adjudicates the flip.
|
|
62
|
+
* The package identity is not available at AST level; evidence carries the file.
|
|
63
|
+
*/
|
|
64
|
+
function _shadowClassifyMcpWrite(contentStr, checkPath, rule, ctx) {
|
|
65
|
+
try {
|
|
66
|
+
if (!isShadowEnabled()) return;
|
|
67
|
+
const { cls, signals } = classifyMcpWrite(contentStr, checkPath);
|
|
68
|
+
if (cls !== 'template') return; // shell_exec / instruction_injection keep CRITICAL — no divergence
|
|
69
|
+
recordShadowDivergence({
|
|
70
|
+
detector: 'mcp_config_injection_3tier',
|
|
71
|
+
oldVerdict: 'CRITICAL',
|
|
72
|
+
newVerdict: 'MEDIUM',
|
|
73
|
+
evidence: { cls, signals, path: checkPath, rule, file: ctx.relFile }
|
|
74
|
+
});
|
|
75
|
+
} catch { /* shadow must never affect the scan */ }
|
|
76
|
+
}
|
|
52
77
|
|
|
53
78
|
/**
|
|
54
79
|
* Detect whether an AST node points at a user-level filesystem location:
|
|
@@ -756,6 +781,10 @@ function handleCallExpression(node, ctx) {
|
|
|
756
781
|
? MCP_CONTENT_PATTERNS.some(p => contentStr.includes(p.replace(/"/g, '')))
|
|
757
782
|
: isSensitiveConfigFile; // dynamic content only suspicious for known config files
|
|
758
783
|
if (hasContentPattern) {
|
|
784
|
+
// SHADOW 3-tier classification (zero effect on the emitted severity):
|
|
785
|
+
// template-class writes are the scaffolder FP under adjudication —
|
|
786
|
+
// log the would-be CRITICAL→MEDIUM divergence for `shadow-report`.
|
|
787
|
+
_shadowClassifyMcpWrite(contentStr, mcpCheckPath, 'R5', ctx);
|
|
759
788
|
ctx.threats.push({
|
|
760
789
|
type: 'mcp_config_injection',
|
|
761
790
|
severity: 'CRITICAL',
|
|
@@ -780,11 +809,20 @@ function handleCallExpression(node, ctx) {
|
|
|
780
809
|
const contentStr2 = extractStringValue(contentArg2);
|
|
781
810
|
const hasShellContent = !!contentStr2 && /(?:curl|wget)\s+[^\n]*\|\s*(?:sh|bash|zsh)\b|\beval\s*\(|\bsh\s+-c\s+|\bbash\s+-c\s+|\bnode\s+-e\s+/i.test(contentStr2);
|
|
782
811
|
const hasInjectionInstruction = !!contentStr2 && /IMPORTANT[:\s]+(?:before|after|run|execute)|do\s+not\s+(?:display|show|mention)|always\s+run/i.test(contentStr2);
|
|
783
|
-
|
|
812
|
+
// 3d (additive, v2.11.91): zero-width/bidi Unicode in the written
|
|
813
|
+
// content — the TrapDoor hidden-instruction encoding (Socket,
|
|
814
|
+
// 2026-05-25: instructions invisible in an editor, word-broken so
|
|
815
|
+
// the 3b/3c plain-text regexes can't match). A legitimate generator
|
|
816
|
+
// never emits invisible codepoints into a rules file. Strictly
|
|
817
|
+
// additive: can only ADD detections to the 3a/3b/3c OR.
|
|
818
|
+
const hasInvisibleContent = !!contentStr2 && countInvisibleUnicode(contentStr2) > 0;
|
|
819
|
+
if (hasUserLevelPath || hasShellContent || hasInjectionInstruction || hasInvisibleContent) {
|
|
784
820
|
const reasons = [];
|
|
785
821
|
if (hasUserLevelPath) reasons.push('user-level destination (homedir/cwd/env.HOME)');
|
|
786
822
|
if (hasShellContent) reasons.push('shell command in content');
|
|
787
823
|
if (hasInjectionInstruction) reasons.push('AI prompt-injection instruction in content');
|
|
824
|
+
if (hasInvisibleContent) reasons.push('zero-width/bidi Unicode in content (hidden-instruction encoding)');
|
|
825
|
+
_shadowClassifyMcpWrite(contentStr2, mcpCheckPath, 'R5b', ctx);
|
|
788
826
|
ctx.threats.push({
|
|
789
827
|
type: 'mcp_config_injection',
|
|
790
828
|
severity: 'CRITICAL',
|
|
@@ -2047,4 +2085,6 @@ function handleCallExpression(node, ctx) {
|
|
|
2047
2085
|
}
|
|
2048
2086
|
|
|
2049
2087
|
|
|
2050
|
-
|
|
2088
|
+
// _shadowClassifyMcpWrite is shared with handle-post-walk.js (the Wave-4
|
|
2089
|
+
// keyword-co-occurrence emitter — the third mcp_config_injection site).
|
|
2090
|
+
module.exports = { handleCallExpression, _shadowClassifyMcpWrite };
|
|
@@ -274,6 +274,19 @@ function handlePostWalk(ctx) {
|
|
|
274
274
|
|
|
275
275
|
// Wave 4: MCP content keywords in file with writeFileSync = MCP injection signal
|
|
276
276
|
if (ctx.hasMcpContentKeywords && !ctx.threats.some(t => t.type === 'mcp_config_injection')) {
|
|
277
|
+
// SHADOW 3-tier classification (zero effect on the emitted severity). The
|
|
278
|
+
// 2026-06-11 backtest showed this keyword-co-occurrence rule emits ~85% of
|
|
279
|
+
// historical mcp_config_injection alerts (100/118 packages) — every
|
|
280
|
+
// legitimate MCP server installer carries mcpServers keywords + writes —
|
|
281
|
+
// so the adjudication MUST cover this site, not just R5/R5b. The classifier
|
|
282
|
+
// runs on the FILE source (the written content is not extractable here):
|
|
283
|
+
// a file whose code carries shell-exec or hidden-instruction markers keeps
|
|
284
|
+
// CRITICAL silently; an inert config-writer logs the CRITICAL→MEDIUM
|
|
285
|
+
// candidate divergence, tagged rule:'W4' so the report splits it out.
|
|
286
|
+
try {
|
|
287
|
+
const { _shadowClassifyMcpWrite } = require('./handle-call-expression.js');
|
|
288
|
+
_shadowClassifyMcpWrite(typeof ctx._content === 'string' ? ctx._content : null, '(file-level keyword co-occurrence)', 'W4', ctx);
|
|
289
|
+
} catch { /* shadow must never affect the scan */ }
|
|
277
290
|
ctx.threats.push({
|
|
278
291
|
type: 'mcp_config_injection',
|
|
279
292
|
severity: 'CRITICAL',
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* mcp-write-classifier.js — pure 3-tier classifier for mcp_config_injection
|
|
5
|
+
* candidates (SHADOW adjudication + the future severity flip).
|
|
6
|
+
*
|
|
7
|
+
* Empirical classes (web research 2026-06-11, calibrated on real campaigns):
|
|
8
|
+
* (a) template — write with inert content: the scaffolder shape
|
|
9
|
+
* (ruler, rulesync, cursor-rules, cursor-tools all legitimately write
|
|
10
|
+
* .cursorrules/CLAUDE.md/AGENTS.md). Candidate MEDIUM after adjudication.
|
|
11
|
+
* (b) shell_exec — content carries a shell command or an
|
|
12
|
+
* agent-hook exec (SafeDep campaign, 2026-05-13: .claude/settings.json
|
|
13
|
+
* SessionStart hook → ELF). Stays CRITICAL.
|
|
14
|
+
* (c) instruction_injection — content carries hidden instructions: zero-
|
|
15
|
+
* width/bidi Unicode (TrapDoor encoding — Socket 2026-05-25; GitHub
|
|
16
|
+
* flags the same) or agent-addressed directives ("do not tell the
|
|
17
|
+
* user…"). Stays CRITICAL.
|
|
18
|
+
*
|
|
19
|
+
* The classifier is PURE (no I/O, no ctx) so it is unit-testable per class and
|
|
20
|
+
* is exactly what gets promoted when the flip lands. Until then it feeds the
|
|
21
|
+
* shadow log: oldVerdict CRITICAL vs newVerdict (template→MEDIUM).
|
|
22
|
+
*
|
|
23
|
+
* Honest default: content that cannot be extracted statically classifies as
|
|
24
|
+
* `template` with signal `dynamic_content` — we don't know, so the shadow
|
|
25
|
+
* numbers must not pretend we do. (The live R5/R5b severity is unaffected
|
|
26
|
+
* either way — this module emits no threats.)
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
const { countInvisibleUnicode } = require('../../shared/unicode-invisibles.js');
|
|
30
|
+
|
|
31
|
+
// (c) — agent-addressed directives. Superset of the live R5b 3c regex
|
|
32
|
+
// (IMPORTANT/do-not-display/always-run) with the additions calibrated on the
|
|
33
|
+
// Rules-File-Backdoor / Mini-Shai-Hulud wording. Word-boundaried enough not to
|
|
34
|
+
// match benign docs ("important: run tests before committing" matches — by
|
|
35
|
+
// design, that wording addressed to an agent IS the attack shape; the
|
|
36
|
+
// difference is made by the write target, which the caller already gated on).
|
|
37
|
+
const INJECTION_DIRECTIVE_RE = /IMPORTANT[:\s]+(?:before|after|run|execute)|do\s+not\s+(?:display|show|mention|tell)|never\s+(?:mention|reveal|disclose)|hide\s+this\s+from|always\s+run/i;
|
|
38
|
+
|
|
39
|
+
// (b) — shell command in content. Same expression as the live R5b 3b gate.
|
|
40
|
+
const SHELL_CONTENT_RE = /(?:curl|wget)\s+[^\n]*\|\s*(?:sh|bash|zsh)\b|\beval\s*\(|\bsh\s+-c\s+|\bbash\s+-c\s+|\bnode\s+-e\s+/i;
|
|
41
|
+
|
|
42
|
+
// (b) — agent-hook exec in JSON content: a "hooks" structure carrying a
|
|
43
|
+
// "command" (the SafeDep .claude/settings.json SessionStart shape). Order-
|
|
44
|
+
// insensitive containment — the content is config the attacker controls, a
|
|
45
|
+
// strict JSON parse would be evadable with trailing garbage.
|
|
46
|
+
const HOOKS_COMMAND_RE = /"hooks"[\s\S]{0,400}"command"|"command"[\s\S]{0,400}"hooks"/;
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* @param {string|null|undefined} contentStr statically-extracted write content
|
|
50
|
+
* (null/undefined = dynamic, not extractable)
|
|
51
|
+
* @param {string} [checkPath] lowercased destination path (reserved for future
|
|
52
|
+
* signals; not used for class decision today)
|
|
53
|
+
* @returns {{cls: 'template'|'shell_exec'|'instruction_injection', signals: string[]}}
|
|
54
|
+
*/
|
|
55
|
+
function classifyMcpWrite(contentStr, checkPath) { // eslint-disable-line no-unused-vars
|
|
56
|
+
if (contentStr === null || contentStr === undefined || typeof contentStr !== 'string') {
|
|
57
|
+
return { cls: 'template', signals: ['dynamic_content'] };
|
|
58
|
+
}
|
|
59
|
+
const signals = [];
|
|
60
|
+
if (countInvisibleUnicode(contentStr) > 0) signals.push('zero_width_unicode');
|
|
61
|
+
if (INJECTION_DIRECTIVE_RE.test(contentStr)) signals.push('injection_directive');
|
|
62
|
+
if (signals.length > 0) return { cls: 'instruction_injection', signals };
|
|
63
|
+
|
|
64
|
+
if (SHELL_CONTENT_RE.test(contentStr)) signals.push('shell_command');
|
|
65
|
+
if (HOOKS_COMMAND_RE.test(contentStr)) signals.push('hooks_command_json');
|
|
66
|
+
if (signals.length > 0) return { cls: 'shell_exec', signals };
|
|
67
|
+
|
|
68
|
+
return { cls: 'template', signals: [] };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
module.exports = { classifyMcpWrite, INJECTION_DIRECTIVE_RE, SHELL_CONTENT_RE, HOOKS_COMMAND_RE };
|
package/src/scanner/ast.js
CHANGED
|
@@ -111,6 +111,10 @@ function analyzeFile(content, filePath, basePath) {
|
|
|
111
111
|
const ctx = {
|
|
112
112
|
threats,
|
|
113
113
|
relFile: path.relative(basePath, filePath),
|
|
114
|
+
// File source reference for the post-walk shadow classifier (Wave-4 MCP
|
|
115
|
+
// site has no extractable written-content string — it classifies the file).
|
|
116
|
+
// A reference to the already-held string: no copy, freed with the ctx.
|
|
117
|
+
_content: content,
|
|
114
118
|
dynamicRequireVars: new Set(),
|
|
115
119
|
staticAssignments: new Set(),
|
|
116
120
|
// v2.10.73 P2: AST-006 source qualification — tracks WHERE a variable's value came from.
|