muaddib-scanner 2.11.98 → 2.11.99

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.98",
3
+ "version": "2.11.99",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T15:36:15.399Z",
3
+ "timestamp": "2026-06-11T16:29:51.081Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -0,0 +1,161 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Heavy-lane semaphore (C2, 2026-06-11) — bound the daemon's RSS by limiting
5
+ * how many MEMORY-heavy static scans run concurrently.
6
+ *
7
+ * Measured (worker-mem.jsonl, n=461 workers): per-worker isolate heap peaks
8
+ * are BIMODAL — p50 = 12MB, but 12.6% of scans jump straight to 0.9-2.1GB
9
+ * (giant minified JS bundles; the AST cache accumulates across every parsable
10
+ * file, executor.js only skips files > getMaxFileSize() individually). With
11
+ * 8 concurrent workers a handful of heavies coincide → process RSS > the
12
+ * 8.5GB breaker → EMERGENCY. The heavies are identifiable BEFORE the worker
13
+ * spawns (total parsable-JS bytes on disk), so instead of killing them (a
14
+ * 768MB worker cap would cost 12% of coverage) we serialize them: at most
15
+ * MUADDIB_HEAVY_SCAN_MAX run at once, lights are NEVER blocked.
16
+ * Worst-case RSS ≈ baseline 2GB + 2×2GB heavies + N×12MB lights ≈ 5-6GB.
17
+ *
18
+ * Same {active, queue[]} semaphore pattern as src/shared/http-limiter.js and
19
+ * the sandbox slots (src/sandbox/index.js), plus two extensions those never
20
+ * needed: an abort-aware acquire and a wait-timeout. Both MUST remove their
21
+ * waiter from the queue on the way out — a release would otherwise hand the
22
+ * slot to a dead waiter and leak it permanently.
23
+ */
24
+
25
+ // Max number of HEAVY_LANE_WAIT_TIMEOUT requeues before an item's final pass
26
+ // runs without the wait bound (abort-aware only, still bounded by the outer
27
+ // SCAN_TIMEOUT_MS). Guarantees an item cannot loop in the queue forever.
28
+ const HEAVY_REQUEUE_MAX = 3;
29
+
30
+ // Env knobs (read at call time so tests can flip them around resetHeavyLane()):
31
+ // - MUADDIB_HEAVY_SCAN_MAX: concurrent heavy scans (default 2, 0 = lane off)
32
+ // - MUADDIB_HEAVY_SCAN_BYTES: heavy threshold on total parsable-JS bytes.
33
+ // Default 3 MiB — the measured distribution has a HOLE between light
34
+ // (≤12MB heap ⇔ <~1MB JS) and heavy (≥512MB heap ⇔ ≥~8MB JS); 3 MiB sits
35
+ // in the hole with ~3× margin both ways. A false-heavy costs a short wait;
36
+ // a false-light risks an EMERGENCY — hence the deliberately low default.
37
+ // - MUADDIB_HEAVY_WAIT_MAX_MS: wait bound before requeue (default 120s —
38
+ // ~2.5 slot services of 45s, leaves >150s of the 300s scan budget).
39
+ function heavyScanMax() {
40
+ const v = parseInt(process.env.MUADDIB_HEAVY_SCAN_MAX, 10);
41
+ return Number.isFinite(v) && v >= 0 ? v : 2;
42
+ }
43
+
44
+ function heavyScanBytesThreshold() {
45
+ const v = parseInt(process.env.MUADDIB_HEAVY_SCAN_BYTES, 10);
46
+ return Number.isFinite(v) && v > 0 ? v : 3 * 1024 * 1024;
47
+ }
48
+
49
+ function heavyWaitMaxMs() {
50
+ const v = parseInt(process.env.MUADDIB_HEAVY_WAIT_MAX_MS, 10);
51
+ return Number.isFinite(v) && v >= 0 ? v : 120000;
52
+ }
53
+
54
+ const _lane = { active: 0, queue: [] };
55
+
56
+ /**
57
+ * Pure classifier. `truncated` (the bounded measurement walk overflowed its
58
+ * depth/file caps) classifies heavy by default — defensive: an unmeasurable
59
+ * package is exactly the kind that blows a worker.
60
+ * @param {{totalJsBytes: number, truncated: boolean}|null} weight
61
+ * @param {number} [thresholdBytes]
62
+ */
63
+ function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
64
+ if (!weight) return false;
65
+ if (weight.truncated) return true;
66
+ return (weight.totalJsBytes || 0) >= thresholdBytes;
67
+ }
68
+
69
+ /**
70
+ * Acquire a heavy-lane slot. Resolves true when a slot is held, false when
71
+ * the lane is disabled (MUADDIB_HEAVY_SCAN_MAX=0 — nothing to release).
72
+ * FIFO when saturated.
73
+ *
74
+ * @param {Object} [opts]
75
+ * @param {AbortSignal} [opts.signal] - outer scan abort: rejects err.code='ABORT_ERR'
76
+ * @param {number} [opts.maxWaitMs] - wait bound; 0/absent = unbounded.
77
+ * On expiry rejects err.code='HEAVY_LANE_WAIT_TIMEOUT' (caller requeues).
78
+ * @returns {Promise<boolean>}
79
+ */
80
+ function acquireHeavySlot(opts = {}) {
81
+ const max = heavyScanMax();
82
+ if (max === 0) return Promise.resolve(false);
83
+ if (_lane.active < max) {
84
+ _lane.active++;
85
+ return Promise.resolve(true);
86
+ }
87
+ const { signal, maxWaitMs } = opts;
88
+ return new Promise((resolve, reject) => {
89
+ let timer = null;
90
+ const cleanup = () => {
91
+ if (timer) { clearTimeout(timer); timer = null; }
92
+ if (signal) { try { signal.removeEventListener('abort', onAbort); } catch { /* not added */ } }
93
+ };
94
+ const waiter = () => {
95
+ cleanup();
96
+ resolve(true); // slot transferred by releaseHeavySlot (active unchanged)
97
+ };
98
+ // Leaving the queue WITHOUT being woken: splice the waiter out, or the
99
+ // next release hands the slot to this dead waiter and leaks it (trap #1).
100
+ const bail = (err) => {
101
+ const i = _lane.queue.indexOf(waiter);
102
+ if (i === -1) return; // already woken — the release path owns the slot
103
+ _lane.queue.splice(i, 1);
104
+ cleanup();
105
+ reject(err);
106
+ };
107
+ const onAbort = () => {
108
+ const err = new Error('Heavy-lane wait aborted (outer scan timeout)');
109
+ err.code = 'ABORT_ERR';
110
+ bail(err);
111
+ };
112
+ // Push BEFORE wiring abort/timeout: bail() rejects only when it finds the
113
+ // waiter in the queue (its index check guards the already-woken race) —
114
+ // a pre-aborted signal firing before the push would otherwise bail into
115
+ // the guard and leave the promise forever pending.
116
+ _lane.queue.push(waiter);
117
+ if (signal) {
118
+ if (signal.aborted) { onAbort(); return; }
119
+ signal.addEventListener('abort', onAbort, { once: true });
120
+ }
121
+ if (Number.isFinite(maxWaitMs) && maxWaitMs > 0) {
122
+ // Deliberately NOT unref'd: a pending acquire is active work (a scan
123
+ // holding tmp disk and a pool slot) — it must keep the process alive.
124
+ timer = setTimeout(() => {
125
+ const err = new Error(`Heavy-lane slot not acquired within ${maxWaitMs}ms`);
126
+ err.code = 'HEAVY_LANE_WAIT_TIMEOUT';
127
+ bail(err);
128
+ }, maxWaitMs);
129
+ }
130
+ });
131
+ }
132
+
133
+ function releaseHeavySlot() {
134
+ if (_lane.queue.length > 0) {
135
+ const next = _lane.queue.shift();
136
+ next(); // transfers the slot to the next waiter (active count unchanged)
137
+ } else if (_lane.active > 0) {
138
+ _lane.active--;
139
+ }
140
+ }
141
+
142
+ function getHeavyLaneState() {
143
+ return { active: _lane.active, waiting: _lane.queue.length, max: heavyScanMax() };
144
+ }
145
+
146
+ /** Test helper — same role as resetSandboxLimiter in src/sandbox/index.js. */
147
+ function resetHeavyLane() {
148
+ _lane.active = 0;
149
+ _lane.queue.length = 0;
150
+ }
151
+
152
+ module.exports = {
153
+ acquireHeavySlot,
154
+ releaseHeavySlot,
155
+ isHeavyScan,
156
+ getHeavyLaneState,
157
+ resetHeavyLane,
158
+ heavyScanBytesThreshold,
159
+ heavyWaitMaxMs,
160
+ HEAVY_REQUEUE_MAX
161
+ };
@@ -13,13 +13,14 @@ const { Worker } = require('worker_threads');
13
13
  const { runSandbox, tryAcquireSandboxSlot } = require('../sandbox/index.js');
14
14
  const { sendWebhook } = require('../webhook.js');
15
15
  const { downloadToFile, extractArchive, sanitizePackageName } = require('../shared/download.js');
16
- const { MAX_TARBALL_SIZE } = require('../shared/constants.js');
16
+ const { MAX_TARBALL_SIZE, getMaxFileSize } = require('../shared/constants.js');
17
17
  const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
18
18
  const { loadCachedIOCs } = require('../ioc/updater.js');
19
19
  const { scanPackageJson } = require('../scanner/package.js');
20
20
  const { scanShellScripts } = require('../scanner/shell.js');
21
21
  const { buildTrainingRecord } = require('../ml/feature-extractor.js');
22
22
  const { appendWorkerMem } = require('./worker-mem.js');
23
+ const { acquireHeavySlot, releaseHeavySlot, isHeavyScan, getHeavyLaneState, heavyWaitMaxMs, HEAVY_REQUEUE_MAX } = require('./heavy-lane.js');
23
24
  const { appendRecord: appendTrainingRecord, relabelRecords } = require('../ml/jsonl-writer.js');
24
25
 
25
26
  // From ./state.js
@@ -305,6 +306,60 @@ function countPackageFiles(dir) {
305
306
  return { fileCountTotal, hasTests };
306
307
  }
307
308
 
309
+ // C2 heavy-lane measurement bounds. Distinct from countPackageFiles (whose
310
+ // depth cap of 5 is an ML-feature contract — do not touch it).
311
+ const JS_WEIGHT_MAX_DEPTH = 8;
312
+ const JS_WEIGHT_MAX_FILES = 2000;
313
+ const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
314
+
315
+ /**
316
+ * Measure how much parsable JS a package carries — the heavy-lane
317
+ * classification signal. The per-worker isolate heap is driven by the SUM of
318
+ * AST-parsed JS bytes (executor.js skips files > getMaxFileSize()
319
+ * individually, but the AST cache accumulates across files), so we sum the
320
+ * on-disk sizes of parsable JS files, skipping the ones the executor will
321
+ * skip anyway. NEVER use meta.unpackedSize for this — it is absent for PyPI
322
+ * and part of npm (the `|| 0` hole that lets giant bundles bypass the C1
323
+ * size-cap in the first place).
324
+ *
325
+ * Bounded walk; an overflow (depth/file caps) returns truncated:true, which
326
+ * isHeavyScan classifies heavy by default.
327
+ *
328
+ * @param {string} dir - extracted package directory
329
+ * @returns {{ totalJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
330
+ */
331
+ function measureJsWeight(dir) {
332
+ let totalJsBytes = 0;
333
+ let maxJsFileBytes = 0;
334
+ let seen = 0;
335
+ let truncated = false;
336
+ const perFileCap = getMaxFileSize();
337
+
338
+ function walk(current, depth) {
339
+ if (truncated) return;
340
+ if (depth > JS_WEIGHT_MAX_DEPTH) { truncated = true; return; }
341
+ let entries;
342
+ try { entries = fs.readdirSync(current, { withFileTypes: true }); } catch { return; }
343
+ for (const entry of entries) {
344
+ if (truncated) return;
345
+ if (entry.isDirectory()) {
346
+ if (ML_EXCLUDED_DIRS.has(entry.name)) continue;
347
+ walk(path.join(current, entry.name), depth + 1);
348
+ } else if (entry.isFile() && JS_WEIGHT_FILE_PATTERN.test(entry.name)) {
349
+ if (++seen > JS_WEIGHT_MAX_FILES) { truncated = true; return; }
350
+ let size;
351
+ try { size = fs.statSync(path.join(current, entry.name)).size; } catch { continue; }
352
+ if (size > perFileCap) continue; // executor skips these — they never reach the AST
353
+ totalJsBytes += size;
354
+ if (size > maxJsFileBytes) maxJsFileBytes = size;
355
+ }
356
+ }
357
+ }
358
+
359
+ walk(dir, 0);
360
+ return { totalJsBytes, maxJsFileBytes, truncated };
361
+ }
362
+
308
363
  /**
309
364
  * Pure classifier: is this a prebuilt native-binary platform shard (the kind that
310
365
  * hangs the sandbox install and always times out INCONCLUSIVE)? No I/O — the parsed
@@ -435,6 +490,7 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
435
490
  appendWorkerMem({
436
491
  ev: 'spawn', tid: _wmTid,
437
492
  name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem,
493
+ lane: _sc._lane, jsBytes: _sc._jsBytes,
438
494
  rss: process.memoryUsage().rss
439
495
  });
440
496
 
@@ -645,6 +701,18 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
645
701
  // ML Phase 2a: Count JS files and detect test presence for enriched features
646
702
  const { fileCountTotal, hasTests } = countPackageFiles(extractedDir);
647
703
 
704
+ // C2 heavy-lane classification (see heavy-lane.js header): measured on
705
+ // disk, after extraction — registry metadata is not trustworthy here.
706
+ // Measurement failure falls back to the compressed tarball size
707
+ // (conservative: never silently far under the real JS weight).
708
+ let jsWeight;
709
+ try {
710
+ jsWeight = measureJsWeight(extractedDir);
711
+ } catch {
712
+ jsWeight = { totalJsBytes: fileSize, maxJsFileBytes: 0, truncated: false };
713
+ }
714
+ const lane = isHeavyScan(jsWeight) ? 'heavy' : 'light';
715
+
648
716
  // Hoisted before the worker spawn (per-worker 429-storm fix): fetch the npm
649
717
  // registry metadata ONCE on the main thread. The shared http-limiter coordinates
650
718
  // it and the temporal cache is warm (npm-registry.js reads it first), so only
@@ -663,6 +731,28 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
663
731
  }
664
732
  }
665
733
 
734
+ // C2 heavy-lane: serialize the memory-heavy scans. Acquired AFTER the
735
+ // registry fetch above (never hold the slot during network I/O); released
736
+ // in the finally right after the static scan — the slot covers ONLY the
737
+ // worker's lifetime (≤ STATIC_SCAN_TIMEOUT_MS), not the sandbox (which
738
+ // has its own semaphore and runs outside the daemon's heap).
739
+ let heavySlotHeld = false;
740
+ if (lane === 'heavy') {
741
+ stats.heavyScans = (stats.heavyScans || 0) + 1;
742
+ const laneState = getHeavyLaneState();
743
+ if (laneState.max > 0 && laneState.active >= laneState.max) {
744
+ stats.heavyLaneWaits = (stats.heavyLaneWaits || 0) + 1;
745
+ console.log(`[MONITOR] HEAVY_LANE: ${name}@${version} waiting for a slot (${(jsWeight.totalJsBytes / 1024 / 1024).toFixed(1)}MB JS, active=${laneState.active}, waiting=${laneState.waiting})`);
746
+ }
747
+ // After HEAVY_REQUEUE_MAX requeues the final pass waits unbounded
748
+ // (abort-aware only, still under the outer SCAN_TIMEOUT_MS) so an item
749
+ // cannot loop in the queue forever.
750
+ const lastPass = (meta._heavyRetries || 0) >= HEAVY_REQUEUE_MAX;
751
+ const waitStart = Date.now();
752
+ heavySlotHeld = await acquireHeavySlot({ signal, maxWaitMs: lastPass ? 0 : heavyWaitMaxMs() });
753
+ stats.heavyLaneWaitMsTotal = (stats.heavyLaneWaitMsTotal || 0) + (Date.now() - waitStart);
754
+ }
755
+
666
756
  let result;
667
757
  try {
668
758
  // scanContext: feeds monitor-side info (name/version/ecosystem) and the
@@ -679,7 +769,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
679
769
  // Stage 2: set by processQueueItem when MUADDIB_TRIAGE_MODE=enforce.
680
770
  // Defaults to 'full' so any CLI/test caller that bypasses triage gets
681
771
  // the full 20-scanner pipeline (unchanged behaviour).
682
- scanMode: (meta && meta.scanMode) || 'full'
772
+ scanMode: (meta && meta.scanMode) || 'full',
773
+ // C2 observability: lane + JS weight flow into the worker-mem spawn
774
+ // event (runScanInWorker) so lane×heap-peak cross-checks are possible
775
+ // post-rollout (hard criterion: zero 'light' scans peaking >512MB).
776
+ _lane: lane,
777
+ _jsBytes: jsWeight.totalJsBytes
683
778
  };
684
779
  // Hand the main-thread-fetched metadata to the worker so its processor skips
685
780
  // the per-worker getPackageMetadata fetch (429-storm fix). npm only; the key
@@ -705,6 +800,10 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
705
800
  return { sandboxResult: null, staticClean: false };
706
801
  }
707
802
  throw staticErr;
803
+ } finally {
804
+ // Single release point — success, static timeout, EMERGENCY terminate
805
+ // and abort all funnel through here exactly once (heavySlotHeld guard).
806
+ if (heavySlotHeld) { releaseHeavySlot(); heavySlotHeld = false; }
708
807
  }
709
808
 
710
809
  // Phase 3 signal — agent-supply-chain lens. Pure observability, no scoring impact.
@@ -1285,6 +1384,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1285
1384
  }
1286
1385
  }
1287
1386
  } catch (err) {
1387
+ // C2 heavy-lane: a wait-timeout is NOT a scan failure — processQueueItem
1388
+ // requeues the item (bounded by HEAVY_REQUEUE_MAX). Re-throw BEFORE any
1389
+ // error accounting: this catch otherwise swallows everything into the
1390
+ // 'scan_error' ledger path and the requeue would never happen.
1391
+ if (err && err.code === 'HEAVY_LANE_WAIT_TIMEOUT') throw err;
1288
1392
  recordError(err, stats);
1289
1393
  stats.scanned++;
1290
1394
  stats.totalTimeMs += Date.now() - startTime;
@@ -1376,6 +1480,22 @@ async function processQueueItem(item, stats, dailyAlerts, recentlyScanned, downl
1376
1480
  })
1377
1481
  ]);
1378
1482
  } catch (err) {
1483
+ // C2 heavy-lane: the bounded wait expired while the heavy slots were
1484
+ // saturated (typical under a spill-drain burst). Not a failure — put the
1485
+ // item back at the queue tail (natural backoff) up to HEAVY_REQUEUE_MAX
1486
+ // passes; scanPackage runs the final pass without the wait bound. Note:
1487
+ // _heavyRetries does not survive a spill (spillItems strips non-re-enqueue
1488
+ // fields) — acceptable, the spill drain runs in calm windows anyway.
1489
+ if (err && err.code === 'HEAVY_LANE_WAIT_TIMEOUT') {
1490
+ const decision = computeHeavyRequeue(item);
1491
+ if (decision.requeue) {
1492
+ stats.heavyLaneRequeues = (stats.heavyLaneRequeues || 0) + 1;
1493
+ console.log(`[MONITOR] HEAVY_LANE: requeued ${item.name}@${item.version || '?'} (wait-timeout pass ${decision.retries}/${HEAVY_REQUEUE_MAX})`);
1494
+ enqueueScan(scanQueue, item, stats);
1495
+ return;
1496
+ }
1497
+ // Safety net — should be unreachable (the last pass waits unbounded).
1498
+ }
1379
1499
  recordError(err, stats);
1380
1500
  console.error(`[MONITOR] Queue error for ${item.name}: ${err.message}`);
1381
1501
  // IOC fallback: if scan failed for a known malicious package, send P1 alert.
@@ -1450,6 +1570,18 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
1450
1570
  return Math.max(0, Math.min(targetConcurrency - activeWorkers, queueLength));
1451
1571
  }
1452
1572
 
1573
+ /**
1574
+ * Pure requeue decision for a heavy-lane wait-timeout (same extraction
1575
+ * rationale as computeWorkersToSpawn). Mutates item._heavyRetries; once the
1576
+ * counter passes HEAVY_REQUEUE_MAX the item is NOT requeued again — its next
1577
+ * pass through scanPackage waits unbounded instead.
1578
+ */
1579
+ function computeHeavyRequeue(item) {
1580
+ const retries = (item._heavyRetries || 0) + 1;
1581
+ item._heavyRetries = retries;
1582
+ return { requeue: retries <= HEAVY_REQUEUE_MAX, retries };
1583
+ }
1584
+
1453
1585
  // ── RSS-aware worker admission (P1 OOM durable fix) ──
1454
1586
  // The pressure breaker is reactive: it stops spawning at HIGH, but the workers already in
1455
1587
  // flight overshoot RSS by ~2GB (each isolate + gVisor sandbox ~0.55GB, draining up to
@@ -1795,7 +1927,10 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1795
1927
  registryScripts: item.registryScripts || null,
1796
1928
  _cacheTrigger: item._cacheTrigger || null,
1797
1929
  fastTrack: item.fastTrack || false,
1798
- scanMode: effectiveScanMode
1930
+ scanMode: effectiveScanMode,
1931
+ // C2 heavy-lane: pass count set by computeHeavyRequeue — at
1932
+ // HEAVY_REQUEUE_MAX the final pass waits for its slot unbounded.
1933
+ _heavyRetries: item._heavyRetries || 0
1799
1934
  }, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable, signal);
1800
1935
  const sandboxResult = scanResult && scanResult.sandboxResult;
1801
1936
  const staticClean = scanResult && scanResult.staticClean;
@@ -1917,6 +2052,8 @@ module.exports = {
1917
2052
  isBundledToolingOnly,
1918
2053
  recordTrainingSample,
1919
2054
  countPackageFiles,
2055
+ measureJsWeight,
2056
+ computeHeavyRequeue,
1920
2057
  classifyNativeShard,
1921
2058
  shouldSkipSandbox,
1922
2059
  runScanInWorker,