muaddib-scanner 2.11.99 → 2.11.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.99",
3
+ "version": "2.11.101",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T16:29:51.081Z",
3
+ "timestamp": "2026-06-11T18:54:05.745Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -7,7 +7,7 @@ const AdmZip = require('adm-zip');
7
7
  const IOC_FILE = path.join(__dirname, 'data/iocs.json');
8
8
  const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
9
9
  const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
10
- const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs } = require('./updater.js');
10
+ const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs, writeLeanIOCFile } = require('./updater.js');
11
11
  const { Spinner } = require('../utils.js');
12
12
  const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
13
13
  const { version: PKG_VERSION } = require('../../package.json');
@@ -1274,6 +1274,11 @@ async function runScraper() {
1274
1274
  fs.writeFileSync(tmpCompactFile, JSON.stringify(compactIOCs));
1275
1275
  fs.renameSync(tmpCompactFile, COMPACT_IOC_FILE);
1276
1276
 
1277
+ // Save the lean projection in lock-step with the full file (~24MB) — what
1278
+ // scan workers load instead of the 223MB full (RSS fix). Built from the
1279
+ // in-memory object, so no extra parse peak. See updater.js:createLeanIOCs.
1280
+ writeLeanIOCFile(existingIOCs);
1281
+
1277
1282
  // Persist to ~/.muaddib/data/ (survives npm update)
1278
1283
  saveSpinner.update('Persisting to home directory...');
1279
1284
  const homeDir = path.dirname(HOME_IOC_FILE);
@@ -6,6 +6,14 @@ const crypto = require('crypto');
6
6
  const HOME_DATA_PATH = path.join(os.homedir(), '.muaddib', 'data');
7
7
  const CACHE_IOC_FILE = path.join(HOME_DATA_PATH, 'iocs.json');
8
8
  const LOCAL_IOC_FILE = path.join(__dirname, 'data/iocs.json');
9
+ // Lean projection of LOCAL_IOC_FILE — only the fields the matcher/alert read
10
+ // ({name,version,severity,source} + hashes/markers/files/stringIocs). The full
11
+ // file is ~223MB → 447MB string during JSON.parse, reloaded by every one-shot
12
+ // worker that touches IOC matching (heap-snapshot-confirmed ~900MB peak). The
13
+ // lean is ~24MB → ~50MB peak. Workers READ this; only the daemon/scraper write
14
+ // it (a worker must never re-read the 223MB full to regenerate — that is the
15
+ // very peak we are removing). See ensureLeanIOCFile + createLeanIOCs below.
16
+ const LOCAL_LEAN_FILE = path.join(__dirname, 'data/iocs-lean.json');
9
17
  const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
10
18
  const { loadYAMLIOCs } = require('./yaml-loader.js');
11
19
 
@@ -241,7 +249,7 @@ function mergeIOCs(target, source) {
241
249
  // scan/poll) does zero disk I/O.
242
250
  const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
243
251
  const IOC_SOURCE_FILES = [
244
- CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
252
+ CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_LEAN_FILE, LOCAL_COMPACT_FILE,
245
253
  path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
246
254
  path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
247
255
  ];
@@ -279,8 +287,19 @@ function loadCachedIOCs() {
279
287
  stringIocs: Array.isArray(yamlIOCs.stringIocs) ? [...yamlIOCs.stringIocs] : []
280
288
  };
281
289
 
282
- // Priority 2a: Local scraped IOCs (full enriched file)
283
- if (fs.existsSync(LOCAL_IOC_FILE)) {
290
+ // Priority 2a: Local scraped IOCs. Prefer the lean projection (~24MB) it
291
+ // carries every field the matcher/alert read. Only fall back to the full
292
+ // ~223MB file when the lean is absent (backward-compat / before the daemon
293
+ // has generated it), which costs the ~450MB parse peak. ensureLeanIOCFile()
294
+ // (called at daemon boot + after each scrape) keeps the lean present & fresh.
295
+ if (fs.existsSync(LOCAL_LEAN_FILE)) {
296
+ try {
297
+ const leanIOCs = JSON.parse(fs.readFileSync(LOCAL_LEAN_FILE, 'utf8'));
298
+ mergeIOCs(merged, leanIOCs);
299
+ } catch (e) {
300
+ console.log('[WARN] Failed to load lean IOC database (iocs-lean.json): ' + e.message);
301
+ }
302
+ } else if (fs.existsSync(LOCAL_IOC_FILE)) {
284
303
  try {
285
304
  const localIOCs = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
286
305
  mergeIOCs(merged, localIOCs);
@@ -471,6 +490,67 @@ const NEVER_WILDCARD_PYPI = new Set([
471
490
  'scipy', 'tensorflow', 'torch', 'fastapi', 'uvicorn'
472
491
  ]);
473
492
 
493
+ // Lean projection of a full IOC object: keep only the fields the matcher and
494
+ // the alert message read on package entries ({name,version,severity,source}),
495
+ // drop the enrichment (id/description/references/mitre/published/freshness/
496
+ // sources/confidence — never read after load; profiled). hashes/markers/files/
497
+ // stringIocs are simple values / small (YAML-sourced) and kept verbatim.
498
+ // Pure: no I/O. Used to write LOCAL_LEAN_FILE from an in-memory full object
499
+ // (zero extra parse peak) and by ensureLeanIOCFile.
500
+ function createLeanIOCs(fullIOCs) {
501
+ const leanPkg = p => ({ name: p.name, version: p.version, severity: p.severity, source: p.source });
502
+ return {
503
+ packages: (fullIOCs.packages || []).map(leanPkg),
504
+ pypi_packages: (fullIOCs.pypi_packages || []).map(leanPkg),
505
+ hashes: fullIOCs.hashes || [],
506
+ markers: fullIOCs.markers || [],
507
+ files: fullIOCs.files || [],
508
+ stringIocs: fullIOCs.stringIocs || [],
509
+ updated: fullIOCs.updated,
510
+ sources: fullIOCs.sources
511
+ };
512
+ }
513
+
514
+ // Ensure LOCAL_LEAN_FILE exists and is at least as fresh as LOCAL_IOC_FILE.
515
+ // Reads the 223MB full ONCE (the ~450MB parse peak) — acceptable only in a
516
+ // long-lived process (daemon boot); NEVER call from a one-shot scan worker.
517
+ // Atomic write (.tmp → rename). Returns {generated:boolean, bytes:number}.
518
+ function ensureLeanIOCFile() {
519
+ try {
520
+ if (!fs.existsSync(LOCAL_IOC_FILE)) return { generated: false, bytes: 0 };
521
+ let fresh = false;
522
+ if (fs.existsSync(LOCAL_LEAN_FILE)) {
523
+ try { fresh = fs.statSync(LOCAL_LEAN_FILE).mtimeMs >= fs.statSync(LOCAL_IOC_FILE).mtimeMs; } catch { fresh = false; }
524
+ }
525
+ if (fresh) return { generated: false, bytes: fs.statSync(LOCAL_LEAN_FILE).size };
526
+ const full = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
527
+ const lean = createLeanIOCs(full);
528
+ const tmp = LOCAL_LEAN_FILE + '.tmp';
529
+ const data = JSON.stringify(lean);
530
+ fs.writeFileSync(tmp, data);
531
+ fs.renameSync(tmp, LOCAL_LEAN_FILE);
532
+ return { generated: true, bytes: Buffer.byteLength(data) };
533
+ } catch (e) {
534
+ console.log('[WARN] ensureLeanIOCFile failed: ' + e.message);
535
+ return { generated: false, bytes: 0 };
536
+ }
537
+ }
538
+
539
+ // Write the lean file from an already-in-memory full object (zero extra parse
540
+ // peak). Called by the scraper right after it writes LOCAL_IOC_FILE so the
541
+ // lean stays in lock-step with the full after every deep scrape.
542
+ function writeLeanIOCFile(fullIOCs) {
543
+ try {
544
+ const tmp = LOCAL_LEAN_FILE + '.tmp';
545
+ fs.writeFileSync(tmp, JSON.stringify(createLeanIOCs(fullIOCs)));
546
+ fs.renameSync(tmp, LOCAL_LEAN_FILE);
547
+ return true;
548
+ } catch (e) {
549
+ console.log('[WARN] writeLeanIOCFile failed: ' + e.message);
550
+ return false;
551
+ }
552
+ }
553
+
474
554
  function generateCompactIOCs(fullIOCs) {
475
555
  const wildcards = [];
476
556
  const versioned = Object.create(null);
@@ -693,4 +773,4 @@ function verifyIOCHMAC(data, hmac) {
693
773
  }
694
774
  }
695
775
 
696
- module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
776
+ module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, createLeanIOCs, ensureLeanIOCFile, writeLeanIOCFile, LOCAL_LEAN_FILE, LOCAL_IOC_FILE, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
@@ -813,6 +813,18 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
813
813
  console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
814
814
  }
815
815
 
816
+ // RSS fix (C2): make sure the lean IOC projection exists & is fresh BEFORE any
817
+ // scan worker spawns. Workers load the ~24MB lean instead of the ~223MB full
818
+ // (heap-snapshot-confirmed ~900MB→~50MB per IOC-matching scan). The full read
819
+ // here is paid ONCE by this long-lived daemon (never by a one-shot worker).
820
+ try {
821
+ const { ensureLeanIOCFile } = require('../ioc/updater.js');
822
+ const r = ensureLeanIOCFile();
823
+ if (r.generated) console.log(`[MONITOR] IOC lean projection regenerated (${(r.bytes / 1024 / 1024).toFixed(1)}MB) — workers avoid the 223MB full load`);
824
+ } catch (err) {
825
+ console.warn(`[MONITOR] IOC lean bootstrap failed (workers fall back to full file): ${err.message}`);
826
+ }
827
+
816
828
  console.log('\n' + banner([
817
829
  "MUAD'DIB - Registry Monitor",
818
830
  'Scanning npm + PyPI new packages'
@@ -56,14 +56,21 @@ const _lane = { active: 0, queue: [] };
56
56
  /**
57
57
  * Pure classifier. `truncated` (the bounded measurement walk overflowed its
58
58
  * depth/file caps) classifies heavy by default — defensive: an unmeasurable
59
- * package is exactly the kind that blows a worker.
60
- * @param {{totalJsBytes: number, truncated: boolean}|null} weight
59
+ * package is exactly the kind that blows a worker. Compares weightedJsBytes
60
+ * (plain + ×12 minified — see measureJsWeight in queue.js: raw bytes alone
61
+ * missed the minified explosions, powerlines 517KB → 1151MB heap) and falls
62
+ * back to totalJsBytes for callers that don't weight. `oversize` (any single
63
+ * JS file > getMaxFileSize) also forces heavy — content scanners load such a
64
+ * file whole even though the AST skips it (omnius: a 30MB index.js → 1347MB).
65
+ * @param {{totalJsBytes: number, weightedJsBytes?: number, oversize?: boolean, truncated: boolean}|null} weight
61
66
  * @param {number} [thresholdBytes]
62
67
  */
63
68
  function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
64
69
  if (!weight) return false;
65
70
  if (weight.truncated) return true;
66
- return (weight.totalJsBytes || 0) >= thresholdBytes;
71
+ if (weight.oversize) return true; // a single JS file > getMaxFileSize — content scanners load it whole
72
+ const effective = Number.isFinite(weight.weightedJsBytes) ? weight.weightedJsBytes : (weight.totalJsBytes || 0);
73
+ return effective >= thresholdBytes;
67
74
  }
68
75
 
69
76
  /**
@@ -311,6 +311,42 @@ function countPackageFiles(dir) {
311
311
  const JS_WEIGHT_MAX_DEPTH = 8;
312
312
  const JS_WEIGHT_MAX_FILES = 2000;
313
313
  const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
314
+ // Minified JS expands SUPER-linearly in the worker (live counter-examples
315
+ // from the 16:18 rollout, 2026-06-11: powerlines = 517KB JS of which 449KB
316
+ // minified → 1151MB heap, ~2300×; @lethevimlet/sshift = ~1.9MB minified →
317
+ // 1.38GB — both sailed under the raw-bytes threshold as 'light'). Plain
318
+ // source stays roughly linear (the 12MB-heap mode of the bimodal
319
+ // distribution). So minified bytes count ×12 toward the heavy threshold —
320
+ // ≥~256KB of minified JS crosses the 3MiB default. Detection: average line
321
+ // length over the first 4KB; plain code sits at 40-120 chars, minified
322
+ // bundles at 800+ (often a single line). 250 splits cleanly even when a
323
+ // license header pads the probe window.
324
+ const JS_MINIFIED_WEIGHT = 12;
325
+ const JS_MINIFIED_AVG_LINE = 250;
326
+ // 64KB, not 4KB: bike4mind sailed under the 4KB probe (a license/banner header
327
+ // padded the window; the minified body started later) → mis-classified light →
328
+ // 890MB heap. Probe a 64KB window from ~2KB in to skip any header and still
329
+ // never load a 30MB file. Cheap (one readSync) at JS_WEIGHT_MAX_FILES files.
330
+ const JS_MINIFIED_PROBE_OFFSET = 2048;
331
+ const JS_MINIFIED_PROBE_BYTES = 64 * 1024;
332
+
333
+ /** Probe a 64KB window of a file (never loads the rest) for minification. */
334
+ function probeIsMinified(filePath, size) {
335
+ let fd = null;
336
+ try {
337
+ fd = fs.openSync(filePath, 'r');
338
+ const offset = size > JS_MINIFIED_PROBE_OFFSET + JS_MINIFIED_PROBE_BYTES ? JS_MINIFIED_PROBE_OFFSET : 0;
339
+ const buf = Buffer.alloc(JS_MINIFIED_PROBE_BYTES);
340
+ const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES, offset);
341
+ if (n <= 0) return false;
342
+ const head = buf.toString('utf8', 0, n);
343
+ return (head.length / head.split('\n').length) > JS_MINIFIED_AVG_LINE;
344
+ } catch {
345
+ return false;
346
+ } finally {
347
+ if (fd !== null) { try { fs.closeSync(fd); } catch { /* best-effort */ } }
348
+ }
349
+ }
314
350
 
315
351
  /**
316
352
  * Measure how much parsable JS a package carries — the heavy-lane
@@ -325,12 +361,24 @@ const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
325
361
  * Bounded walk; an overflow (depth/file caps) returns truncated:true, which
326
362
  * isHeavyScan classifies heavy by default.
327
363
  *
364
+ * weightedJsBytes = plain bytes + JS_MINIFIED_WEIGHT × minified bytes — the
365
+ * value isHeavyScan compares against the threshold (raw bytes alone missed
366
+ * the minified explosions, see JS_MINIFIED_WEIGHT above).
367
+ *
368
+ * `oversize` (any single JS file > getMaxFileSize) forces heavy: the AST
369
+ * executor skips such files, but the content scanners (entropy/hash/
370
+ * ioc-strings/deobfuscate) still readFileSync the whole thing — omnius
371
+ * (a 30MB dist/index.js, 39KB of other JS) blew a 'light' worker to 1347MB.
372
+ * So an oversize JS file is the STRONGEST heavy signal, not something to skip.
373
+ *
328
374
  * @param {string} dir - extracted package directory
329
- * @returns {{ totalJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
375
+ * @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, oversize: boolean, truncated: boolean }}
330
376
  */
331
377
  function measureJsWeight(dir) {
332
378
  let totalJsBytes = 0;
379
+ let minifiedJsBytes = 0;
333
380
  let maxJsFileBytes = 0;
381
+ let oversize = false;
334
382
  let seen = 0;
335
383
  let truncated = false;
336
384
  const perFileCap = getMaxFileSize();
@@ -347,17 +395,24 @@ function measureJsWeight(dir) {
347
395
  walk(path.join(current, entry.name), depth + 1);
348
396
  } else if (entry.isFile() && JS_WEIGHT_FILE_PATTERN.test(entry.name)) {
349
397
  if (++seen > JS_WEIGHT_MAX_FILES) { truncated = true; return; }
398
+ const filePath = path.join(current, entry.name);
350
399
  let size;
351
- try { size = fs.statSync(path.join(current, entry.name)).size; } catch { continue; }
352
- if (size > perFileCap) continue; // executor skips these — they never reach the AST
353
- totalJsBytes += size;
400
+ try { size = fs.statSync(filePath).size; } catch { continue; }
354
401
  if (size > maxJsFileBytes) maxJsFileBytes = size;
402
+ if (size > perFileCap) {
403
+ // The AST skips it, but content scanners load it whole → heap blow-up.
404
+ oversize = true;
405
+ continue;
406
+ }
407
+ totalJsBytes += size;
408
+ if (probeIsMinified(filePath, size)) minifiedJsBytes += size;
355
409
  }
356
410
  }
357
411
  }
358
412
 
359
413
  walk(dir, 0);
360
- return { totalJsBytes, maxJsFileBytes, truncated };
414
+ const weightedJsBytes = (totalJsBytes - minifiedJsBytes) + JS_MINIFIED_WEIGHT * minifiedJsBytes;
415
+ return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, oversize, truncated };
361
416
  }
362
417
 
363
418
  /**
@@ -490,7 +545,7 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
490
545
  appendWorkerMem({
491
546
  ev: 'spawn', tid: _wmTid,
492
547
  name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem,
493
- lane: _sc._lane, jsBytes: _sc._jsBytes,
548
+ lane: _sc._lane, jsBytes: _sc._jsBytes, jsMin: _sc._jsMin,
494
549
  rss: process.memoryUsage().rss
495
550
  });
496
551
 
@@ -774,7 +829,8 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
774
829
  // event (runScanInWorker) so lane×heap-peak cross-checks are possible
775
830
  // post-rollout (hard criterion: zero 'light' scans peaking >512MB).
776
831
  _lane: lane,
777
- _jsBytes: jsWeight.totalJsBytes
832
+ _jsBytes: jsWeight.totalJsBytes,
833
+ _jsMin: jsWeight.minifiedJsBytes || 0
778
834
  };
779
835
  // Hand the main-thread-fetched metadata to the worker so its processor skips
780
836
  // the per-worker getPackageMetadata fetch (429-storm fix). npm only; the key
@@ -1398,6 +1454,27 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1398
1454
  // count clean — inconclusive, distinct ledger source, distinct log line
1399
1455
  // (the live-validation metric for the limits rollout). No retry: an OOM
1400
1456
  // re-OOMs deterministically.
1457
+ // Reactive heap watermark (C2 volet B): the worker self-terminated before
1458
+ // blowing the process RSS. Same disposition as a resourceLimits OOM —
1459
+ // inconclusive, NOT clean, no retry (a re-scan re-explodes the same way) —
1460
+ // but a distinct ledger source so the watchdog's catch rate is measurable
1461
+ // separately from the V8 hard-cap OOMs.
1462
+ const isHeapWatermark = err && /WORKER_HEAP_WATERMARK/.test(err.message || '');
1463
+ if (isHeapWatermark) {
1464
+ console.error(`[MONITOR] WORKER_HEAP_WATERMARK: ${name}@${version} — scan worker self-terminated over the heap watermark (kept INCONCLUSIVE, not clean)`);
1465
+ stats.workerHeapWatermark = (stats.workerHeapWatermark || 0) + 1;
1466
+ updateScanStats('sandbox_inconclusive');
1467
+ try {
1468
+ appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_heap_watermark' });
1469
+ } catch { /* ledger is best-effort */ }
1470
+ return { sandboxResult: null, staticClean: false };
1471
+ }
1472
+ // Per-worker resourceLimits breach: the worker died on ITS V8 cap
1473
+ // (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
1474
+ // garde-fou as static_timeout: a package that OOMs the scanner must NOT
1475
+ // count clean — inconclusive, distinct ledger source, distinct log line
1476
+ // (the live-validation metric for the limits rollout). No retry: an OOM
1477
+ // re-OOMs deterministically.
1401
1478
  const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
1402
1479
  /ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
1403
1480
  if (isWorkerOom) {
@@ -22,6 +22,22 @@ if (!parentPort) {
22
22
  const { run } = require('../index.js');
23
23
  const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js');
24
24
 
25
+ // Reactive heap watermark (C2 volet B): the static heavy-lane classifier
26
+ // predicts the peak from on-disk bytes and WILL miss cases (omnius: 39KB JS →
27
+ // 1347MB). This is the prediction-free backstop — the worker watches its OWN
28
+ // isolate heap and bails before it contributes to a process-wide RSS spike.
29
+ // CAVEAT: a watchdog timer can only fire when the event loop yields, so it
30
+ // catches PROGRESSIVE (multi-file, async-between-files) growth; a single
31
+ // synchronous 30MB parse never yields and is caught only by the V8 hard cap
32
+ // (MUADDIB_WORKER_MAX_OLD_MB resourceLimits). The two are complementary.
33
+ // Default 2200MB: above the ~1.3GB legitimate scans that finish CLEAN, below
34
+ // the 3072MB resourceLimits cap. 0 disables.
35
+ const HEAP_WATERMARK_MB = (() => {
36
+ const v = parseInt(process.env.MUADDIB_WORKER_HEAP_WATERMARK_MB, 10);
37
+ return Number.isFinite(v) && v >= 0 ? v : 2200;
38
+ })();
39
+ const HEAP_WATERMARK_CHECK_MS = 1000;
40
+
25
41
  (async () => {
26
42
  // Off-heap attribution samples (worker-mem.jsonl): heapUsed/external/
27
43
  // arrayBuffers are isolate-local here, rss is process-wide. The samples MUST
@@ -49,6 +65,34 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
49
65
  sampler = setInterval(sampleNow, everyMs);
50
66
  sampler.unref();
51
67
  }
68
+
69
+ // Heap-watermark watchdog. On breach, post a tagged error and exit — the
70
+ // parent maps the WORKER_HEAP_WATERMARK message onto its existing worker_oom
71
+ // path (inconclusive, ledgered, NOT counted clean). NOT unref'd: while the
72
+ // scan is in flight this watchdog must stay live to fire.
73
+ let watchdog = null;
74
+ if (HEAP_WATERMARK_MB > 0) {
75
+ const limitBytes = HEAP_WATERMARK_MB * 1024 * 1024;
76
+ watchdog = setInterval(() => {
77
+ if (process.memoryUsage().heapUsed > limitBytes) {
78
+ clearInterval(watchdog); watchdog = null;
79
+ if (sampler) clearInterval(sampler);
80
+ try {
81
+ parentPort.postMessage({
82
+ type: 'error',
83
+ message: `WORKER_HEAP_WATERMARK: isolate heap exceeded ${HEAP_WATERMARK_MB}MB (${scanContext.name}@${scanContext.version})`
84
+ });
85
+ } catch { /* parent gone */ }
86
+ // Exit NON-ZERO so the parent settles even if the message above is lost
87
+ // in the post/exit race: the worker.on('exit') handler rejects on any
88
+ // non-zero code, and the catch matches WORKER_HEAP_WATERMARK when the
89
+ // message did arrive, or the generic scan_error path when it didn't —
90
+ // never clean, never a hung promise. (exit(0) would let the exit
91
+ // handler no-op and hang the scan until the 300s outer timeout.)
92
+ process.exit(1);
93
+ }
94
+ }, HEAP_WATERMARK_CHECK_MS);
95
+ }
52
96
  try {
53
97
  // scanContext (optional) carries monitor-side info that opt-in scanners need
54
98
  // (e.g. trusted-dep-diff requires package name + version to query the registry).
@@ -60,5 +104,6 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
60
104
  parentPort.postMessage({ type: 'error', message: err.message || String(err) });
61
105
  } finally {
62
106
  if (sampler) clearInterval(sampler);
107
+ if (watchdog) clearInterval(watchdog);
63
108
  }
64
109
  })();