muaddib-scanner 2.11.100 → 2.11.102
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.100.json → self-scan-v2.11.102.json} +1 -1
- package/src/ioc/scraper.js +6 -1
- package/src/ioc/updater.js +84 -4
- package/src/monitor/daemon.js +12 -0
- package/src/monitor/heavy-lane.js +5 -2
- package/src/monitor/queue.js +48 -10
- package/src/pipeline/scan-worker.js +45 -0
package/package.json
CHANGED
package/src/ioc/scraper.js
CHANGED
|
@@ -7,7 +7,7 @@ const AdmZip = require('adm-zip');
|
|
|
7
7
|
const IOC_FILE = path.join(__dirname, 'data/iocs.json');
|
|
8
8
|
const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
9
9
|
const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
|
|
10
|
-
const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs } = require('./updater.js');
|
|
10
|
+
const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs, writeLeanIOCFile } = require('./updater.js');
|
|
11
11
|
const { Spinner } = require('../utils.js');
|
|
12
12
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
13
13
|
const { version: PKG_VERSION } = require('../../package.json');
|
|
@@ -1274,6 +1274,11 @@ async function runScraper() {
|
|
|
1274
1274
|
fs.writeFileSync(tmpCompactFile, JSON.stringify(compactIOCs));
|
|
1275
1275
|
fs.renameSync(tmpCompactFile, COMPACT_IOC_FILE);
|
|
1276
1276
|
|
|
1277
|
+
// Save the lean projection in lock-step with the full file (~24MB) — what
|
|
1278
|
+
// scan workers load instead of the 223MB full (RSS fix). Built from the
|
|
1279
|
+
// in-memory object, so no extra parse peak. See updater.js:createLeanIOCs.
|
|
1280
|
+
writeLeanIOCFile(existingIOCs);
|
|
1281
|
+
|
|
1277
1282
|
// Persist to ~/.muaddib/data/ (survives npm update)
|
|
1278
1283
|
saveSpinner.update('Persisting to home directory...');
|
|
1279
1284
|
const homeDir = path.dirname(HOME_IOC_FILE);
|
package/src/ioc/updater.js
CHANGED
|
@@ -6,6 +6,14 @@ const crypto = require('crypto');
|
|
|
6
6
|
const HOME_DATA_PATH = path.join(os.homedir(), '.muaddib', 'data');
|
|
7
7
|
const CACHE_IOC_FILE = path.join(HOME_DATA_PATH, 'iocs.json');
|
|
8
8
|
const LOCAL_IOC_FILE = path.join(__dirname, 'data/iocs.json');
|
|
9
|
+
// Lean projection of LOCAL_IOC_FILE — only the fields the matcher/alert read
|
|
10
|
+
// ({name,version,severity,source} + hashes/markers/files/stringIocs). The full
|
|
11
|
+
// file is ~223MB → 447MB string during JSON.parse, reloaded by every one-shot
|
|
12
|
+
// worker that touches IOC matching (heap-snapshot-confirmed ~900MB peak). The
|
|
13
|
+
// lean is ~24MB → ~50MB peak. Workers READ this; only the daemon/scraper write
|
|
14
|
+
// it (a worker must never re-read the 223MB full to regenerate — that is the
|
|
15
|
+
// very peak we are removing). See ensureLeanIOCFile + createLeanIOCs below.
|
|
16
|
+
const LOCAL_LEAN_FILE = path.join(__dirname, 'data/iocs-lean.json');
|
|
9
17
|
const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
10
18
|
const { loadYAMLIOCs } = require('./yaml-loader.js');
|
|
11
19
|
|
|
@@ -241,7 +249,7 @@ function mergeIOCs(target, source) {
|
|
|
241
249
|
// scan/poll) does zero disk I/O.
|
|
242
250
|
const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
|
|
243
251
|
const IOC_SOURCE_FILES = [
|
|
244
|
-
CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
|
|
252
|
+
CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_LEAN_FILE, LOCAL_COMPACT_FILE,
|
|
245
253
|
path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
|
|
246
254
|
path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
|
|
247
255
|
];
|
|
@@ -279,8 +287,19 @@ function loadCachedIOCs() {
|
|
|
279
287
|
stringIocs: Array.isArray(yamlIOCs.stringIocs) ? [...yamlIOCs.stringIocs] : []
|
|
280
288
|
};
|
|
281
289
|
|
|
282
|
-
// Priority 2a: Local scraped IOCs (
|
|
283
|
-
|
|
290
|
+
// Priority 2a: Local scraped IOCs. Prefer the lean projection (~24MB) — it
|
|
291
|
+
// carries every field the matcher/alert read. Only fall back to the full
|
|
292
|
+
// ~223MB file when the lean is absent (backward-compat / before the daemon
|
|
293
|
+
// has generated it), which costs the ~450MB parse peak. ensureLeanIOCFile()
|
|
294
|
+
// (called at daemon boot + after each scrape) keeps the lean present & fresh.
|
|
295
|
+
if (fs.existsSync(LOCAL_LEAN_FILE)) {
|
|
296
|
+
try {
|
|
297
|
+
const leanIOCs = JSON.parse(fs.readFileSync(LOCAL_LEAN_FILE, 'utf8'));
|
|
298
|
+
mergeIOCs(merged, leanIOCs);
|
|
299
|
+
} catch (e) {
|
|
300
|
+
console.log('[WARN] Failed to load lean IOC database (iocs-lean.json): ' + e.message);
|
|
301
|
+
}
|
|
302
|
+
} else if (fs.existsSync(LOCAL_IOC_FILE)) {
|
|
284
303
|
try {
|
|
285
304
|
const localIOCs = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
|
|
286
305
|
mergeIOCs(merged, localIOCs);
|
|
@@ -471,6 +490,67 @@ const NEVER_WILDCARD_PYPI = new Set([
|
|
|
471
490
|
'scipy', 'tensorflow', 'torch', 'fastapi', 'uvicorn'
|
|
472
491
|
]);
|
|
473
492
|
|
|
493
|
+
// Lean projection of a full IOC object: keep only the fields the matcher and
|
|
494
|
+
// the alert message read on package entries ({name,version,severity,source}),
|
|
495
|
+
// drop the enrichment (id/description/references/mitre/published/freshness/
|
|
496
|
+
// sources/confidence — never read after load; profiled). hashes/markers/files/
|
|
497
|
+
// stringIocs are simple values / small (YAML-sourced) and kept verbatim.
|
|
498
|
+
// Pure: no I/O. Used to write LOCAL_LEAN_FILE from an in-memory full object
|
|
499
|
+
// (zero extra parse peak) and by ensureLeanIOCFile.
|
|
500
|
+
function createLeanIOCs(fullIOCs) {
|
|
501
|
+
const leanPkg = p => ({ name: p.name, version: p.version, severity: p.severity, source: p.source });
|
|
502
|
+
return {
|
|
503
|
+
packages: (fullIOCs.packages || []).map(leanPkg),
|
|
504
|
+
pypi_packages: (fullIOCs.pypi_packages || []).map(leanPkg),
|
|
505
|
+
hashes: fullIOCs.hashes || [],
|
|
506
|
+
markers: fullIOCs.markers || [],
|
|
507
|
+
files: fullIOCs.files || [],
|
|
508
|
+
stringIocs: fullIOCs.stringIocs || [],
|
|
509
|
+
updated: fullIOCs.updated,
|
|
510
|
+
sources: fullIOCs.sources
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Ensure LOCAL_LEAN_FILE exists and is at least as fresh as LOCAL_IOC_FILE.
|
|
515
|
+
// Reads the 223MB full ONCE (the ~450MB parse peak) — acceptable only in a
|
|
516
|
+
// long-lived process (daemon boot); NEVER call from a one-shot scan worker.
|
|
517
|
+
// Atomic write (.tmp → rename). Returns {generated:boolean, bytes:number}.
|
|
518
|
+
function ensureLeanIOCFile() {
|
|
519
|
+
try {
|
|
520
|
+
if (!fs.existsSync(LOCAL_IOC_FILE)) return { generated: false, bytes: 0 };
|
|
521
|
+
let fresh = false;
|
|
522
|
+
if (fs.existsSync(LOCAL_LEAN_FILE)) {
|
|
523
|
+
try { fresh = fs.statSync(LOCAL_LEAN_FILE).mtimeMs >= fs.statSync(LOCAL_IOC_FILE).mtimeMs; } catch { fresh = false; }
|
|
524
|
+
}
|
|
525
|
+
if (fresh) return { generated: false, bytes: fs.statSync(LOCAL_LEAN_FILE).size };
|
|
526
|
+
const full = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
|
|
527
|
+
const lean = createLeanIOCs(full);
|
|
528
|
+
const tmp = LOCAL_LEAN_FILE + '.tmp';
|
|
529
|
+
const data = JSON.stringify(lean);
|
|
530
|
+
fs.writeFileSync(tmp, data);
|
|
531
|
+
fs.renameSync(tmp, LOCAL_LEAN_FILE);
|
|
532
|
+
return { generated: true, bytes: Buffer.byteLength(data) };
|
|
533
|
+
} catch (e) {
|
|
534
|
+
console.log('[WARN] ensureLeanIOCFile failed: ' + e.message);
|
|
535
|
+
return { generated: false, bytes: 0 };
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Write the lean file from an already-in-memory full object (zero extra parse
|
|
540
|
+
// peak). Called by the scraper right after it writes LOCAL_IOC_FILE so the
|
|
541
|
+
// lean stays in lock-step with the full after every deep scrape.
|
|
542
|
+
function writeLeanIOCFile(fullIOCs) {
|
|
543
|
+
try {
|
|
544
|
+
const tmp = LOCAL_LEAN_FILE + '.tmp';
|
|
545
|
+
fs.writeFileSync(tmp, JSON.stringify(createLeanIOCs(fullIOCs)));
|
|
546
|
+
fs.renameSync(tmp, LOCAL_LEAN_FILE);
|
|
547
|
+
return true;
|
|
548
|
+
} catch (e) {
|
|
549
|
+
console.log('[WARN] writeLeanIOCFile failed: ' + e.message);
|
|
550
|
+
return false;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
474
554
|
function generateCompactIOCs(fullIOCs) {
|
|
475
555
|
const wildcards = [];
|
|
476
556
|
const versioned = Object.create(null);
|
|
@@ -693,4 +773,4 @@ function verifyIOCHMAC(data, hmac) {
|
|
|
693
773
|
}
|
|
694
774
|
}
|
|
695
775
|
|
|
696
|
-
module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
|
|
776
|
+
module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, createLeanIOCs, ensureLeanIOCFile, writeLeanIOCFile, LOCAL_LEAN_FILE, LOCAL_IOC_FILE, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
|
package/src/monitor/daemon.js
CHANGED
|
@@ -813,6 +813,18 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
813
813
|
console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
|
|
814
814
|
}
|
|
815
815
|
|
|
816
|
+
// RSS fix (C2): make sure the lean IOC projection exists & is fresh BEFORE any
|
|
817
|
+
// scan worker spawns. Workers load the ~24MB lean instead of the ~223MB full
|
|
818
|
+
// (heap-snapshot-confirmed ~900MB→~50MB per IOC-matching scan). The full read
|
|
819
|
+
// here is paid ONCE by this long-lived daemon (never by a one-shot worker).
|
|
820
|
+
try {
|
|
821
|
+
const { ensureLeanIOCFile } = require('../ioc/updater.js');
|
|
822
|
+
const r = ensureLeanIOCFile();
|
|
823
|
+
if (r.generated) console.log(`[MONITOR] IOC lean projection regenerated (${(r.bytes / 1024 / 1024).toFixed(1)}MB) — workers avoid the 223MB full load`);
|
|
824
|
+
} catch (err) {
|
|
825
|
+
console.warn(`[MONITOR] IOC lean bootstrap failed (workers fall back to full file): ${err.message}`);
|
|
826
|
+
}
|
|
827
|
+
|
|
816
828
|
console.log('\n' + banner([
|
|
817
829
|
"MUAD'DIB - Registry Monitor",
|
|
818
830
|
'Scanning npm + PyPI new packages'
|
|
@@ -59,13 +59,16 @@ const _lane = { active: 0, queue: [] };
|
|
|
59
59
|
* package is exactly the kind that blows a worker. Compares weightedJsBytes
|
|
60
60
|
* (plain + ×12 minified — see measureJsWeight in queue.js: raw bytes alone
|
|
61
61
|
* missed the minified explosions, powerlines 517KB → 1151MB heap) and falls
|
|
62
|
-
* back to totalJsBytes for callers that don't weight.
|
|
63
|
-
*
|
|
62
|
+
* back to totalJsBytes for callers that don't weight. `oversize` (any single
|
|
63
|
+
* JS file > getMaxFileSize) also forces heavy — content scanners load such a
|
|
64
|
+
* file whole even though the AST skips it (omnius: a 30MB index.js → 1347MB).
|
|
65
|
+
* @param {{totalJsBytes: number, weightedJsBytes?: number, oversize?: boolean, truncated: boolean}|null} weight
|
|
64
66
|
* @param {number} [thresholdBytes]
|
|
65
67
|
*/
|
|
66
68
|
function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
|
|
67
69
|
if (!weight) return false;
|
|
68
70
|
if (weight.truncated) return true;
|
|
71
|
+
if (weight.oversize) return true; // a single JS file > getMaxFileSize — content scanners load it whole
|
|
69
72
|
const effective = Number.isFinite(weight.weightedJsBytes) ? weight.weightedJsBytes : (weight.totalJsBytes || 0);
|
|
70
73
|
return effective >= thresholdBytes;
|
|
71
74
|
}
|
package/src/monitor/queue.js
CHANGED
|
@@ -323,15 +323,21 @@ const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
|
|
|
323
323
|
// license header pads the probe window.
|
|
324
324
|
const JS_MINIFIED_WEIGHT = 12;
|
|
325
325
|
const JS_MINIFIED_AVG_LINE = 250;
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
326
|
+
// 64KB, not 4KB: bike4mind sailed under the 4KB probe (a license/banner header
|
|
327
|
+
// padded the window; the minified body started later) → mis-classified light →
|
|
328
|
+
// 890MB heap. Probe a 64KB window from ~2KB in to skip any header and still
|
|
329
|
+
// never load a 30MB file. Cheap (one readSync) at JS_WEIGHT_MAX_FILES files.
|
|
330
|
+
const JS_MINIFIED_PROBE_OFFSET = 2048;
|
|
331
|
+
const JS_MINIFIED_PROBE_BYTES = 64 * 1024;
|
|
332
|
+
|
|
333
|
+
/** Probe a 64KB window of a file (never loads the rest) for minification. */
|
|
334
|
+
function probeIsMinified(filePath, size) {
|
|
330
335
|
let fd = null;
|
|
331
336
|
try {
|
|
332
337
|
fd = fs.openSync(filePath, 'r');
|
|
338
|
+
const offset = size > JS_MINIFIED_PROBE_OFFSET + JS_MINIFIED_PROBE_BYTES ? JS_MINIFIED_PROBE_OFFSET : 0;
|
|
333
339
|
const buf = Buffer.alloc(JS_MINIFIED_PROBE_BYTES);
|
|
334
|
-
const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES,
|
|
340
|
+
const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES, offset);
|
|
335
341
|
if (n <= 0) return false;
|
|
336
342
|
const head = buf.toString('utf8', 0, n);
|
|
337
343
|
return (head.length / head.split('\n').length) > JS_MINIFIED_AVG_LINE;
|
|
@@ -359,13 +365,20 @@ function probeIsMinified(filePath) {
|
|
|
359
365
|
* value isHeavyScan compares against the threshold (raw bytes alone missed
|
|
360
366
|
* the minified explosions, see JS_MINIFIED_WEIGHT above).
|
|
361
367
|
*
|
|
368
|
+
* `oversize` (any single JS file > getMaxFileSize) forces heavy: the AST
|
|
369
|
+
* executor skips such files, but the content scanners (entropy/hash/
|
|
370
|
+
* ioc-strings/deobfuscate) still readFileSync the whole thing — omnius
|
|
371
|
+
* (a 30MB dist/index.js, 39KB of other JS) blew a 'light' worker to 1347MB.
|
|
372
|
+
* So an oversize JS file is the STRONGEST heavy signal, not something to skip.
|
|
373
|
+
*
|
|
362
374
|
* @param {string} dir - extracted package directory
|
|
363
|
-
* @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
|
|
375
|
+
* @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, oversize: boolean, truncated: boolean }}
|
|
364
376
|
*/
|
|
365
377
|
function measureJsWeight(dir) {
|
|
366
378
|
let totalJsBytes = 0;
|
|
367
379
|
let minifiedJsBytes = 0;
|
|
368
380
|
let maxJsFileBytes = 0;
|
|
381
|
+
let oversize = false;
|
|
369
382
|
let seen = 0;
|
|
370
383
|
let truncated = false;
|
|
371
384
|
const perFileCap = getMaxFileSize();
|
|
@@ -385,17 +398,21 @@ function measureJsWeight(dir) {
|
|
|
385
398
|
const filePath = path.join(current, entry.name);
|
|
386
399
|
let size;
|
|
387
400
|
try { size = fs.statSync(filePath).size; } catch { continue; }
|
|
388
|
-
if (size > perFileCap) continue; // executor skips these — they never reach the AST
|
|
389
|
-
totalJsBytes += size;
|
|
390
|
-
if (probeIsMinified(filePath)) minifiedJsBytes += size;
|
|
391
401
|
if (size > maxJsFileBytes) maxJsFileBytes = size;
|
|
402
|
+
if (size > perFileCap) {
|
|
403
|
+
// The AST skips it, but content scanners load it whole → heap blow-up.
|
|
404
|
+
oversize = true;
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
totalJsBytes += size;
|
|
408
|
+
if (probeIsMinified(filePath, size)) minifiedJsBytes += size;
|
|
392
409
|
}
|
|
393
410
|
}
|
|
394
411
|
}
|
|
395
412
|
|
|
396
413
|
walk(dir, 0);
|
|
397
414
|
const weightedJsBytes = (totalJsBytes - minifiedJsBytes) + JS_MINIFIED_WEIGHT * minifiedJsBytes;
|
|
398
|
-
return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, truncated };
|
|
415
|
+
return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, oversize, truncated };
|
|
399
416
|
}
|
|
400
417
|
|
|
401
418
|
/**
|
|
@@ -1437,6 +1454,27 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1437
1454
|
// count clean — inconclusive, distinct ledger source, distinct log line
|
|
1438
1455
|
// (the live-validation metric for the limits rollout). No retry: an OOM
|
|
1439
1456
|
// re-OOMs deterministically.
|
|
1457
|
+
// Reactive heap watermark (C2 volet B): the worker self-terminated before
|
|
1458
|
+
// blowing the process RSS. Same disposition as a resourceLimits OOM —
|
|
1459
|
+
// inconclusive, NOT clean, no retry (a re-scan re-explodes the same way) —
|
|
1460
|
+
// but a distinct ledger source so the watchdog's catch rate is measurable
|
|
1461
|
+
// separately from the V8 hard-cap OOMs.
|
|
1462
|
+
const isHeapWatermark = err && /WORKER_HEAP_WATERMARK/.test(err.message || '');
|
|
1463
|
+
if (isHeapWatermark) {
|
|
1464
|
+
console.error(`[MONITOR] WORKER_HEAP_WATERMARK: ${name}@${version} — scan worker self-terminated over the heap watermark (kept INCONCLUSIVE, not clean)`);
|
|
1465
|
+
stats.workerHeapWatermark = (stats.workerHeapWatermark || 0) + 1;
|
|
1466
|
+
updateScanStats('sandbox_inconclusive');
|
|
1467
|
+
try {
|
|
1468
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_heap_watermark' });
|
|
1469
|
+
} catch { /* ledger is best-effort */ }
|
|
1470
|
+
return { sandboxResult: null, staticClean: false };
|
|
1471
|
+
}
|
|
1472
|
+
// Per-worker resourceLimits breach: the worker died on ITS V8 cap
|
|
1473
|
+
// (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
|
|
1474
|
+
// garde-fou as static_timeout: a package that OOMs the scanner must NOT
|
|
1475
|
+
// count clean — inconclusive, distinct ledger source, distinct log line
|
|
1476
|
+
// (the live-validation metric for the limits rollout). No retry: an OOM
|
|
1477
|
+
// re-OOMs deterministically.
|
|
1440
1478
|
const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
|
|
1441
1479
|
/ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
|
|
1442
1480
|
if (isWorkerOom) {
|
|
@@ -22,6 +22,22 @@ if (!parentPort) {
|
|
|
22
22
|
const { run } = require('../index.js');
|
|
23
23
|
const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js');
|
|
24
24
|
|
|
25
|
+
// Reactive heap watermark (C2 volet B): the static heavy-lane classifier
|
|
26
|
+
// predicts the peak from on-disk bytes and WILL miss cases (omnius: 39KB JS →
|
|
27
|
+
// 1347MB). This is the prediction-free backstop — the worker watches its OWN
|
|
28
|
+
// isolate heap and bails before it contributes to a process-wide RSS spike.
|
|
29
|
+
// CAVEAT: a watchdog timer can only fire when the event loop yields, so it
|
|
30
|
+
// catches PROGRESSIVE (multi-file, async-between-files) growth; a single
|
|
31
|
+
// synchronous 30MB parse never yields and is caught only by the V8 hard cap
|
|
32
|
+
// (MUADDIB_WORKER_MAX_OLD_MB resourceLimits). The two are complementary.
|
|
33
|
+
// Default 2200MB: above the ~1.3GB legitimate scans that finish CLEAN, below
|
|
34
|
+
// the 3072MB resourceLimits cap. 0 disables.
|
|
35
|
+
const HEAP_WATERMARK_MB = (() => {
|
|
36
|
+
const v = parseInt(process.env.MUADDIB_WORKER_HEAP_WATERMARK_MB, 10);
|
|
37
|
+
return Number.isFinite(v) && v >= 0 ? v : 2200;
|
|
38
|
+
})();
|
|
39
|
+
const HEAP_WATERMARK_CHECK_MS = 1000;
|
|
40
|
+
|
|
25
41
|
(async () => {
|
|
26
42
|
// Off-heap attribution samples (worker-mem.jsonl): heapUsed/external/
|
|
27
43
|
// arrayBuffers are isolate-local here, rss is process-wide. The samples MUST
|
|
@@ -49,6 +65,34 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
|
|
|
49
65
|
sampler = setInterval(sampleNow, everyMs);
|
|
50
66
|
sampler.unref();
|
|
51
67
|
}
|
|
68
|
+
|
|
69
|
+
// Heap-watermark watchdog. On breach, post a tagged error and exit — the
|
|
70
|
+
// parent maps the WORKER_HEAP_WATERMARK message onto its existing worker_oom
|
|
71
|
+
// path (inconclusive, ledgered, NOT counted clean). NOT unref'd: while the
|
|
72
|
+
// scan is in flight this watchdog must stay live to fire.
|
|
73
|
+
let watchdog = null;
|
|
74
|
+
if (HEAP_WATERMARK_MB > 0) {
|
|
75
|
+
const limitBytes = HEAP_WATERMARK_MB * 1024 * 1024;
|
|
76
|
+
watchdog = setInterval(() => {
|
|
77
|
+
if (process.memoryUsage().heapUsed > limitBytes) {
|
|
78
|
+
clearInterval(watchdog); watchdog = null;
|
|
79
|
+
if (sampler) clearInterval(sampler);
|
|
80
|
+
try {
|
|
81
|
+
parentPort.postMessage({
|
|
82
|
+
type: 'error',
|
|
83
|
+
message: `WORKER_HEAP_WATERMARK: isolate heap exceeded ${HEAP_WATERMARK_MB}MB (${scanContext.name}@${scanContext.version})`
|
|
84
|
+
});
|
|
85
|
+
} catch { /* parent gone */ }
|
|
86
|
+
// Exit NON-ZERO so the parent settles even if the message above is lost
|
|
87
|
+
// in the post/exit race: the worker.on('exit') handler rejects on any
|
|
88
|
+
// non-zero code, and the catch matches WORKER_HEAP_WATERMARK when the
|
|
89
|
+
// message did arrive, or the generic scan_error path when it didn't —
|
|
90
|
+
// never clean, never a hung promise. (exit(0) would let the exit
|
|
91
|
+
// handler no-op and hang the scan until the 300s outer timeout.)
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
}, HEAP_WATERMARK_CHECK_MS);
|
|
95
|
+
}
|
|
52
96
|
try {
|
|
53
97
|
// scanContext (optional) carries monitor-side info that opt-in scanners need
|
|
54
98
|
// (e.g. trusted-dep-diff requires package name + version to query the registry).
|
|
@@ -60,5 +104,6 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
|
|
|
60
104
|
parentPort.postMessage({ type: 'error', message: err.message || String(err) });
|
|
61
105
|
} finally {
|
|
62
106
|
if (sampler) clearInterval(sampler);
|
|
107
|
+
if (watchdog) clearInterval(watchdog);
|
|
63
108
|
}
|
|
64
109
|
})();
|