muaddib-scanner 2.11.99 → 2.11.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.99.json → self-scan-v2.11.101.json} +1 -1
- package/src/ioc/scraper.js +6 -1
- package/src/ioc/updater.js +84 -4
- package/src/monitor/daemon.js +12 -0
- package/src/monitor/heavy-lane.js +10 -3
- package/src/monitor/queue.js +84 -7
- package/src/pipeline/scan-worker.js +45 -0
package/package.json
CHANGED
package/src/ioc/scraper.js
CHANGED
|
@@ -7,7 +7,7 @@ const AdmZip = require('adm-zip');
|
|
|
7
7
|
const IOC_FILE = path.join(__dirname, 'data/iocs.json');
|
|
8
8
|
const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
9
9
|
const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
|
|
10
|
-
const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs } = require('./updater.js');
|
|
10
|
+
const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs, writeLeanIOCFile } = require('./updater.js');
|
|
11
11
|
const { Spinner } = require('../utils.js');
|
|
12
12
|
const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
|
|
13
13
|
const { version: PKG_VERSION } = require('../../package.json');
|
|
@@ -1274,6 +1274,11 @@ async function runScraper() {
|
|
|
1274
1274
|
fs.writeFileSync(tmpCompactFile, JSON.stringify(compactIOCs));
|
|
1275
1275
|
fs.renameSync(tmpCompactFile, COMPACT_IOC_FILE);
|
|
1276
1276
|
|
|
1277
|
+
// Save the lean projection in lock-step with the full file (~24MB) — what
|
|
1278
|
+
// scan workers load instead of the 223MB full (RSS fix). Built from the
|
|
1279
|
+
// in-memory object, so no extra parse peak. See updater.js:createLeanIOCs.
|
|
1280
|
+
writeLeanIOCFile(existingIOCs);
|
|
1281
|
+
|
|
1277
1282
|
// Persist to ~/.muaddib/data/ (survives npm update)
|
|
1278
1283
|
saveSpinner.update('Persisting to home directory...');
|
|
1279
1284
|
const homeDir = path.dirname(HOME_IOC_FILE);
|
package/src/ioc/updater.js
CHANGED
|
@@ -6,6 +6,14 @@ const crypto = require('crypto');
|
|
|
6
6
|
const HOME_DATA_PATH = path.join(os.homedir(), '.muaddib', 'data');
|
|
7
7
|
const CACHE_IOC_FILE = path.join(HOME_DATA_PATH, 'iocs.json');
|
|
8
8
|
const LOCAL_IOC_FILE = path.join(__dirname, 'data/iocs.json');
|
|
9
|
+
// Lean projection of LOCAL_IOC_FILE — only the fields the matcher/alert read
|
|
10
|
+
// ({name,version,severity,source} + hashes/markers/files/stringIocs). The full
|
|
11
|
+
// file is ~223MB → 447MB string during JSON.parse, reloaded by every one-shot
|
|
12
|
+
// worker that touches IOC matching (heap-snapshot-confirmed ~900MB peak). The
|
|
13
|
+
// lean is ~24MB → ~50MB peak. Workers READ this; only the daemon/scraper write
|
|
14
|
+
// it (a worker must never re-read the 223MB full to regenerate — that is the
|
|
15
|
+
// very peak we are removing). See ensureLeanIOCFile + createLeanIOCs below.
|
|
16
|
+
const LOCAL_LEAN_FILE = path.join(__dirname, 'data/iocs-lean.json');
|
|
9
17
|
const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
|
|
10
18
|
const { loadYAMLIOCs } = require('./yaml-loader.js');
|
|
11
19
|
|
|
@@ -241,7 +249,7 @@ function mergeIOCs(target, source) {
|
|
|
241
249
|
// scan/poll) does zero disk I/O.
|
|
242
250
|
const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
|
|
243
251
|
const IOC_SOURCE_FILES = [
|
|
244
|
-
CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
|
|
252
|
+
CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_LEAN_FILE, LOCAL_COMPACT_FILE,
|
|
245
253
|
path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
|
|
246
254
|
path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
|
|
247
255
|
];
|
|
@@ -279,8 +287,19 @@ function loadCachedIOCs() {
|
|
|
279
287
|
stringIocs: Array.isArray(yamlIOCs.stringIocs) ? [...yamlIOCs.stringIocs] : []
|
|
280
288
|
};
|
|
281
289
|
|
|
282
|
-
// Priority 2a: Local scraped IOCs (
|
|
283
|
-
|
|
290
|
+
// Priority 2a: Local scraped IOCs. Prefer the lean projection (~24MB) — it
|
|
291
|
+
// carries every field the matcher/alert read. Only fall back to the full
|
|
292
|
+
// ~223MB file when the lean is absent (backward-compat / before the daemon
|
|
293
|
+
// has generated it), which costs the ~450MB parse peak. ensureLeanIOCFile()
|
|
294
|
+
// (called at daemon boot + after each scrape) keeps the lean present & fresh.
|
|
295
|
+
if (fs.existsSync(LOCAL_LEAN_FILE)) {
|
|
296
|
+
try {
|
|
297
|
+
const leanIOCs = JSON.parse(fs.readFileSync(LOCAL_LEAN_FILE, 'utf8'));
|
|
298
|
+
mergeIOCs(merged, leanIOCs);
|
|
299
|
+
} catch (e) {
|
|
300
|
+
console.log('[WARN] Failed to load lean IOC database (iocs-lean.json): ' + e.message);
|
|
301
|
+
}
|
|
302
|
+
} else if (fs.existsSync(LOCAL_IOC_FILE)) {
|
|
284
303
|
try {
|
|
285
304
|
const localIOCs = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
|
|
286
305
|
mergeIOCs(merged, localIOCs);
|
|
@@ -471,6 +490,67 @@ const NEVER_WILDCARD_PYPI = new Set([
|
|
|
471
490
|
'scipy', 'tensorflow', 'torch', 'fastapi', 'uvicorn'
|
|
472
491
|
]);
|
|
473
492
|
|
|
493
|
+
// Lean projection of a full IOC object: keep only the fields the matcher and
|
|
494
|
+
// the alert message read on package entries ({name,version,severity,source}),
|
|
495
|
+
// drop the enrichment (id/description/references/mitre/published/freshness/
|
|
496
|
+
// sources/confidence — never read after load; profiled). hashes/markers/files/
|
|
497
|
+
// stringIocs are simple values / small (YAML-sourced) and kept verbatim.
|
|
498
|
+
// Pure: no I/O. Used to write LOCAL_LEAN_FILE from an in-memory full object
|
|
499
|
+
// (zero extra parse peak) and by ensureLeanIOCFile.
|
|
500
|
+
function createLeanIOCs(fullIOCs) {
|
|
501
|
+
const leanPkg = p => ({ name: p.name, version: p.version, severity: p.severity, source: p.source });
|
|
502
|
+
return {
|
|
503
|
+
packages: (fullIOCs.packages || []).map(leanPkg),
|
|
504
|
+
pypi_packages: (fullIOCs.pypi_packages || []).map(leanPkg),
|
|
505
|
+
hashes: fullIOCs.hashes || [],
|
|
506
|
+
markers: fullIOCs.markers || [],
|
|
507
|
+
files: fullIOCs.files || [],
|
|
508
|
+
stringIocs: fullIOCs.stringIocs || [],
|
|
509
|
+
updated: fullIOCs.updated,
|
|
510
|
+
sources: fullIOCs.sources
|
|
511
|
+
};
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// Ensure LOCAL_LEAN_FILE exists and is at least as fresh as LOCAL_IOC_FILE.
|
|
515
|
+
// Reads the 223MB full ONCE (the ~450MB parse peak) — acceptable only in a
|
|
516
|
+
// long-lived process (daemon boot); NEVER call from a one-shot scan worker.
|
|
517
|
+
// Atomic write (.tmp → rename). Returns {generated:boolean, bytes:number}.
|
|
518
|
+
function ensureLeanIOCFile() {
|
|
519
|
+
try {
|
|
520
|
+
if (!fs.existsSync(LOCAL_IOC_FILE)) return { generated: false, bytes: 0 };
|
|
521
|
+
let fresh = false;
|
|
522
|
+
if (fs.existsSync(LOCAL_LEAN_FILE)) {
|
|
523
|
+
try { fresh = fs.statSync(LOCAL_LEAN_FILE).mtimeMs >= fs.statSync(LOCAL_IOC_FILE).mtimeMs; } catch { fresh = false; }
|
|
524
|
+
}
|
|
525
|
+
if (fresh) return { generated: false, bytes: fs.statSync(LOCAL_LEAN_FILE).size };
|
|
526
|
+
const full = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
|
|
527
|
+
const lean = createLeanIOCs(full);
|
|
528
|
+
const tmp = LOCAL_LEAN_FILE + '.tmp';
|
|
529
|
+
const data = JSON.stringify(lean);
|
|
530
|
+
fs.writeFileSync(tmp, data);
|
|
531
|
+
fs.renameSync(tmp, LOCAL_LEAN_FILE);
|
|
532
|
+
return { generated: true, bytes: Buffer.byteLength(data) };
|
|
533
|
+
} catch (e) {
|
|
534
|
+
console.log('[WARN] ensureLeanIOCFile failed: ' + e.message);
|
|
535
|
+
return { generated: false, bytes: 0 };
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
// Write the lean file from an already-in-memory full object (zero extra parse
|
|
540
|
+
// peak). Called by the scraper right after it writes LOCAL_IOC_FILE so the
|
|
541
|
+
// lean stays in lock-step with the full after every deep scrape.
|
|
542
|
+
function writeLeanIOCFile(fullIOCs) {
|
|
543
|
+
try {
|
|
544
|
+
const tmp = LOCAL_LEAN_FILE + '.tmp';
|
|
545
|
+
fs.writeFileSync(tmp, JSON.stringify(createLeanIOCs(fullIOCs)));
|
|
546
|
+
fs.renameSync(tmp, LOCAL_LEAN_FILE);
|
|
547
|
+
return true;
|
|
548
|
+
} catch (e) {
|
|
549
|
+
console.log('[WARN] writeLeanIOCFile failed: ' + e.message);
|
|
550
|
+
return false;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
|
|
474
554
|
function generateCompactIOCs(fullIOCs) {
|
|
475
555
|
const wildcards = [];
|
|
476
556
|
const versioned = Object.create(null);
|
|
@@ -693,4 +773,4 @@ function verifyIOCHMAC(data, hmac) {
|
|
|
693
773
|
}
|
|
694
774
|
}
|
|
695
775
|
|
|
696
|
-
module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
|
|
776
|
+
module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, createLeanIOCs, ensureLeanIOCFile, writeLeanIOCFile, LOCAL_LEAN_FILE, LOCAL_IOC_FILE, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
|
package/src/monitor/daemon.js
CHANGED
|
@@ -813,6 +813,18 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
813
813
|
console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
|
|
814
814
|
}
|
|
815
815
|
|
|
816
|
+
// RSS fix (C2): make sure the lean IOC projection exists & is fresh BEFORE any
|
|
817
|
+
// scan worker spawns. Workers load the ~24MB lean instead of the ~223MB full
|
|
818
|
+
// (heap-snapshot-confirmed ~900MB→~50MB per IOC-matching scan). The full read
|
|
819
|
+
// here is paid ONCE by this long-lived daemon (never by a one-shot worker).
|
|
820
|
+
try {
|
|
821
|
+
const { ensureLeanIOCFile } = require('../ioc/updater.js');
|
|
822
|
+
const r = ensureLeanIOCFile();
|
|
823
|
+
if (r.generated) console.log(`[MONITOR] IOC lean projection regenerated (${(r.bytes / 1024 / 1024).toFixed(1)}MB) — workers avoid the 223MB full load`);
|
|
824
|
+
} catch (err) {
|
|
825
|
+
console.warn(`[MONITOR] IOC lean bootstrap failed (workers fall back to full file): ${err.message}`);
|
|
826
|
+
}
|
|
827
|
+
|
|
816
828
|
console.log('\n' + banner([
|
|
817
829
|
"MUAD'DIB - Registry Monitor",
|
|
818
830
|
'Scanning npm + PyPI new packages'
|
|
@@ -56,14 +56,21 @@ const _lane = { active: 0, queue: [] };
|
|
|
56
56
|
/**
|
|
57
57
|
* Pure classifier. `truncated` (the bounded measurement walk overflowed its
|
|
58
58
|
* depth/file caps) classifies heavy by default — defensive: an unmeasurable
|
|
59
|
-
* package is exactly the kind that blows a worker.
|
|
60
|
-
*
|
|
59
|
+
* package is exactly the kind that blows a worker. Compares weightedJsBytes
|
|
60
|
+
* (plain + ×12 minified — see measureJsWeight in queue.js: raw bytes alone
|
|
61
|
+
* missed the minified explosions, powerlines 517KB → 1151MB heap) and falls
|
|
62
|
+
* back to totalJsBytes for callers that don't weight. `oversize` (any single
|
|
63
|
+
* JS file > getMaxFileSize) also forces heavy — content scanners load such a
|
|
64
|
+
* file whole even though the AST skips it (omnius: a 30MB index.js → 1347MB).
|
|
65
|
+
* @param {{totalJsBytes: number, weightedJsBytes?: number, oversize?: boolean, truncated: boolean}|null} weight
|
|
61
66
|
* @param {number} [thresholdBytes]
|
|
62
67
|
*/
|
|
63
68
|
function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
|
|
64
69
|
if (!weight) return false;
|
|
65
70
|
if (weight.truncated) return true;
|
|
66
|
-
|
|
71
|
+
if (weight.oversize) return true; // a single JS file > getMaxFileSize — content scanners load it whole
|
|
72
|
+
const effective = Number.isFinite(weight.weightedJsBytes) ? weight.weightedJsBytes : (weight.totalJsBytes || 0);
|
|
73
|
+
return effective >= thresholdBytes;
|
|
67
74
|
}
|
|
68
75
|
|
|
69
76
|
/**
|
package/src/monitor/queue.js
CHANGED
|
@@ -311,6 +311,42 @@ function countPackageFiles(dir) {
|
|
|
311
311
|
const JS_WEIGHT_MAX_DEPTH = 8;
|
|
312
312
|
const JS_WEIGHT_MAX_FILES = 2000;
|
|
313
313
|
const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
|
|
314
|
+
// Minified JS expands SUPER-linearly in the worker (live counter-examples
|
|
315
|
+
// from the 16:18 rollout, 2026-06-11: powerlines = 517KB JS of which 449KB
|
|
316
|
+
// minified → 1151MB heap, ~2300×; @lethevimlet/sshift = ~1.9MB minified →
|
|
317
|
+
// 1.38GB — both sailed under the raw-bytes threshold as 'light'). Plain
|
|
318
|
+
// source stays roughly linear (the 12MB-heap mode of the bimodal
|
|
319
|
+
// distribution). So minified bytes count ×12 toward the heavy threshold —
|
|
320
|
+
// ≥~256KB of minified JS crosses the 3MiB default. Detection: average line
|
|
321
|
+
// length over the first 4KB; plain code sits at 40-120 chars, minified
|
|
322
|
+
// bundles at 800+ (often a single line). 250 splits cleanly even when a
|
|
323
|
+
// license header pads the probe window.
|
|
324
|
+
const JS_MINIFIED_WEIGHT = 12;
|
|
325
|
+
const JS_MINIFIED_AVG_LINE = 250;
|
|
326
|
+
// 64KB, not 4KB: bike4mind sailed under the 4KB probe (a license/banner header
|
|
327
|
+
// padded the window; the minified body started later) → mis-classified light →
|
|
328
|
+
// 890MB heap. Probe a 64KB window from ~2KB in to skip any header and still
|
|
329
|
+
// never load a 30MB file. Cheap (one readSync) at JS_WEIGHT_MAX_FILES files.
|
|
330
|
+
const JS_MINIFIED_PROBE_OFFSET = 2048;
|
|
331
|
+
const JS_MINIFIED_PROBE_BYTES = 64 * 1024;
|
|
332
|
+
|
|
333
|
+
/** Probe a 64KB window of a file (never loads the rest) for minification. */
|
|
334
|
+
function probeIsMinified(filePath, size) {
|
|
335
|
+
let fd = null;
|
|
336
|
+
try {
|
|
337
|
+
fd = fs.openSync(filePath, 'r');
|
|
338
|
+
const offset = size > JS_MINIFIED_PROBE_OFFSET + JS_MINIFIED_PROBE_BYTES ? JS_MINIFIED_PROBE_OFFSET : 0;
|
|
339
|
+
const buf = Buffer.alloc(JS_MINIFIED_PROBE_BYTES);
|
|
340
|
+
const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES, offset);
|
|
341
|
+
if (n <= 0) return false;
|
|
342
|
+
const head = buf.toString('utf8', 0, n);
|
|
343
|
+
return (head.length / head.split('\n').length) > JS_MINIFIED_AVG_LINE;
|
|
344
|
+
} catch {
|
|
345
|
+
return false;
|
|
346
|
+
} finally {
|
|
347
|
+
if (fd !== null) { try { fs.closeSync(fd); } catch { /* best-effort */ } }
|
|
348
|
+
}
|
|
349
|
+
}
|
|
314
350
|
|
|
315
351
|
/**
|
|
316
352
|
* Measure how much parsable JS a package carries — the heavy-lane
|
|
@@ -325,12 +361,24 @@ const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
|
|
|
325
361
|
* Bounded walk; an overflow (depth/file caps) returns truncated:true, which
|
|
326
362
|
* isHeavyScan classifies heavy by default.
|
|
327
363
|
*
|
|
364
|
+
* weightedJsBytes = plain bytes + JS_MINIFIED_WEIGHT × minified bytes — the
|
|
365
|
+
* value isHeavyScan compares against the threshold (raw bytes alone missed
|
|
366
|
+
* the minified explosions, see JS_MINIFIED_WEIGHT above).
|
|
367
|
+
*
|
|
368
|
+
* `oversize` (any single JS file > getMaxFileSize) forces heavy: the AST
|
|
369
|
+
* executor skips such files, but the content scanners (entropy/hash/
|
|
370
|
+
* ioc-strings/deobfuscate) still readFileSync the whole thing — omnius
|
|
371
|
+
* (a 30MB dist/index.js, 39KB of other JS) blew a 'light' worker to 1347MB.
|
|
372
|
+
* So an oversize JS file is the STRONGEST heavy signal, not something to skip.
|
|
373
|
+
*
|
|
328
374
|
* @param {string} dir - extracted package directory
|
|
329
|
-
* @returns {{ totalJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
|
|
375
|
+
* @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, oversize: boolean, truncated: boolean }}
|
|
330
376
|
*/
|
|
331
377
|
function measureJsWeight(dir) {
|
|
332
378
|
let totalJsBytes = 0;
|
|
379
|
+
let minifiedJsBytes = 0;
|
|
333
380
|
let maxJsFileBytes = 0;
|
|
381
|
+
let oversize = false;
|
|
334
382
|
let seen = 0;
|
|
335
383
|
let truncated = false;
|
|
336
384
|
const perFileCap = getMaxFileSize();
|
|
@@ -347,17 +395,24 @@ function measureJsWeight(dir) {
|
|
|
347
395
|
walk(path.join(current, entry.name), depth + 1);
|
|
348
396
|
} else if (entry.isFile() && JS_WEIGHT_FILE_PATTERN.test(entry.name)) {
|
|
349
397
|
if (++seen > JS_WEIGHT_MAX_FILES) { truncated = true; return; }
|
|
398
|
+
const filePath = path.join(current, entry.name);
|
|
350
399
|
let size;
|
|
351
|
-
try { size = fs.statSync(
|
|
352
|
-
if (size > perFileCap) continue; // executor skips these — they never reach the AST
|
|
353
|
-
totalJsBytes += size;
|
|
400
|
+
try { size = fs.statSync(filePath).size; } catch { continue; }
|
|
354
401
|
if (size > maxJsFileBytes) maxJsFileBytes = size;
|
|
402
|
+
if (size > perFileCap) {
|
|
403
|
+
// The AST skips it, but content scanners load it whole → heap blow-up.
|
|
404
|
+
oversize = true;
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
totalJsBytes += size;
|
|
408
|
+
if (probeIsMinified(filePath, size)) minifiedJsBytes += size;
|
|
355
409
|
}
|
|
356
410
|
}
|
|
357
411
|
}
|
|
358
412
|
|
|
359
413
|
walk(dir, 0);
|
|
360
|
-
|
|
414
|
+
const weightedJsBytes = (totalJsBytes - minifiedJsBytes) + JS_MINIFIED_WEIGHT * minifiedJsBytes;
|
|
415
|
+
return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, oversize, truncated };
|
|
361
416
|
}
|
|
362
417
|
|
|
363
418
|
/**
|
|
@@ -490,7 +545,7 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
|
|
|
490
545
|
appendWorkerMem({
|
|
491
546
|
ev: 'spawn', tid: _wmTid,
|
|
492
547
|
name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem,
|
|
493
|
-
lane: _sc._lane, jsBytes: _sc._jsBytes,
|
|
548
|
+
lane: _sc._lane, jsBytes: _sc._jsBytes, jsMin: _sc._jsMin,
|
|
494
549
|
rss: process.memoryUsage().rss
|
|
495
550
|
});
|
|
496
551
|
|
|
@@ -774,7 +829,8 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
774
829
|
// event (runScanInWorker) so lane×heap-peak cross-checks are possible
|
|
775
830
|
// post-rollout (hard criterion: zero 'light' scans peaking >512MB).
|
|
776
831
|
_lane: lane,
|
|
777
|
-
_jsBytes: jsWeight.totalJsBytes
|
|
832
|
+
_jsBytes: jsWeight.totalJsBytes,
|
|
833
|
+
_jsMin: jsWeight.minifiedJsBytes || 0
|
|
778
834
|
};
|
|
779
835
|
// Hand the main-thread-fetched metadata to the worker so its processor skips
|
|
780
836
|
// the per-worker getPackageMetadata fetch (429-storm fix). npm only; the key
|
|
@@ -1398,6 +1454,27 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1398
1454
|
// count clean — inconclusive, distinct ledger source, distinct log line
|
|
1399
1455
|
// (the live-validation metric for the limits rollout). No retry: an OOM
|
|
1400
1456
|
// re-OOMs deterministically.
|
|
1457
|
+
// Reactive heap watermark (C2 volet B): the worker self-terminated before
|
|
1458
|
+
// blowing the process RSS. Same disposition as a resourceLimits OOM —
|
|
1459
|
+
// inconclusive, NOT clean, no retry (a re-scan re-explodes the same way) —
|
|
1460
|
+
// but a distinct ledger source so the watchdog's catch rate is measurable
|
|
1461
|
+
// separately from the V8 hard-cap OOMs.
|
|
1462
|
+
const isHeapWatermark = err && /WORKER_HEAP_WATERMARK/.test(err.message || '');
|
|
1463
|
+
if (isHeapWatermark) {
|
|
1464
|
+
console.error(`[MONITOR] WORKER_HEAP_WATERMARK: ${name}@${version} — scan worker self-terminated over the heap watermark (kept INCONCLUSIVE, not clean)`);
|
|
1465
|
+
stats.workerHeapWatermark = (stats.workerHeapWatermark || 0) + 1;
|
|
1466
|
+
updateScanStats('sandbox_inconclusive');
|
|
1467
|
+
try {
|
|
1468
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_heap_watermark' });
|
|
1469
|
+
} catch { /* ledger is best-effort */ }
|
|
1470
|
+
return { sandboxResult: null, staticClean: false };
|
|
1471
|
+
}
|
|
1472
|
+
// Per-worker resourceLimits breach: the worker died on ITS V8 cap
|
|
1473
|
+
// (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
|
|
1474
|
+
// garde-fou as static_timeout: a package that OOMs the scanner must NOT
|
|
1475
|
+
// count clean — inconclusive, distinct ledger source, distinct log line
|
|
1476
|
+
// (the live-validation metric for the limits rollout). No retry: an OOM
|
|
1477
|
+
// re-OOMs deterministically.
|
|
1401
1478
|
const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
|
|
1402
1479
|
/ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
|
|
1403
1480
|
if (isWorkerOom) {
|
|
@@ -22,6 +22,22 @@ if (!parentPort) {
|
|
|
22
22
|
const { run } = require('../index.js');
|
|
23
23
|
const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js');
|
|
24
24
|
|
|
25
|
+
// Reactive heap watermark (C2 volet B): the static heavy-lane classifier
|
|
26
|
+
// predicts the peak from on-disk bytes and WILL miss cases (omnius: 39KB JS →
|
|
27
|
+
// 1347MB). This is the prediction-free backstop — the worker watches its OWN
|
|
28
|
+
// isolate heap and bails before it contributes to a process-wide RSS spike.
|
|
29
|
+
// CAVEAT: a watchdog timer can only fire when the event loop yields, so it
|
|
30
|
+
// catches PROGRESSIVE (multi-file, async-between-files) growth; a single
|
|
31
|
+
// synchronous 30MB parse never yields and is caught only by the V8 hard cap
|
|
32
|
+
// (MUADDIB_WORKER_MAX_OLD_MB resourceLimits). The two are complementary.
|
|
33
|
+
// Default 2200MB: above the ~1.3GB legitimate scans that finish CLEAN, below
|
|
34
|
+
// the 3072MB resourceLimits cap. 0 disables.
|
|
35
|
+
const HEAP_WATERMARK_MB = (() => {
|
|
36
|
+
const v = parseInt(process.env.MUADDIB_WORKER_HEAP_WATERMARK_MB, 10);
|
|
37
|
+
return Number.isFinite(v) && v >= 0 ? v : 2200;
|
|
38
|
+
})();
|
|
39
|
+
const HEAP_WATERMARK_CHECK_MS = 1000;
|
|
40
|
+
|
|
25
41
|
(async () => {
|
|
26
42
|
// Off-heap attribution samples (worker-mem.jsonl): heapUsed/external/
|
|
27
43
|
// arrayBuffers are isolate-local here, rss is process-wide. The samples MUST
|
|
@@ -49,6 +65,34 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
|
|
|
49
65
|
sampler = setInterval(sampleNow, everyMs);
|
|
50
66
|
sampler.unref();
|
|
51
67
|
}
|
|
68
|
+
|
|
69
|
+
// Heap-watermark watchdog. On breach, post a tagged error and exit — the
|
|
70
|
+
// parent maps the WORKER_HEAP_WATERMARK message onto its existing worker_oom
|
|
71
|
+
// path (inconclusive, ledgered, NOT counted clean). NOT unref'd: while the
|
|
72
|
+
// scan is in flight this watchdog must stay live to fire.
|
|
73
|
+
let watchdog = null;
|
|
74
|
+
if (HEAP_WATERMARK_MB > 0) {
|
|
75
|
+
const limitBytes = HEAP_WATERMARK_MB * 1024 * 1024;
|
|
76
|
+
watchdog = setInterval(() => {
|
|
77
|
+
if (process.memoryUsage().heapUsed > limitBytes) {
|
|
78
|
+
clearInterval(watchdog); watchdog = null;
|
|
79
|
+
if (sampler) clearInterval(sampler);
|
|
80
|
+
try {
|
|
81
|
+
parentPort.postMessage({
|
|
82
|
+
type: 'error',
|
|
83
|
+
message: `WORKER_HEAP_WATERMARK: isolate heap exceeded ${HEAP_WATERMARK_MB}MB (${scanContext.name}@${scanContext.version})`
|
|
84
|
+
});
|
|
85
|
+
} catch { /* parent gone */ }
|
|
86
|
+
// Exit NON-ZERO so the parent settles even if the message above is lost
|
|
87
|
+
// in the post/exit race: the worker.on('exit') handler rejects on any
|
|
88
|
+
// non-zero code, and the catch matches WORKER_HEAP_WATERMARK when the
|
|
89
|
+
// message did arrive, or the generic scan_error path when it didn't —
|
|
90
|
+
// never clean, never a hung promise. (exit(0) would let the exit
|
|
91
|
+
// handler no-op and hang the scan until the 300s outer timeout.)
|
|
92
|
+
process.exit(1);
|
|
93
|
+
}
|
|
94
|
+
}, HEAP_WATERMARK_CHECK_MS);
|
|
95
|
+
}
|
|
52
96
|
try {
|
|
53
97
|
// scanContext (optional) carries monitor-side info that opt-in scanners need
|
|
54
98
|
// (e.g. trusted-dep-diff requires package name + version to query the registry).
|
|
@@ -60,5 +104,6 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
|
|
|
60
104
|
parentPort.postMessage({ type: 'error', message: err.message || String(err) });
|
|
61
105
|
} finally {
|
|
62
106
|
if (sampler) clearInterval(sampler);
|
|
107
|
+
if (watchdog) clearInterval(watchdog);
|
|
63
108
|
}
|
|
64
109
|
})();
|