muaddib-scanner 2.11.100 → 2.11.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.100",
3
+ "version": "2.11.102",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T17:09:48.912Z",
3
+ "timestamp": "2026-06-11T19:20:45.240Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -7,7 +7,7 @@ const AdmZip = require('adm-zip');
7
7
  const IOC_FILE = path.join(__dirname, 'data/iocs.json');
8
8
  const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
9
9
  const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
10
- const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs } = require('./updater.js');
10
+ const { generateCompactIOCs, NEVER_WILDCARD, expandCompactIOCs, writeLeanIOCFile } = require('./updater.js');
11
11
  const { Spinner } = require('../utils.js');
12
12
  const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
13
13
  const { version: PKG_VERSION } = require('../../package.json');
@@ -1274,6 +1274,11 @@ async function runScraper() {
1274
1274
  fs.writeFileSync(tmpCompactFile, JSON.stringify(compactIOCs));
1275
1275
  fs.renameSync(tmpCompactFile, COMPACT_IOC_FILE);
1276
1276
 
1277
+ // Save the lean projection in lock-step with the full file (~24MB) — what
1278
+ // scan workers load instead of the 223MB full (RSS fix). Built from the
1279
+ // in-memory object, so no extra parse peak. See updater.js:createLeanIOCs.
1280
+ writeLeanIOCFile(existingIOCs);
1281
+
1277
1282
  // Persist to ~/.muaddib/data/ (survives npm update)
1278
1283
  saveSpinner.update('Persisting to home directory...');
1279
1284
  const homeDir = path.dirname(HOME_IOC_FILE);
@@ -6,6 +6,14 @@ const crypto = require('crypto');
6
6
  const HOME_DATA_PATH = path.join(os.homedir(), '.muaddib', 'data');
7
7
  const CACHE_IOC_FILE = path.join(HOME_DATA_PATH, 'iocs.json');
8
8
  const LOCAL_IOC_FILE = path.join(__dirname, 'data/iocs.json');
9
+ // Lean projection of LOCAL_IOC_FILE — only the fields the matcher/alert read
10
+ // ({name,version,severity,source} + hashes/markers/files/stringIocs). The full
11
+ // file is ~223MB → 447MB string during JSON.parse, reloaded by every one-shot
12
+ // worker that touches IOC matching (heap-snapshot-confirmed ~900MB peak). The
13
+ // lean is ~24MB → ~50MB peak. Workers READ this; only the daemon/scraper write
14
+ // it (a worker must never re-read the 223MB full to regenerate — that is the
15
+ // very peak we are removing). See ensureLeanIOCFile + createLeanIOCs below.
16
+ const LOCAL_LEAN_FILE = path.join(__dirname, 'data/iocs-lean.json');
9
17
  const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
10
18
  const { loadYAMLIOCs } = require('./yaml-loader.js');
11
19
 
@@ -241,7 +249,7 @@ function mergeIOCs(target, source) {
241
249
  // scan/poll) does zero disk I/O.
242
250
  const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
243
251
  const IOC_SOURCE_FILES = [
244
- CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
252
+ CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_LEAN_FILE, LOCAL_COMPACT_FILE,
245
253
  path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
246
254
  path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
247
255
  ];
@@ -279,8 +287,19 @@ function loadCachedIOCs() {
279
287
  stringIocs: Array.isArray(yamlIOCs.stringIocs) ? [...yamlIOCs.stringIocs] : []
280
288
  };
281
289
 
282
- // Priority 2a: Local scraped IOCs (full enriched file)
283
- if (fs.existsSync(LOCAL_IOC_FILE)) {
290
+ // Priority 2a: Local scraped IOCs. Prefer the lean projection (~24MB) it
291
+ // carries every field the matcher/alert read. Only fall back to the full
292
+ // ~223MB file when the lean is absent (backward-compat / before the daemon
293
+ // has generated it), which costs the ~450MB parse peak. ensureLeanIOCFile()
294
+ // (called at daemon boot + after each scrape) keeps the lean present & fresh.
295
+ if (fs.existsSync(LOCAL_LEAN_FILE)) {
296
+ try {
297
+ const leanIOCs = JSON.parse(fs.readFileSync(LOCAL_LEAN_FILE, 'utf8'));
298
+ mergeIOCs(merged, leanIOCs);
299
+ } catch (e) {
300
+ console.log('[WARN] Failed to load lean IOC database (iocs-lean.json): ' + e.message);
301
+ }
302
+ } else if (fs.existsSync(LOCAL_IOC_FILE)) {
284
303
  try {
285
304
  const localIOCs = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
286
305
  mergeIOCs(merged, localIOCs);
@@ -471,6 +490,67 @@ const NEVER_WILDCARD_PYPI = new Set([
471
490
  'scipy', 'tensorflow', 'torch', 'fastapi', 'uvicorn'
472
491
  ]);
473
492
 
493
+ // Lean projection of a full IOC object: keep only the fields the matcher and
494
+ // the alert message read on package entries ({name,version,severity,source}),
495
+ // drop the enrichment (id/description/references/mitre/published/freshness/
496
+ // sources/confidence — never read after load; profiled). hashes/markers/files/
497
+ // stringIocs are simple values / small (YAML-sourced) and kept verbatim.
498
+ // Pure: no I/O. Used to write LOCAL_LEAN_FILE from an in-memory full object
499
+ // (zero extra parse peak) and by ensureLeanIOCFile.
500
+ function createLeanIOCs(fullIOCs) {
501
+ const leanPkg = p => ({ name: p.name, version: p.version, severity: p.severity, source: p.source });
502
+ return {
503
+ packages: (fullIOCs.packages || []).map(leanPkg),
504
+ pypi_packages: (fullIOCs.pypi_packages || []).map(leanPkg),
505
+ hashes: fullIOCs.hashes || [],
506
+ markers: fullIOCs.markers || [],
507
+ files: fullIOCs.files || [],
508
+ stringIocs: fullIOCs.stringIocs || [],
509
+ updated: fullIOCs.updated,
510
+ sources: fullIOCs.sources
511
+ };
512
+ }
513
+
514
+ // Ensure LOCAL_LEAN_FILE exists and is at least as fresh as LOCAL_IOC_FILE.
515
+ // Reads the 223MB full ONCE (the ~450MB parse peak) — acceptable only in a
516
+ // long-lived process (daemon boot); NEVER call from a one-shot scan worker.
517
+ // Atomic write (.tmp → rename). Returns {generated:boolean, bytes:number}.
518
+ function ensureLeanIOCFile() {
519
+ try {
520
+ if (!fs.existsSync(LOCAL_IOC_FILE)) return { generated: false, bytes: 0 };
521
+ let fresh = false;
522
+ if (fs.existsSync(LOCAL_LEAN_FILE)) {
523
+ try { fresh = fs.statSync(LOCAL_LEAN_FILE).mtimeMs >= fs.statSync(LOCAL_IOC_FILE).mtimeMs; } catch { fresh = false; }
524
+ }
525
+ if (fresh) return { generated: false, bytes: fs.statSync(LOCAL_LEAN_FILE).size };
526
+ const full = JSON.parse(fs.readFileSync(LOCAL_IOC_FILE, 'utf8'));
527
+ const lean = createLeanIOCs(full);
528
+ const tmp = LOCAL_LEAN_FILE + '.tmp';
529
+ const data = JSON.stringify(lean);
530
+ fs.writeFileSync(tmp, data);
531
+ fs.renameSync(tmp, LOCAL_LEAN_FILE);
532
+ return { generated: true, bytes: Buffer.byteLength(data) };
533
+ } catch (e) {
534
+ console.log('[WARN] ensureLeanIOCFile failed: ' + e.message);
535
+ return { generated: false, bytes: 0 };
536
+ }
537
+ }
538
+
539
+ // Write the lean file from an already-in-memory full object (zero extra parse
540
+ // peak). Called by the scraper right after it writes LOCAL_IOC_FILE so the
541
+ // lean stays in lock-step with the full after every deep scrape.
542
+ function writeLeanIOCFile(fullIOCs) {
543
+ try {
544
+ const tmp = LOCAL_LEAN_FILE + '.tmp';
545
+ fs.writeFileSync(tmp, JSON.stringify(createLeanIOCs(fullIOCs)));
546
+ fs.renameSync(tmp, LOCAL_LEAN_FILE);
547
+ return true;
548
+ } catch (e) {
549
+ console.log('[WARN] writeLeanIOCFile failed: ' + e.message);
550
+ return false;
551
+ }
552
+ }
553
+
474
554
  function generateCompactIOCs(fullIOCs) {
475
555
  const wildcards = [];
476
556
  const versioned = Object.create(null);
@@ -693,4 +773,4 @@ function verifyIOCHMAC(data, hmac) {
693
773
  }
694
774
  }
695
775
 
696
- module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
776
+ module.exports = { updateIOCs, loadCachedIOCs, invalidateCache, generateCompactIOCs, expandCompactIOCs, createLeanIOCs, ensureLeanIOCFile, writeLeanIOCFile, LOCAL_LEAN_FILE, LOCAL_IOC_FILE, mergeIOCs, createOptimizedIOCs, generateIOCHMAC, verifyIOCHMAC, checkIOCStaleness, NEVER_WILDCARD, NEVER_WILDCARD_PYPI };
@@ -813,6 +813,18 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
813
813
  console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
814
814
  }
815
815
 
816
+ // RSS fix (C2): make sure the lean IOC projection exists & is fresh BEFORE any
817
+ // scan worker spawns. Workers load the ~24MB lean instead of the ~223MB full
818
+ // (heap-snapshot-confirmed ~900MB→~50MB per IOC-matching scan). The full read
819
+ // here is paid ONCE by this long-lived daemon (never by a one-shot worker).
820
+ try {
821
+ const { ensureLeanIOCFile } = require('../ioc/updater.js');
822
+ const r = ensureLeanIOCFile();
823
+ if (r.generated) console.log(`[MONITOR] IOC lean projection regenerated (${(r.bytes / 1024 / 1024).toFixed(1)}MB) — workers avoid the 223MB full load`);
824
+ } catch (err) {
825
+ console.warn(`[MONITOR] IOC lean bootstrap failed (workers fall back to full file): ${err.message}`);
826
+ }
827
+
816
828
  console.log('\n' + banner([
817
829
  "MUAD'DIB - Registry Monitor",
818
830
  'Scanning npm + PyPI new packages'
@@ -59,13 +59,16 @@ const _lane = { active: 0, queue: [] };
59
59
  * package is exactly the kind that blows a worker. Compares weightedJsBytes
60
60
  * (plain + ×12 minified — see measureJsWeight in queue.js: raw bytes alone
61
61
  * missed the minified explosions, powerlines 517KB → 1151MB heap) and falls
62
- * back to totalJsBytes for callers that don't weight.
63
- * @param {{totalJsBytes: number, weightedJsBytes?: number, truncated: boolean}|null} weight
62
+ * back to totalJsBytes for callers that don't weight. `oversize` (any single
63
+ * JS file > getMaxFileSize) also forces heavy — content scanners load such a
64
+ * file whole even though the AST skips it (omnius: a 30MB index.js → 1347MB).
65
+ * @param {{totalJsBytes: number, weightedJsBytes?: number, oversize?: boolean, truncated: boolean}|null} weight
64
66
  * @param {number} [thresholdBytes]
65
67
  */
66
68
  function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
67
69
  if (!weight) return false;
68
70
  if (weight.truncated) return true;
71
+ if (weight.oversize) return true; // a single JS file > getMaxFileSize — content scanners load it whole
69
72
  const effective = Number.isFinite(weight.weightedJsBytes) ? weight.weightedJsBytes : (weight.totalJsBytes || 0);
70
73
  return effective >= thresholdBytes;
71
74
  }
@@ -323,15 +323,21 @@ const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
323
323
  // license header pads the probe window.
324
324
  const JS_MINIFIED_WEIGHT = 12;
325
325
  const JS_MINIFIED_AVG_LINE = 250;
326
- const JS_MINIFIED_PROBE_BYTES = 4096;
327
-
328
- /** Probe the first 4KB of a file (never loads the rest) for minification. */
329
- function probeIsMinified(filePath) {
326
+ // 64KB, not 4KB: bike4mind sailed under the 4KB probe (a license/banner header
327
+ // padded the window; the minified body started later) → mis-classified light →
328
+ // 890MB heap. Probe a 64KB window from ~2KB in to skip any header and still
329
+ // never load a 30MB file. Cheap (one readSync) at JS_WEIGHT_MAX_FILES files.
330
+ const JS_MINIFIED_PROBE_OFFSET = 2048;
331
+ const JS_MINIFIED_PROBE_BYTES = 64 * 1024;
332
+
333
+ /** Probe a 64KB window of a file (never loads the rest) for minification. */
334
+ function probeIsMinified(filePath, size) {
330
335
  let fd = null;
331
336
  try {
332
337
  fd = fs.openSync(filePath, 'r');
338
+ const offset = size > JS_MINIFIED_PROBE_OFFSET + JS_MINIFIED_PROBE_BYTES ? JS_MINIFIED_PROBE_OFFSET : 0;
333
339
  const buf = Buffer.alloc(JS_MINIFIED_PROBE_BYTES);
334
- const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES, 0);
340
+ const n = fs.readSync(fd, buf, 0, JS_MINIFIED_PROBE_BYTES, offset);
335
341
  if (n <= 0) return false;
336
342
  const head = buf.toString('utf8', 0, n);
337
343
  return (head.length / head.split('\n').length) > JS_MINIFIED_AVG_LINE;
@@ -359,13 +365,20 @@ function probeIsMinified(filePath) {
359
365
  * value isHeavyScan compares against the threshold (raw bytes alone missed
360
366
  * the minified explosions, see JS_MINIFIED_WEIGHT above).
361
367
  *
368
+ * `oversize` (any single JS file > getMaxFileSize) forces heavy: the AST
369
+ * executor skips such files, but the content scanners (entropy/hash/
370
+ * ioc-strings/deobfuscate) still readFileSync the whole thing — omnius
371
+ * (a 30MB dist/index.js, 39KB of other JS) blew a 'light' worker to 1347MB.
372
+ * So an oversize JS file is the STRONGEST heavy signal, not something to skip.
373
+ *
362
374
  * @param {string} dir - extracted package directory
363
- * @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
375
+ * @returns {{ totalJsBytes: number, minifiedJsBytes: number, weightedJsBytes: number, maxJsFileBytes: number, oversize: boolean, truncated: boolean }}
364
376
  */
365
377
  function measureJsWeight(dir) {
366
378
  let totalJsBytes = 0;
367
379
  let minifiedJsBytes = 0;
368
380
  let maxJsFileBytes = 0;
381
+ let oversize = false;
369
382
  let seen = 0;
370
383
  let truncated = false;
371
384
  const perFileCap = getMaxFileSize();
@@ -385,17 +398,21 @@ function measureJsWeight(dir) {
385
398
  const filePath = path.join(current, entry.name);
386
399
  let size;
387
400
  try { size = fs.statSync(filePath).size; } catch { continue; }
388
- if (size > perFileCap) continue; // executor skips these — they never reach the AST
389
- totalJsBytes += size;
390
- if (probeIsMinified(filePath)) minifiedJsBytes += size;
391
401
  if (size > maxJsFileBytes) maxJsFileBytes = size;
402
+ if (size > perFileCap) {
403
+ // The AST skips it, but content scanners load it whole → heap blow-up.
404
+ oversize = true;
405
+ continue;
406
+ }
407
+ totalJsBytes += size;
408
+ if (probeIsMinified(filePath, size)) minifiedJsBytes += size;
392
409
  }
393
410
  }
394
411
  }
395
412
 
396
413
  walk(dir, 0);
397
414
  const weightedJsBytes = (totalJsBytes - minifiedJsBytes) + JS_MINIFIED_WEIGHT * minifiedJsBytes;
398
- return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, truncated };
415
+ return { totalJsBytes, minifiedJsBytes, weightedJsBytes, maxJsFileBytes, oversize, truncated };
399
416
  }
400
417
 
401
418
  /**
@@ -1437,6 +1454,27 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1437
1454
  // count clean — inconclusive, distinct ledger source, distinct log line
1438
1455
  // (the live-validation metric for the limits rollout). No retry: an OOM
1439
1456
  // re-OOMs deterministically.
1457
+ // Reactive heap watermark (C2 volet B): the worker self-terminated before
1458
+ // blowing the process RSS. Same disposition as a resourceLimits OOM —
1459
+ // inconclusive, NOT clean, no retry (a re-scan re-explodes the same way) —
1460
+ // but a distinct ledger source so the watchdog's catch rate is measurable
1461
+ // separately from the V8 hard-cap OOMs.
1462
+ const isHeapWatermark = err && /WORKER_HEAP_WATERMARK/.test(err.message || '');
1463
+ if (isHeapWatermark) {
1464
+ console.error(`[MONITOR] WORKER_HEAP_WATERMARK: ${name}@${version} — scan worker self-terminated over the heap watermark (kept INCONCLUSIVE, not clean)`);
1465
+ stats.workerHeapWatermark = (stats.workerHeapWatermark || 0) + 1;
1466
+ updateScanStats('sandbox_inconclusive');
1467
+ try {
1468
+ appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'worker_heap_watermark' });
1469
+ } catch { /* ledger is best-effort */ }
1470
+ return { sandboxResult: null, staticClean: false };
1471
+ }
1472
+ // Per-worker resourceLimits breach: the worker died on ITS V8 cap
1473
+ // (ERR_WORKER_OUT_OF_MEMORY) instead of blowing the process RSS. Same
1474
+ // garde-fou as static_timeout: a package that OOMs the scanner must NOT
1475
+ // count clean — inconclusive, distinct ledger source, distinct log line
1476
+ // (the live-validation metric for the limits rollout). No retry: an OOM
1477
+ // re-OOMs deterministically.
1440
1478
  const isWorkerOom = err && (err.code === 'ERR_WORKER_OUT_OF_MEMORY' ||
1441
1479
  /ERR_WORKER_OUT_OF_MEMORY|reached its memory limit/i.test(err.message || ''));
1442
1480
  if (isWorkerOom) {
@@ -22,6 +22,22 @@ if (!parentPort) {
22
22
  const { run } = require('../index.js');
23
23
  const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js');
24
24
 
25
+ // Reactive heap watermark (C2 volet B): the static heavy-lane classifier
26
+ // predicts the peak from on-disk bytes and WILL miss cases (omnius: 39KB JS →
27
+ // 1347MB). This is the prediction-free backstop — the worker watches its OWN
28
+ // isolate heap and bails before it contributes to a process-wide RSS spike.
29
+ // CAVEAT: a watchdog timer can only fire when the event loop yields, so it
30
+ // catches PROGRESSIVE (multi-file, async-between-files) growth; a single
31
+ // synchronous 30MB parse never yields and is caught only by the V8 hard cap
32
+ // (MUADDIB_WORKER_MAX_OLD_MB resourceLimits). The two are complementary.
33
+ // Default 2200MB: above the ~1.3GB legitimate scans that finish CLEAN, below
34
+ // the 3072MB resourceLimits cap. 0 disables.
35
+ const HEAP_WATERMARK_MB = (() => {
36
+ const v = parseInt(process.env.MUADDIB_WORKER_HEAP_WATERMARK_MB, 10);
37
+ return Number.isFinite(v) && v >= 0 ? v : 2200;
38
+ })();
39
+ const HEAP_WATERMARK_CHECK_MS = 1000;
40
+
25
41
  (async () => {
26
42
  // Off-heap attribution samples (worker-mem.jsonl): heapUsed/external/
27
43
  // arrayBuffers are isolate-local here, rss is process-wide. The samples MUST
@@ -49,6 +65,34 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
49
65
  sampler = setInterval(sampleNow, everyMs);
50
66
  sampler.unref();
51
67
  }
68
+
69
+ // Heap-watermark watchdog. On breach, post a tagged error and exit — the
70
+ // parent maps the WORKER_HEAP_WATERMARK message onto its existing worker_oom
71
+ // path (inconclusive, ledgered, NOT counted clean). NOT unref'd: while the
72
+ // scan is in flight this watchdog must stay live to fire.
73
+ let watchdog = null;
74
+ if (HEAP_WATERMARK_MB > 0) {
75
+ const limitBytes = HEAP_WATERMARK_MB * 1024 * 1024;
76
+ watchdog = setInterval(() => {
77
+ if (process.memoryUsage().heapUsed > limitBytes) {
78
+ clearInterval(watchdog); watchdog = null;
79
+ if (sampler) clearInterval(sampler);
80
+ try {
81
+ parentPort.postMessage({
82
+ type: 'error',
83
+ message: `WORKER_HEAP_WATERMARK: isolate heap exceeded ${HEAP_WATERMARK_MB}MB (${scanContext.name}@${scanContext.version})`
84
+ });
85
+ } catch { /* parent gone */ }
86
+ // Exit NON-ZERO so the parent settles even if the message above is lost
87
+ // in the post/exit race: the worker.on('exit') handler rejects on any
88
+ // non-zero code, and the catch matches WORKER_HEAP_WATERMARK when the
89
+ // message did arrive, or the generic scan_error path when it didn't —
90
+ // never clean, never a hung promise. (exit(0) would let the exit
91
+ // handler no-op and hang the scan until the 300s outer timeout.)
92
+ process.exit(1);
93
+ }
94
+ }, HEAP_WATERMARK_CHECK_MS);
95
+ }
52
96
  try {
53
97
  // scanContext (optional) carries monitor-side info that opt-in scanners need
54
98
  // (e.g. trusted-dep-diff requires package name + version to query the registry).
@@ -60,5 +104,6 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
60
104
  parentPort.postMessage({ type: 'error', message: err.message || String(err) });
61
105
  } finally {
62
106
  if (sampler) clearInterval(sampler);
107
+ if (watchdog) clearInterval(watchdog);
63
108
  }
64
109
  })();