muaddib-scanner 2.11.120 → 2.11.123

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.120",
3
+ "version": "2.11.123",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-18T19:56:37.339Z",
3
+ "timestamp": "2026-06-19T12:35:51.997Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -12,7 +12,7 @@ const os = require('os');
12
12
  const { Worker } = require('worker_threads');
13
13
  const { runSandbox, tryAcquireSandboxSlot } = require('../sandbox/index.js');
14
14
  const { sendWebhook } = require('../webhook.js');
15
- const { downloadToFile, extractArchive, sanitizePackageName } = require('../shared/download.js');
15
+ const { downloadToFile, extractArchive, extractArchiveOffThread, sanitizePackageName } = require('../shared/download.js');
16
16
  const { MAX_TARBALL_SIZE, getMaxFileSize } = require('../shared/constants.js');
17
17
  const { acquireRegistrySlot, releaseRegistrySlot, awaitRateToken: awaitRateTokenForWorker, signal429: signal429ForWorker } = require('../shared/http-limiter.js');
18
18
  const { loadCachedIOCs } = require('../ioc/updater.js');
@@ -178,6 +178,25 @@ const STATIC_SCAN_TIMEOUT_MS = 45_000; // 45s for static analysis only
178
178
  const LARGE_PACKAGE_SIZE = 10 * 1024 * 1024; // 10MB
179
179
  const RECENTLY_SCANNED_MAX = 50_000; // FIFO cap for the dedup Set (P0c — bounded resource)
180
180
 
181
+ // OOM fix (2026-06-19): archives larger than this (COMPRESSED, on-disk size) are
182
+ // extracted off the main thread in a worker, so the synchronous extractor
183
+ // (adm-zip extractAllTo / execFileSync tar) can no longer wedge the event loop and
184
+ // starve the RSS breaker / memory governor / EMERGENCY purge (all main-thread
185
+ // timers) → cgroup OOM. Confirmed culprit: data/loop-stalls.jsonl (extract:* up to
186
+ // 148s). Small archives extract inline — a worker spawn costs more than their
187
+ // sub-100ms extraction. Env-tunable via MUADDIB_INLINE_EXTRACT_MB.
188
+ const INLINE_EXTRACT_MAX_BYTES = (parseInt(process.env.MUADDIB_INLINE_EXTRACT_MB, 10) || 4) * 1024 * 1024;
189
+
190
+ // Extract inline for small archives, off-thread for large ones. compressedSize is
191
+ // the on-disk tarball size (reliable, unlike the registry unpackedSize metadata).
192
+ // Always returns a Promise so the call sites can uniformly `await`.
193
+ function extractGated(archivePath, destDir, compressedSize) {
194
+ if (compressedSize > INLINE_EXTRACT_MAX_BYTES) {
195
+ return extractArchiveOffThread(archivePath, destDir);
196
+ }
197
+ return Promise.resolve(extractArchive(archivePath, destDir));
198
+ }
199
+
181
200
  // First-publish sandbox: max pending sandbox items before deferring first-publish clean scans
182
201
  // Prevents starving T1a sandbox capacity when many first-publish packages arrive at once
183
202
  const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
@@ -766,7 +785,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
766
785
  let bypassQuickScan = false;
767
786
  try {
768
787
  const _crumb = beginOp('extract:quickscan', { name, version, unpackedSizeMb: Math.round(unpackedSize / 1024 / 1024) });
769
- try { extractedDir = extractArchive(tgzPath, tmpDir); } finally { endOp(_crumb); }
788
+ try { extractedDir = await extractGated(tgzPath, tmpDir, fileSize); } finally { endOp(_crumb); }
770
789
 
771
790
  const [pkgThreats, shellThreats] = await Promise.all([
772
791
  scanPackageJson(extractedDir),
@@ -816,7 +835,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
816
835
 
817
836
  if (!extractedDir) {
818
837
  const _crumb = beginOp('extract:prework', { name, version, unpackedSizeMb: Math.round((meta.unpackedSize || 0) / 1024 / 1024) });
819
- try { extractedDir = extractArchive(tgzPath, tmpDir); } finally { endOp(_crumb); }
838
+ try { extractedDir = await extractGated(tgzPath, tmpDir, fileSize); } finally { endOp(_crumb); }
820
839
  }
821
840
 
822
841
  // ML Phase 2a: Count JS files and detect test presence for enriched features
@@ -807,6 +807,13 @@ const PLAYBOOKS = {
807
807
  'Vecteur classique de dependency confusion: le code s\'execute a l\'installation. ' +
808
808
  'NE PAS installer. Verifier le nom exact du package. Signaler sur npm.',
809
809
 
810
+ lifecycle_version99:
811
+ 'CRITIQUE: Version a major repdigit "win-semver" (99/999/9999) + hook lifecycle = ' +
812
+ 'dependency confusion complete. La version elevee force npm a resoudre vers ce package ' +
813
+ 'public au lieu du package interne prive, et le hook execute le payload a l\'installation. ' +
814
+ 'NE PAS installer. Verifier si un package interne du meme nom existe. Regenerer les secrets ' +
815
+ 'exposes. Signaler sur npm.',
816
+
810
817
  lifecycle_inline_exec:
811
818
  'CRITIQUE: Script lifecycle avec node -e (execution inline). Le code s\'execute automatiquement a npm install. ' +
812
819
  'NE PAS installer. Si deja installe: considerer la machine compromise. ' +
@@ -2699,6 +2699,19 @@ const RULES = {
2699
2699
  ],
2700
2700
  mitre: 'T1195.002'
2701
2701
  },
2702
+ lifecycle_version99: {
2703
+ id: 'MUADDIB-COMPOUND-018',
2704
+ name: 'Lifecycle Hook + Dependency-Confusion Version',
2705
+ severity: 'CRITICAL',
2706
+ confidence: 'high',
2707
+ domain: 'malware',
2708
+ description: 'Version a major repdigit "win-semver" (99/999/9999) AVEC hook lifecycle (preinstall/install/postinstall). Chaine complete de dependency confusion: la version elevee force la resolution npm vers le package public malveillant au lieu du package interne prive, et le hook execute le payload a l\'installation. Compound: version_99_preinstall + lifecycle_script (gate-FPR-test 2026-06-19: 0/3901 FP).',
2709
+ references: [
2710
+ 'https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610',
2711
+ 'https://attack.mitre.org/techniques/T1195.002/'
2712
+ ],
2713
+ mitre: 'T1195.002'
2714
+ },
2702
2715
  lifecycle_inline_exec: {
2703
2716
  id: 'MUADDIB-COMPOUND-004',
2704
2717
  name: 'Lifecycle Hook + Inline Node Execution',
@@ -165,17 +165,24 @@ async function scanPackageJson(targetPath) {
165
165
  }
166
166
  }
167
167
 
168
- // v2.10.89: Dependency confusion indicator — version >= 99 with install hooks
168
+ // v2.10.89: Dependency confusion indicator — repdigit "win-semver" major with install hooks.
169
169
  // Catches: @corpweb-ui/wmkt-library, @toprank/partner, @adac-fahrzeugplattform/ui
170
+ // v2.11.118 (2026-06-19, gate-FPR-test on the GHSA-2026 miss corpus): tightened from a
171
+ // plain `major >= 99` to the repdigit set {99, 999, 9999}. `>= 99` also fired on calendar
172
+ // versions (2026.x — 51 in the FP corpus) and legit high-version packages (chromedriver@148,
173
+ // taskcluster@100, @jetbrains/junie@1966, salt@3008) — masked only because the lone signal
174
+ // stayed HIGH (<20). Restricting to repdigit majors keeps 27/27 corpus dep-conf MALWARE at
175
+ // ZERO benign hits, and unblocks the lifecycle_version99 compound below (which would
176
+ // otherwise inherit the calendar FPs once escalated to CRITICAL).
170
177
  const versionStr = pkg.version || '';
171
178
  const majorVersion = parseInt(versionStr.split('.')[0], 10);
172
- if (majorVersion >= 99) {
179
+ if ([99, 999, 9999].includes(majorVersion)) {
173
180
  const hasInstallHook = ['preinstall', 'install', 'postinstall'].some(s => scripts[s]);
174
181
  if (hasInstallHook) {
175
182
  threats.push({
176
183
  type: 'version_99_preinstall',
177
184
  severity: 'HIGH',
178
- message: `Version ${versionStr} (major >= 99) with lifecycle hook — dependency confusion attack pattern.`,
185
+ message: `Version ${versionStr} (repdigit win-semver major ${majorVersion}) with lifecycle hook — dependency confusion attack pattern.`,
179
186
  file: 'package.json'
180
187
  });
181
188
  }
@@ -8,7 +8,7 @@ const SHELL_EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache'];
8
8
  const MALICIOUS_PATTERNS = [
9
9
  { pattern: /curl[^\n]{0,5000}\|[^\n]{0,5000}sh/m, name: 'curl_pipe_shell', severity: 'HIGH' },
10
10
  { pattern: /wget[^\n]{0,5000}&&[^\n]{0,5000}chmod[^\n]{0,5000}\+x/m, name: 'wget_chmod_exec', severity: 'HIGH' },
11
- { pattern: /bash\s+-i\s+>&\s+\/dev\/tcp/m, name: 'reverse_shell', severity: 'CRITICAL' },
11
+ { pattern: /(?:ba)?sh\s+-i\s+>&\s*\/dev\/tcp/m, name: 'reverse_shell', severity: 'CRITICAL' },
12
12
  { pattern: /nc\s+-e\s+\/bin\/(ba)?sh/m, name: 'netcat_shell', severity: 'CRITICAL' },
13
13
  { pattern: /rm\s+-rf\s+(~\/|\$HOME|\/home)/m, name: 'home_deletion', severity: 'CRITICAL' },
14
14
  { pattern: /shred.*\$HOME/m, name: 'shred_home', severity: 'CRITICAL' },
@@ -40,13 +40,26 @@ const MALICIOUS_PATTERNS = [
40
40
 
41
41
  const SHEBANG_RE = /^#!.*\b(?:ba)?sh\b/;
42
42
 
43
- function scanFileContent(file, content, targetPath, threats) {
43
+ // Source files (.js/.ts/...) can embed shell reverse-shell commands inside
44
+ // child_process exec/execSync/spawn string args. shell.js historically scanned only
45
+ // .sh/shebang files, so `execSync("bash -i >& /dev/tcp/...")` in index.js was invisible
46
+ // (missed npx-whoami-demo, 2026-06 — a revshell that scored grs 25 with type_reverse_shell=0).
47
+ // Apply ONLY the unambiguous reverse-shell command patterns to source files — NOT the
48
+ // context-dependent ones (curl|sh, systemctl, rm -rf, base64|sh) which would false-positive
49
+ // on JS string literals / build tooling. FPR-gate (2026-06-19): these 4 matched 0 port-check
50
+ // idioms (</dev/tcp, echo >/dev/tcp, nc -z) and 0 node_modules .js files.
51
+ const SOURCE_SCAN_PATTERN_NAMES = new Set([
52
+ 'reverse_shell', 'netcat_shell', 'fifo_reverse_shell', 'fifo_nc_reverse_shell'
53
+ ]);
54
+ const SOURCE_SCAN_EXTENSIONS = ['.js', '.cjs', '.mjs', '.ts', '.jsx', '.tsx'];
55
+
56
+ function scanFileContent(file, content, targetPath, threats, patterns = MALICIOUS_PATTERNS) {
44
57
  // Strip comment lines to avoid false positives on documentation
45
58
  const activeContent = content.split(/\r?\n/)
46
59
  .filter(line => !line.trimStart().startsWith('#'))
47
60
  .join('\n');
48
61
 
49
- for (const { pattern, name, severity } of MALICIOUS_PATTERNS) {
62
+ for (const { pattern, name, severity } of patterns) {
50
63
  if (pattern.test(activeContent)) {
51
64
  threats.push({
52
65
  type: name,
@@ -106,6 +119,14 @@ async function scanShellScripts(targetPath) {
106
119
  } catch (e) { debugLog('[SHELL] readFile error:', e?.message); }
107
120
  }
108
121
 
122
+ // Pass 3: source files (.js/.ts/...) — only the unambiguous reverse-shell command
123
+ // patterns (revshell commands embedded in child_process exec/spawn string args).
124
+ const sourcePatterns = MALICIOUS_PATTERNS.filter(p => SOURCE_SCAN_PATTERN_NAMES.has(p.name));
125
+ const sourceFiles = findFiles(targetPath, { extensions: SOURCE_SCAN_EXTENSIONS, excludedDirs: SHELL_EXCLUDED_DIRS });
126
+ forEachSafeFile(sourceFiles, (file, content) => {
127
+ scanFileContent(file, content, targetPath, threats, sourcePatterns);
128
+ });
129
+
109
130
  return threats;
110
131
  }
111
132
 
package/src/scoring.js CHANGED
@@ -536,6 +536,23 @@ const SCORING_COMPOUNDS = [
536
536
  message: 'Lifecycle hook on typosquat package — dependency confusion attack vector (scoring compound).',
537
537
  fileFrom: 'typosquat_detected'
538
538
  },
539
+ {
540
+ // 2026-06-19 detection-gap (GHSA-2026 misses): a repdigit "win-semver" version
541
+ // (version_99_preinstall: major 99/999/9999) + an install lifecycle hook is the full
542
+ // dependency-confusion RCE chain. version_99_preinstall alone is HIGH (10), below the
543
+ // 20 alert threshold, so these scored ~13 and were missed (e.g. @doaction/* @99.99.99).
544
+ // Gate-FPR-tested on the confirmed corpus: repdigit-major + lifecycle_script = 0/3901
545
+ // benign FP (the 3 repdigit-version FPs have no install hook), 22/42 GT MALWARE.
546
+ // Both signals are package.json-level (no sameFile / excludeIfBundled needed).
547
+ // requireOriginalSeverityHigh anchors on version_99_preinstall (HIGH) so a lone
548
+ // lifecycle_script (MEDIUM, fires on every install hook) can never trip this alone.
549
+ type: 'lifecycle_version99',
550
+ requires: ['version_99_preinstall', 'lifecycle_script'],
551
+ severity: 'CRITICAL',
552
+ message: 'Dependency-confusion version (repdigit major 99/999/9999) + install lifecycle hook — install-time RCE via dependency confusion (scoring compound).',
553
+ fileFrom: 'version_99_preinstall',
554
+ requireOriginalSeverityHigh: true
555
+ },
539
556
  {
540
557
  // RT-C1: Boundary-squat dep declared AND require()d in code → CRITICAL.
541
558
  // Pattern Axios UNC1069 (March 2026): wrapper looks benign, payload is in the dep.
@@ -340,6 +340,52 @@ function extractArchive(archivePath, destDir, options = {}) {
340
340
  throw new Error(`Unsupported archive format for ${path.basename(archivePath)}`);
341
341
  }
342
342
 
343
+ // Hard cap for off-thread extraction (OOM fix). Large legit packages can take
344
+ // 10-30s; 120s leaves headroom while still bounding a pathological extraction
345
+ // (the worker is terminated past this so a runaway cannot pin RSS forever).
346
+ const EXTRACT_OFFTHREAD_TIMEOUT_MS = parseInt(process.env.MUADDIB_EXTRACT_OFFTHREAD_TIMEOUT_MS, 10) || 120_000;
347
+
348
+ /**
349
+ * Off-main-thread variant of extractArchive: runs the SAME synchronous extractor
350
+ * in a worker thread (src/shared/extract-worker.js) so the caller's event loop
351
+ * stays responsive during extraction. See extract-worker.js header for why this
352
+ * is the OOM fix. Same return contract as extractArchive (resolves to the
353
+ * extracted package root); rejects on extraction error, worker crash, or timeout.
354
+ * Callers gate on archive size — small archives extract inline (cheaper than a
355
+ * worker spawn), large ones offload here.
356
+ *
357
+ * @param {string} archivePath
358
+ * @param {string} destDir - must already exist
359
+ * @param {Object} [options]
360
+ * @param {'targz'|'zip'} [options.format] - override auto-detection
361
+ * @param {number} [options.timeoutMs] - hard cap; worker terminated past it
362
+ * @returns {Promise<string>} extracted package root
363
+ */
364
+ function extractArchiveOffThread(archivePath, destDir, options = {}) {
365
+ const { Worker } = require('worker_threads');
366
+ const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : EXTRACT_OFFTHREAD_TIMEOUT_MS;
367
+ return new Promise((resolve, reject) => {
368
+ let settled = false;
369
+ const worker = new Worker(path.join(__dirname, 'extract-worker.js'), {
370
+ workerData: { archivePath, destDir, format: options.format || null }
371
+ });
372
+ const finish = (fn) => { if (settled) return; settled = true; clearTimeout(timer); fn(); };
373
+ const timer = setTimeout(() => finish(() => {
374
+ worker.terminate().finally(() =>
375
+ reject(new Error(`extractArchiveOffThread timeout after ${timeoutMs}ms: ${path.basename(archivePath)}`)));
376
+ }), timeoutMs);
377
+ if (timer && typeof timer.unref === 'function') timer.unref();
378
+ worker.once('message', (msg) => finish(() => {
379
+ worker.terminate();
380
+ if (msg && msg.ok) resolve(msg.dir);
381
+ else reject(new Error((msg && msg.error) || 'extractArchiveOffThread: worker reported failure'));
382
+ }));
383
+ worker.once('error', (err) => finish(() => { worker.terminate(); reject(err); }));
384
+ worker.once('exit', (code) => finish(() =>
385
+ reject(new Error(`extractArchiveOffThread: worker exited (${code}) without a result`))));
386
+ });
387
+ }
388
+
343
389
  /**
344
390
  * Backwards-compatible wrapper for the original tar.gz-only extractor.
345
391
  * Kept because src/scanner/temporal-ast-diff.js and existing tests still
@@ -370,6 +416,7 @@ module.exports = {
370
416
  downloadToFile,
371
417
  extractTarGz,
372
418
  extractArchive,
419
+ extractArchiveOffThread,
373
420
  detectArchiveFormat,
374
421
  sanitizePackageName,
375
422
  isAllowedDownloadRedirect,
@@ -0,0 +1,30 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Worker-thread entry for off-main-thread archive extraction (OOM fix 2026-06-19).
5
+ *
6
+ * extractArchive() runs a SYNCHRONOUS extractor — adm-zip `extractAllTo` (.zip/
7
+ * .whl) or `execFileSync('tar', …)` (.tgz). On the main thread that wedges the
8
+ * event loop for the whole extraction (measured: up to 148s on large packages,
9
+ * data/loop-stalls.jsonl). With the loop wedged the RSS circuit breaker, the
10
+ * memory governor's RSS feed, and the EMERGENCY queue purge — all main-thread
11
+ * `setInterval` timers (daemon.js) — never fire, so RSS climbs to the cgroup
12
+ * MemoryMax unchecked → kernel SIGKILL. Running the same sync extractor here
13
+ * blocks only the WORKER loop; the parent's loop stays live so those defenses run.
14
+ *
15
+ * Contract: workerData = { archivePath, destDir, format }. Posts exactly one
16
+ * message — { ok: true, dir } on success, { ok: false, error } on failure — and
17
+ * never throws to the thread (the parent also handles a worker 'error', but an
18
+ * explicit message keeps the failure path uniform). All extraction hardening
19
+ * (zip-slip, zip-bomb uncompressed-size cap) lives in extractArchive and runs here.
20
+ */
21
+ const { workerData, parentPort } = require('worker_threads');
22
+ const { extractArchive } = require('./download.js');
23
+
24
+ try {
25
+ const opts = workerData && workerData.format ? { format: workerData.format } : {};
26
+ const dir = extractArchive(workerData.archivePath, workerData.destDir, opts);
27
+ parentPort.postMessage({ ok: true, dir });
28
+ } catch (err) {
29
+ parentPort.postMessage({ ok: false, error: err && err.message ? err.message : String(err) });
30
+ }