muaddib-scanner 2.11.120 → 2.11.123
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.120.json → self-scan-v2.11.123.json} +1 -1
- package/src/monitor/queue.js +22 -3
- package/src/response/playbooks.js +7 -0
- package/src/rules/index.js +13 -0
- package/src/scanner/package.js +10 -3
- package/src/scanner/shell.js +24 -3
- package/src/scoring.js +17 -0
- package/src/shared/download.js +47 -0
- package/src/shared/extract-worker.js +30 -0
package/package.json
CHANGED
package/src/monitor/queue.js
CHANGED
|
@@ -12,7 +12,7 @@ const os = require('os');
|
|
|
12
12
|
const { Worker } = require('worker_threads');
|
|
13
13
|
const { runSandbox, tryAcquireSandboxSlot } = require('../sandbox/index.js');
|
|
14
14
|
const { sendWebhook } = require('../webhook.js');
|
|
15
|
-
const { downloadToFile, extractArchive, sanitizePackageName } = require('../shared/download.js');
|
|
15
|
+
const { downloadToFile, extractArchive, extractArchiveOffThread, sanitizePackageName } = require('../shared/download.js');
|
|
16
16
|
const { MAX_TARBALL_SIZE, getMaxFileSize } = require('../shared/constants.js');
|
|
17
17
|
const { acquireRegistrySlot, releaseRegistrySlot, awaitRateToken: awaitRateTokenForWorker, signal429: signal429ForWorker } = require('../shared/http-limiter.js');
|
|
18
18
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
@@ -178,6 +178,25 @@ const STATIC_SCAN_TIMEOUT_MS = 45_000; // 45s for static analysis only
|
|
|
178
178
|
const LARGE_PACKAGE_SIZE = 10 * 1024 * 1024; // 10MB
|
|
179
179
|
const RECENTLY_SCANNED_MAX = 50_000; // FIFO cap for the dedup Set (P0c — bounded resource)
|
|
180
180
|
|
|
181
|
+
// OOM fix (2026-06-19): archives larger than this (COMPRESSED, on-disk size) are
|
|
182
|
+
// extracted off the main thread in a worker, so the synchronous extractor
|
|
183
|
+
// (adm-zip extractAllTo / execFileSync tar) can no longer wedge the event loop and
|
|
184
|
+
// starve the RSS breaker / memory governor / EMERGENCY purge (all main-thread
|
|
185
|
+
// timers) → cgroup OOM. Confirmed culprit: data/loop-stalls.jsonl (extract:* up to
|
|
186
|
+
// 148s). Small archives extract inline — a worker spawn costs more than their
|
|
187
|
+
// sub-100ms extraction. Env-tunable via MUADDIB_INLINE_EXTRACT_MB.
|
|
188
|
+
const INLINE_EXTRACT_MAX_BYTES = (parseInt(process.env.MUADDIB_INLINE_EXTRACT_MB, 10) || 4) * 1024 * 1024;
|
|
189
|
+
|
|
190
|
+
// Extract inline for small archives, off-thread for large ones. compressedSize is
|
|
191
|
+
// the on-disk tarball size (reliable, unlike the registry unpackedSize metadata).
|
|
192
|
+
// Always returns a Promise so the call sites can uniformly `await`.
|
|
193
|
+
function extractGated(archivePath, destDir, compressedSize) {
|
|
194
|
+
if (compressedSize > INLINE_EXTRACT_MAX_BYTES) {
|
|
195
|
+
return extractArchiveOffThread(archivePath, destDir);
|
|
196
|
+
}
|
|
197
|
+
return Promise.resolve(extractArchive(archivePath, destDir));
|
|
198
|
+
}
|
|
199
|
+
|
|
181
200
|
// First-publish sandbox: max pending sandbox items before deferring first-publish clean scans
|
|
182
201
|
// Prevents starving T1a sandbox capacity when many first-publish packages arrive at once
|
|
183
202
|
const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
|
|
@@ -766,7 +785,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
766
785
|
let bypassQuickScan = false;
|
|
767
786
|
try {
|
|
768
787
|
const _crumb = beginOp('extract:quickscan', { name, version, unpackedSizeMb: Math.round(unpackedSize / 1024 / 1024) });
|
|
769
|
-
try { extractedDir =
|
|
788
|
+
try { extractedDir = await extractGated(tgzPath, tmpDir, fileSize); } finally { endOp(_crumb); }
|
|
770
789
|
|
|
771
790
|
const [pkgThreats, shellThreats] = await Promise.all([
|
|
772
791
|
scanPackageJson(extractedDir),
|
|
@@ -816,7 +835,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
816
835
|
|
|
817
836
|
if (!extractedDir) {
|
|
818
837
|
const _crumb = beginOp('extract:prework', { name, version, unpackedSizeMb: Math.round((meta.unpackedSize || 0) / 1024 / 1024) });
|
|
819
|
-
try { extractedDir =
|
|
838
|
+
try { extractedDir = await extractGated(tgzPath, tmpDir, fileSize); } finally { endOp(_crumb); }
|
|
820
839
|
}
|
|
821
840
|
|
|
822
841
|
// ML Phase 2a: Count JS files and detect test presence for enriched features
|
|
@@ -807,6 +807,13 @@ const PLAYBOOKS = {
|
|
|
807
807
|
'Vecteur classique de dependency confusion: le code s\'execute a l\'installation. ' +
|
|
808
808
|
'NE PAS installer. Verifier le nom exact du package. Signaler sur npm.',
|
|
809
809
|
|
|
810
|
+
lifecycle_version99:
|
|
811
|
+
'CRITIQUE: Version a major repdigit "win-semver" (99/999/9999) + hook lifecycle = ' +
|
|
812
|
+
'dependency confusion complete. La version elevee force npm a resoudre vers ce package ' +
|
|
813
|
+
'public au lieu du package interne prive, et le hook execute le payload a l\'installation. ' +
|
|
814
|
+
'NE PAS installer. Verifier si un package interne du meme nom existe. Regenerer les secrets ' +
|
|
815
|
+
'exposes. Signaler sur npm.',
|
|
816
|
+
|
|
810
817
|
lifecycle_inline_exec:
|
|
811
818
|
'CRITIQUE: Script lifecycle avec node -e (execution inline). Le code s\'execute automatiquement a npm install. ' +
|
|
812
819
|
'NE PAS installer. Si deja installe: considerer la machine compromise. ' +
|
package/src/rules/index.js
CHANGED
|
@@ -2699,6 +2699,19 @@ const RULES = {
|
|
|
2699
2699
|
],
|
|
2700
2700
|
mitre: 'T1195.002'
|
|
2701
2701
|
},
|
|
2702
|
+
lifecycle_version99: {
|
|
2703
|
+
id: 'MUADDIB-COMPOUND-018',
|
|
2704
|
+
name: 'Lifecycle Hook + Dependency-Confusion Version',
|
|
2705
|
+
severity: 'CRITICAL',
|
|
2706
|
+
confidence: 'high',
|
|
2707
|
+
domain: 'malware',
|
|
2708
|
+
description: 'Version a major repdigit "win-semver" (99/999/9999) AVEC hook lifecycle (preinstall/install/postinstall). Chaine complete de dependency confusion: la version elevee force la resolution npm vers le package public malveillant au lieu du package interne prive, et le hook execute le payload a l\'installation. Compound: version_99_preinstall + lifecycle_script (gate-FPR-test 2026-06-19: 0/3901 FP).',
|
|
2709
|
+
references: [
|
|
2710
|
+
'https://medium.com/@alex.birsan/dependency-confusion-4a5d60fec610',
|
|
2711
|
+
'https://attack.mitre.org/techniques/T1195.002/'
|
|
2712
|
+
],
|
|
2713
|
+
mitre: 'T1195.002'
|
|
2714
|
+
},
|
|
2702
2715
|
lifecycle_inline_exec: {
|
|
2703
2716
|
id: 'MUADDIB-COMPOUND-004',
|
|
2704
2717
|
name: 'Lifecycle Hook + Inline Node Execution',
|
package/src/scanner/package.js
CHANGED
|
@@ -165,17 +165,24 @@ async function scanPackageJson(targetPath) {
|
|
|
165
165
|
}
|
|
166
166
|
}
|
|
167
167
|
|
|
168
|
-
// v2.10.89: Dependency confusion indicator —
|
|
168
|
+
// v2.10.89: Dependency confusion indicator — repdigit "win-semver" major with install hooks.
|
|
169
169
|
// Catches: @corpweb-ui/wmkt-library, @toprank/partner, @adac-fahrzeugplattform/ui
|
|
170
|
+
// v2.11.118 (2026-06-19, gate-FPR-test on the GHSA-2026 miss corpus): tightened from a
|
|
171
|
+
// plain `major >= 99` to the repdigit set {99, 999, 9999}. `>= 99` also fired on calendar
|
|
172
|
+
// versions (2026.x — 51 in the FP corpus) and legit high-version packages (chromedriver@148,
|
|
173
|
+
// taskcluster@100, @jetbrains/junie@1966, salt@3008) — masked only because the lone signal
|
|
174
|
+
// stayed HIGH (<20). Restricting to repdigit majors keeps 27/27 corpus dep-conf MALWARE at
|
|
175
|
+
// ZERO benign hits, and unblocks the lifecycle_version99 compound below (which would
|
|
176
|
+
// otherwise inherit the calendar FPs once escalated to CRITICAL).
|
|
170
177
|
const versionStr = pkg.version || '';
|
|
171
178
|
const majorVersion = parseInt(versionStr.split('.')[0], 10);
|
|
172
|
-
if (
|
|
179
|
+
if ([99, 999, 9999].includes(majorVersion)) {
|
|
173
180
|
const hasInstallHook = ['preinstall', 'install', 'postinstall'].some(s => scripts[s]);
|
|
174
181
|
if (hasInstallHook) {
|
|
175
182
|
threats.push({
|
|
176
183
|
type: 'version_99_preinstall',
|
|
177
184
|
severity: 'HIGH',
|
|
178
|
-
message: `Version ${versionStr} (major
|
|
185
|
+
message: `Version ${versionStr} (repdigit win-semver major ${majorVersion}) with lifecycle hook — dependency confusion attack pattern.`,
|
|
179
186
|
file: 'package.json'
|
|
180
187
|
});
|
|
181
188
|
}
|
package/src/scanner/shell.js
CHANGED
|
@@ -8,7 +8,7 @@ const SHELL_EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache'];
|
|
|
8
8
|
const MALICIOUS_PATTERNS = [
|
|
9
9
|
{ pattern: /curl[^\n]{0,5000}\|[^\n]{0,5000}sh/m, name: 'curl_pipe_shell', severity: 'HIGH' },
|
|
10
10
|
{ pattern: /wget[^\n]{0,5000}&&[^\n]{0,5000}chmod[^\n]{0,5000}\+x/m, name: 'wget_chmod_exec', severity: 'HIGH' },
|
|
11
|
-
{ pattern: /
|
|
11
|
+
{ pattern: /(?:ba)?sh\s+-i\s+>&\s*\/dev\/tcp/m, name: 'reverse_shell', severity: 'CRITICAL' },
|
|
12
12
|
{ pattern: /nc\s+-e\s+\/bin\/(ba)?sh/m, name: 'netcat_shell', severity: 'CRITICAL' },
|
|
13
13
|
{ pattern: /rm\s+-rf\s+(~\/|\$HOME|\/home)/m, name: 'home_deletion', severity: 'CRITICAL' },
|
|
14
14
|
{ pattern: /shred.*\$HOME/m, name: 'shred_home', severity: 'CRITICAL' },
|
|
@@ -40,13 +40,26 @@ const MALICIOUS_PATTERNS = [
|
|
|
40
40
|
|
|
41
41
|
const SHEBANG_RE = /^#!.*\b(?:ba)?sh\b/;
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
// Source files (.js/.ts/...) can embed shell reverse-shell commands inside
|
|
44
|
+
// child_process exec/execSync/spawn string args. shell.js historically scanned only
|
|
45
|
+
// .sh/shebang files, so `execSync("bash -i >& /dev/tcp/...")` in index.js was invisible
|
|
46
|
+
// (missed npx-whoami-demo, 2026-06 — a revshell that scored grs 25 with type_reverse_shell=0).
|
|
47
|
+
// Apply ONLY the unambiguous reverse-shell command patterns to source files — NOT the
|
|
48
|
+
// context-dependent ones (curl|sh, systemctl, rm -rf, base64|sh) which would false-positive
|
|
49
|
+
// on JS string literals / build tooling. FPR-gate (2026-06-19): these 4 matched 0 port-check
|
|
50
|
+
// idioms (</dev/tcp, echo >/dev/tcp, nc -z) and 0 node_modules .js files.
|
|
51
|
+
const SOURCE_SCAN_PATTERN_NAMES = new Set([
|
|
52
|
+
'reverse_shell', 'netcat_shell', 'fifo_reverse_shell', 'fifo_nc_reverse_shell'
|
|
53
|
+
]);
|
|
54
|
+
const SOURCE_SCAN_EXTENSIONS = ['.js', '.cjs', '.mjs', '.ts', '.jsx', '.tsx'];
|
|
55
|
+
|
|
56
|
+
function scanFileContent(file, content, targetPath, threats, patterns = MALICIOUS_PATTERNS) {
|
|
44
57
|
// Strip comment lines to avoid false positives on documentation
|
|
45
58
|
const activeContent = content.split(/\r?\n/)
|
|
46
59
|
.filter(line => !line.trimStart().startsWith('#'))
|
|
47
60
|
.join('\n');
|
|
48
61
|
|
|
49
|
-
for (const { pattern, name, severity } of
|
|
62
|
+
for (const { pattern, name, severity } of patterns) {
|
|
50
63
|
if (pattern.test(activeContent)) {
|
|
51
64
|
threats.push({
|
|
52
65
|
type: name,
|
|
@@ -106,6 +119,14 @@ async function scanShellScripts(targetPath) {
|
|
|
106
119
|
} catch (e) { debugLog('[SHELL] readFile error:', e?.message); }
|
|
107
120
|
}
|
|
108
121
|
|
|
122
|
+
// Pass 3: source files (.js/.ts/...) — only the unambiguous reverse-shell command
|
|
123
|
+
// patterns (revshell commands embedded in child_process exec/spawn string args).
|
|
124
|
+
const sourcePatterns = MALICIOUS_PATTERNS.filter(p => SOURCE_SCAN_PATTERN_NAMES.has(p.name));
|
|
125
|
+
const sourceFiles = findFiles(targetPath, { extensions: SOURCE_SCAN_EXTENSIONS, excludedDirs: SHELL_EXCLUDED_DIRS });
|
|
126
|
+
forEachSafeFile(sourceFiles, (file, content) => {
|
|
127
|
+
scanFileContent(file, content, targetPath, threats, sourcePatterns);
|
|
128
|
+
});
|
|
129
|
+
|
|
109
130
|
return threats;
|
|
110
131
|
}
|
|
111
132
|
|
package/src/scoring.js
CHANGED
|
@@ -536,6 +536,23 @@ const SCORING_COMPOUNDS = [
|
|
|
536
536
|
message: 'Lifecycle hook on typosquat package — dependency confusion attack vector (scoring compound).',
|
|
537
537
|
fileFrom: 'typosquat_detected'
|
|
538
538
|
},
|
|
539
|
+
{
|
|
540
|
+
// 2026-06-19 detection-gap (GHSA-2026 misses): a repdigit "win-semver" version
|
|
541
|
+
// (version_99_preinstall: major 99/999/9999) + an install lifecycle hook is the full
|
|
542
|
+
// dependency-confusion RCE chain. version_99_preinstall alone is HIGH (10), below the
|
|
543
|
+
// 20 alert threshold, so these scored ~13 and were missed (e.g. @doaction/* @99.99.99).
|
|
544
|
+
// Gate-FPR-tested on the confirmed corpus: repdigit-major + lifecycle_script = 0/3901
|
|
545
|
+
// benign FP (the 3 repdigit-version FPs have no install hook), 22/42 GT MALWARE.
|
|
546
|
+
// Both signals are package.json-level (no sameFile / excludeIfBundled needed).
|
|
547
|
+
// requireOriginalSeverityHigh anchors on version_99_preinstall (HIGH) so a lone
|
|
548
|
+
// lifecycle_script (MEDIUM, fires on every install hook) can never trip this alone.
|
|
549
|
+
type: 'lifecycle_version99',
|
|
550
|
+
requires: ['version_99_preinstall', 'lifecycle_script'],
|
|
551
|
+
severity: 'CRITICAL',
|
|
552
|
+
message: 'Dependency-confusion version (repdigit major 99/999/9999) + install lifecycle hook — install-time RCE via dependency confusion (scoring compound).',
|
|
553
|
+
fileFrom: 'version_99_preinstall',
|
|
554
|
+
requireOriginalSeverityHigh: true
|
|
555
|
+
},
|
|
539
556
|
{
|
|
540
557
|
// RT-C1: Boundary-squat dep declared AND require()d in code → CRITICAL.
|
|
541
558
|
// Pattern Axios UNC1069 (March 2026): wrapper looks benign, payload is in the dep.
|
package/src/shared/download.js
CHANGED
|
@@ -340,6 +340,52 @@ function extractArchive(archivePath, destDir, options = {}) {
|
|
|
340
340
|
throw new Error(`Unsupported archive format for ${path.basename(archivePath)}`);
|
|
341
341
|
}
|
|
342
342
|
|
|
343
|
+
// Hard cap for off-thread extraction (OOM fix). Large legit packages can take
|
|
344
|
+
// 10-30s; 120s leaves headroom while still bounding a pathological extraction
|
|
345
|
+
// (the worker is terminated past this so a runaway cannot pin RSS forever).
|
|
346
|
+
const EXTRACT_OFFTHREAD_TIMEOUT_MS = parseInt(process.env.MUADDIB_EXTRACT_OFFTHREAD_TIMEOUT_MS, 10) || 120_000;
|
|
347
|
+
|
|
348
|
+
/**
|
|
349
|
+
* Off-main-thread variant of extractArchive: runs the SAME synchronous extractor
|
|
350
|
+
* in a worker thread (src/shared/extract-worker.js) so the caller's event loop
|
|
351
|
+
* stays responsive during extraction. See extract-worker.js header for why this
|
|
352
|
+
* is the OOM fix. Same return contract as extractArchive (resolves to the
|
|
353
|
+
* extracted package root); rejects on extraction error, worker crash, or timeout.
|
|
354
|
+
* Callers gate on archive size — small archives extract inline (cheaper than a
|
|
355
|
+
* worker spawn), large ones offload here.
|
|
356
|
+
*
|
|
357
|
+
* @param {string} archivePath
|
|
358
|
+
* @param {string} destDir - must already exist
|
|
359
|
+
* @param {Object} [options]
|
|
360
|
+
* @param {'targz'|'zip'} [options.format] - override auto-detection
|
|
361
|
+
* @param {number} [options.timeoutMs] - hard cap; worker terminated past it
|
|
362
|
+
* @returns {Promise<string>} extracted package root
|
|
363
|
+
*/
|
|
364
|
+
function extractArchiveOffThread(archivePath, destDir, options = {}) {
|
|
365
|
+
const { Worker } = require('worker_threads');
|
|
366
|
+
const timeoutMs = Number.isFinite(options.timeoutMs) ? options.timeoutMs : EXTRACT_OFFTHREAD_TIMEOUT_MS;
|
|
367
|
+
return new Promise((resolve, reject) => {
|
|
368
|
+
let settled = false;
|
|
369
|
+
const worker = new Worker(path.join(__dirname, 'extract-worker.js'), {
|
|
370
|
+
workerData: { archivePath, destDir, format: options.format || null }
|
|
371
|
+
});
|
|
372
|
+
const finish = (fn) => { if (settled) return; settled = true; clearTimeout(timer); fn(); };
|
|
373
|
+
const timer = setTimeout(() => finish(() => {
|
|
374
|
+
worker.terminate().finally(() =>
|
|
375
|
+
reject(new Error(`extractArchiveOffThread timeout after ${timeoutMs}ms: ${path.basename(archivePath)}`)));
|
|
376
|
+
}), timeoutMs);
|
|
377
|
+
if (timer && typeof timer.unref === 'function') timer.unref();
|
|
378
|
+
worker.once('message', (msg) => finish(() => {
|
|
379
|
+
worker.terminate();
|
|
380
|
+
if (msg && msg.ok) resolve(msg.dir);
|
|
381
|
+
else reject(new Error((msg && msg.error) || 'extractArchiveOffThread: worker reported failure'));
|
|
382
|
+
}));
|
|
383
|
+
worker.once('error', (err) => finish(() => { worker.terminate(); reject(err); }));
|
|
384
|
+
worker.once('exit', (code) => finish(() =>
|
|
385
|
+
reject(new Error(`extractArchiveOffThread: worker exited (${code}) without a result`))));
|
|
386
|
+
});
|
|
387
|
+
}
|
|
388
|
+
|
|
343
389
|
/**
|
|
344
390
|
* Backwards-compatible wrapper for the original tar.gz-only extractor.
|
|
345
391
|
* Kept because src/scanner/temporal-ast-diff.js and existing tests still
|
|
@@ -370,6 +416,7 @@ module.exports = {
|
|
|
370
416
|
downloadToFile,
|
|
371
417
|
extractTarGz,
|
|
372
418
|
extractArchive,
|
|
419
|
+
extractArchiveOffThread,
|
|
373
420
|
detectArchiveFormat,
|
|
374
421
|
sanitizePackageName,
|
|
375
422
|
isAllowedDownloadRedirect,
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Worker-thread entry for off-main-thread archive extraction (OOM fix 2026-06-19).
|
|
5
|
+
*
|
|
6
|
+
* extractArchive() runs a SYNCHRONOUS extractor — adm-zip `extractAllTo` (.zip/
|
|
7
|
+
* .whl) or `execFileSync('tar', …)` (.tgz). On the main thread that wedges the
|
|
8
|
+
* event loop for the whole extraction (measured: up to 148s on large packages,
|
|
9
|
+
* data/loop-stalls.jsonl). With the loop wedged the RSS circuit breaker, the
|
|
10
|
+
* memory governor's RSS feed, and the EMERGENCY queue purge — all main-thread
|
|
11
|
+
* `setInterval` timers (daemon.js) — never fire, so RSS climbs to the cgroup
|
|
12
|
+
* MemoryMax unchecked → kernel SIGKILL. Running the same sync extractor here
|
|
13
|
+
* blocks only the WORKER loop; the parent's loop stays live so those defenses run.
|
|
14
|
+
*
|
|
15
|
+
* Contract: workerData = { archivePath, destDir, format }. Posts exactly one
|
|
16
|
+
* message — { ok: true, dir } on success, { ok: false, error } on failure — and
|
|
17
|
+
* never throws to the thread (the parent also handles a worker 'error', but an
|
|
18
|
+
* explicit message keeps the failure path uniform). All extraction hardening
|
|
19
|
+
* (zip-slip, zip-bomb uncompressed-size cap) lives in extractArchive and runs here.
|
|
20
|
+
*/
|
|
21
|
+
const { workerData, parentPort } = require('worker_threads');
|
|
22
|
+
const { extractArchive } = require('./download.js');
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const opts = workerData && workerData.format ? { format: workerData.format } : {};
|
|
26
|
+
const dir = extractArchive(workerData.archivePath, workerData.destDir, opts);
|
|
27
|
+
parentPort.postMessage({ ok: true, dir });
|
|
28
|
+
} catch (err) {
|
|
29
|
+
parentPort.postMessage({ ok: false, error: err && err.message ? err.message : String(err) });
|
|
30
|
+
}
|