muaddib-scanner 2.11.29 → 2.11.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -296,7 +296,7 @@ repos:
|
|
|
296
296
|
| **FPR** (Benign random, v2.10.95 measure) | **7.0%** (14/200) | 200 random npm packages, stratified sampling |
|
|
297
297
|
| **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
|
|
298
298
|
|
|
299
|
-
**
|
|
299
|
+
**3664 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
|
|
300
300
|
|
|
301
301
|
> **ML retrain methodology (v2.10.51):**
|
|
302
302
|
> - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
|
|
@@ -344,7 +344,7 @@ npm test
|
|
|
344
344
|
|
|
345
345
|
### Testing
|
|
346
346
|
|
|
347
|
-
- **
|
|
347
|
+
- **3664 tests** across 93 modular test files
|
|
348
348
|
- **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
|
|
349
349
|
- **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
|
|
350
350
|
- **Ground truth validation** - 67 real-world attacks (93.85% TPR@3, 86.2% TPR@20 — v2.10.95 measure)
|
package/package.json
CHANGED
|
@@ -604,15 +604,35 @@ const F9_INFRA_KEYS = new Set([
|
|
|
604
604
|
// Appearance in any threat message disqualifies F9.
|
|
605
605
|
const F9_CREDENTIAL_FILE_RE = /\.npmrc\b|\.aws[\/\\](?:credentials|config)\b|\bid_rsa\b|\bid_ed25519\b|\.ssh[\/\\]|\.kube[\/\\]config\b|\.docker[\/\\]config\b|\.netrc\b|\.git-credentials\b|wallet\.dat\b|\bsecret_token\b/i;
|
|
606
606
|
|
|
607
|
-
//
|
|
608
|
-
//
|
|
609
|
-
//
|
|
610
|
-
|
|
607
|
+
// v2.11.31 F14: split exfil types into HARD (real malware signals) vs
|
|
608
|
+
// SOFT (compound/intent threats that legitimately fire on AI proxies +
|
|
609
|
+
// MCP installers + vendor CLIs).
|
|
610
|
+
//
|
|
611
|
+
// Rescan of 107 high-score FPs against v2.11.30 (data/rescan/REPORT.md)
|
|
612
|
+
// showed C5 disqualifying 41/42 not-capped packages. Of those, 25 had
|
|
613
|
+
// ONLY soft signals — packages doing `process.env.ANTHROPIC_API_KEY` →
|
|
614
|
+
// POST `api.anthropic.com`. The intent_*/detached_credential_exfil/
|
|
615
|
+
// suspicious_dataflow threats fire on that combo even though the network
|
|
616
|
+
// destination is the legit first-party AI provider.
|
|
617
|
+
//
|
|
618
|
+
// HARD signals always indicate adversary capability: a network host that
|
|
619
|
+
// is NOT first-party (suspicious_domain), a binary fetch+exec
|
|
620
|
+
// (binary_dropper, download_exec_binary, fetch_decrypt_exec, remote_code_load),
|
|
621
|
+
// a non-npm dep (external_tarball_dep, dependency_url_suspicious), a
|
|
622
|
+
// shell-out channel (reverse_shell, curl_env_exfil, curl_exec), or a
|
|
623
|
+
// covert egress (blockchain_c2_resolution, dns_exfil). Shai-Hulud 2.0/3.0,
|
|
624
|
+
// postmark-mcp, and dep-confusion samples all emit ≥1 HARD signal.
|
|
625
|
+
//
|
|
626
|
+
// SOFT signals are co-occurrence intents — env_read + network_call in the
|
|
627
|
+
// same intent or file. Legit on AI proxies; relied on by the malware
|
|
628
|
+
// detection only when combined with a HARD signal.
|
|
629
|
+
//
|
|
630
|
+
// `F9_EXFIL_TYPES` is kept as the union for back-compat (no external
|
|
631
|
+
// consumers as of v2.11.30 but the symbol is referenced by older audit
|
|
632
|
+
// scripts).
|
|
633
|
+
const HARD_EXFIL_TYPES = new Set([
|
|
611
634
|
'suspicious_domain',
|
|
612
|
-
'suspicious_dataflow',
|
|
613
635
|
'remote_code_load',
|
|
614
|
-
'intent_credential_exfil',
|
|
615
|
-
'intent_command_exfil',
|
|
616
636
|
'fetch_decrypt_exec',
|
|
617
637
|
'reverse_shell',
|
|
618
638
|
'binary_dropper',
|
|
@@ -625,6 +645,22 @@ const F9_EXFIL_TYPES = new Set([
|
|
|
625
645
|
'dns_exfil'
|
|
626
646
|
]);
|
|
627
647
|
|
|
648
|
+
const SOFT_EXFIL_TYPES = new Set([
|
|
649
|
+
'suspicious_dataflow',
|
|
650
|
+
'intent_credential_exfil',
|
|
651
|
+
'intent_command_exfil',
|
|
652
|
+
'detached_credential_exfil'
|
|
653
|
+
]);
|
|
654
|
+
|
|
655
|
+
// Back-compat union (HARD ∪ SOFT minus detached_credential_exfil which
|
|
656
|
+
// was never in F9_EXFIL_TYPES historically; preserve original membership).
|
|
657
|
+
const F9_EXFIL_TYPES = new Set([
|
|
658
|
+
...HARD_EXFIL_TYPES,
|
|
659
|
+
'suspicious_dataflow',
|
|
660
|
+
'intent_credential_exfil',
|
|
661
|
+
'intent_command_exfil'
|
|
662
|
+
]);
|
|
663
|
+
|
|
628
664
|
// MCP identity signals — package SELF-identifies as an MCP installer/server.
|
|
629
665
|
const MCP_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|claude[_-]plugin[_-]mcp|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
|
|
630
666
|
const MCP_DESC_RE = /\bmodel context protocol\b|\bmcp[ -](?:server|installer|bridge|plugin|memory|core|gateway|relay|orchestrator|transport)\b|\b(?:claude|cursor|windsurf)[ -]mcp\b/i;
|
|
@@ -705,9 +741,12 @@ function mcpServerEnvAccess(result, meta) {
|
|
|
705
741
|
return false;
|
|
706
742
|
}
|
|
707
743
|
}
|
|
708
|
-
// C5 — no third-party exfil capability
|
|
744
|
+
// C5 — no HARD third-party exfil capability (v2.11.31 F14: SOFT compound
|
|
745
|
+
// intent threats are intrinsic to MCP installer behaviour — env_read +
|
|
746
|
+
// POST first-party endpoint — and no longer disqualify here. HARD signals
|
|
747
|
+
// — suspicious_domain, binary_dropper, remote_code_load, etc. — still do.)
|
|
709
748
|
for (const t of threats) {
|
|
710
|
-
if (
|
|
749
|
+
if (HARD_EXFIL_TYPES.has(t.type)) return false;
|
|
711
750
|
}
|
|
712
751
|
return true;
|
|
713
752
|
}
|
|
@@ -791,9 +830,14 @@ function vendorCliSdk(result, meta) {
|
|
|
791
830
|
if (threats.some(t => t.type === 'mcp_config_injection')) return false;
|
|
792
831
|
// C4 — no install lifecycle hook
|
|
793
832
|
if (hasLifecycleScripts(meta)) return false;
|
|
794
|
-
// C5 + C6 — scan threats for exfil signal and credential-file
|
|
833
|
+
// C5 + C6 — scan threats for HARD exfil signal and credential-file
|
|
834
|
+
// mentions. v2.11.31 F14: SOFT compound intent threats (suspicious_dataflow,
|
|
835
|
+
// intent_*, detached_credential_exfil) no longer disqualify C5 — a legit
|
|
836
|
+
// vendor CLI does env_read + POST own API endpoint, which trips those
|
|
837
|
+
// compounds without being malicious. HARD signals (suspicious_domain,
|
|
838
|
+
// binary_dropper, remote_code_load, external_tarball_dep, etc.) remain.
|
|
795
839
|
for (const t of threats) {
|
|
796
|
-
if (
|
|
840
|
+
if (HARD_EXFIL_TYPES.has(t.type)) return false; // C5
|
|
797
841
|
if (F9_CREDENTIAL_FILE_RE.test(String(t.message || ''))) return false; // C6
|
|
798
842
|
}
|
|
799
843
|
// C7 — vendor identity
|
|
@@ -927,12 +971,19 @@ function aiAgentBot(result, meta) {
|
|
|
927
971
|
if (threats.length === 0) return false;
|
|
928
972
|
// C2 — no install lifecycle hook
|
|
929
973
|
if (hasLifecycleScripts(meta)) return false;
|
|
930
|
-
// C3
|
|
974
|
+
// C3 — no mcp_config_injection (F9 priority)
|
|
975
|
+
for (const t of threats) {
|
|
976
|
+
if (t.type === 'mcp_config_injection') return false;
|
|
977
|
+
}
|
|
978
|
+
// C4 + C7 — v2.11.31 F14: unify hard-exfil veto across F9/F10/F11.
|
|
979
|
+
// Pre-F14 F11 only blocked on suspicious_domain / binary_dropper /
|
|
980
|
+
// download_exec_binary; now also blocks on remote_code_load (slopsquat
|
|
981
|
+
// staging), external_tarball_dep (non-npm dep), dependency_url_suspicious
|
|
982
|
+
// (attacker-controlled dep URL), curl_*/reverse_shell (shell exfil),
|
|
983
|
+
// dns_exfil + blockchain_c2_resolution (covert egress), fetch_decrypt_exec
|
|
984
|
+
// (multistage). Soft compound intents still don't disqualify here.
|
|
931
985
|
for (const t of threats) {
|
|
932
|
-
if (t.type
|
|
933
|
-
if (t.type === 'suspicious_domain') return false; // C4
|
|
934
|
-
if (t.type === 'binary_dropper') return false; // C7
|
|
935
|
-
if (t.type === 'download_exec_binary') return false; // C7
|
|
986
|
+
if (HARD_EXFIL_TYPES.has(t.type)) return false;
|
|
936
987
|
}
|
|
937
988
|
// C5 — no credential file path in any message
|
|
938
989
|
for (const t of threats) {
|
|
@@ -1379,5 +1430,10 @@ module.exports = {
|
|
|
1379
1430
|
aiAgentBot,
|
|
1380
1431
|
vendorMinifiedBundle,
|
|
1381
1432
|
typosquatBenignLifecycle,
|
|
1382
|
-
isBenignLifecycleScript
|
|
1433
|
+
isBenignLifecycleScript,
|
|
1434
|
+
// v2.11.31 F14: exposed so audit scripts can introspect the HARD/SOFT
|
|
1435
|
+
// classification when triaging cluster FPs.
|
|
1436
|
+
HARD_EXFIL_TYPES,
|
|
1437
|
+
SOFT_EXFIL_TYPES,
|
|
1438
|
+
F9_EXFIL_TYPES
|
|
1383
1439
|
};
|
package/src/monitor/daemon.js
CHANGED
|
@@ -13,7 +13,7 @@ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTarg
|
|
|
13
13
|
const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
|
|
14
14
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
15
15
|
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
16
|
-
const { cleanupOldArchives, getRetentionDays } = require('./tarball-archive.js');
|
|
16
|
+
const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
|
|
17
17
|
const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
|
|
18
18
|
// Caches not previously cleared by handleMemoryPressure (OOM fix). These live
|
|
19
19
|
// in the main thread and are populated by temporal-ast-diff and the typosquat
|
|
@@ -499,11 +499,17 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
499
499
|
cleanupRunscOrphans();
|
|
500
500
|
// Layer 3: Purge expired cached tarballs on startup
|
|
501
501
|
purgeTarballCache();
|
|
502
|
-
// Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default
|
|
503
|
-
// Runs in-process at startup so no external cron is required.
|
|
502
|
+
// Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 7).
|
|
503
|
+
// Runs in-process at startup AND every 6h via setInterval so no external cron is required.
|
|
504
|
+
// Required to prevent the disk-fill cascade observed on 2026-05-24 (96GB filled,
|
|
505
|
+
// .claude.json corrupted, +89K monitor errors): startup-only cleanup never ran on a
|
|
506
|
+
// long-uptime service, and 30-day default + 4.5GB/day average exceeded the 96GB disk.
|
|
504
507
|
try { cleanupOldArchives(getRetentionDays()); } catch (err) {
|
|
505
508
|
console.warn(`[Archive] Startup cleanup failed: ${err.message}`);
|
|
506
509
|
}
|
|
510
|
+
try { startPeriodicCleanup(); } catch (err) {
|
|
511
|
+
console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
|
|
512
|
+
}
|
|
507
513
|
|
|
508
514
|
console.log(`
|
|
509
515
|
╔════════════════════════════════════════════╗
|
|
@@ -19,9 +19,10 @@ const { downloadToFile } = require('../shared/download.js');
|
|
|
19
19
|
const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
|
|
20
20
|
const ARCHIVE_TIMEOUT_MS = 10_000;
|
|
21
21
|
|
|
22
|
-
// Retention window for archived tarballs.
|
|
23
|
-
// Bounded to [1, 365] days; non-numeric or out-of-range values fall back to
|
|
24
|
-
|
|
22
|
+
// Retention window for archived tarballs. Purged at startup and every 6h thereafter.
|
|
23
|
+
// Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 7.
|
|
24
|
+
// Math: ~4.5GB/day average → 7d ≈ 31GB, fits in 96GB disk with safe margin.
|
|
25
|
+
const DEFAULT_RETENTION_DAYS = 7;
|
|
25
26
|
function getRetentionDays() {
|
|
26
27
|
const raw = process.env.MUADDIB_ARCHIVE_RETENTION_DAYS;
|
|
27
28
|
if (raw === undefined || raw === '') return DEFAULT_RETENTION_DAYS;
|
|
@@ -30,6 +31,31 @@ function getRetentionDays() {
|
|
|
30
31
|
return n;
|
|
31
32
|
}
|
|
32
33
|
|
|
34
|
+
// Defensive disk-space gate. Skip archiving when free space falls below threshold,
|
|
35
|
+
// so a burst of suspects can't run the volume to 100% between periodic cleanups.
|
|
36
|
+
// Bounded to [1, 100] GB, default 5GB.
|
|
37
|
+
const DEFAULT_MIN_FREE_GB = 5;
|
|
38
|
+
function getMinFreeBytes() {
|
|
39
|
+
const raw = process.env.MUADDIB_ARCHIVE_MIN_FREE_GB;
|
|
40
|
+
let gb = DEFAULT_MIN_FREE_GB;
|
|
41
|
+
if (raw !== undefined && raw !== '') {
|
|
42
|
+
const n = parseInt(raw, 10);
|
|
43
|
+
if (Number.isFinite(n) && n >= 1 && n <= 100) gb = n;
|
|
44
|
+
}
|
|
45
|
+
return gb * 1024 * 1024 * 1024;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function hasEnoughSpace(targetDir) {
|
|
49
|
+
try {
|
|
50
|
+
if (typeof fs.statfsSync !== 'function') return true; // Node <18.15 — fail-open
|
|
51
|
+
const dirForStat = fs.existsSync(targetDir) ? targetDir : path.dirname(targetDir);
|
|
52
|
+
const s = fs.statfsSync(dirForStat);
|
|
53
|
+
return s.bavail * s.bsize > getMinFreeBytes();
|
|
54
|
+
} catch {
|
|
55
|
+
return true; // never block archiving on a stat error
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
33
59
|
/**
|
|
34
60
|
* Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
|
|
35
61
|
* Falls back to UTC if Intl is unavailable.
|
|
@@ -103,6 +129,14 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
|
|
|
103
129
|
return false;
|
|
104
130
|
}
|
|
105
131
|
|
|
132
|
+
// Defense layer 3: skip if disk is nearly full, even if retention is well-configured.
|
|
133
|
+
// Prevents a burst of malicious campaigns from blowing past the 7-day budget
|
|
134
|
+
// before the 6h periodic cleanup tick can catch up.
|
|
135
|
+
if (!hasEnoughSpace(ARCHIVE_DIR)) {
|
|
136
|
+
console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
106
140
|
// Ensure day directory exists
|
|
107
141
|
fs.mkdirSync(dayDir, { recursive: true });
|
|
108
142
|
|
|
@@ -208,14 +242,35 @@ function cleanupOldArchives(retentionDays = getRetentionDays()) {
|
|
|
208
242
|
return stats;
|
|
209
243
|
}
|
|
210
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Periodically re-run cleanupOldArchives so a long-running daemon (no restarts for
|
|
247
|
+
* weeks) can't accumulate archives past the retention window. Defaults to every 6h.
|
|
248
|
+
* .unref()'d so the timer never keeps the event loop alive on shutdown.
|
|
249
|
+
*/
|
|
250
|
+
const DEFAULT_PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000;
|
|
251
|
+
function startPeriodicCleanup(intervalMs = DEFAULT_PERIODIC_INTERVAL_MS) {
|
|
252
|
+
const timer = setInterval(() => {
|
|
253
|
+
try {
|
|
254
|
+
cleanupOldArchives();
|
|
255
|
+
} catch (err) {
|
|
256
|
+
console.warn(`[Archive] Periodic cleanup failed: ${err.message}`);
|
|
257
|
+
}
|
|
258
|
+
}, intervalMs);
|
|
259
|
+
timer.unref();
|
|
260
|
+
return timer;
|
|
261
|
+
}
|
|
262
|
+
|
|
211
263
|
module.exports = {
|
|
212
264
|
archiveSuspectTarball,
|
|
213
265
|
cleanupOldArchives,
|
|
266
|
+
startPeriodicCleanup,
|
|
267
|
+
hasEnoughSpace,
|
|
214
268
|
ARCHIVE_DIR,
|
|
215
269
|
// Exported for testing
|
|
216
270
|
sanitizeForFilename,
|
|
217
271
|
sha256File,
|
|
218
272
|
getArchiveDateString,
|
|
219
273
|
getRetentionDays,
|
|
274
|
+
getMinFreeBytes,
|
|
220
275
|
parseArchiveDayDir
|
|
221
276
|
};
|