muaddib-scanner 2.11.29 → 2.11.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -296,7 +296,7 @@ repos:
296
296
  | **FPR** (Benign random, v2.10.95 measure) | **7.0%** (14/200) | 200 random npm packages, stratified sampling |
297
297
  | **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
298
298
 
299
- **3602 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
299
+ **3664 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
300
300
 
301
301
  > **ML retrain methodology (v2.10.51):**
302
302
  > - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
@@ -344,7 +344,7 @@ npm test
344
344
 
345
345
  ### Testing
346
346
 
347
- - **3602 tests** across 93 modular test files
347
+ - **3664 tests** across 93 modular test files
348
348
  - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
349
349
  - **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
350
350
  - **Ground truth validation** - 67 real-world attacks (93.85% TPR@3, 86.2% TPR@20 — v2.10.95 measure)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.29",
3
+ "version": "2.11.31",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-05-24T21:46:43.561Z",
3
+ "timestamp": "2026-05-24T21:47:04.731Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -604,15 +604,35 @@ const F9_INFRA_KEYS = new Set([
604
604
  // Appearance in any threat message disqualifies F9.
605
605
  const F9_CREDENTIAL_FILE_RE = /\.npmrc\b|\.aws[\/\\](?:credentials|config)\b|\bid_rsa\b|\bid_ed25519\b|\.ssh[\/\\]|\.kube[\/\\]config\b|\.docker[\/\\]config\b|\.netrc\b|\.git-credentials\b|wallet\.dat\b|\bsecret_token\b/i;
606
606
 
607
- // Threat types that signal third-party network egress. F9 disqualifies on
608
- // any of these a legit MCP installer writes .mcp.json and reads env, it
609
- // does NOT download payloads or call back to attacker hosts.
610
- const F9_EXFIL_TYPES = new Set([
607
+ // v2.11.31 F14: split exfil types into HARD (real malware signals) vs
608
+ // SOFT (compound/intent threats that legitimately fire on AI proxies +
609
+ // MCP installers + vendor CLIs).
610
+ //
611
+ // Rescan of 107 high-score FPs against v2.11.30 (data/rescan/REPORT.md)
612
+ // showed C5 disqualifying 41/42 not-capped packages. Of those, 25 had
613
+ // ONLY soft signals — packages doing `process.env.ANTHROPIC_API_KEY` →
614
+ // POST `api.anthropic.com`. The intent_*/detached_credential_exfil/
615
+ // suspicious_dataflow threats fire on that combo even though the network
616
+ // destination is the legit first-party AI provider.
617
+ //
618
+ // HARD signals always indicate adversary capability: a network host that
619
+ // is NOT first-party (suspicious_domain), a binary fetch+exec
620
+ // (binary_dropper, download_exec_binary, fetch_decrypt_exec, remote_code_load),
621
+ // a non-npm dep (external_tarball_dep, dependency_url_suspicious), a
622
+ // shell-out channel (reverse_shell, curl_env_exfil, curl_exec), or a
623
+ // covert egress (blockchain_c2_resolution, dns_exfil). Shai-Hulud 2.0/3.0,
624
+ // postmark-mcp, and dep-confusion samples all emit ≥1 HARD signal.
625
+ //
626
+ // SOFT signals are co-occurrence intents — env_read + network_call in the
627
+ // same intent or file. Legit on AI proxies; relied on by the malware
628
+ // detection only when combined with a HARD signal.
629
+ //
630
+ // `F9_EXFIL_TYPES` is kept as the union for back-compat (no external
631
+ // consumers as of v2.11.30 but the symbol is referenced by older audit
632
+ // scripts).
633
+ const HARD_EXFIL_TYPES = new Set([
611
634
  'suspicious_domain',
612
- 'suspicious_dataflow',
613
635
  'remote_code_load',
614
- 'intent_credential_exfil',
615
- 'intent_command_exfil',
616
636
  'fetch_decrypt_exec',
617
637
  'reverse_shell',
618
638
  'binary_dropper',
@@ -625,6 +645,22 @@ const F9_EXFIL_TYPES = new Set([
625
645
  'dns_exfil'
626
646
  ]);
627
647
 
648
+ const SOFT_EXFIL_TYPES = new Set([
649
+ 'suspicious_dataflow',
650
+ 'intent_credential_exfil',
651
+ 'intent_command_exfil',
652
+ 'detached_credential_exfil'
653
+ ]);
654
+
655
+ // Back-compat union (HARD ∪ SOFT minus detached_credential_exfil which
656
+ // was never in F9_EXFIL_TYPES historically; preserve original membership).
657
+ const F9_EXFIL_TYPES = new Set([
658
+ ...HARD_EXFIL_TYPES,
659
+ 'suspicious_dataflow',
660
+ 'intent_credential_exfil',
661
+ 'intent_command_exfil'
662
+ ]);
663
+
628
664
  // MCP identity signals — package SELF-identifies as an MCP installer/server.
629
665
  const MCP_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|claude[_-]plugin[_-]mcp|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
630
666
  const MCP_DESC_RE = /\bmodel context protocol\b|\bmcp[ -](?:server|installer|bridge|plugin|memory|core|gateway|relay|orchestrator|transport)\b|\b(?:claude|cursor|windsurf)[ -]mcp\b/i;
@@ -705,9 +741,12 @@ function mcpServerEnvAccess(result, meta) {
705
741
  return false;
706
742
  }
707
743
  }
708
- // C5 — no third-party exfil capability
744
+ // C5 — no HARD third-party exfil capability (v2.11.31 F14: SOFT compound
745
+ // intent threats are intrinsic to MCP installer behaviour — env_read +
746
+ // POST first-party endpoint — and no longer disqualify here. HARD signals
747
+ // — suspicious_domain, binary_dropper, remote_code_load, etc. — still do.)
709
748
  for (const t of threats) {
710
- if (F9_EXFIL_TYPES.has(t.type)) return false;
749
+ if (HARD_EXFIL_TYPES.has(t.type)) return false;
711
750
  }
712
751
  return true;
713
752
  }
@@ -791,9 +830,14 @@ function vendorCliSdk(result, meta) {
791
830
  if (threats.some(t => t.type === 'mcp_config_injection')) return false;
792
831
  // C4 — no install lifecycle hook
793
832
  if (hasLifecycleScripts(meta)) return false;
794
- // C5 + C6 — scan threats for exfil signal and credential-file mentions
833
+ // C5 + C6 — scan threats for HARD exfil signal and credential-file
834
+ // mentions. v2.11.31 F14: SOFT compound intent threats (suspicious_dataflow,
835
+ // intent_*, detached_credential_exfil) no longer disqualify C5 — a legit
836
+ // vendor CLI does env_read + POST own API endpoint, which trips those
837
+ // compounds without being malicious. HARD signals (suspicious_domain,
838
+ // binary_dropper, remote_code_load, external_tarball_dep, etc.) remain.
795
839
  for (const t of threats) {
796
- if (F9_EXFIL_TYPES.has(t.type)) return false; // C5
840
+ if (HARD_EXFIL_TYPES.has(t.type)) return false; // C5
797
841
  if (F9_CREDENTIAL_FILE_RE.test(String(t.message || ''))) return false; // C6
798
842
  }
799
843
  // C7 — vendor identity
@@ -927,12 +971,19 @@ function aiAgentBot(result, meta) {
927
971
  if (threats.length === 0) return false;
928
972
  // C2 — no install lifecycle hook
929
973
  if (hasLifecycleScripts(meta)) return false;
930
- // C3, C4, C7 fast threat-type checks
974
+ // C3 no mcp_config_injection (F9 priority)
975
+ for (const t of threats) {
976
+ if (t.type === 'mcp_config_injection') return false;
977
+ }
978
+ // C4 + C7 — v2.11.31 F14: unify hard-exfil veto across F9/F10/F11.
979
+ // Pre-F14 F11 only blocked on suspicious_domain / binary_dropper /
980
+ // download_exec_binary; now also blocks on remote_code_load (slopsquat
981
+ // staging), external_tarball_dep (non-npm dep), dependency_url_suspicious
982
+ // (attacker-controlled dep URL), curl_*/reverse_shell (shell exfil),
983
+ // dns_exfil + blockchain_c2_resolution (covert egress), fetch_decrypt_exec
984
+ // (multistage). Soft compound intents still don't disqualify here.
931
985
  for (const t of threats) {
932
- if (t.type === 'mcp_config_injection') return false; // C3
933
- if (t.type === 'suspicious_domain') return false; // C4
934
- if (t.type === 'binary_dropper') return false; // C7
935
- if (t.type === 'download_exec_binary') return false; // C7
986
+ if (HARD_EXFIL_TYPES.has(t.type)) return false;
936
987
  }
937
988
  // C5 — no credential file path in any message
938
989
  for (const t of threats) {
@@ -1379,5 +1430,10 @@ module.exports = {
1379
1430
  aiAgentBot,
1380
1431
  vendorMinifiedBundle,
1381
1432
  typosquatBenignLifecycle,
1382
- isBenignLifecycleScript
1433
+ isBenignLifecycleScript,
1434
+ // v2.11.31 F14: exposed so audit scripts can introspect the HARD/SOFT
1435
+ // classification when triaging cluster FPs.
1436
+ HARD_EXFIL_TYPES,
1437
+ SOFT_EXFIL_TYPES,
1438
+ F9_EXFIL_TYPES
1383
1439
  };
@@ -13,7 +13,7 @@ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTarg
13
13
  const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
14
14
  const { startHealthcheck } = require('./healthcheck.js');
15
15
  const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
16
- const { cleanupOldArchives, getRetentionDays } = require('./tarball-archive.js');
16
+ const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
17
17
  const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
18
18
  // Caches not previously cleared by handleMemoryPressure (OOM fix). These live
19
19
  // in the main thread and are populated by temporal-ast-diff and the typosquat
@@ -499,11 +499,17 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
499
499
  cleanupRunscOrphans();
500
500
  // Layer 3: Purge expired cached tarballs on startup
501
501
  purgeTarballCache();
502
- // Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 30).
503
- // Runs in-process at startup so no external cron is required.
502
+ // Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 7).
503
+ // Runs in-process at startup AND every 6h via setInterval so no external cron is required.
504
+ // Required to prevent the disk-fill cascade observed on 2026-05-24 (96GB filled,
505
+ // .claude.json corrupted, +89K monitor errors): startup-only cleanup never ran on a
506
+ // long-uptime service, and 30-day default + 4.5GB/day average exceeded the 96GB disk.
504
507
  try { cleanupOldArchives(getRetentionDays()); } catch (err) {
505
508
  console.warn(`[Archive] Startup cleanup failed: ${err.message}`);
506
509
  }
510
+ try { startPeriodicCleanup(); } catch (err) {
511
+ console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
512
+ }
507
513
 
508
514
  console.log(`
509
515
  ╔════════════════════════════════════════════╗
@@ -19,9 +19,10 @@ const { downloadToFile } = require('../shared/download.js');
19
19
  const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
20
20
  const ARCHIVE_TIMEOUT_MS = 10_000;
21
21
 
22
- // Retention window for archived tarballs. Anything older is purged on startup.
23
- // Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 30.
24
- const DEFAULT_RETENTION_DAYS = 30;
22
+ // Retention window for archived tarballs. Purged at startup and every 6h thereafter.
23
+ // Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 7.
24
+ // Math: ~4.5GB/day average → 7d ≈ 31GB, fits in 96GB disk with safe margin.
25
+ const DEFAULT_RETENTION_DAYS = 7;
25
26
  function getRetentionDays() {
26
27
  const raw = process.env.MUADDIB_ARCHIVE_RETENTION_DAYS;
27
28
  if (raw === undefined || raw === '') return DEFAULT_RETENTION_DAYS;
@@ -30,6 +31,31 @@ function getRetentionDays() {
30
31
  return n;
31
32
  }
32
33
 
34
+ // Defensive disk-space gate. Skip archiving when free space falls below threshold,
35
+ // so a burst of suspects can't run the volume to 100% between periodic cleanups.
36
+ // Bounded to [1, 100] GB, default 5GB.
37
+ const DEFAULT_MIN_FREE_GB = 5;
38
+ function getMinFreeBytes() {
39
+ const raw = process.env.MUADDIB_ARCHIVE_MIN_FREE_GB;
40
+ let gb = DEFAULT_MIN_FREE_GB;
41
+ if (raw !== undefined && raw !== '') {
42
+ const n = parseInt(raw, 10);
43
+ if (Number.isFinite(n) && n >= 1 && n <= 100) gb = n;
44
+ }
45
+ return gb * 1024 * 1024 * 1024;
46
+ }
47
+
48
+ function hasEnoughSpace(targetDir) {
49
+ try {
50
+ if (typeof fs.statfsSync !== 'function') return true; // Node <18.15 — fail-open
51
+ const dirForStat = fs.existsSync(targetDir) ? targetDir : path.dirname(targetDir);
52
+ const s = fs.statfsSync(dirForStat);
53
+ return s.bavail * s.bsize > getMinFreeBytes();
54
+ } catch {
55
+ return true; // never block archiving on a stat error
56
+ }
57
+ }
58
+
33
59
  /**
34
60
  * Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
35
61
  * Falls back to UTC if Intl is unavailable.
@@ -103,6 +129,14 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
103
129
  return false;
104
130
  }
105
131
 
132
+ // Defense layer 3: skip if disk is nearly full, even if retention is well-configured.
133
+ // Prevents a burst of malicious campaigns from blowing past the 7-day budget
134
+ // before the 6h periodic cleanup tick can catch up.
135
+ if (!hasEnoughSpace(ARCHIVE_DIR)) {
136
+ console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
137
+ return false;
138
+ }
139
+
106
140
  // Ensure day directory exists
107
141
  fs.mkdirSync(dayDir, { recursive: true });
108
142
 
@@ -208,14 +242,35 @@ function cleanupOldArchives(retentionDays = getRetentionDays()) {
208
242
  return stats;
209
243
  }
210
244
 
245
+ /**
246
+ * Periodically re-run cleanupOldArchives so a long-running daemon (no restarts for
247
+ * weeks) can't accumulate archives past the retention window. Defaults to every 6h.
248
+ * .unref()'d so the timer never keeps the event loop alive on shutdown.
249
+ */
250
+ const DEFAULT_PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000;
251
+ function startPeriodicCleanup(intervalMs = DEFAULT_PERIODIC_INTERVAL_MS) {
252
+ const timer = setInterval(() => {
253
+ try {
254
+ cleanupOldArchives();
255
+ } catch (err) {
256
+ console.warn(`[Archive] Periodic cleanup failed: ${err.message}`);
257
+ }
258
+ }, intervalMs);
259
+ timer.unref();
260
+ return timer;
261
+ }
262
+
211
263
  module.exports = {
212
264
  archiveSuspectTarball,
213
265
  cleanupOldArchives,
266
+ startPeriodicCleanup,
267
+ hasEnoughSpace,
214
268
  ARCHIVE_DIR,
215
269
  // Exported for testing
216
270
  sanitizeForFilename,
217
271
  sha256File,
218
272
  getArchiveDateString,
219
273
  getRetentionDays,
274
+ getMinFreeBytes,
220
275
  parseArchiveDayDir
221
276
  };