npm - muaddib-scanner - Versions diffs - 2.11.29 → 2.11.31 - Mend

muaddib-scanner 2.11.29 → 2.11.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +2 -2
package/package.json +1 -1
package/{self-scan-v2.11.29.json → self-scan-v2.11.31.json} +1 -1
package/src/ml/feature-extractor.js +73 -17
package/src/monitor/daemon.js +9 -3
package/src/monitor/tarball-archive.js +58 -3

package/README.md CHANGED Viewed

@@ -296,7 +296,7 @@ repos:
 | **FPR** (Benign random, v2.10.95 measure) | **7.0%** (14/200) | 200 random npm packages, stratified sampling |
 | **ADR** (Adversarial + Holdout) | **96.3%** (103/107) | 67 adversarial + 40 holdout (107 available on disk), global threshold=20 |
-**3602 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
+**3664 tests** across 93 files. **234 rules** (229 RULES + 5 PARANOID).
 > **ML retrain methodology (v2.10.51):**
 > - Ground truth: 377 confirmed_malicious via auto-labeler (OSSF malicious-packages, GitHub Advisory Database, npm registry takedown correlation)
@@ -344,7 +344,7 @@ npm test
 ### Testing
-- **3602 tests** across 93 modular test files
+- **3664 tests** across 93 modular test files
 - **56 fuzz tests** - Malformed inputs, ReDoS, unicode, binary
 - **Datadog 17K benchmark** - 14,587 confirmed malware samples (in-scope)
 - **Ground truth validation** - 67 real-world attacks (93.85% TPR@3, 86.2% TPR@20 — v2.10.95 measure)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.29",
+  "version": "2.11.31",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.29.json → self-scan-v2.11.31.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-05-24T21:46:43.561Z",
+  "timestamp": "2026-05-24T21:47:04.731Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/ml/feature-extractor.js CHANGED Viewed

@@ -604,15 +604,35 @@ const F9_INFRA_KEYS = new Set([
 // Appearance in any threat message disqualifies F9.
 const F9_CREDENTIAL_FILE_RE = /\.npmrc\b|\.aws[\/\\](?:credentials|config)\b|\bid_rsa\b|\bid_ed25519\b|\.ssh[\/\\]|\.kube[\/\\]config\b|\.docker[\/\\]config\b|\.netrc\b|\.git-credentials\b|wallet\.dat\b|\bsecret_token\b/i;
-// Threat types that signal third-party network egress. F9 disqualifies on
-// any of these — a legit MCP installer writes .mcp.json and reads env, it
-// does NOT download payloads or call back to attacker hosts.
-const F9_EXFIL_TYPES = new Set([
+// v2.11.31 F14: split exfil types into HARD (real malware signals) vs
+// SOFT (compound/intent threats that legitimately fire on AI proxies +
+// MCP installers + vendor CLIs).
+//
+// Rescan of 107 high-score FPs against v2.11.30 (data/rescan/REPORT.md)
+// showed C5 disqualifying 41/42 not-capped packages. Of those, 25 had
+// ONLY soft signals — packages doing `process.env.ANTHROPIC_API_KEY` →
+// POST `api.anthropic.com`. The intent_*/detached_credential_exfil/
+// suspicious_dataflow threats fire on that combo even though the network
+// destination is the legit first-party AI provider.
+//
+// HARD signals always indicate adversary capability: a network host that
+// is NOT first-party (suspicious_domain), a binary fetch+exec
+// (binary_dropper, download_exec_binary, fetch_decrypt_exec, remote_code_load),
+// a non-npm dep (external_tarball_dep, dependency_url_suspicious), a
+// shell-out channel (reverse_shell, curl_env_exfil, curl_exec), or a
+// covert egress (blockchain_c2_resolution, dns_exfil). Shai-Hulud 2.0/3.0,
+// postmark-mcp, and dep-confusion samples all emit ≥1 HARD signal.
+//
+// SOFT signals are co-occurrence intents — env_read + network_call in the
+// same intent or file. Legit on AI proxies; relied on by the malware
+// detection only when combined with a HARD signal.
+//
+// `F9_EXFIL_TYPES` is kept as the union for back-compat (no external
+// consumers as of v2.11.30 but the symbol is referenced by older audit
+// scripts).
+const HARD_EXFIL_TYPES = new Set([
   'suspicious_domain',
-  'suspicious_dataflow',
   'remote_code_load',
-  'intent_credential_exfil',
-  'intent_command_exfil',
   'fetch_decrypt_exec',
   'reverse_shell',
   'binary_dropper',
@@ -625,6 +645,22 @@ const F9_EXFIL_TYPES = new Set([
   'dns_exfil'
 ]);
+const SOFT_EXFIL_TYPES = new Set([
+  'suspicious_dataflow',
+  'intent_credential_exfil',
+  'intent_command_exfil',
+  'detached_credential_exfil'
+]);
+// Back-compat union (HARD ∪ SOFT minus detached_credential_exfil which
+// was never in F9_EXFIL_TYPES historically; preserve original membership).
+const F9_EXFIL_TYPES = new Set([
+  ...HARD_EXFIL_TYPES,
+  'suspicious_dataflow',
+  'intent_credential_exfil',
+  'intent_command_exfil'
+]);
 // MCP identity signals — package SELF-identifies as an MCP installer/server.
 const MCP_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|claude[_-]plugin[_-]mcp|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
 const MCP_DESC_RE = /\bmodel context protocol\b|\bmcp[ -](?:server|installer|bridge|plugin|memory|core|gateway|relay|orchestrator|transport)\b|\b(?:claude|cursor|windsurf)[ -]mcp\b/i;
@@ -705,9 +741,12 @@ function mcpServerEnvAccess(result, meta) {
       return false;
     }
   }
-  // C5 — no third-party exfil capability
+  // C5 — no HARD third-party exfil capability (v2.11.31 F14: SOFT compound
+  // intent threats are intrinsic to MCP installer behaviour — env_read +
+  // POST first-party endpoint — and no longer disqualify here. HARD signals
+  // — suspicious_domain, binary_dropper, remote_code_load, etc. — still do.)
   for (const t of threats) {
-    if (F9_EXFIL_TYPES.has(t.type)) return false;
+    if (HARD_EXFIL_TYPES.has(t.type)) return false;
   }
   return true;
 }
@@ -791,9 +830,14 @@ function vendorCliSdk(result, meta) {
   if (threats.some(t => t.type === 'mcp_config_injection')) return false;
   // C4 — no install lifecycle hook
   if (hasLifecycleScripts(meta)) return false;
-  // C5 + C6 — scan threats for exfil signal and credential-file mentions
+  // C5 + C6 — scan threats for HARD exfil signal and credential-file
+  // mentions. v2.11.31 F14: SOFT compound intent threats (suspicious_dataflow,
+  // intent_*, detached_credential_exfil) no longer disqualify C5 — a legit
+  // vendor CLI does env_read + POST own API endpoint, which trips those
+  // compounds without being malicious. HARD signals (suspicious_domain,
+  // binary_dropper, remote_code_load, external_tarball_dep, etc.) remain.
   for (const t of threats) {
-    if (F9_EXFIL_TYPES.has(t.type)) return false;       // C5
+    if (HARD_EXFIL_TYPES.has(t.type)) return false;       // C5
     if (F9_CREDENTIAL_FILE_RE.test(String(t.message || ''))) return false;  // C6
   }
   // C7 — vendor identity
@@ -927,12 +971,19 @@ function aiAgentBot(result, meta) {
   if (threats.length === 0) return false;
   // C2 — no install lifecycle hook
   if (hasLifecycleScripts(meta)) return false;
-  // C3, C4, C7 — fast threat-type checks
+  // C3 — no mcp_config_injection (F9 priority)
+  for (const t of threats) {
+    if (t.type === 'mcp_config_injection') return false;
+  }
+  // C4 + C7 — v2.11.31 F14: unify hard-exfil veto across F9/F10/F11.
+  // Pre-F14 F11 only blocked on suspicious_domain / binary_dropper /
+  // download_exec_binary; now also blocks on remote_code_load (slopsquat
+  // staging), external_tarball_dep (non-npm dep), dependency_url_suspicious
+  // (attacker-controlled dep URL), curl_*/reverse_shell (shell exfil),
+  // dns_exfil + blockchain_c2_resolution (covert egress), fetch_decrypt_exec
+  // (multistage). Soft compound intents still don't disqualify here.
   for (const t of threats) {
-    if (t.type === 'mcp_config_injection') return false;   // C3
-    if (t.type === 'suspicious_domain') return false;      // C4
-    if (t.type === 'binary_dropper') return false;         // C7
-    if (t.type === 'download_exec_binary') return false;   // C7
+    if (HARD_EXFIL_TYPES.has(t.type)) return false;
   }
   // C5 — no credential file path in any message
   for (const t of threats) {
@@ -1379,5 +1430,10 @@ module.exports = {
   aiAgentBot,
   vendorMinifiedBundle,
   typosquatBenignLifecycle,
-  isBenignLifecycleScript
+  isBenignLifecycleScript,
+  // v2.11.31 F14: exposed so audit scripts can introspect the HARD/SOFT
+  // classification when triaging cluster FPs.
+  HARD_EXFIL_TYPES,
+  SOFT_EXFIL_TYPES,
+  F9_EXFIL_TYPES
 };

package/src/monitor/daemon.js CHANGED Viewed

@@ -13,7 +13,7 @@ const { processQueue, ensureWorkers, drainWorkers, getTargetConcurrency, setTarg
 const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = require('./adaptive-concurrency.js');
 const { startHealthcheck } = require('./healthcheck.js');
 const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
-const { cleanupOldArchives, getRetentionDays } = require('./tarball-archive.js');
+const { cleanupOldArchives, getRetentionDays, startPeriodicCleanup } = require('./tarball-archive.js');
 const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
 // Caches not previously cleared by handleMemoryPressure (OOM fix). These live
 // in the main thread and are populated by temporal-ast-diff and the typosquat
@@ -499,11 +499,17 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
   cleanupRunscOrphans();
   // Layer 3: Purge expired cached tarballs on startup
   purgeTarballCache();
-  // Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 30).
-  // Runs in-process at startup so no external cron is required.
+  // Purge archived tarballs older than MUADDIB_ARCHIVE_RETENTION_DAYS (default 7).
+  // Runs in-process at startup AND every 6h via setInterval so no external cron is required.
+  // Required to prevent the disk-fill cascade observed on 2026-05-24 (96GB filled,
+  // .claude.json corrupted, +89K monitor errors): startup-only cleanup never ran on a
+  // long-uptime service, and 30-day default + 4.5GB/day average exceeded the 96GB disk.
   try { cleanupOldArchives(getRetentionDays()); } catch (err) {
     console.warn(`[Archive] Startup cleanup failed: ${err.message}`);
   }
+  try { startPeriodicCleanup(); } catch (err) {
+    console.warn(`[Archive] Failed to start periodic cleanup: ${err.message}`);
+  }
   console.log(`
 ╔════════════════════════════════════════════╗

package/src/monitor/tarball-archive.js CHANGED Viewed

@@ -19,9 +19,10 @@ const { downloadToFile } = require('../shared/download.js');
 const ARCHIVE_DIR = process.env.MUADDIB_ARCHIVE_DIR || '/opt/muaddib/archive';
 const ARCHIVE_TIMEOUT_MS = 10_000;
-// Retention window for archived tarballs. Anything older is purged on startup.
-// Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 30.
-const DEFAULT_RETENTION_DAYS = 30;
+// Retention window for archived tarballs. Purged at startup and every 6h thereafter.
+// Bounded to [1, 365] days; non-numeric or out-of-range values fall back to 7.
+// Math: ~4.5GB/day average → 7d ≈ 31GB, fits in 96GB disk with safe margin.
+const DEFAULT_RETENTION_DAYS = 7;
 function getRetentionDays() {
   const raw = process.env.MUADDIB_ARCHIVE_RETENTION_DAYS;
   if (raw === undefined || raw === '') return DEFAULT_RETENTION_DAYS;
@@ -30,6 +31,31 @@ function getRetentionDays() {
   return n;
 }
+// Defensive disk-space gate. Skip archiving when free space falls below threshold,
+// so a burst of suspects can't run the volume to 100% between periodic cleanups.
+// Bounded to [1, 100] GB, default 5GB.
+const DEFAULT_MIN_FREE_GB = 5;
+function getMinFreeBytes() {
+  const raw = process.env.MUADDIB_ARCHIVE_MIN_FREE_GB;
+  let gb = DEFAULT_MIN_FREE_GB;
+  if (raw !== undefined && raw !== '') {
+    const n = parseInt(raw, 10);
+    if (Number.isFinite(n) && n >= 1 && n <= 100) gb = n;
+  }
+  return gb * 1024 * 1024 * 1024;
+}
+function hasEnoughSpace(targetDir) {
+  try {
+    if (typeof fs.statfsSync !== 'function') return true; // Node <18.15 — fail-open
+    const dirForStat = fs.existsSync(targetDir) ? targetDir : path.dirname(targetDir);
+    const s = fs.statfsSync(dirForStat);
+    return s.bavail * s.bsize > getMinFreeBytes();
+  } catch {
+    return true; // never block archiving on a stat error
+  }
+}
 /**
  * Get the date string in YYYY-MM-DD format (Paris timezone, consistent with monitor).
  * Falls back to UTC if Intl is unavailable.
@@ -103,6 +129,14 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
     return false;
   }
+  // Defense layer 3: skip if disk is nearly full, even if retention is well-configured.
+  // Prevents a burst of malicious campaigns from blowing past the 7-day budget
+  // before the 6h periodic cleanup tick can catch up.
+  if (!hasEnoughSpace(ARCHIVE_DIR)) {
+    console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
+    return false;
+  }
   // Ensure day directory exists
   fs.mkdirSync(dayDir, { recursive: true });
@@ -208,14 +242,35 @@ function cleanupOldArchives(retentionDays = getRetentionDays()) {
   return stats;
 }
+/**
+ * Periodically re-run cleanupOldArchives so a long-running daemon (no restarts for
+ * weeks) can't accumulate archives past the retention window. Defaults to every 6h.
+ * .unref()'d so the timer never keeps the event loop alive on shutdown.
+ */
+const DEFAULT_PERIODIC_INTERVAL_MS = 6 * 60 * 60 * 1000;
+function startPeriodicCleanup(intervalMs = DEFAULT_PERIODIC_INTERVAL_MS) {
+  const timer = setInterval(() => {
+    try {
+      cleanupOldArchives();
+    } catch (err) {
+      console.warn(`[Archive] Periodic cleanup failed: ${err.message}`);
+    }
+  }, intervalMs);
+  timer.unref();
+  return timer;
+}
 module.exports = {
   archiveSuspectTarball,
   cleanupOldArchives,
+  startPeriodicCleanup,
+  hasEnoughSpace,
   ARCHIVE_DIR,
   // Exported for testing
   sanitizeForFilename,
   sha256File,
   getArchiveDateString,
   getRetentionDays,
+  getMinFreeBytes,
   parseArchiveDayDir
 };