npm - muaddib-scanner - Versions diffs - 2.11.81 → 2.11.83 - Mend

muaddib-scanner 2.11.81 → 2.11.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/package.json +1 -1
package/{self-scan-v2.11.81.json → self-scan-v2.11.83.json} +1 -1
package/src/ml/feature-extractor.js +70 -0
package/src/monitor/queue.js +19 -5
package/src/monitor/scan-queue.js +47 -3
package/src/monitor/state.js +4 -0
package/src/monitor/webhook.js +20 -1
package/src/scoring.js +8 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.81",
+  "version": "2.11.83",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.81.json → self-scan-v2.11.83.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-06-10T12:42:10.126Z",
+  "timestamp": "2026-06-10T14:16:30.189Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/ml/feature-extractor.js CHANGED Viewed

@@ -751,6 +751,75 @@ function mcpServerEnvAccess(result, meta) {
   return true;
 }
+// ============================================================================
+// Feature 15 — mcp_server_benign_lifecycle (AUDIT 2, 2026-06)
+// ============================================================================
+//
+// Like F9 (mcpServerEnvAccess) but TOLERATES a benign install lifecycle. F9
+// vetoes on ANY preinstall/install/postinstall (its C3), which makes it
+// inoperative for the ~77% of legitimate MCP installers that ship a build/setup
+// hook (`husky install`, `node build.js`, `tsc`). Those packages stack
+// mcp_config_injection (CRIT) + suspicious_dataflow (CRIT, env→first-party POST)
+// + env_access (HIGH) + lifecycle_script (MEDIUM) and score ~150 on `muaddib
+// scan` — the recurring @recapp/mcp-style false positives in the daily report.
+//
+// F15 instead allows a lifecycle that is only flagged as a plain MEDIUM/LOW
+// `lifecycle_script`, and vetoes the moment the lifecycle does anything
+// malicious. Ground-truth safety (verified by replay before/after):
+//   GT-060 mcp-config-inject  → vetoed by lifecycle_file_exec (malicious postinstall)
+//   GT-088 defi-threat-scanner → vetoed by HARD exfil (suspicious_domain) + cred files
+//   GT-066 ai-agent-exploit    → never emits mcp_config_injection (C2 excludes it)
+//   GT-097 / GT-099            → HARD exfil / not an mcp_config_injection JS package
+// Same cap (30 = MEDIUM) and identity/provider-key machinery as F9.
+const F15_LIFECYCLE_MALICE_TYPES = new Set([
+  'lifecycle_file_exec',        // postinstall executes a file containing HIGH/CRIT threats
+  'lifecycle_dataflow',         // install-time credential read + network send (compound)
+  'lifecycle_shell_pipe',       // curl | sh during install
+  'lifecycle_missing_script',   // phantom install script (payload injected later)
+  'intent_credential_exfil',    // multi-file credential→network intent
+  'intent_command_exfil',
+  'detached_credential_exfil',
+  'staged_payload'
+]);
+function mcpServerBenignLifecycle(result, meta) {
+  // C1 — MCP identity (same as F9)
+  if (!_f9HasMcpIdentity(meta)) return false;
+  const threats = (result && result.threats) || [];
+  if (threats.length === 0) return false;
+  // C2 — mcp_config_injection present (proves real MCP work, not just a name claim)
+  if (!threats.some(t => t.type === 'mcp_config_injection')) return false;
+  // C3' (relaxed) — a lifecycle MAY exist, but it must be benign: no malicious
+  // lifecycle compound, and a plain lifecycle_script (if any) must not itself be
+  // HIGH/CRITICAL (a benign husky/build hook is MEDIUM/LOW).
+  for (const t of threats) {
+    if (F15_LIFECYCLE_MALICE_TYPES.has(t.type)) return false;
+    if (t.type === 'lifecycle_script' && (t.severity === 'HIGH' || t.severity === 'CRITICAL')) return false;
+  }
+  // C4 — env_access / credential threats cite ONLY known provider keys or infra
+  // vars; never credential file paths (same machinery as F9).
+  for (const t of threats) {
+    if (t.type !== 'env_access' && t.type !== 'credential_regex_harvest' &&
+        t.type !== 'env_charcode_reconstruction') continue;
+    const msg = String(t.message || '');
+    if (F9_CREDENTIAL_FILE_RE.test(msg)) return false;
+    const candidates = msg.match(/\b[A-Z][A-Z0-9_]{2,}\b/g);
+    if (!candidates) continue;
+    for (const v of candidates) {
+      if (KNOWN_PROVIDER_KEYS_LITERAL.has(v)) continue;
+      if (PROVIDER_KEY_SUFFIX_RE.test(v)) continue;
+      if (F9_INFRA_KEYS.has(v)) continue;
+      return false;
+    }
+  }
+  // C5 — no HARD third-party exfil capability (SOFT suspicious_dataflow to a
+  // first-party endpoint is intrinsic to MCP installers — see F9/F14)
+  for (const t of threats) {
+    if (HARD_EXFIL_TYPES.has(t.type)) return false;
+  }
+  return true;
+}
 // ============================================================================
 // Feature 10 — vendor_cli_sdk (v2.11.23, audit week3 cluster, 96 FP)
 // ============================================================================
@@ -1426,6 +1495,7 @@ module.exports = {
   placeholderAntiDepConfusion,
   installScriptNoNetworkEgress,
   mcpServerEnvAccess,
+  mcpServerBenignLifecycle,
   vendorCliSdk,
   aiAgentBot,
   vendorMinifiedBundle,

package/src/monitor/queue.js CHANGED Viewed

@@ -80,7 +80,7 @@ const {
 // From ./ingestion.js
 const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
-const { enqueueScan } = require('./scan-queue.js');
+const { enqueueScan, dequeueScan } = require('./scan-queue.js');
 // From ./tarball-archive.js
 const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
       maxSeverity: result.summary ? result.summary.riskLevel : null,
       types: [...new Set((result.threats || []).map(t => t.type))],
       sandbox: params.sandboxResult ? 'run' : 'none',
-      source: 'scan'
+      source: 'scan',
+      // AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
+      firstPublish: !!(result && result._firstPublish)
     });
   } catch (err) {
     // Non-fatal: ML export must never crash the monitor
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     // First-publish detection: used for sandbox priority below
     const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
+    // AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
+    // below carries firstPublish into the scan-ledger (all ~10 call sites share this
+    // `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
+    // first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
+    // comment below promised this; the threading was missing until now.
+    result._firstPublish = isFirstPublish;
     // npm registry metadata was fetched ONCE before the worker spawn (hoisted above
     // to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
           console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
         }
-        // Record daily alert with post-reputation score for top suspects ranking
+        // Record daily alert with post-reputation score for top suspects ranking.
+        // AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
+        // can annotate MCP suspects with their signals (visual triage, no scoring change).
         if (dailyAlerts.length < MAX_DAILY_ALERTS) {
-          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
+          const signals = [...new Set((result.threats || [])
+            .filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
+            .map(t => t.type))].slice(0, 6);
+          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
         }
         // LLM Detective: AI-powered analysis for T1a/T1b suspects
         // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
   _activeWorkers++;
   try {
     while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
-      const item = scanQueue.shift();
+      // AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
+      const item = dequeueScan(scanQueue);
       if (!item) break;
       await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
     }

package/src/monitor/scan-queue.js CHANGED Viewed

@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
       if (evicted && evicted.name) {
         require('./state.js').appendScanLedger({
           name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
-          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
+          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
+          // AUDIT-A1 observability (see evictFromScanQueueBulk)
+          firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
         });
       }
     } catch { /* ledger is best-effort */ }
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
           appendLedger({
             name: item.name, version: item.version, ecosystem: item.ecosystem,
             outcome: 'dropped',
-            source: _isProtected(item) ? `${source}_protected` : source
+            source: _isProtected(item) ? `${source}_protected` : source,
+            // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
+            // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
+            // measure if the memory breaker is evicting genuine new packages.
+            firstPublish: !!item.firstPublish,
+            isBurstExtra: !!item.isATOBurstExtra
           });
         } catch { /* ledger is best-effort — must never break the breaker */ }
       }
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
   return { dropped: toDrop, droppedProtected };
 }
-module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
+// ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
+// Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
+// the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
+// bounded head-window before falling back to FIFO — so a genuine new package never
+// ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
+// the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
+// data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
+const PRIORITY_DEQUEUE = (() => {
+  const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
+  return v === '1' || v === 'true';
+})();
+const PRIORITY_DEQUEUE_WINDOW = (() => {
+  const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
+  return Number.isFinite(v) && v > 0 ? v : 2048;
+})();
+function _isPriority(item) {
+  return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
+}
+/**
+ * Remove and return the next item to scan. Strict FIFO by default (unchanged). With
+ * MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
+ * FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
+ * @param {Array} scanQueue
+ * @param {{priority?: boolean, window?: number}} [opts] test overrides
+ */
+function dequeueScan(scanQueue, opts = {}) {
+  const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
+  if (!priority || scanQueue.length === 0) return scanQueue.shift();
+  const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
+  for (let i = 0; i < win; i++) {
+    if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
+  }
+  return scanQueue.shift();
+}
+module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };

package/src/monitor/state.js CHANGED Viewed

@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
       types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
       sandbox: e.sandbox || 'none',
       firstPublish: !!e.firstPublish,
+      // AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
+      // rollup separate "first-publish lost" from "spam extra dropped (expected)".
+      // Only written when true to keep the 127MB ledger lean.
+      ...(e.isBurstExtra ? { isBurstExtra: true } : {}),
       source: e.source || 'scan'
     };
     fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');

package/src/monitor/webhook.js CHANGED Viewed

@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
   return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
 }
+// AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
+// feature-extractor.js — kept local to avoid importing the ML module into the embed path).
+const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
+/**
+ * Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
+ * package self-identifies as an MCP server/installer, else ''. Signals come from the
+ * alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
+ */
+function mcpTriageTag(a) {
+  const name = (a && (a.name || a.package)) || '';
+  if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
+  const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
+  return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
+}
 function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
   // Use in-memory stats (accumulated since last reset, restored from disk on restart)
   // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
         const version = a.version || 'N/A';
         const count = a.findingsCount || (a.findings ? a.findings.length : 0);
         const scoreText = a.score != null ? `score ${a.score}, ` : '';
-        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)`;
+        // AUDIT-C: annotate MCP suspects (identity + signals) for visual triage — MCP
+        // servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
+        // Pure presentation, no scoring change.
+        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
       }).join('\n')
     : 'None';

package/src/scoring.js CHANGED Viewed

@@ -1506,6 +1506,7 @@ const {
   obfuscationWithoutVector,
   placeholderAntiDepConfusion,
   mcpServerEnvAccess,
+  mcpServerBenignLifecycle,
   vendorCliSdk,
   aiAgentBot,
   vendorMinifiedBundle,
@@ -1559,6 +1560,13 @@ function applyContextualFPCaps(result, pkgMeta) {
   if (mcpServerEnvAccess(result, meta)) {
     applied.push({ feature: 'mcp_server_env_access', cap: 30 });
   }
+  // F15: legit MCP installer/server WITH a benign install lifecycle (AUDIT 2) →
+  // MAX 30. Extends F9 to the ~77% of MCP installers that ship a build/setup hook
+  // (husky install, node build.js). Vetoes on malicious lifecycle (lifecycle_file_exec
+  // etc.), HARD exfil, or credential-file access — so GT MCP malware stays uncapped.
+  if (mcpServerBenignLifecycle(result, meta)) {
+    applied.push({ feature: 'mcp_server_benign_lifecycle', cap: 30 });
+  }
   // F2: binary installer from GitHub Releases → MAX 35
   if (installUrlGithubReleases(result)) {
     applied.push({ feature: 'install_url_github_releases', cap: 35 });