muaddib-scanner 2.11.81 → 2.11.83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.81",
3
+ "version": "2.11.83",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-10T12:42:10.126Z",
3
+ "timestamp": "2026-06-10T14:16:30.189Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -751,6 +751,75 @@ function mcpServerEnvAccess(result, meta) {
751
751
  return true;
752
752
  }
753
753
 
754
+ // ============================================================================
755
+ // Feature 15 — mcp_server_benign_lifecycle (AUDIT 2, 2026-06)
756
+ // ============================================================================
757
+ //
758
+ // Like F9 (mcpServerEnvAccess) but TOLERATES a benign install lifecycle. F9
759
+ // vetoes on ANY preinstall/install/postinstall (its C3), which makes it
760
+ // inoperative for the ~77% of legitimate MCP installers that ship a build/setup
761
+ // hook (`husky install`, `node build.js`, `tsc`). Those packages stack
762
+ // mcp_config_injection (CRIT) + suspicious_dataflow (CRIT, env→first-party POST)
763
+ // + env_access (HIGH) + lifecycle_script (MEDIUM) and score ~150 on `muaddib
764
+ // scan` — the recurring @recapp/mcp-style false positives in the daily report.
765
+ //
766
+ // F15 instead allows a lifecycle that is only flagged as a plain MEDIUM/LOW
767
+ // `lifecycle_script`, and vetoes the moment the lifecycle does anything
768
+ // malicious. Ground-truth safety (verified by replay before/after):
769
+ // GT-060 mcp-config-inject → vetoed by lifecycle_file_exec (malicious postinstall)
770
+ // GT-088 defi-threat-scanner → vetoed by HARD exfil (suspicious_domain) + cred files
771
+ // GT-066 ai-agent-exploit → never emits mcp_config_injection (C2 excludes it)
772
+ // GT-097 / GT-099 → HARD exfil / not an mcp_config_injection JS package
773
+ // Same cap (30 = MEDIUM) and identity/provider-key machinery as F9.
774
+ const F15_LIFECYCLE_MALICE_TYPES = new Set([
775
+ 'lifecycle_file_exec', // postinstall executes a file containing HIGH/CRIT threats
776
+ 'lifecycle_dataflow', // install-time credential read + network send (compound)
777
+ 'lifecycle_shell_pipe', // curl | sh during install
778
+ 'lifecycle_missing_script', // phantom install script (payload injected later)
779
+ 'intent_credential_exfil', // multi-file credential→network intent
780
+ 'intent_command_exfil',
781
+ 'detached_credential_exfil',
782
+ 'staged_payload'
783
+ ]);
784
+
785
+ function mcpServerBenignLifecycle(result, meta) {
786
+ // C1 — MCP identity (same as F9)
787
+ if (!_f9HasMcpIdentity(meta)) return false;
788
+ const threats = (result && result.threats) || [];
789
+ if (threats.length === 0) return false;
790
+ // C2 — mcp_config_injection present (proves real MCP work, not just a name claim)
791
+ if (!threats.some(t => t.type === 'mcp_config_injection')) return false;
792
+ // C3' (relaxed) — a lifecycle MAY exist, but it must be benign: no malicious
793
+ // lifecycle compound, and a plain lifecycle_script (if any) must not itself be
794
+ // HIGH/CRITICAL (a benign husky/build hook is MEDIUM/LOW).
795
+ for (const t of threats) {
796
+ if (F15_LIFECYCLE_MALICE_TYPES.has(t.type)) return false;
797
+ if (t.type === 'lifecycle_script' && (t.severity === 'HIGH' || t.severity === 'CRITICAL')) return false;
798
+ }
799
+ // C4 — env_access / credential threats cite ONLY known provider keys or infra
800
+ // vars; never credential file paths (same machinery as F9).
801
+ for (const t of threats) {
802
+ if (t.type !== 'env_access' && t.type !== 'credential_regex_harvest' &&
803
+ t.type !== 'env_charcode_reconstruction') continue;
804
+ const msg = String(t.message || '');
805
+ if (F9_CREDENTIAL_FILE_RE.test(msg)) return false;
806
+ const candidates = msg.match(/\b[A-Z][A-Z0-9_]{2,}\b/g);
807
+ if (!candidates) continue;
808
+ for (const v of candidates) {
809
+ if (KNOWN_PROVIDER_KEYS_LITERAL.has(v)) continue;
810
+ if (PROVIDER_KEY_SUFFIX_RE.test(v)) continue;
811
+ if (F9_INFRA_KEYS.has(v)) continue;
812
+ return false;
813
+ }
814
+ }
815
+ // C5 — no HARD third-party exfil capability (SOFT suspicious_dataflow to a
816
+ // first-party endpoint is intrinsic to MCP installers — see F9/F14)
817
+ for (const t of threats) {
818
+ if (HARD_EXFIL_TYPES.has(t.type)) return false;
819
+ }
820
+ return true;
821
+ }
822
+
754
823
  // ============================================================================
755
824
  // Feature 10 — vendor_cli_sdk (v2.11.23, audit week3 cluster, 96 FP)
756
825
  // ============================================================================
@@ -1426,6 +1495,7 @@ module.exports = {
1426
1495
  placeholderAntiDepConfusion,
1427
1496
  installScriptNoNetworkEgress,
1428
1497
  mcpServerEnvAccess,
1498
+ mcpServerBenignLifecycle,
1429
1499
  vendorCliSdk,
1430
1500
  aiAgentBot,
1431
1501
  vendorMinifiedBundle,
@@ -80,7 +80,7 @@ const {
80
80
 
81
81
  // From ./ingestion.js
82
82
  const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
83
- const { enqueueScan } = require('./scan-queue.js');
83
+ const { enqueueScan, dequeueScan } = require('./scan-queue.js');
84
84
 
85
85
  // From ./tarball-archive.js
86
86
  const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
259
259
  maxSeverity: result.summary ? result.summary.riskLevel : null,
260
260
  types: [...new Set((result.threats || []).map(t => t.type))],
261
261
  sandbox: params.sandboxResult ? 'run' : 'none',
262
- source: 'scan'
262
+ source: 'scan',
263
+ // AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
264
+ firstPublish: !!(result && result._firstPublish)
263
265
  });
264
266
  } catch (err) {
265
267
  // Non-fatal: ML export must never crash the monitor
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
673
675
 
674
676
  // First-publish detection: used for sandbox priority below
675
677
  const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
678
+ // AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
679
+ // below carries firstPublish into the scan-ledger (all ~10 call sites share this
680
+ // `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
681
+ // first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
682
+ // comment below promised this; the threading was missing until now.
683
+ result._firstPublish = isFirstPublish;
676
684
 
677
685
  // npm registry metadata was fetched ONCE before the worker spawn (hoisted above
678
686
  // to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1171
1179
  console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
1172
1180
  }
1173
1181
 
1174
- // Record daily alert with post-reputation score for top suspects ranking
1182
+ // Record daily alert with post-reputation score for top suspects ranking.
1183
+ // AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
1184
+ // can annotate MCP suspects with their signals (visual triage, no scoring change).
1175
1185
  if (dailyAlerts.length < MAX_DAILY_ALERTS) {
1176
- dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
1186
+ const signals = [...new Set((result.threats || [])
1187
+ .filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
1188
+ .map(t => t.type))].slice(0, 6);
1189
+ dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
1177
1190
  }
1178
1191
  // LLM Detective: AI-powered analysis for T1a/T1b suspects
1179
1192
  // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
1354
1367
  _activeWorkers++;
1355
1368
  try {
1356
1369
  while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
1357
- const item = scanQueue.shift();
1370
+ // AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
1371
+ const item = dequeueScan(scanQueue);
1358
1372
  if (!item) break;
1359
1373
  await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
1360
1374
  }
@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
68
68
  if (evicted && evicted.name) {
69
69
  require('./state.js').appendScanLedger({
70
70
  name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
71
- outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
71
+ outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
72
+ // AUDIT-A1 observability (see evictFromScanQueueBulk)
73
+ firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
72
74
  });
73
75
  }
74
76
  } catch { /* ledger is best-effort */ }
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
136
138
  appendLedger({
137
139
  name: item.name, version: item.version, ecosystem: item.ecosystem,
138
140
  outcome: 'dropped',
139
- source: _isProtected(item) ? `${source}_protected` : source
141
+ source: _isProtected(item) ? `${source}_protected` : source,
142
+ // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
143
+ // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
144
+ // measure if the memory breaker is evicting genuine new packages.
145
+ firstPublish: !!item.firstPublish,
146
+ isBurstExtra: !!item.isATOBurstExtra
140
147
  });
141
148
  } catch { /* ledger is best-effort — must never break the breaker */ }
142
149
  }
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
149
156
  return { dropped: toDrop, droppedProtected };
150
157
  }
151
158
 
152
- module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
159
+ // ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
160
+ // Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
161
+ // the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
162
+ // bounded head-window before falling back to FIFO — so a genuine new package never
163
+ // ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
164
+ // the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
165
+ // data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
166
+ const PRIORITY_DEQUEUE = (() => {
167
+ const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
168
+ return v === '1' || v === 'true';
169
+ })();
170
+ const PRIORITY_DEQUEUE_WINDOW = (() => {
171
+ const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
172
+ return Number.isFinite(v) && v > 0 ? v : 2048;
173
+ })();
174
+
175
+ function _isPriority(item) {
176
+ return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
177
+ }
178
+
179
+ /**
180
+ * Remove and return the next item to scan. Strict FIFO by default (unchanged). With
181
+ * MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
182
+ * FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
183
+ * @param {Array} scanQueue
184
+ * @param {{priority?: boolean, window?: number}} [opts] test overrides
185
+ */
186
+ function dequeueScan(scanQueue, opts = {}) {
187
+ const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
188
+ if (!priority || scanQueue.length === 0) return scanQueue.shift();
189
+ const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
190
+ for (let i = 0; i < win; i++) {
191
+ if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
192
+ }
193
+ return scanQueue.shift();
194
+ }
195
+
196
+ module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };
@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
1010
1010
  types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
1011
1011
  sandbox: e.sandbox || 'none',
1012
1012
  firstPublish: !!e.firstPublish,
1013
+ // AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
1014
+ // rollup separate "first-publish lost" from "spam extra dropped (expected)".
1015
+ // Only written when true to keep the 127MB ledger lean.
1016
+ ...(e.isBurstExtra ? { isBurstExtra: true } : {}),
1013
1017
  source: e.source || 'scan'
1014
1018
  };
1015
1019
  fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');
@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
1094
1094
  return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
1095
1095
  }
1096
1096
 
1097
+ // AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
1098
+ // feature-extractor.js — kept local to avoid importing the ML module into the embed path).
1099
+ const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
1100
+
1101
+ /**
1102
+ * Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
1103
+ * package self-identifies as an MCP server/installer, else ''. Signals come from the
1104
+ * alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
1105
+ */
1106
+ function mcpTriageTag(a) {
1107
+ const name = (a && (a.name || a.package)) || '';
1108
+ if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
1109
+ const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
1110
+ return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
1111
+ }
1112
+
1097
1113
  function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1098
1114
  // Use in-memory stats (accumulated since last reset, restored from disk on restart)
1099
1115
  // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1110
1126
  const version = a.version || 'N/A';
1111
1127
  const count = a.findingsCount || (a.findings ? a.findings.length : 0);
1112
1128
  const scoreText = a.score != null ? `score ${a.score}, ` : '';
1113
- return `${i + 1}. **${name}@${version}**${scoreText}${count} finding(s)`;
1129
+ // AUDIT-C: annotate MCP suspects (identity + signals) for visual triage MCP
1130
+ // servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
1131
+ // Pure presentation, no scoring change.
1132
+ return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
1114
1133
  }).join('\n')
1115
1134
  : 'None';
1116
1135
 
package/src/scoring.js CHANGED
@@ -1506,6 +1506,7 @@ const {
1506
1506
  obfuscationWithoutVector,
1507
1507
  placeholderAntiDepConfusion,
1508
1508
  mcpServerEnvAccess,
1509
+ mcpServerBenignLifecycle,
1509
1510
  vendorCliSdk,
1510
1511
  aiAgentBot,
1511
1512
  vendorMinifiedBundle,
@@ -1559,6 +1560,13 @@ function applyContextualFPCaps(result, pkgMeta) {
1559
1560
  if (mcpServerEnvAccess(result, meta)) {
1560
1561
  applied.push({ feature: 'mcp_server_env_access', cap: 30 });
1561
1562
  }
1563
+ // F15: legit MCP installer/server WITH a benign install lifecycle (AUDIT 2) →
1564
+ // MAX 30. Extends F9 to the ~77% of MCP installers that ship a build/setup hook
1565
+ // (husky install, node build.js). Vetoes on malicious lifecycle (lifecycle_file_exec
1566
+ // etc.), HARD exfil, or credential-file access — so GT MCP malware stays uncapped.
1567
+ if (mcpServerBenignLifecycle(result, meta)) {
1568
+ applied.push({ feature: 'mcp_server_benign_lifecycle', cap: 30 });
1569
+ }
1562
1570
  // F2: binary installer from GitHub Releases → MAX 35
1563
1571
  if (installUrlGithubReleases(result)) {
1564
1572
  applied.push({ feature: 'install_url_github_releases', cap: 35 });