npm - muaddib-scanner - Versions diffs - 2.11.82 → 2.11.84 - Mend

muaddib-scanner 2.11.82 → 2.11.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/{self-scan-v2.11.82.json → self-scan-v2.11.84.json} +1 -1
package/src/monitor/queue.js +19 -5
package/src/monitor/scan-queue.js +47 -3
package/src/monitor/state.js +4 -0
package/src/monitor/webhook.js +20 -1
package/src/response/playbooks.js +10 -0
package/src/rules/index.js +26 -0
package/src/scanner/github-actions.js +52 -0
package/src/scanner/npm-registry.js +19 -0
package/src/scanner/pypi-registry.js +19 -1
package/src/scoring.js +161 -8

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.82",
+  "version": "2.11.84",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.82.json → self-scan-v2.11.84.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-06-10T12:51:04.328Z",
+  "timestamp": "2026-06-10T20:04:48.914Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/monitor/queue.js CHANGED Viewed

@@ -80,7 +80,7 @@ const {
 // From ./ingestion.js
 const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
-const { enqueueScan } = require('./scan-queue.js');
+const { enqueueScan, dequeueScan } = require('./scan-queue.js');
 // From ./tarball-archive.js
 const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
       maxSeverity: result.summary ? result.summary.riskLevel : null,
       types: [...new Set((result.threats || []).map(t => t.type))],
       sandbox: params.sandboxResult ? 'run' : 'none',
-      source: 'scan'
+      source: 'scan',
+      // AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
+      firstPublish: !!(result && result._firstPublish)
     });
   } catch (err) {
     // Non-fatal: ML export must never crash the monitor
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
     // First-publish detection: used for sandbox priority below
     const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
+    // AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
+    // below carries firstPublish into the scan-ledger (all ~10 call sites share this
+    // `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
+    // first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
+    // comment below promised this; the threading was missing until now.
+    result._firstPublish = isFirstPublish;
     // npm registry metadata was fetched ONCE before the worker spawn (hoisted above
     // to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
           console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
         }
-        // Record daily alert with post-reputation score for top suspects ranking
+        // Record daily alert with post-reputation score for top suspects ranking.
+        // AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
+        // can annotate MCP suspects with their signals (visual triage, no scoring change).
         if (dailyAlerts.length < MAX_DAILY_ALERTS) {
-          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
+          const signals = [...new Set((result.threats || [])
+            .filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
+            .map(t => t.type))].slice(0, 6);
+          dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
         }
         // LLM Detective: AI-powered analysis for T1a/T1b suspects
         // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
   _activeWorkers++;
   try {
     while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
-      const item = scanQueue.shift();
+      // AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
+      const item = dequeueScan(scanQueue);
       if (!item) break;
       await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
     }

package/src/monitor/scan-queue.js CHANGED Viewed

@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
       if (evicted && evicted.name) {
         require('./state.js').appendScanLedger({
           name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
-          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
+          outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
+          // AUDIT-A1 observability (see evictFromScanQueueBulk)
+          firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
         });
       }
     } catch { /* ledger is best-effort */ }
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
           appendLedger({
             name: item.name, version: item.version, ecosystem: item.ecosystem,
             outcome: 'dropped',
-            source: _isProtected(item) ? `${source}_protected` : source
+            source: _isProtected(item) ? `${source}_protected` : source,
+            // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
+            // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
+            // measure if the memory breaker is evicting genuine new packages.
+            firstPublish: !!item.firstPublish,
+            isBurstExtra: !!item.isATOBurstExtra
           });
         } catch { /* ledger is best-effort — must never break the breaker */ }
       }
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
   return { dropped: toDrop, droppedProtected };
 }
-module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
+// ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
+// Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
+// the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
+// bounded head-window before falling back to FIFO — so a genuine new package never
+// ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
+// the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
+// data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
+const PRIORITY_DEQUEUE = (() => {
+  const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
+  return v === '1' || v === 'true';
+})();
+const PRIORITY_DEQUEUE_WINDOW = (() => {
+  const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
+  return Number.isFinite(v) && v > 0 ? v : 2048;
+})();
+function _isPriority(item) {
+  return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
+}
+/**
+ * Remove and return the next item to scan. Strict FIFO by default (unchanged). With
+ * MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
+ * FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
+ * @param {Array} scanQueue
+ * @param {{priority?: boolean, window?: number}} [opts] test overrides
+ */
+function dequeueScan(scanQueue, opts = {}) {
+  const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
+  if (!priority || scanQueue.length === 0) return scanQueue.shift();
+  const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
+  for (let i = 0; i < win; i++) {
+    if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
+  }
+  return scanQueue.shift();
+}
+module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };

package/src/monitor/state.js CHANGED Viewed

@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
       types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
       sandbox: e.sandbox || 'none',
       firstPublish: !!e.firstPublish,
+      // AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
+      // rollup separate "first-publish lost" from "spam extra dropped (expected)".
+      // Only written when true to keep the 127MB ledger lean.
+      ...(e.isBurstExtra ? { isBurstExtra: true } : {}),
       source: e.source || 'scan'
     };
     fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');

package/src/monitor/webhook.js CHANGED Viewed

@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
   return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
 }
+// AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
+// feature-extractor.js — kept local to avoid importing the ML module into the embed path).
+const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
+/**
+ * Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
+ * package self-identifies as an MCP server/installer, else ''. Signals come from the
+ * alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
+ */
+function mcpTriageTag(a) {
+  const name = (a && (a.name || a.package)) || '';
+  if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
+  const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
+  return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
+}
 function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
   // Use in-memory stats (accumulated since last reset, restored from disk on restart)
   // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
         const version = a.version || 'N/A';
         const count = a.findingsCount || (a.findings ? a.findings.length : 0);
         const scoreText = a.score != null ? `score ${a.score}, ` : '';
-        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)`;
+        // AUDIT-C: annotate MCP suspects (identity + signals) for visual triage — MCP
+        // servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
+        // Pure presentation, no scoring change.
+        return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
       }).join('\n')
     : 'None';

package/src/response/playbooks.js CHANGED Viewed

@@ -251,6 +251,16 @@ const PLAYBOOKS = {
     'Technique Shai-Hulud (TeamPCP). Supprimer le workflow immediatement. ' +
     'Si le workflow a ete execute, considerer tous les secrets du repository compromis et les regenerer.',
+  unpinned_action:
+    'INFO: Action GitHub tierce epinglee a une ref mutable (tag/branche) au lieu d\'un commit SHA. ' +
+    'Epingler au SHA complet du commit (ex: uses: owner/repo@<40-hex-sha>) pour empecher qu\'une release retaggee ' +
+    'injecte du code malveillant (cf. tj-actions/changed-files CVE-2025-30066).',
+  unpinned_action_in_risky_workflow:
+    'CRITIQUE: Action tierce non-epinglee combinee a un workflow controlable par un attaquant (injection ou pwn-request). ' +
+    'Vecteur de livraison supply-chain (pattern tj-actions/Ultralytics). Epingler toutes les actions au SHA, ' +
+    'supprimer le trigger risque (pull_request_target / contexte attaquant), et auditer l\'historique d\'execution du workflow.',
   sandbox_sensitive_file_read:
     'CRITIQUE: Package lit des fichiers sensibles (credentials) lors de l\'installation. Ne pas installer. Supprimer immediatement.',
   sandbox_sensitive_file_write:

package/src/rules/index.js CHANGED Viewed

@@ -1592,6 +1592,32 @@ const RULES = {
     ],
     mitre: 'T1552.001'
   },
+  unpinned_action: {
+    id: 'MUADDIB-GHA-005',
+    name: 'Unpinned Third-Party GitHub Action',
+    severity: 'LOW',
+    confidence: 'low',
+    domain: 'engineering',
+    description: 'Action GitHub tierce epinglee a une ref mutable (tag/branche) au lieu d\'un commit SHA. Une release retaggee livre du code malveillant a tous les consommateurs — cause racine de tj-actions/changed-files (CVE-2025-30066) et reviewdog (CVE-2025-30154). Informatif seul ; le signal fort est le compound MUADDIB-GHA-006.',
+    references: [
+      'https://www.cisa.gov/news-events/alerts/2025/03/18/supply-chain-compromise-third-party-tj-actionschanged-files-cve-2025-30066-and-reviewdogaction',
+      'https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-third-party-actions'
+    ],
+    mitre: 'T1195.002'
+  },
+  unpinned_action_in_risky_workflow: {
+    id: 'MUADDIB-GHA-006',
+    name: 'Unpinned Action in Attacker-Controllable Workflow',
+    severity: 'CRITICAL',
+    confidence: 'high',
+    domain: 'malware',
+    description: 'Compound: action tierce non-epinglee (ref mutable) dans un workflow egalement controlable par un attaquant (injection de contexte ou pwn-request). La ref mutable est le vecteur de livraison, le trigger risque est la portee — pattern tj-actions/Ultralytics. FP~0 par construction (requiert les deux moities independantes).',
+    references: [
+      'https://www.cisa.gov/news-events/alerts/2025/03/18/supply-chain-compromise-third-party-tj-actionschanged-files-cve-2025-30066-and-reviewdogaction',
+      'https://orca.security/resources/blog/pull-request-nightmare-part-2-exploits/'
+    ],
+    mitre: 'T1195.002'
+  },
   // Sandbox detections
   sandbox_sensitive_file_read: {

package/src/scanner/github-actions.js CHANGED Viewed

@@ -62,6 +62,10 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
       const activeLines = yamlLines.filter(l => !l.trim().startsWith('#'));
       const activeContent = activeLines.join('\n');
+      // Per-file risk flags, consumed by the GHA-006 compound below.
+      let fileHasInjection = false;
+      let fileHasPwn = false;
       // Détection du backdoor Shai-Hulud discussion.yaml
       if (file === 'discussion.yaml' || file === 'discussion.yml') {
         if (activeContent.includes('github.event.discussion.body')) {
@@ -82,6 +86,7 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
       for (const { regex, msg } of injectionPatterns) {
         if (regex.test(activeContent)) {
+          fileHasInjection = true;
           threats.push({
             type: 'workflow_injection',
             severity: 'HIGH',
@@ -95,6 +100,7 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
       const hasPRTarget = /pull_request_target/m.test(activeContent);
       const hasCheckoutPRHead = /actions\/checkout[\s\S]*?ref:\s*\$\{\{\s*github\.event\.pull_request\.head\.(ref|sha)\s*\}\}/m.test(activeContent);
       if (hasPRTarget && hasCheckoutPRHead) {
+        fileHasPwn = true;
         threats.push({
           type: 'workflow_pwn_request',
           severity: 'CRITICAL',
@@ -114,6 +120,52 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
           file: relFile
         });
       }
+      // GHA-005: Unpinned THIRD-PARTY action — pinned to a mutable tag/branch ref
+      // instead of an immutable commit SHA. Root cause of the tj-actions/changed-files
+      // (CVE-2025-30066) and reviewdog (CVE-2025-30154) compromises: a retagged release
+      // silently ships malicious code to every consumer. LOW/informational on its own —
+      // pinning to a major tag is ubiquitous and usually benign — and restricted to
+      // third-party orgs (official actions/* and github/* are conventionally trusted) to
+      // avoid noise on the near-universal `actions/checkout@v4`. The real signal is the
+      // GHA-006 compound below.
+      let fileHasUnpinnedThirdParty = false;
+      const usesRe = /^\s*-?\s*uses:\s*['"]?([^'"\s#]+)/gm;
+      let um;
+      while ((um = usesRe.exec(activeContent)) !== null) {
+        const ref = um[1];
+        // Local actions (./, ../) and docker refs carry no upstream tag to retag.
+        if (ref.startsWith('./') || ref.startsWith('../') || ref.startsWith('.\\') || ref.startsWith('docker://')) continue;
+        const at = ref.lastIndexOf('@');
+        if (at === -1) continue;
+        const repo = ref.slice(0, at);
+        const pin = ref.slice(at + 1);
+        if (/^[0-9a-f]{40}$/i.test(pin)) continue; // immutable SHA — correctly pinned
+        const org = repo.split('/')[0].toLowerCase();
+        if (org === 'actions' || org === 'github') continue; // first-party trusted orgs
+        fileHasUnpinnedThirdParty = true;
+        threats.push({
+          type: 'unpinned_action',
+          severity: 'LOW',
+          confidence: 'low',
+          message: `Third-party GitHub Action "${ref}" is pinned to a mutable ref ("${pin}") instead of a commit SHA — a retagged release (cf. tj-actions CVE-2025-30066) would execute attacker-controlled code.`,
+          file: relFile
+        });
+      }
+      // GHA-006 compound: an unpinned third-party action in a workflow that is ALSO
+      // attacker-controllable (context injection or pwn-request). This is the
+      // tj-actions / Ultralytics shape — the mutable ref is the delivery vector and the
+      // risky trigger is the reach. FP≈0 by construction: requires both independent halves.
+      if (fileHasUnpinnedThirdParty && (fileHasInjection || fileHasPwn)) {
+        threats.push({
+          type: 'unpinned_action_in_risky_workflow',
+          severity: 'CRITICAL',
+          compound: true,
+          message: 'Unpinned third-party action combined with an attacker-controllable workflow trigger (injection/pwn-request) — supply-chain delivery vector (tj-actions/Ultralytics pattern).',
+          file: relFile
+        });
+      }
     }
 }

package/src/scanner/npm-registry.js CHANGED Viewed

@@ -142,6 +142,21 @@ async function getPackageMetadata(packageName) {
   const hasRepository = !!(latestMeta?.repository || meta.repository);
+  // P3 (provenance): npm publish provenance / attestations (npm `--provenance`,
+  // Sigstore-backed, GA since 2023) appear as `dist.attestations` on the version.
+  // Presence on the live latest version is a trust signal (downweight, fewer FP);
+  // a mature package whose latest version LOST the provenance that earlier versions
+  // carried is a build-divergence / takeover signal (Ultralytics shape — upweight).
+  const latestHasProvenance = !!(latestMeta?.dist?.attestations);
+  let anyPriorHadProvenance = false;
+  if (!latestHasProvenance && meta.versions) {
+    for (const [v, vm] of Object.entries(meta.versions)) {
+      if (v === latestVersion) continue;
+      if (vm?.dist?.attestations) { anyPriorHadProvenance = true; break; }
+    }
+  }
+  const provenanceRegressed = !latestHasProvenance && anyPriorHadProvenance;
   // 2. Weekly downloads + author search (parallel)
   const downloadsUrl = DOWNLOADS_URL + '/' + encodeURIComponent(packageName);
   const authorUrl = maintainer
@@ -207,6 +222,10 @@ async function getPackageMetadata(packageName) {
     maintainer_emails: maintainerEmails,
     // C3 : per-version publish timestamps for delta-mode selectPriorVersions.
     time: versionTimes,
+    // P3 : Sigstore-backed publish provenance on the live latest version, and
+    // whether it regressed (earlier versions had it, latest does not).
+    has_provenance: latestHasProvenance,
+    provenance_regressed: provenanceRegressed,
     ...advancedSignals
   };
 }

package/src/scanner/pypi-registry.js CHANGED Viewed

@@ -206,6 +206,22 @@ async function getPyPIPackageMetadata(packageName) {
     yanked = releases[latestVersion].every(f => f && f.yanked === true);
   }
+  // P3 (provenance): PEP 740 digital attestations (Trusted Publishing, supported
+  // since Nov 2024) surface as a `provenance` field on a release file. Same dual
+  // signal as npm: present on the latest version → trust (downweight); regressed
+  // from earlier versions → build-divergence / takeover suspicion (upweight).
+  let latestHasProvenance = false;
+  if (latestVersion && Array.isArray(releases[latestVersion])) {
+    latestHasProvenance = releases[latestVersion].some(f => f && f.provenance);
+  }
+  let anyPriorHadProvenance = false;
+  if (!latestHasProvenance) {
+    for (const [v, files] of Object.entries(releases)) {
+      if (v === latestVersion || !Array.isArray(files)) continue;
+      if (files.some(f => f && f.provenance)) { anyPriorHadProvenance = true; break; }
+    }
+  }
   const data = {
     created_at: createdAt,
     latest_release_at: latestReleaseAt,
@@ -218,7 +234,9 @@ async function getPyPIPackageMetadata(packageName) {
       : (typeof info.description === 'string' ? info.description.slice(0, 1000) : ''),
     home_page: typeof info.home_page === 'string' && info.home_page ? info.home_page : null,
     project_urls: (info.project_urls && typeof info.project_urls === 'object') ? info.project_urls : null,
-    releases: releaseTimes
+    releases: releaseTimes,
+    has_provenance: latestHasProvenance,
+    provenance_regressed: !latestHasProvenance && anyPriorHadProvenance
   };
   _pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data });

package/src/scoring.js CHANGED Viewed

@@ -170,6 +170,34 @@ const SINGLE_FIRE_CRITICAL_TYPES = new Set([
 ]);
 const SINGLE_FIRE_CRITICAL_FLOOR = 75;
 const SINGLE_FIRE_MIN_SEVERITY_RANK = 2; // HIGH
+// MT-1 / PyPI unblock: import-time RCE on PyPI is the lifecycle-equivalent of an
+// npm install hook — code that runs at `pip install` time via __init__.py / setup.py.
+// PyPI packages emit no `lifecycle_script` (an npm-only signal), so confirmed
+// import-time RCE would otherwise be capped at 35 and buried in the benign 25-35
+// cluster. These types are emitted ONLY by the Python scanners (python-source.js /
+// python-ast-detectors) on .py files, so their presence is itself the PyPI signal —
+// no ecosystem flag needed, and npm packages are unaffected (they never emit them).
+const PYPI_IMPORT_TIME_RCE_TYPES = new Set([
+  'import_time_exec',
+  'import_time_subprocess',
+  'import_time_os_system',
+  'import_time_deserialization',
+  'import_time_fetch_exec',
+  'fetch_to_fork_exec_inline',
+  'pyast_module_level_exec',
+  'pyast_module_level_subprocess_shell',
+  'pyast_module_level_unsafe_deserialization',
+  'pyast_setup_cmdclass_override',
+  'pyast_ctypes_shellcode_load'
+]);
+// Track R: the reputation multiplier (applyReputationFactor) may suppress noise on
+// mature/popular packages down to ×0.10, but it must never pull a CONFIRMED malice
+// detection below the operational alert threshold. Account-takeover of a popular
+// package (Shai-Hulud / event-stream shape) is the #1 real-world vector and would
+// otherwise inherit the victim package's reputation and be silently dropped.
+const REPUTATION_MALICE_FLOOR = 20;
 const _SEV_RANK = { LOW: 0, MEDIUM: 1, HIGH: 2, CRITICAL: 3 };
 /**
@@ -672,9 +700,13 @@ const SCORING_COMPOUNDS = [
     type: 'recon_exfil_direct_ip',
     requires: ['linux_fingerprint_exec', 'direct_ip_exfil'],
     severity: 'CRITICAL',
-    message: 'Linux system fingerprint (id/uname/lsb_release/hostname/whoami) + direct-IP exfil in same file — targeted device fingerprinting for C2 grouping (scoring compound).',
+    message: 'Linux system fingerprint (id/uname/lsb_release/hostname/whoami) + direct-IP exfil in the same module — targeted device fingerprinting for C2 grouping (scoring compound).',
     fileFrom: 'direct_ip_exfil',
-    sameFile: true
+    sameFile: true,
+    // P2c: also fire when the two halves are split across statically-import-linked
+    // files (anti-fragmentation). Both components are individually high-signal, so
+    // extending from sameFile to sameModule keeps FP≈0 while closing the evasion.
+    sameModule: true
   },
 ];
@@ -707,6 +739,50 @@ function _extractStaticImports(filePath) {
   return imports;
 }
+// P2c (anti-fragmentation): resolve a file's 1-hop static import targets to
+// normalized relative paths (forward slashes), matching the threat.file format.
+// Mirrors the resolution inside _resolveLifecycleScopeGate so sameModule and
+// lifecycleScoped agree on what "linked by import" means.
+function _resolveImports1Hop(relFile, targetPath) {
+  const fs = require('fs');
+  const pathMod = require('path');
+  const out = new Set();
+  if (!relFile || relFile === 'package.json' || relFile === '(unknown)') return out;
+  const absFile = pathMod.resolve(targetPath, relFile);
+  const imports = _extractStaticImports(absFile);
+  const impDir = pathMod.dirname(absFile);
+  for (const imp of imports) {
+    let resolved = pathMod.relative(targetPath, pathMod.resolve(impDir, imp)).replace(/\\/g, '/');
+    if (!resolved.match(/\.(js|mjs|cjs)$/)) {
+      if (fs.existsSync(pathMod.resolve(targetPath, resolved + '.js'))) {
+        resolved += '.js';
+      } else if (fs.existsSync(pathMod.resolve(targetPath, resolved, 'index.js'))) {
+        resolved = resolved + '/index.js';
+      }
+    }
+    out.add(resolved);
+  }
+  return out;
+}
+// P2c: two files are "in the same module" if they are the same file or linked by a
+// 1-hop static import in either direction. Closes the fragmentation evasion where an
+// attacker splits the two halves of a payload across an importing file and its helper
+// to break a sameFile compound. Dynamic require() is intentionally NOT resolved
+// (mirrors the module-graph) — linkage must be a literal static import.
+function _filesSameModule(fileA, fileB, targetPath) {
+  if (!fileA || !fileB) return false;
+  if (fileA === 'package.json' || fileB === 'package.json') return false;
+  if (fileA === '(unknown)' || fileB === '(unknown)') return false;
+  const a = fileA.replace(/\\/g, '/');
+  const b = fileB.replace(/\\/g, '/');
+  if (a === b) return true;
+  if (!targetPath) return false;
+  if (_resolveImports1Hop(a, targetPath).has(b)) return true;
+  if (_resolveImports1Hop(b, targetPath).has(a)) return true;
+  return false;
+}
 // v2.11.11: Lifecycle scope resolution. Determines if a lifecycleScoped compound
 // should fire based on whether the non-lifecycle threats are in the lifecycle
 // target file or its direct static imports.
@@ -889,7 +965,22 @@ function applyCompoundBoosts(threats, targetPath) {
       const commonFiles = [...filesByType[0]].filter(f =>
         filesByType.every(s => s.has(f))
       );
-      if (commonFiles.length === 0) continue;
+      if (commonFiles.length === 0) {
+        // P2c (anti-fragmentation): sameModule fallback — accept two component files
+        // linked by a 1-hop static import, so splitting the payload across an importer
+        // and its helper no longer evades the compound. Opt-in per compound and limited
+        // to the two-type case to bound the FP surface to the highest-confidence rules.
+        let linked = false;
+        if (compound.sameModule && filesByType.length === 2 && targetPath) {
+          for (const fa of filesByType[0]) {
+            for (const fb of filesByType[1]) {
+              if (_filesSameModule(fa, fb, targetPath)) { linked = true; break; }
+            }
+            if (linked) break;
+          }
+        }
+        if (!linked) continue;
+      }
     }
     if (!compoundAlreadyPresent) {
@@ -1464,7 +1555,11 @@ function calculateRiskScore(deduped, intentResult) {
   // json-spacer, reactvora: eval(data.content) from jsonkeeper.com is always malicious
   const _hasStagedC2 = deduped.some(t => t.type === 'staged_payload') &&
     deduped.some(t => t.type === 'suspicious_domain' && t.severity === 'HIGH');
-  if (!_hasLifecycle && !_hasHC && !_hasCompound && !_hasStagedC2) {
+  // PyPI unblock: import-time RCE is the PyPI lifecycle-equivalent — bypass the cap so
+  // confirmed Python install-time malware reaches its true score and separates from the
+  // benign 25-35 cluster (which carries no import-time-exec signal).
+  const _hasPyPIImportRCE = deduped.some(t => PYPI_IMPORT_TIME_RCE_TYPES.has(t.type));
+  if (!_hasLifecycle && !_hasHC && !_hasCompound && !_hasStagedC2 && !_hasPyPIImportRCE) {
     riskScore = Math.min(riskScore, 35);
   }
@@ -1652,7 +1747,8 @@ const REPUTATION_FACTOR_BOUNDS = { min: 0.10, max: 1.5 };
 function _hasNumeric(v) { return typeof v === 'number' && !Number.isNaN(v); }
-function _factorFromMetadata(meta) {
+function _factorFromMetadata(meta, opts) {
+  const allowProvenanceBonus = !opts || opts.allowProvenanceBonus !== false;
   let factor = 1.0;
   let signalsApplied = 0;
   // Age (AUC 0.81 — strongest single discriminator). Old packages = benign.
@@ -1725,6 +1821,25 @@ function _factorFromMetadata(meta) {
     factor -= 0.15;
     signalsApplied++;
   }
+  // P3 (provenance) : Sigstore-backed publish provenance (npm --provenance / PyPI
+  // PEP 740). Two ASYMMETRIC signals:
+  //   - regressed (earlier versions attested, latest is not) → build divergence /
+  //     takeover suspicion (Ultralytics shape) → upweight. Always applies.
+  //   - present on the live latest version → mild downweight, BUT only when the
+  //     package shows no malice signal. A valid attestation proves WHICH pipeline
+  //     built the package, NOT that the code is safe: the TeamPCP / "Mini Shai-Hulud"
+  //     campaign (May 2026, 84 malicious TanStack versions) shipped VALID SLSA L3
+  //     Sigstore attestations by hijacking the legitimate release runner's OIDC
+  //     identity. Granting a trust bonus to an attested-but-malicious package would
+  //     actively help the attacker, so the bonus is suppressed whenever malice is
+  //     present (allowProvenanceBonus=false, set by applyReputationFactor).
+  if (meta.provenance_regressed === true) {
+    factor += 0.20;
+    signalsApplied++;
+  } else if (meta.has_provenance === true && allowProvenanceBonus) {
+    factor -= 0.10;
+    signalsApplied++;
+  }
   // If no signals applied (metadata fully absent), return neutral 1.0 rather
   // than the default-shaped factor — avoid spurious adjustments on rows where
   // the registry data is simply missing.
@@ -1732,6 +1847,33 @@ function _factorFromMetadata(meta) {
   return Math.max(REPUTATION_FACTOR_BOUNDS.min, Math.min(REPUTATION_FACTOR_BOUNDS.max, factor));
 }
+// Track R: "confirmed malice" predicate, kept identical to the MT-1 ceiling bypass
+// (HIGH_CONFIDENCE_MALICE_TYPES / compound / staged-C2). These are the signals the
+// pipeline already trusts as never-benign-regardless-of-context; reusing the exact
+// same definition keeps the reputation floor symmetric with the cap and bounds the
+// FP cost to zero (a benign popular package carries none of these).
+function _hasConfirmedMalice(threats) {
+  if (!Array.isArray(threats)) return false;
+  const hasHC = threats.some(t => HIGH_CONFIDENCE_MALICE_TYPES.has(t.type));
+  const hasCompound = threats.some(t => t.compound === true);
+  const hasStagedC2 = threats.some(t => t.type === 'staged_payload') &&
+    threats.some(t => t.type === 'suspicious_domain' && t.severity === 'HIGH');
+  return hasHC || hasCompound || hasStagedC2;
+}
+// P3 (TeamPCP / Mini Shai-Hulud hardening): broader malice predicate used to
+// SUPPRESS the provenance-presence trust bonus. A valid Sigstore/PEP-740 attestation
+// only proves the build pipeline's identity, not code safety — a compromised pipeline
+// emits valid attestations for malicious code. So any HIGH/CRITICAL signal (not just
+// the confirmed-malice set) must veto the provenance bonus, denying the attacker a
+// confidence boost. Broader than _hasConfirmedMalice on purpose: the bonus is a
+// trust grant, so we withhold it on weaker suspicion too.
+function _hasMaliceSignal(threats) {
+  if (!Array.isArray(threats)) return false;
+  if (_hasConfirmedMalice(threats)) return true;
+  return threats.some(t => t.severity === 'HIGH' || t.severity === 'CRITICAL');
+}
 function applyReputationFactor(result, metadata) {
   if (!result || !result.summary || !metadata) return null;
   // FPR plan : the reputation factor describes "how trustworthy this package
@@ -1755,13 +1897,24 @@ function applyReputationFactor(result, metadata) {
   ) {
     return null;
   }
-  const factor = _factorFromMetadata(metadata);
+  // P3 hardening: a valid attestation must NOT earn a trust bonus on a package that
+  // also shows malice (TeamPCP attested-malware scenario). Withhold it here, where
+  // the threat list is available.
+  const factor = _factorFromMetadata(metadata, {
+    allowProvenanceBonus: !_hasMaliceSignal(result.threats)
+  });
   if (factor === 1.0) {
     result.summary.reputationFactor = 1.0;
     return null;
   }
   const oldScore = result.summary.riskScore;
-  const newScore = Math.max(0, Math.min(MAX_RISK_SCORE, Math.round(oldScore * factor)));
+  let newScore = Math.max(0, Math.min(MAX_RISK_SCORE, Math.round(oldScore * factor)));
+  // Track R: malice-aware floor. Only raises the score when the reputation multiplier
+  // would otherwise bury a confirmed-malice detection under the alert threshold; never
+  // touches benign packages (no confirmed-malice signal) so FPR is unaffected.
+  if (newScore < REPUTATION_MALICE_FLOOR && _hasConfirmedMalice(result.threats)) {
+    newScore = REPUTATION_MALICE_FLOOR;
+  }
   result.summary.riskScore = newScore;
   result.summary.reputationFactor = factor;
   const rs = newScore;
@@ -2058,7 +2211,7 @@ const { applyDeltaMultiplier } = require('./scoring/delta-multiplier.js');
 module.exports = {
   SEVERITY_WEIGHTS, RISK_THRESHOLDS, MAX_RISK_SCORE, CONFIDENCE_FACTORS,
   SINGLE_FIRE_CRITICAL_TYPES, SINGLE_FIRE_CRITICAL_FLOOR, DECAY_ALPHA,
-  REPUTATION_FACTOR_BOUNDS,
+  REPUTATION_FACTOR_BOUNDS, REPUTATION_MALICE_FLOOR,
   MATURE_CAP_SCORE, MATURE_MIN_AGE_DAYS, MATURE_MIN_VERSION_COUNT, MATURE_MIN_WEEKLY_DOWNLOADS,
   SANDBOX_VERDICT_CONFIRMED_FLOOR, SANDBOX_VERDICT_CHAIN_FLOOR, SANDBOX_VERDICT_CLEAN_DELTA,
   applyMatureStableCap, applySandboxVerdict, applyDeltaMultiplier,