muaddib-scanner 2.11.82 → 2.11.84

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.82",
3
+ "version": "2.11.84",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-10T12:51:04.328Z",
3
+ "timestamp": "2026-06-10T20:04:48.914Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -80,7 +80,7 @@ const {
80
80
 
81
81
  // From ./ingestion.js
82
82
  const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
83
- const { enqueueScan } = require('./scan-queue.js');
83
+ const { enqueueScan, dequeueScan } = require('./scan-queue.js');
84
84
 
85
85
  // From ./tarball-archive.js
86
86
  const { archiveSuspectTarball } = require('./tarball-archive.js');
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
259
259
  maxSeverity: result.summary ? result.summary.riskLevel : null,
260
260
  types: [...new Set((result.threats || []).map(t => t.type))],
261
261
  sandbox: params.sandboxResult ? 'run' : 'none',
262
- source: 'scan'
262
+ source: 'scan',
263
+ // AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
264
+ firstPublish: !!(result && result._firstPublish)
263
265
  });
264
266
  } catch (err) {
265
267
  // Non-fatal: ML export must never crash the monitor
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
673
675
 
674
676
  // First-publish detection: used for sandbox priority below
675
677
  const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
678
+ // AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
679
+ // below carries firstPublish into the scan-ledger (all ~10 call sites share this
680
+ // `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
681
+ // first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
682
+ // comment below promised this; the threading was missing until now.
683
+ result._firstPublish = isFirstPublish;
676
684
 
677
685
  // npm registry metadata was fetched ONCE before the worker spawn (hoisted above
678
686
  // to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1171
1179
  console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
1172
1180
  }
1173
1181
 
1174
- // Record daily alert with post-reputation score for top suspects ranking
1182
+ // Record daily alert with post-reputation score for top suspects ranking.
1183
+ // AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
1184
+ // can annotate MCP suspects with their signals (visual triage, no scoring change).
1175
1185
  if (dailyAlerts.length < MAX_DAILY_ALERTS) {
1176
- dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier });
1186
+ const signals = [...new Set((result.threats || [])
1187
+ .filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
1188
+ .map(t => t.type))].slice(0, 6);
1189
+ dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
1177
1190
  }
1178
1191
  // LLM Detective: AI-powered analysis for T1a/T1b suspects
1179
1192
  // Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
1354
1367
  _activeWorkers++;
1355
1368
  try {
1356
1369
  while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
1357
- const item = scanQueue.shift();
1370
+ // AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
1371
+ const item = dequeueScan(scanQueue);
1358
1372
  if (!item) break;
1359
1373
  await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
1360
1374
  }
@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
68
68
  if (evicted && evicted.name) {
69
69
  require('./state.js').appendScanLedger({
70
70
  name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
71
- outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
71
+ outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
72
+ // AUDIT-A1 observability (see evictFromScanQueueBulk)
73
+ firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
72
74
  });
73
75
  }
74
76
  } catch { /* ledger is best-effort */ }
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
136
138
  appendLedger({
137
139
  name: item.name, version: item.version, ecosystem: item.ecosystem,
138
140
  outcome: 'dropped',
139
- source: _isProtected(item) ? `${source}_protected` : source
141
+ source: _isProtected(item) ? `${source}_protected` : source,
142
+ // AUDIT-A1 observability: record whether a DROPPED item was a first-publish
143
+ // (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
144
+ // measure if the memory breaker is evicting genuine new packages.
145
+ firstPublish: !!item.firstPublish,
146
+ isBurstExtra: !!item.isATOBurstExtra
140
147
  });
141
148
  } catch { /* ledger is best-effort — must never break the breaker */ }
142
149
  }
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
149
156
  return { dropped: toDrop, droppedProtected };
150
157
  }
151
158
 
152
- module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
159
+ // ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
160
+ // Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
161
+ // the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
162
+ // bounded head-window before falling back to FIFO — so a genuine new package never
163
+ // ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
164
+ // the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
165
+ // data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
166
+ const PRIORITY_DEQUEUE = (() => {
167
+ const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
168
+ return v === '1' || v === 'true';
169
+ })();
170
+ const PRIORITY_DEQUEUE_WINDOW = (() => {
171
+ const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
172
+ return Number.isFinite(v) && v > 0 ? v : 2048;
173
+ })();
174
+
175
+ function _isPriority(item) {
176
+ return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
177
+ }
178
+
179
+ /**
180
+ * Remove and return the next item to scan. Strict FIFO by default (unchanged). With
181
+ * MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
182
+ * FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
183
+ * @param {Array} scanQueue
184
+ * @param {{priority?: boolean, window?: number}} [opts] test overrides
185
+ */
186
+ function dequeueScan(scanQueue, opts = {}) {
187
+ const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
188
+ if (!priority || scanQueue.length === 0) return scanQueue.shift();
189
+ const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
190
+ for (let i = 0; i < win; i++) {
191
+ if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
192
+ }
193
+ return scanQueue.shift();
194
+ }
195
+
196
+ module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };
@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
1010
1010
  types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
1011
1011
  sandbox: e.sandbox || 'none',
1012
1012
  firstPublish: !!e.firstPublish,
1013
+ // AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
1014
+ // rollup separate "first-publish lost" from "spam extra dropped (expected)".
1015
+ // Only written when true to keep the 127MB ledger lean.
1016
+ ...(e.isBurstExtra ? { isBurstExtra: true } : {}),
1013
1017
  source: e.source || 'scan'
1014
1018
  };
1015
1019
  fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');
@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
1094
1094
  return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
1095
1095
  }
1096
1096
 
1097
+ // AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
1098
+ // feature-extractor.js — kept local to avoid importing the ML module into the embed path).
1099
+ const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
1100
+
1101
+ /**
1102
+ * Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
1103
+ * package self-identifies as an MCP server/installer, else ''. Signals come from the
1104
+ * alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
1105
+ */
1106
+ function mcpTriageTag(a) {
1107
+ const name = (a && (a.name || a.package)) || '';
1108
+ if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
1109
+ const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
1110
+ return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
1111
+ }
1112
+
1097
1113
  function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1098
1114
  // Use in-memory stats (accumulated since last reset, restored from disk on restart)
1099
1115
  // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1110
1126
  const version = a.version || 'N/A';
1111
1127
  const count = a.findingsCount || (a.findings ? a.findings.length : 0);
1112
1128
  const scoreText = a.score != null ? `score ${a.score}, ` : '';
1113
- return `${i + 1}. **${name}@${version}**${scoreText}${count} finding(s)`;
1129
+ // AUDIT-C: annotate MCP suspects (identity + signals) for visual triage MCP
1130
+ // servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
1131
+ // Pure presentation, no scoring change.
1132
+ return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
1114
1133
  }).join('\n')
1115
1134
  : 'None';
1116
1135
 
@@ -251,6 +251,16 @@ const PLAYBOOKS = {
251
251
  'Technique Shai-Hulud (TeamPCP). Supprimer le workflow immediatement. ' +
252
252
  'Si le workflow a ete execute, considerer tous les secrets du repository compromis et les regenerer.',
253
253
 
254
+ unpinned_action:
255
+ 'INFO: Action GitHub tierce epinglee a une ref mutable (tag/branche) au lieu d\'un commit SHA. ' +
256
+ 'Epingler au SHA complet du commit (ex: uses: owner/repo@<40-hex-sha>) pour empecher qu\'une release retaggee ' +
257
+ 'injecte du code malveillant (cf. tj-actions/changed-files CVE-2025-30066).',
258
+
259
+ unpinned_action_in_risky_workflow:
260
+ 'CRITIQUE: Action tierce non-epinglee combinee a un workflow controlable par un attaquant (injection ou pwn-request). ' +
261
+ 'Vecteur de livraison supply-chain (pattern tj-actions/Ultralytics). Epingler toutes les actions au SHA, ' +
262
+ 'supprimer le trigger risque (pull_request_target / contexte attaquant), et auditer l\'historique d\'execution du workflow.',
263
+
254
264
  sandbox_sensitive_file_read:
255
265
  'CRITIQUE: Package lit des fichiers sensibles (credentials) lors de l\'installation. Ne pas installer. Supprimer immediatement.',
256
266
  sandbox_sensitive_file_write:
@@ -1592,6 +1592,32 @@ const RULES = {
1592
1592
  ],
1593
1593
  mitre: 'T1552.001'
1594
1594
  },
1595
+ unpinned_action: {
1596
+ id: 'MUADDIB-GHA-005',
1597
+ name: 'Unpinned Third-Party GitHub Action',
1598
+ severity: 'LOW',
1599
+ confidence: 'low',
1600
+ domain: 'engineering',
1601
+ description: 'Action GitHub tierce epinglee a une ref mutable (tag/branche) au lieu d\'un commit SHA. Une release retaggee livre du code malveillant a tous les consommateurs — cause racine de tj-actions/changed-files (CVE-2025-30066) et reviewdog (CVE-2025-30154). Informatif seul ; le signal fort est le compound MUADDIB-GHA-006.',
1602
+ references: [
1603
+ 'https://www.cisa.gov/news-events/alerts/2025/03/18/supply-chain-compromise-third-party-tj-actionschanged-files-cve-2025-30066-and-reviewdogaction',
1604
+ 'https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-third-party-actions'
1605
+ ],
1606
+ mitre: 'T1195.002'
1607
+ },
1608
+ unpinned_action_in_risky_workflow: {
1609
+ id: 'MUADDIB-GHA-006',
1610
+ name: 'Unpinned Action in Attacker-Controllable Workflow',
1611
+ severity: 'CRITICAL',
1612
+ confidence: 'high',
1613
+ domain: 'malware',
1614
+ description: 'Compound: action tierce non-epinglee (ref mutable) dans un workflow egalement controlable par un attaquant (injection de contexte ou pwn-request). La ref mutable est le vecteur de livraison, le trigger risque est la portee — pattern tj-actions/Ultralytics. FP~0 par construction (requiert les deux moities independantes).',
1615
+ references: [
1616
+ 'https://www.cisa.gov/news-events/alerts/2025/03/18/supply-chain-compromise-third-party-tj-actionschanged-files-cve-2025-30066-and-reviewdogaction',
1617
+ 'https://orca.security/resources/blog/pull-request-nightmare-part-2-exploits/'
1618
+ ],
1619
+ mitre: 'T1195.002'
1620
+ },
1595
1621
 
1596
1622
  // Sandbox detections
1597
1623
  sandbox_sensitive_file_read: {
@@ -62,6 +62,10 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
62
62
  const activeLines = yamlLines.filter(l => !l.trim().startsWith('#'));
63
63
  const activeContent = activeLines.join('\n');
64
64
 
65
+ // Per-file risk flags, consumed by the GHA-006 compound below.
66
+ let fileHasInjection = false;
67
+ let fileHasPwn = false;
68
+
65
69
  // Détection du backdoor Shai-Hulud discussion.yaml
66
70
  if (file === 'discussion.yaml' || file === 'discussion.yml') {
67
71
  if (activeContent.includes('github.event.discussion.body')) {
@@ -82,6 +86,7 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
82
86
 
83
87
  for (const { regex, msg } of injectionPatterns) {
84
88
  if (regex.test(activeContent)) {
89
+ fileHasInjection = true;
85
90
  threats.push({
86
91
  type: 'workflow_injection',
87
92
  severity: 'HIGH',
@@ -95,6 +100,7 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
95
100
  const hasPRTarget = /pull_request_target/m.test(activeContent);
96
101
  const hasCheckoutPRHead = /actions\/checkout[\s\S]*?ref:\s*\$\{\{\s*github\.event\.pull_request\.head\.(ref|sha)\s*\}\}/m.test(activeContent);
97
102
  if (hasPRTarget && hasCheckoutPRHead) {
103
+ fileHasPwn = true;
98
104
  threats.push({
99
105
  type: 'workflow_pwn_request',
100
106
  severity: 'CRITICAL',
@@ -114,6 +120,52 @@ function scanDirRecursive(dirPath, targetPath, threats, depth = 0) {
114
120
  file: relFile
115
121
  });
116
122
  }
123
+
124
+ // GHA-005: Unpinned THIRD-PARTY action — pinned to a mutable tag/branch ref
125
+ // instead of an immutable commit SHA. Root cause of the tj-actions/changed-files
126
+ // (CVE-2025-30066) and reviewdog (CVE-2025-30154) compromises: a retagged release
127
+ // silently ships malicious code to every consumer. LOW/informational on its own —
128
+ // pinning to a major tag is ubiquitous and usually benign — and restricted to
129
+ // third-party orgs (official actions/* and github/* are conventionally trusted) to
130
+ // avoid noise on the near-universal `actions/checkout@v4`. The real signal is the
131
+ // GHA-006 compound below.
132
+ let fileHasUnpinnedThirdParty = false;
133
+ const usesRe = /^\s*-?\s*uses:\s*['"]?([^'"\s#]+)/gm;
134
+ let um;
135
+ while ((um = usesRe.exec(activeContent)) !== null) {
136
+ const ref = um[1];
137
+ // Local actions (./, ../) and docker refs carry no upstream tag to retag.
138
+ if (ref.startsWith('./') || ref.startsWith('../') || ref.startsWith('.\\') || ref.startsWith('docker://')) continue;
139
+ const at = ref.lastIndexOf('@');
140
+ if (at === -1) continue;
141
+ const repo = ref.slice(0, at);
142
+ const pin = ref.slice(at + 1);
143
+ if (/^[0-9a-f]{40}$/i.test(pin)) continue; // immutable SHA — correctly pinned
144
+ const org = repo.split('/')[0].toLowerCase();
145
+ if (org === 'actions' || org === 'github') continue; // first-party trusted orgs
146
+ fileHasUnpinnedThirdParty = true;
147
+ threats.push({
148
+ type: 'unpinned_action',
149
+ severity: 'LOW',
150
+ confidence: 'low',
151
+ message: `Third-party GitHub Action "${ref}" is pinned to a mutable ref ("${pin}") instead of a commit SHA — a retagged release (cf. tj-actions CVE-2025-30066) would execute attacker-controlled code.`,
152
+ file: relFile
153
+ });
154
+ }
155
+
156
+ // GHA-006 compound: an unpinned third-party action in a workflow that is ALSO
157
+ // attacker-controllable (context injection or pwn-request). This is the
158
+ // tj-actions / Ultralytics shape — the mutable ref is the delivery vector and the
159
+ // risky trigger is the reach. FP≈0 by construction: requires both independent halves.
160
+ if (fileHasUnpinnedThirdParty && (fileHasInjection || fileHasPwn)) {
161
+ threats.push({
162
+ type: 'unpinned_action_in_risky_workflow',
163
+ severity: 'CRITICAL',
164
+ compound: true,
165
+ message: 'Unpinned third-party action combined with an attacker-controllable workflow trigger (injection/pwn-request) — supply-chain delivery vector (tj-actions/Ultralytics pattern).',
166
+ file: relFile
167
+ });
168
+ }
117
169
  }
118
170
  }
119
171
 
@@ -142,6 +142,21 @@ async function getPackageMetadata(packageName) {
142
142
 
143
143
  const hasRepository = !!(latestMeta?.repository || meta.repository);
144
144
 
145
+ // P3 (provenance): npm publish provenance / attestations (npm `--provenance`,
146
+ // Sigstore-backed, GA since 2023) appear as `dist.attestations` on the version.
147
+ // Presence on the live latest version is a trust signal (downweight, fewer FP);
148
+ // a mature package whose latest version LOST the provenance that earlier versions
149
+ // carried is a build-divergence / takeover signal (Ultralytics shape — upweight).
150
+ const latestHasProvenance = !!(latestMeta?.dist?.attestations);
151
+ let anyPriorHadProvenance = false;
152
+ if (!latestHasProvenance && meta.versions) {
153
+ for (const [v, vm] of Object.entries(meta.versions)) {
154
+ if (v === latestVersion) continue;
155
+ if (vm?.dist?.attestations) { anyPriorHadProvenance = true; break; }
156
+ }
157
+ }
158
+ const provenanceRegressed = !latestHasProvenance && anyPriorHadProvenance;
159
+
145
160
  // 2. Weekly downloads + author search (parallel)
146
161
  const downloadsUrl = DOWNLOADS_URL + '/' + encodeURIComponent(packageName);
147
162
  const authorUrl = maintainer
@@ -207,6 +222,10 @@ async function getPackageMetadata(packageName) {
207
222
  maintainer_emails: maintainerEmails,
208
223
  // C3 : per-version publish timestamps for delta-mode selectPriorVersions.
209
224
  time: versionTimes,
225
+ // P3 : Sigstore-backed publish provenance on the live latest version, and
226
+ // whether it regressed (earlier versions had it, latest does not).
227
+ has_provenance: latestHasProvenance,
228
+ provenance_regressed: provenanceRegressed,
210
229
  ...advancedSignals
211
230
  };
212
231
  }
@@ -206,6 +206,22 @@ async function getPyPIPackageMetadata(packageName) {
206
206
  yanked = releases[latestVersion].every(f => f && f.yanked === true);
207
207
  }
208
208
 
209
+ // P3 (provenance): PEP 740 digital attestations (Trusted Publishing, supported
210
+ // since Nov 2024) surface as a `provenance` field on a release file. Same dual
211
+ // signal as npm: present on the latest version → trust (downweight); regressed
212
+ // from earlier versions → build-divergence / takeover suspicion (upweight).
213
+ let latestHasProvenance = false;
214
+ if (latestVersion && Array.isArray(releases[latestVersion])) {
215
+ latestHasProvenance = releases[latestVersion].some(f => f && f.provenance);
216
+ }
217
+ let anyPriorHadProvenance = false;
218
+ if (!latestHasProvenance) {
219
+ for (const [v, files] of Object.entries(releases)) {
220
+ if (v === latestVersion || !Array.isArray(files)) continue;
221
+ if (files.some(f => f && f.provenance)) { anyPriorHadProvenance = true; break; }
222
+ }
223
+ }
224
+
209
225
  const data = {
210
226
  created_at: createdAt,
211
227
  latest_release_at: latestReleaseAt,
@@ -218,7 +234,9 @@ async function getPyPIPackageMetadata(packageName) {
218
234
  : (typeof info.description === 'string' ? info.description.slice(0, 1000) : ''),
219
235
  home_page: typeof info.home_page === 'string' && info.home_page ? info.home_page : null,
220
236
  project_urls: (info.project_urls && typeof info.project_urls === 'object') ? info.project_urls : null,
221
- releases: releaseTimes
237
+ releases: releaseTimes,
238
+ has_provenance: latestHasProvenance,
239
+ provenance_regressed: !latestHasProvenance && anyPriorHadProvenance
222
240
  };
223
241
 
224
242
  _pypiMetadataCache.set(normalized, { fetchedAt: Date.now(), data });
package/src/scoring.js CHANGED
@@ -170,6 +170,34 @@ const SINGLE_FIRE_CRITICAL_TYPES = new Set([
170
170
  ]);
171
171
  const SINGLE_FIRE_CRITICAL_FLOOR = 75;
172
172
  const SINGLE_FIRE_MIN_SEVERITY_RANK = 2; // HIGH
173
+
174
+ // MT-1 / PyPI unblock: import-time RCE on PyPI is the lifecycle-equivalent of an
175
+ // npm install hook — code that runs at `pip install` time via __init__.py / setup.py.
176
+ // PyPI packages emit no `lifecycle_script` (an npm-only signal), so confirmed
177
+ // import-time RCE would otherwise be capped at 35 and buried in the benign 25-35
178
+ // cluster. These types are emitted ONLY by the Python scanners (python-source.js /
179
+ // python-ast-detectors) on .py files, so their presence is itself the PyPI signal —
180
+ // no ecosystem flag needed, and npm packages are unaffected (they never emit them).
181
+ const PYPI_IMPORT_TIME_RCE_TYPES = new Set([
182
+ 'import_time_exec',
183
+ 'import_time_subprocess',
184
+ 'import_time_os_system',
185
+ 'import_time_deserialization',
186
+ 'import_time_fetch_exec',
187
+ 'fetch_to_fork_exec_inline',
188
+ 'pyast_module_level_exec',
189
+ 'pyast_module_level_subprocess_shell',
190
+ 'pyast_module_level_unsafe_deserialization',
191
+ 'pyast_setup_cmdclass_override',
192
+ 'pyast_ctypes_shellcode_load'
193
+ ]);
194
+
195
+ // Track R: the reputation multiplier (applyReputationFactor) may suppress noise on
196
+ // mature/popular packages down to ×0.10, but it must never pull a CONFIRMED malice
197
+ // detection below the operational alert threshold. Account-takeover of a popular
198
+ // package (Shai-Hulud / event-stream shape) is the #1 real-world vector and would
199
+ // otherwise inherit the victim package's reputation and be silently dropped.
200
+ const REPUTATION_MALICE_FLOOR = 20;
173
201
  const _SEV_RANK = { LOW: 0, MEDIUM: 1, HIGH: 2, CRITICAL: 3 };
174
202
 
175
203
  /**
@@ -672,9 +700,13 @@ const SCORING_COMPOUNDS = [
672
700
  type: 'recon_exfil_direct_ip',
673
701
  requires: ['linux_fingerprint_exec', 'direct_ip_exfil'],
674
702
  severity: 'CRITICAL',
675
- message: 'Linux system fingerprint (id/uname/lsb_release/hostname/whoami) + direct-IP exfil in same file — targeted device fingerprinting for C2 grouping (scoring compound).',
703
+ message: 'Linux system fingerprint (id/uname/lsb_release/hostname/whoami) + direct-IP exfil in the same module — targeted device fingerprinting for C2 grouping (scoring compound).',
676
704
  fileFrom: 'direct_ip_exfil',
677
- sameFile: true
705
+ sameFile: true,
706
+ // P2c: also fire when the two halves are split across statically-import-linked
707
+ // files (anti-fragmentation). Both components are individually high-signal, so
708
+ // extending from sameFile to sameModule keeps FP≈0 while closing the evasion.
709
+ sameModule: true
678
710
  },
679
711
  ];
680
712
 
@@ -707,6 +739,50 @@ function _extractStaticImports(filePath) {
707
739
  return imports;
708
740
  }
709
741
 
742
+ // P2c (anti-fragmentation): resolve a file's 1-hop static import targets to
743
+ // normalized relative paths (forward slashes), matching the threat.file format.
744
+ // Mirrors the resolution inside _resolveLifecycleScopeGate so sameModule and
745
+ // lifecycleScoped agree on what "linked by import" means.
746
+ function _resolveImports1Hop(relFile, targetPath) {
747
+ const fs = require('fs');
748
+ const pathMod = require('path');
749
+ const out = new Set();
750
+ if (!relFile || relFile === 'package.json' || relFile === '(unknown)') return out;
751
+ const absFile = pathMod.resolve(targetPath, relFile);
752
+ const imports = _extractStaticImports(absFile);
753
+ const impDir = pathMod.dirname(absFile);
754
+ for (const imp of imports) {
755
+ let resolved = pathMod.relative(targetPath, pathMod.resolve(impDir, imp)).replace(/\\/g, '/');
756
+ if (!resolved.match(/\.(js|mjs|cjs)$/)) {
757
+ if (fs.existsSync(pathMod.resolve(targetPath, resolved + '.js'))) {
758
+ resolved += '.js';
759
+ } else if (fs.existsSync(pathMod.resolve(targetPath, resolved, 'index.js'))) {
760
+ resolved = resolved + '/index.js';
761
+ }
762
+ }
763
+ out.add(resolved);
764
+ }
765
+ return out;
766
+ }
767
+
768
+ // P2c: two files are "in the same module" if they are the same file or linked by a
769
+ // 1-hop static import in either direction. Closes the fragmentation evasion where an
770
+ // attacker splits the two halves of a payload across an importing file and its helper
771
+ // to break a sameFile compound. Dynamic require() is intentionally NOT resolved
772
+ // (mirrors the module-graph) — linkage must be a literal static import.
773
+ function _filesSameModule(fileA, fileB, targetPath) {
774
+ if (!fileA || !fileB) return false;
775
+ if (fileA === 'package.json' || fileB === 'package.json') return false;
776
+ if (fileA === '(unknown)' || fileB === '(unknown)') return false;
777
+ const a = fileA.replace(/\\/g, '/');
778
+ const b = fileB.replace(/\\/g, '/');
779
+ if (a === b) return true;
780
+ if (!targetPath) return false;
781
+ if (_resolveImports1Hop(a, targetPath).has(b)) return true;
782
+ if (_resolveImports1Hop(b, targetPath).has(a)) return true;
783
+ return false;
784
+ }
785
+
710
786
  // v2.11.11: Lifecycle scope resolution. Determines if a lifecycleScoped compound
711
787
  // should fire based on whether the non-lifecycle threats are in the lifecycle
712
788
  // target file or its direct static imports.
@@ -889,7 +965,22 @@ function applyCompoundBoosts(threats, targetPath) {
889
965
  const commonFiles = [...filesByType[0]].filter(f =>
890
966
  filesByType.every(s => s.has(f))
891
967
  );
892
- if (commonFiles.length === 0) continue;
968
+ if (commonFiles.length === 0) {
969
+ // P2c (anti-fragmentation): sameModule fallback — accept two component files
970
+ // linked by a 1-hop static import, so splitting the payload across an importer
971
+ // and its helper no longer evades the compound. Opt-in per compound and limited
972
+ // to the two-type case to bound the FP surface to the highest-confidence rules.
973
+ let linked = false;
974
+ if (compound.sameModule && filesByType.length === 2 && targetPath) {
975
+ for (const fa of filesByType[0]) {
976
+ for (const fb of filesByType[1]) {
977
+ if (_filesSameModule(fa, fb, targetPath)) { linked = true; break; }
978
+ }
979
+ if (linked) break;
980
+ }
981
+ }
982
+ if (!linked) continue;
983
+ }
893
984
  }
894
985
 
895
986
  if (!compoundAlreadyPresent) {
@@ -1464,7 +1555,11 @@ function calculateRiskScore(deduped, intentResult) {
1464
1555
  // json-spacer, reactvora: eval(data.content) from jsonkeeper.com is always malicious
1465
1556
  const _hasStagedC2 = deduped.some(t => t.type === 'staged_payload') &&
1466
1557
  deduped.some(t => t.type === 'suspicious_domain' && t.severity === 'HIGH');
1467
- if (!_hasLifecycle && !_hasHC && !_hasCompound && !_hasStagedC2) {
1558
+ // PyPI unblock: import-time RCE is the PyPI lifecycle-equivalent — bypass the cap so
1559
+ // confirmed Python install-time malware reaches its true score and separates from the
1560
+ // benign 25-35 cluster (which carries no import-time-exec signal).
1561
+ const _hasPyPIImportRCE = deduped.some(t => PYPI_IMPORT_TIME_RCE_TYPES.has(t.type));
1562
+ if (!_hasLifecycle && !_hasHC && !_hasCompound && !_hasStagedC2 && !_hasPyPIImportRCE) {
1468
1563
  riskScore = Math.min(riskScore, 35);
1469
1564
  }
1470
1565
 
@@ -1652,7 +1747,8 @@ const REPUTATION_FACTOR_BOUNDS = { min: 0.10, max: 1.5 };
1652
1747
 
1653
1748
  function _hasNumeric(v) { return typeof v === 'number' && !Number.isNaN(v); }
1654
1749
 
1655
- function _factorFromMetadata(meta) {
1750
+ function _factorFromMetadata(meta, opts) {
1751
+ const allowProvenanceBonus = !opts || opts.allowProvenanceBonus !== false;
1656
1752
  let factor = 1.0;
1657
1753
  let signalsApplied = 0;
1658
1754
  // Age (AUC 0.81 — strongest single discriminator). Old packages = benign.
@@ -1725,6 +1821,25 @@ function _factorFromMetadata(meta) {
1725
1821
  factor -= 0.15;
1726
1822
  signalsApplied++;
1727
1823
  }
1824
+ // P3 (provenance) : Sigstore-backed publish provenance (npm --provenance / PyPI
1825
+ // PEP 740). Two ASYMMETRIC signals:
1826
+ // - regressed (earlier versions attested, latest is not) → build divergence /
1827
+ // takeover suspicion (Ultralytics shape) → upweight. Always applies.
1828
+ // - present on the live latest version → mild downweight, BUT only when the
1829
+ // package shows no malice signal. A valid attestation proves WHICH pipeline
1830
+ // built the package, NOT that the code is safe: the TeamPCP / "Mini Shai-Hulud"
1831
+ // campaign (May 2026, 84 malicious TanStack versions) shipped VALID SLSA L3
1832
+ // Sigstore attestations by hijacking the legitimate release runner's OIDC
1833
+ // identity. Granting a trust bonus to an attested-but-malicious package would
1834
+ // actively help the attacker, so the bonus is suppressed whenever malice is
1835
+ // present (allowProvenanceBonus=false, set by applyReputationFactor).
1836
+ if (meta.provenance_regressed === true) {
1837
+ factor += 0.20;
1838
+ signalsApplied++;
1839
+ } else if (meta.has_provenance === true && allowProvenanceBonus) {
1840
+ factor -= 0.10;
1841
+ signalsApplied++;
1842
+ }
1728
1843
  // If no signals applied (metadata fully absent), return neutral 1.0 rather
1729
1844
  // than the default-shaped factor — avoid spurious adjustments on rows where
1730
1845
  // the registry data is simply missing.
@@ -1732,6 +1847,33 @@ function _factorFromMetadata(meta) {
1732
1847
  return Math.max(REPUTATION_FACTOR_BOUNDS.min, Math.min(REPUTATION_FACTOR_BOUNDS.max, factor));
1733
1848
  }
1734
1849
 
1850
+ // Track R: "confirmed malice" predicate, kept identical to the MT-1 ceiling bypass
1851
+ // (HIGH_CONFIDENCE_MALICE_TYPES / compound / staged-C2). These are the signals the
1852
+ // pipeline already trusts as never-benign-regardless-of-context; reusing the exact
1853
+ // same definition keeps the reputation floor symmetric with the cap and bounds the
1854
+ // FP cost to zero (a benign popular package carries none of these).
1855
+ function _hasConfirmedMalice(threats) {
1856
+ if (!Array.isArray(threats)) return false;
1857
+ const hasHC = threats.some(t => HIGH_CONFIDENCE_MALICE_TYPES.has(t.type));
1858
+ const hasCompound = threats.some(t => t.compound === true);
1859
+ const hasStagedC2 = threats.some(t => t.type === 'staged_payload') &&
1860
+ threats.some(t => t.type === 'suspicious_domain' && t.severity === 'HIGH');
1861
+ return hasHC || hasCompound || hasStagedC2;
1862
+ }
1863
+
1864
+ // P3 (TeamPCP / Mini Shai-Hulud hardening): broader malice predicate used to
1865
+ // SUPPRESS the provenance-presence trust bonus. A valid Sigstore/PEP-740 attestation
1866
+ // only proves the build pipeline's identity, not code safety — a compromised pipeline
1867
+ // emits valid attestations for malicious code. So any HIGH/CRITICAL signal (not just
1868
+ // the confirmed-malice set) must veto the provenance bonus, denying the attacker a
1869
+ // confidence boost. Broader than _hasConfirmedMalice on purpose: the bonus is a
1870
+ // trust grant, so we withhold it on weaker suspicion too.
1871
+ function _hasMaliceSignal(threats) {
1872
+ if (!Array.isArray(threats)) return false;
1873
+ if (_hasConfirmedMalice(threats)) return true;
1874
+ return threats.some(t => t.severity === 'HIGH' || t.severity === 'CRITICAL');
1875
+ }
1876
+
1735
1877
  function applyReputationFactor(result, metadata) {
1736
1878
  if (!result || !result.summary || !metadata) return null;
1737
1879
  // FPR plan : the reputation factor describes "how trustworthy this package
@@ -1755,13 +1897,24 @@ function applyReputationFactor(result, metadata) {
1755
1897
  ) {
1756
1898
  return null;
1757
1899
  }
1758
- const factor = _factorFromMetadata(metadata);
1900
+ // P3 hardening: a valid attestation must NOT earn a trust bonus on a package that
1901
+ // also shows malice (TeamPCP attested-malware scenario). Withhold it here, where
1902
+ // the threat list is available.
1903
+ const factor = _factorFromMetadata(metadata, {
1904
+ allowProvenanceBonus: !_hasMaliceSignal(result.threats)
1905
+ });
1759
1906
  if (factor === 1.0) {
1760
1907
  result.summary.reputationFactor = 1.0;
1761
1908
  return null;
1762
1909
  }
1763
1910
  const oldScore = result.summary.riskScore;
1764
- const newScore = Math.max(0, Math.min(MAX_RISK_SCORE, Math.round(oldScore * factor)));
1911
+ let newScore = Math.max(0, Math.min(MAX_RISK_SCORE, Math.round(oldScore * factor)));
1912
+ // Track R: malice-aware floor. Only raises the score when the reputation multiplier
1913
+ // would otherwise bury a confirmed-malice detection under the alert threshold; never
1914
+ // touches benign packages (no confirmed-malice signal) so FPR is unaffected.
1915
+ if (newScore < REPUTATION_MALICE_FLOOR && _hasConfirmedMalice(result.threats)) {
1916
+ newScore = REPUTATION_MALICE_FLOOR;
1917
+ }
1765
1918
  result.summary.riskScore = newScore;
1766
1919
  result.summary.reputationFactor = factor;
1767
1920
  const rs = newScore;
@@ -2058,7 +2211,7 @@ const { applyDeltaMultiplier } = require('./scoring/delta-multiplier.js');
2058
2211
  module.exports = {
2059
2212
  SEVERITY_WEIGHTS, RISK_THRESHOLDS, MAX_RISK_SCORE, CONFIDENCE_FACTORS,
2060
2213
  SINGLE_FIRE_CRITICAL_TYPES, SINGLE_FIRE_CRITICAL_FLOOR, DECAY_ALPHA,
2061
- REPUTATION_FACTOR_BOUNDS,
2214
+ REPUTATION_FACTOR_BOUNDS, REPUTATION_MALICE_FLOOR,
2062
2215
  MATURE_CAP_SCORE, MATURE_MIN_AGE_DAYS, MATURE_MIN_VERSION_COUNT, MATURE_MIN_WEEKLY_DOWNLOADS,
2063
2216
  SANDBOX_VERDICT_CONFIRMED_FLOOR, SANDBOX_VERDICT_CHAIN_FLOOR, SANDBOX_VERDICT_CLEAN_DELTA,
2064
2217
  applyMatureStableCap, applySandboxVerdict, applyDeltaMultiplier,