muaddib-scanner 2.11.67 → 2.11.68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.67",
3
+ "version": "2.11.68",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-07T12:54:23.816Z",
3
+ "timestamp": "2026-06-07T13:41:08.649Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -1059,6 +1059,116 @@ function loadScanLedger() {
1059
1059
  return entries;
1060
1060
  }
1061
1061
 
1062
+ // Bounded distinct-key tracking for the `vanished` cross-reference (CLAUDE.md §2).
1063
+ // Sits above the MAX_SCAN_LEDGER file ceiling so it is a pure safety valve: in normal
1064
+ // operation the in-window key sets are far smaller than the file, so `exactVanished`
1065
+ // stays true. Only an operator setting MUADDIB_SCAN_LEDGER_MAX above this would trip it.
1066
+ const MAX_ROLLUP_KEYS = 1_200_000;
1067
+
1068
+ /**
1069
+ * Phase 0b: roll up the per-scan ledger into operational-coverage metrics.
1070
+ *
1071
+ * Single streaming pass (never loads the whole file at once — same machinery as
1072
+ * getDetectionStats). It distinguishes:
1073
+ * - scanned : entries that reached a real verdict (outcome !== 'dropped')
1074
+ * - dropped : queue-cap evictions (outcome === 'dropped') — never scanned
1075
+ * - vanished: DISTINCT name@version that were dropped AND never (re)scanned in-window
1076
+ * = a permanent coverage hole (the "which Miasma versions never ran" case)
1077
+ *
1078
+ * HONEST METRIC NOTE — `alertRate` is (suspect+confirmed) / scanned, i.e. "of what we
1079
+ * scanned, the fraction we flagged". It is NOT a true-positive rate: the ledger carries
1080
+ * no ground truth. The GHSA-denominated operational TPR (the 105/429 audit number) needs
1081
+ * the ledger cross-referenced against the GHSA malware feed — that is the Phase 5
1082
+ * coverage-audit, not this rollup. Do not relabel `alertRate` as TPR (CLAUDE.md: pas
1083
+ * d'embellissement des métriques).
1084
+ *
1085
+ * @param {number|string|null} [sinceTs] window start — ms epoch, ISO string, or null for
1086
+ * "whole ledger". Entries with ts < sinceTs (or unparseable ts) are skipped.
1087
+ * @param {object} [opts]
1088
+ * @param {string} [opts.file] ledger path override (tests). Defaults to SCAN_LEDGER_FILE.
1089
+ * @returns {{
1090
+ * generatedAt:string, since:string|null, windowStart:string|null, windowEnd:string|null,
1091
+ * total:number, scanned:number, dropped:number, vanished:number, exactVanished:boolean,
1092
+ * alerted:number, alertRate:number|null,
1093
+ * byOutcome:Object.<string,number>,
1094
+ * byEcosystem:Object.<string,{total:number,scanned:number,dropped:number,alerted:number}>
1095
+ * }}
1096
+ */
1097
+ function computeLedgerRollup(sinceTs, opts = {}) {
1098
+ const file = opts.file || SCAN_LEDGER_FILE;
1099
+
1100
+ let sinceMs = null;
1101
+ if (typeof sinceTs === 'number' && Number.isFinite(sinceTs)) {
1102
+ sinceMs = sinceTs;
1103
+ } else if (typeof sinceTs === 'string') {
1104
+ const p = Date.parse(sinceTs);
1105
+ if (!Number.isNaN(p)) sinceMs = p;
1106
+ }
1107
+
1108
+ const byOutcome = Object.create(null);
1109
+ const byEcosystem = Object.create(null);
1110
+ let total = 0, scanned = 0, dropped = 0, alerted = 0;
1111
+ let earliest = null, latest = null;
1112
+ // Two sets so `vanished` is correct regardless of drop/scan ordering in the file.
1113
+ // droppedKeys is small (drops only happen under queue-cap pressure); scannedKeys is
1114
+ // bounded by the in-window line count (≤ MAX_SCAN_LEDGER), and further by MAX_ROLLUP_KEYS.
1115
+ const scannedKeys = new Set();
1116
+ const droppedKeys = new Set();
1117
+ let exactVanished = true;
1118
+
1119
+ _iterateJsonlSync(file, (e) => {
1120
+ if (!e || !e.name) return;
1121
+ let t = null;
1122
+ if (e.ts) { const p = Date.parse(e.ts); if (!Number.isNaN(p)) t = p; }
1123
+ if (sinceMs !== null && (t === null || t < sinceMs)) return;
1124
+
1125
+ total++;
1126
+ if (t !== null) {
1127
+ if (earliest === null || t < earliest) earliest = t;
1128
+ if (latest === null || t > latest) latest = t;
1129
+ }
1130
+
1131
+ const outcome = (typeof e.outcome === 'string' && e.outcome) ? e.outcome : 'clean';
1132
+ byOutcome[outcome] = (byOutcome[outcome] || 0) + 1;
1133
+
1134
+ const eco = e.ecosystem || 'unknown';
1135
+ let ecoNode = byEcosystem[eco];
1136
+ if (!ecoNode) ecoNode = byEcosystem[eco] = { total: 0, scanned: 0, dropped: 0, alerted: 0 };
1137
+ ecoNode.total++;
1138
+
1139
+ const key = `${e.name}@${e.version || ''}`;
1140
+ const underCap = exactVanished && (scannedKeys.size + droppedKeys.size) < MAX_ROLLUP_KEYS;
1141
+ if (outcome === 'dropped') {
1142
+ dropped++; ecoNode.dropped++;
1143
+ if (underCap) droppedKeys.add(key); else exactVanished = false;
1144
+ } else {
1145
+ scanned++; ecoNode.scanned++;
1146
+ if (outcome === 'suspect' || outcome === 'confirmed') { alerted++; ecoNode.alerted++; }
1147
+ if (underCap) scannedKeys.add(key); else exactVanished = false;
1148
+ }
1149
+ });
1150
+
1151
+ let vanished = 0;
1152
+ for (const k of droppedKeys) { if (!scannedKeys.has(k)) vanished++; }
1153
+
1154
+ return {
1155
+ generatedAt: new Date().toISOString(),
1156
+ since: sinceMs !== null ? new Date(sinceMs).toISOString() : null,
1157
+ windowStart: earliest !== null ? new Date(earliest).toISOString() : null,
1158
+ windowEnd: latest !== null ? new Date(latest).toISOString() : null,
1159
+ total,
1160
+ scanned,
1161
+ dropped,
1162
+ vanished,
1163
+ exactVanished,
1164
+ alerted,
1165
+ // NOT a TPR — see the HONEST METRIC NOTE above. null when nothing was scanned.
1166
+ alertRate: scanned > 0 ? alerted / scanned : null,
1167
+ byOutcome,
1168
+ byEcosystem
1169
+ };
1170
+ }
1171
+
1062
1172
  // --- Scan stats (FP rate tracking) ---
1063
1173
 
1064
1174
  function loadScanStats() {
@@ -1568,6 +1678,7 @@ module.exports = {
1568
1678
  appendDetection,
1569
1679
  appendScanLedger,
1570
1680
  loadScanLedger,
1681
+ computeLedgerRollup,
1571
1682
  _compactScanLedgerJsonl,
1572
1683
  getDetectionStats,
1573
1684
  runStateMigrations,
@@ -28,7 +28,8 @@ const {
28
28
  saveState,
29
29
  loadStateRaw,
30
30
  getScansSinceLastMemoryPersist,
31
- setScansSinceLastMemoryPersist
31
+ setScansSinceLastMemoryPersist,
32
+ computeLedgerRollup
32
33
  } = require('./state.js');
33
34
  const {
34
35
  HIGH_CONFIDENCE_MALICE_TYPES,
@@ -897,7 +898,52 @@ function formatDelta(current, previous) {
897
898
  return '=0';
898
899
  }
899
900
 
900
- function buildDailyReportEmbed(stats, dailyAlerts) {
901
+ // Phase 0b: rolling window for the daily report's ledger section. The report runs
902
+ // once/day, so 24h is the natural "what happened today" view and keeps the rollup's
903
+ // distinct-key sets small (one day of scans, far below MAX_ROLLUP_KEYS). Env-tunable.
904
+ const LEDGER_ROLLUP_WINDOW_MS = (() => {
905
+ const v = parseInt(process.env.MUADDIB_LEDGER_ROLLUP_WINDOW_MS, 10);
906
+ return Number.isFinite(v) && v > 0 ? v : 24 * 60 * 60 * 1000;
907
+ })();
908
+
909
+ /**
910
+ * Compute the per-scan ledger rollup for the daily-report window. Best-effort: a
911
+ * rollup failure (corrupt ledger, I/O) must NEVER break the daily report, so this
912
+ * swallows errors and returns null. Also returns null when the ledger is empty so
913
+ * the report omits the section instead of showing a noise row of zeros.
914
+ */
915
+ function safeLedgerRollup() {
916
+ try {
917
+ const rollup = computeLedgerRollup(Date.now() - LEDGER_ROLLUP_WINDOW_MS);
918
+ return (rollup && rollup.total > 0) ? rollup : null;
919
+ } catch {
920
+ return null;
921
+ }
922
+ }
923
+
924
+ /**
925
+ * Format the ledger rollup as a Discord embed field, or null to omit it (no data).
926
+ * Surfaces operational scan coverage: scanned, alert rate (NOT a TPR — see
927
+ * computeLedgerRollup's HONEST METRIC NOTE), the dropped/vanished coverage holes,
928
+ * and a per-ecosystem split. Compact, well under Discord's 1024-char field limit.
929
+ */
930
+ function formatLedgerField(rollup) {
931
+ if (!rollup || rollup.total <= 0) return null;
932
+ const pct = rollup.alertRate != null ? (rollup.alertRate * 100).toFixed(2) : '0.00';
933
+ const lines = [`Scanned ${rollup.scanned} · Alerted ${rollup.alerted} (${pct}%)`];
934
+ if (rollup.dropped > 0) {
935
+ const vanishedNote = rollup.exactVanished ? `${rollup.vanished}` : `≥${rollup.vanished}`;
936
+ lines.push(`Dropped ${rollup.dropped} (${vanishedNote} vanished)`);
937
+ }
938
+ const ecos = Object.keys(rollup.byEcosystem)
939
+ .sort((a, b) => rollup.byEcosystem[b].total - rollup.byEcosystem[a].total);
940
+ if (ecos.length > 0) {
941
+ lines.push(ecos.slice(0, 4).map(e => `${e} ${rollup.byEcosystem[e].total}`).join(' · '));
942
+ }
943
+ return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
944
+ }
945
+
946
+ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
901
947
  // Use in-memory stats (accumulated since last reset, restored from disk on restart)
902
948
  // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
903
949
  const { top3: diskTop3 } = buildReportFromDisk();
@@ -1000,6 +1046,12 @@ function buildDailyReportEmbed(stats, dailyAlerts) {
1000
1046
  } catch { /* non-fatal */ }
1001
1047
  const healthText = `Up ${uptimeH}h${uptimeM}m | Heap ${heapMB}MB${jsonlInfo}`;
1002
1048
 
1049
+ // --- Phase 0b: per-scan ledger rollup (operational coverage) ---
1050
+ // Caller may pass a precomputed rollup (sendDailyReport does, to persist the same
1051
+ // numbers it displays); undefined → compute here; explicit null → omit the section.
1052
+ const ledger = ledgerRollup !== undefined ? ledgerRollup : safeLedgerRollup();
1053
+ const ledgerField = formatLedgerField(ledger);
1054
+
1003
1055
  const now = new Date();
1004
1056
  const readableTime = now.toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC');
1005
1057
 
@@ -1022,6 +1074,7 @@ function buildDailyReportEmbed(stats, dailyAlerts) {
1022
1074
  ? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
1023
1075
  : []),
1024
1076
  { name: 'Stability', value: `Restarts (24h): ${stats.restartsToday || 0} | Temporal load-shed: ${stats.temporalLoadShed || 0} | Queue hard-drops: ${stats.queueHardDrops || 0}`, inline: false },
1077
+ ...(ledgerField ? [ledgerField] : []),
1025
1078
  { name: 'System', value: healthText, inline: false }
1026
1079
  ],
1027
1080
  footer: {
@@ -1060,7 +1113,10 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
1060
1113
  // delta. Written before the (now last) webhook so a mid-send kill can't double-count.
1061
1114
  saveLastDailyReportDate(today, captureScanStatsBaseline());
1062
1115
 
1063
- const payload = buildDailyReportEmbed(stats, dailyAlerts);
1116
+ // Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
1117
+ // we persist (no double-scan, no drift between Discord and the on-disk metrics).
1118
+ const ledgerRollup = safeLedgerRollup();
1119
+ const payload = buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup);
1064
1120
 
1065
1121
  // Persist locally with full raw metrics (independent of webhook — enables trend analysis)
1066
1122
  persistDailyReport(payload, {
@@ -1081,6 +1137,7 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
1081
1137
  restartsToday: stats.restartsToday || 0,
1082
1138
  temporalLoadShed: stats.temporalLoadShed || 0,
1083
1139
  queueHardDrops: stats.queueHardDrops || 0,
1140
+ ledger: ledgerRollup || null,
1084
1141
  topSuspects: dailyAlerts.slice().sort((a, b) => (b.score || 0) - (a.score || 0) || b.findingsCount - a.findingsCount).slice(0, 10)
1085
1142
  });
1086
1143
 
@@ -1337,6 +1394,7 @@ module.exports = {
1337
1394
  buildMaintainerChangeWebhookEmbed,
1338
1395
  buildCanaryExfiltrationWebhookEmbed,
1339
1396
  buildDailyReportEmbed,
1397
+ formatLedgerField,
1340
1398
  sendDailyReport,
1341
1399
  buildReportFromDisk,
1342
1400
  buildReportEmbedFromDisk,