muaddib-scanner 2.11.90 → 2.11.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/muaddib.js CHANGED
@@ -659,6 +659,19 @@ if (command === 'version' || command === '--version' || command === '-v') {
659
659
  console.error('[ERROR]', err.message);
660
660
  process.exit(1);
661
661
  });
662
+ } else if (command === 'shadow-report') {
663
+ const { runShadowReport } = require('../src/commands/shadow-report.js');
664
+ const shOpts = { json: jsonOutput };
665
+ for (let i = 0; i < options.length; i++) {
666
+ if (options[i] === '--since' && options[i + 1]) { shOpts.since = options[i + 1]; i++; }
667
+ else if (options[i] === '--detector' && options[i + 1]) { shOpts.detector = options[i + 1]; i++; }
668
+ }
669
+ runShadowReport(shOpts).then(() => {
670
+ process.exit(0);
671
+ }).catch(err => {
672
+ console.error('[ERROR]', err.message);
673
+ process.exit(1);
674
+ });
662
675
  } else if (command === 'evaluate') {
663
676
  if (wantHelp) showHelp('evaluate');
664
677
  const { evaluate } = require('../src/commands/evaluate.js');
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.90",
3
+ "version": "2.11.92",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T08:32:51.994Z",
3
+ "timestamp": "2026-06-11T11:05:03.615Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -0,0 +1,106 @@
1
+ 'use strict';
2
+
3
+ // muaddib shadow-report — read the shadow-mode divergence log and print the
4
+ // V1-vs-V2 adjudication split per detector. This is the read side of
5
+ // src/shared/shadow.js: detectors compute a candidate semantics alongside the
6
+ // live one and log disagreements; this command turns the log into the table a
7
+ // human adjudicates before flipping the semantics.
8
+ //
9
+ // The log only contains DIVERGENCES (agreements are not recorded), so:
10
+ // old-only = oldVerdict truthy, newVerdict falsy → alerts V2 would drop (FP killed)
11
+ // new-only = newVerdict truthy, oldVerdict falsy → NEW flags (review every one)
12
+ // changed = both truthy but different (e.g. severity reclassification)
13
+
14
+ const { readShadowDivergences } = require('../shared/shadow.js');
15
+
16
+ const DAY_MS = 24 * 60 * 60 * 1000;
17
+
18
+ /** Parse `--since 7d` / `--since 12h` / ISO string → ms epoch (null = all). */
19
+ function parseSince(s) {
20
+ if (!s) return null;
21
+ const m = /^(\d+)([dh])$/.exec(s);
22
+ if (m) {
23
+ const n = parseInt(m[1], 10);
24
+ return Date.now() - n * (m[2] === 'd' ? DAY_MS : 3600 * 1000);
25
+ }
26
+ const p = Date.parse(s);
27
+ return Number.isNaN(p) ? null : p;
28
+ }
29
+
30
+ function classify(e) {
31
+ const oldT = !!e.oldVerdict, newT = !!e.newVerdict;
32
+ if (oldT && !newT) return 'oldOnly';
33
+ if (!oldT && newT) return 'newOnly';
34
+ return 'changed';
35
+ }
36
+
37
+ async function runShadowReport(opts = {}) {
38
+ const sinceMs = parseSince(opts.since);
39
+ const entries = readShadowDivergences({
40
+ detector: opts.detector || undefined,
41
+ sinceTs: sinceMs !== null ? sinceMs : undefined
42
+ });
43
+
44
+ if (entries.length === 0) {
45
+ console.log('\n No shadow divergences recorded' +
46
+ (opts.detector ? ` for detector "${opts.detector}"` : '') +
47
+ (opts.since ? ` since ${opts.since}` : '') +
48
+ '.\n (Shadow mode logs only V1≠V2 disagreements; enable with MUADDIB_SHADOW=1.)\n');
49
+ return;
50
+ }
51
+
52
+ // Group by detector, dedup by package@version (a package rescanned N times
53
+ // diverges N times — the adjudication unit is the package, not the event).
54
+ const byDetector = new Map();
55
+ for (const e of entries) {
56
+ let d = byDetector.get(e.detector);
57
+ if (!d) { d = { events: 0, byKey: new Map() }; byDetector.set(e.detector, d); }
58
+ d.events++;
59
+ const key = `${e.package || '?'}@${e.version || ''}`;
60
+ if (!d.byKey.has(key)) d.byKey.set(key, e); // first divergence wins for the listing
61
+ }
62
+
63
+ if (opts.json) {
64
+ const out = {};
65
+ for (const [det, d] of byDetector) {
66
+ const split = { oldOnly: [], newOnly: [], changed: [] };
67
+ for (const [key, e] of d.byKey) split[classify(e)].push({ key, evidence: e.evidence });
68
+ out[det] = {
69
+ events: d.events, distinct: d.byKey.size,
70
+ oldOnly: split.oldOnly.length, newOnly: split.newOnly.length, changed: split.changed.length,
71
+ newOnlyList: split.newOnly, oldOnlyExamples: split.oldOnly.slice(0, 20)
72
+ };
73
+ }
74
+ console.log(JSON.stringify(out, null, 2));
75
+ return;
76
+ }
77
+
78
+ console.log('\n MUAD\'DIB Shadow Divergence Report' + (opts.since ? ` (since ${opts.since})` : '') + '\n');
79
+ for (const [det, d] of byDetector) {
80
+ const split = { oldOnly: [], newOnly: [], changed: [] };
81
+ for (const [key, e] of d.byKey) split[classify(e)].push({ key, e });
82
+ console.log(` ${det}`);
83
+ console.log(` divergence events: ${d.events} | distinct pkg@version: ${d.byKey.size}`);
84
+ console.log(` old-only (V2 drops the alert — FP killed): ${split.oldOnly.length}`);
85
+ console.log(` new-only (V2 adds a flag — REVIEW): ${split.newOnly.length}`);
86
+ if (split.changed.length) {
87
+ console.log(` changed (both fire, different verdict): ${split.changed.length}`);
88
+ }
89
+ const show = (label, list, max) => {
90
+ if (!list.length) return;
91
+ console.log(` ${label}:`);
92
+ for (const { key, e } of list.slice(0, max)) {
93
+ const ev = e.evidence ? JSON.stringify(e.evidence) : '';
94
+ console.log(` - ${key} old=${JSON.stringify(e.oldVerdict)} new=${JSON.stringify(e.newVerdict)} ${ev.slice(0, 140)}`);
95
+ }
96
+ if (list.length > max) console.log(` ... and ${list.length - max} more`);
97
+ };
98
+ // Every NEW flag must be human-reviewed (possible FN risk if wrong) — show all.
99
+ show('new-only detail', split.newOnly, 50);
100
+ show('old-only examples', split.oldOnly, 20);
101
+ show('changed detail', split.changed, 20);
102
+ console.log('');
103
+ }
104
+ }
105
+
106
+ module.exports = { runShadowReport, parseSince };
@@ -975,6 +975,14 @@ const SCAN_LEDGER_OUTCOMES = new Set([
975
975
  'static_timeout', 'size_skip', 'dropped', 'error'
976
976
  ]);
977
977
 
978
+ // Benign terminal verdicts — the ledger-headline "clean" bucket. Mirrors the
979
+ // in-memory stats.clean semantics (every path that increments stats.clean writes
980
+ // one of these outcomes). sandbox_inconclusive/unconfirmed and size_skip are
981
+ // deliberately in neither bucket: scanned but not vouched-for.
982
+ const CLEAN_LEDGER_OUTCOMES = new Set([
983
+ 'clean', 'clean_low_signal', 'clean_tooling', 'ml_clean', 'llm_benign'
984
+ ]);
985
+
978
986
  /**
979
987
  * Append one per-scan ledger entry recording the terminal outcome of a dequeued
980
988
  * package. Best-effort: NEVER throws (a ledger failure must not break scanning).
@@ -1112,6 +1120,12 @@ function computeLedgerRollup(sinceTs, opts = {}) {
1112
1120
  const byOutcome = Object.create(null);
1113
1121
  const byEcosystem = Object.create(null);
1114
1122
  let total = 0, scanned = 0, dropped = 0, alerted = 0;
1123
+ // Headline counters (ledger-derived daily-report headline — restart-proof, unlike
1124
+ // the in-memory stats counters). clean buckets all the benign terminal verdicts;
1125
+ // errors only the ledgerized failure outcomes (HTTP/tar failures live in the
1126
+ // in-memory errorsByType breakdown, not the ledger).
1127
+ let hClean = 0, hErrors = 0;
1128
+ const hByTier = { t1: 0, t1a: 0, t1b: 0, t2: 0, t3: 0 };
1115
1129
  let earliest = null, latest = null;
1116
1130
  // Two sets so `vanished` is correct regardless of drop/scan ordering in the file.
1117
1131
  // droppedKeys is small (drops only happen under queue-cap pressure); scannedKeys is
@@ -1153,10 +1167,24 @@ function computeLedgerRollup(sinceTs, opts = {}) {
1153
1167
  if (underCap) { droppedKeys.add(key); allNames.add(e.name); } else exactVanished = false;
1154
1168
  } else {
1155
1169
  scanned++; ecoNode.scanned++;
1156
- if (outcome === 'suspect' || outcome === 'confirmed') { alerted++; ecoNode.alerted++; }
1170
+ if (outcome === 'suspect' || outcome === 'confirmed') {
1171
+ alerted++; ecoNode.alerted++;
1172
+ const t = e.tier !== undefined && e.tier !== null ? String(e.tier) : null;
1173
+ if (t === '1a') hByTier.t1a++;
1174
+ else if (t === '1b') hByTier.t1b++;
1175
+ else if (t === '1') hByTier.t1++;
1176
+ else if (t === '2') hByTier.t2++;
1177
+ else if (t === '3') hByTier.t3++;
1178
+ } else if (CLEAN_LEDGER_OUTCOMES.has(outcome)) {
1179
+ hClean++;
1180
+ } else if (outcome === 'error' || outcome === 'static_timeout') {
1181
+ hErrors++;
1182
+ }
1157
1183
  if (underCap) { scannedKeys.add(key); allNames.add(e.name); scannedNames.add(e.name); } else exactVanished = false;
1158
1184
  }
1159
1185
  });
1186
+ // Match the in-memory suspectByTier semantics where t1 = t1a + t1b (+ legacy '1').
1187
+ hByTier.t1 += hByTier.t1a + hByTier.t1b;
1160
1188
 
1161
1189
  let vanished = 0;
1162
1190
  for (const k of droppedKeys) { if (!scannedKeys.has(k)) vanished++; }
@@ -1181,6 +1209,16 @@ function computeLedgerRollup(sinceTs, opts = {}) {
1181
1209
  distinctPackages: allNames.size,
1182
1210
  distinctScanned: scannedNames.size,
1183
1211
  distinctCoverage: allNames.size > 0 ? scannedNames.size / allNames.size : null,
1212
+ // Ledger-derived daily-report headline (window-exact, restart-proof). `suspect`
1213
+ // mirrors `alerted` (suspect+confirmed); `scanned` mirrors the non-dropped count
1214
+ // above. The in-memory counters remain the fallback when the ledger is unavailable.
1215
+ headline: {
1216
+ scanned,
1217
+ clean: hClean,
1218
+ suspect: alerted,
1219
+ errors: hErrors,
1220
+ byTier: hByTier
1221
+ },
1184
1222
  byOutcome,
1185
1223
  byEcosystem
1186
1224
  };
@@ -1421,6 +1459,21 @@ function loadLastDailyReportDate() {
1421
1459
  }
1422
1460
  }
1423
1461
 
1462
+ /**
1463
+ * Load the exact ISO timestamp of the last daily report send (the start of the
1464
+ * current reporting window). Returns null when absent (pre-upgrade file, first
1465
+ * report ever, corrupt file) — callers fall back to a fixed 24h window.
1466
+ */
1467
+ function loadLastDailyReportTs() {
1468
+ try {
1469
+ const raw = fs.readFileSync(LAST_DAILY_REPORT_FILE, 'utf8');
1470
+ const data = JSON.parse(raw);
1471
+ return typeof data.lastReportTs === 'string' ? data.lastReportTs : null;
1472
+ } catch {
1473
+ return null;
1474
+ }
1475
+ }
1476
+
1424
1477
  /**
1425
1478
  * Persist the date of the last daily report sent (YYYY-MM-DD), and optionally the
1426
1479
  * monotonic scan-stats baseline captured at that moment (used by the next report's
@@ -1430,6 +1483,10 @@ function saveLastDailyReportDate(dateStr, scanStatsBaseline) {
1430
1483
  try {
1431
1484
  const payload = { lastReportDate: dateStr };
1432
1485
  if (scanStatsBaseline) payload.scanStatsBaseline = scanStatsBaseline;
1486
+ // Exact send timestamp = start of the NEXT report's ledger window (8h→8h
1487
+ // semantics, restart-proof). Written in the same write-ahead as the date
1488
+ // stamp, so a mid-send kill can neither hole nor double-count the window.
1489
+ payload.lastReportTs = new Date().toISOString();
1433
1490
  atomicWriteFileSync(LAST_DAILY_REPORT_FILE, JSON.stringify(payload, null, 2));
1434
1491
  } catch (err) {
1435
1492
  console.error(`[MONITOR] Failed to save last daily report date: ${err.message}`);
@@ -1735,6 +1792,7 @@ module.exports = {
1735
1792
  captureScanStatsBaseline,
1736
1793
  reconcileDailyHeadline,
1737
1794
  loadLastDailyReportDate,
1795
+ loadLastDailyReportTs,
1738
1796
  saveLastDailyReportDate,
1739
1797
  hasReportBeenSentToday,
1740
1798
  saveRecentlyScanned,
@@ -30,7 +30,8 @@ const {
30
30
  loadStateRaw,
31
31
  getScansSinceLastMemoryPersist,
32
32
  setScansSinceLastMemoryPersist,
33
- computeLedgerRollup
33
+ computeLedgerRollup,
34
+ loadLastDailyReportTs
34
35
  } = require('./state.js');
35
36
  const {
36
37
  HIGH_CONFIDENCE_MALICE_TYPES,
@@ -1049,24 +1050,59 @@ function formatDelta(current, previous) {
1049
1050
  return '=0';
1050
1051
  }
1051
1052
 
1052
- // Phase 0b: rolling window for the daily report's ledger section. The report runs
1053
- // once/day, so 24h is the natural "what happened today" view and keeps the rollup's
1054
- // distinct-key sets small (one day of scans, far below MAX_ROLLUP_KEYS). Env-tunable.
1053
+ // Phase 0b: fallback window for the daily report's ledger section when no
1054
+ // last-report timestamp exists yet (first report ever / pre-upgrade stamp file).
1055
+ // Normal operation derives the window from lastReportTs instead (8h→8h Paris,
1056
+ // restart-proof). Env-tunable.
1055
1057
  const LEDGER_ROLLUP_WINDOW_MS = (() => {
1056
1058
  const v = parseInt(process.env.MUADDIB_LEDGER_ROLLUP_WINDOW_MS, 10);
1057
1059
  return Number.isFinite(v) && v > 0 ? v : 24 * 60 * 60 * 1000;
1058
1060
  })();
1059
1061
 
1062
+ // Hard ceiling on the report window. A multi-day daemon outage would otherwise make
1063
+ // the next report's window (and the rollup's distinct-key sets) span the whole gap;
1064
+ // clamp to 48h and flag it so the report stays honest about the truncation.
1065
+ const LEDGER_ROLLUP_MAX_WINDOW_MS = 48 * 60 * 60 * 1000;
1066
+
1060
1067
  /**
1061
- * Compute the per-scan ledger rollup for the daily-report window. Best-effort: a
1062
- * rollup failure (corrupt ledger, I/O) must NEVER break the daily report, so this
1063
- * swallows errors and returns null. Also returns null when the ledger is empty so
1064
- * the report omits the section instead of showing a noise row of zeros.
1068
+ * Compute the per-scan ledger rollup for the daily-report window. The window is
1069
+ * [last report send now] (8h→8h Paris semantics, exact across restarts) when the
1070
+ * lastReportTs stamp exists, else the fixed fallback window. Best-effort: a rollup
1071
+ * failure (corrupt ledger, I/O) must NEVER break the daily report, so this swallows
1072
+ * errors and returns null. Also returns null when the ledger is empty so the report
1073
+ * omits the section instead of showing a noise row of zeros.
1065
1074
  */
1066
1075
  function safeLedgerRollup() {
1067
1076
  try {
1068
- const rollup = computeLedgerRollup(Date.now() - LEDGER_ROLLUP_WINDOW_MS);
1069
- return (rollup && rollup.total > 0) ? rollup : null;
1077
+ const now = Date.now();
1078
+ let sinceMs = now - LEDGER_ROLLUP_WINDOW_MS;
1079
+ let windowClamped = false;
1080
+ let windowSource = 'fallback_24h';
1081
+ const lastTs = loadLastDailyReportTs();
1082
+ if (lastTs) {
1083
+ const p = Date.parse(lastTs);
1084
+ // Guard against clock skew (stamp in the future) — fall back to 24h.
1085
+ if (!Number.isNaN(p) && p <= now) {
1086
+ if (p < now - LEDGER_ROLLUP_MAX_WINDOW_MS) {
1087
+ sinceMs = now - LEDGER_ROLLUP_MAX_WINDOW_MS;
1088
+ windowClamped = true;
1089
+ } else {
1090
+ sinceMs = p;
1091
+ }
1092
+ windowSource = 'last_report';
1093
+ }
1094
+ }
1095
+ // Ledger source resolved at CALL time (not module load) so tests can point the
1096
+ // rollup at a synthetic/empty ledger after the module graph is already loaded.
1097
+ // Unset env → computeLedgerRollup falls back to its SCAN_LEDGER_FILE default.
1098
+ const fileOverride = process.env.MUADDIB_SCAN_LEDGER_FILE;
1099
+ const rollup = computeLedgerRollup(sinceMs, fileOverride ? { file: fileOverride } : {});
1100
+ if (rollup && rollup.total > 0) {
1101
+ rollup.windowClamped = windowClamped;
1102
+ rollup.windowSource = windowSource;
1103
+ return rollup;
1104
+ }
1105
+ return null;
1070
1106
  } catch {
1071
1107
  return null;
1072
1108
  }
@@ -1091,7 +1127,10 @@ function formatLedgerField(rollup) {
1091
1127
  if (ecos.length > 0) {
1092
1128
  lines.push(ecos.slice(0, 4).map(e => `${e} ${rollup.byEcosystem[e].total}`).join(' · '));
1093
1129
  }
1094
- return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
1130
+ const label = rollup.windowSource === 'last_report'
1131
+ ? `Ledger (since last report${rollup.windowClamped ? ', clamped 48h' : ''})`
1132
+ : 'Ledger (24h)';
1133
+ return { name: label, value: lines.join('\n'), inline: false };
1095
1134
  }
1096
1135
 
1097
1136
  // AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
@@ -1115,6 +1154,22 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1115
1154
  // instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
1116
1155
  const { top3: diskTop3 } = buildReportFromDisk();
1117
1156
 
1157
+ // --- Phase 0b: per-scan ledger rollup (resolved early so the headline can use it) ---
1158
+ // Caller may pass a precomputed rollup (sendDailyReport does, to persist the same
1159
+ // numbers it displays); undefined → compute here; explicit null → omit the section.
1160
+ const ledger = ledgerRollup !== undefined ? ledgerRollup : safeLedgerRollup();
1161
+
1162
+ // HEADLINE BOUNDARY — scanned/clean/suspect come from the ledger window
1163
+ // [last report → now] when available: window-exact and restart-proof, unlike the
1164
+ // in-memory counters (reset-restore cycles can under-count after a restart storm).
1165
+ // Everything NOT in the ledger (errorsByType breakdown, changes-stream/publish-event
1166
+ // counts, pypi*, avg scan time) stays on the in-memory counters + daily-stats.json:
1167
+ // best-effort since the last reset, may under-count after a restart.
1168
+ const headline = (ledger && ledger.headline && ledger.headline.scanned > 0) ? ledger.headline : null;
1169
+ const hScanned = headline ? headline.scanned : stats.scanned;
1170
+ const hClean = headline ? headline.clean : stats.clean;
1171
+ const hSuspect = headline ? headline.suspect : stats.suspect;
1172
+
1118
1173
  // Prefer in-memory dailyAlerts for top suspects (richer data), fallback to disk
1119
1174
  const top3 = dailyAlerts.length > 0
1120
1175
  ? dailyAlerts.slice().sort((a, b) => (b.score || 0) - (a.score || 0) || b.findingsCount - a.findingsCount).slice(0, 3)
@@ -1133,14 +1188,9 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1133
1188
  }).join('\n')
1134
1189
  : 'None';
1135
1190
 
1136
- // Avg scan time from in-memory stats
1191
+ // Avg scan time from in-memory stats (totalTimeMs is not ledgerized — best-effort)
1137
1192
  const avg = stats.scanned > 0 ? (stats.totalTimeMs / stats.scanned / 1000).toFixed(1) : '0.0';
1138
1193
 
1139
- // --- Phase 0b: per-scan ledger rollup (resolved early so Coverage can use it) ---
1140
- // Caller may pass a precomputed rollup (sendDailyReport does, to persist the same
1141
- // numbers it displays); undefined → compute here; explicit null → omit the section.
1142
- const ledger = ledgerRollup !== undefined ? ledgerRollup : safeLedgerRollup();
1143
-
1144
1194
  // --- Coverage ---
1145
1195
  // HEADLINE: honest, version-collapsed coverage from the scan-ledger — distinct
1146
1196
  // package NAMES actually scanned vs distinct names seen (scanned + dropped) in
@@ -1155,8 +1205,8 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1155
1205
  const published = npmPub + pypiPub;
1156
1206
  const catchupSkipped = (stats.npmCatchupSkippedSeqs || 0) + (stats.pypiCatchupSkippedEvents || 0);
1157
1207
  const opsSuffix = catchupSkipped > 0
1158
- ? `\nOps: ${stats.scanned} | Catch-up skip: ${catchupSkipped}`
1159
- : `\nOps: ${stats.scanned}`;
1208
+ ? `\nOps: ${hScanned} | Catch-up skip: ${catchupSkipped}`
1209
+ : `\nOps: ${hScanned}`;
1160
1210
  let coverageText;
1161
1211
  if (ledger && ledger.distinctPackages > 0 && ledger.distinctCoverage != null) {
1162
1212
  const pct = (ledger.distinctCoverage * 100).toFixed(0);
@@ -1183,8 +1233,8 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1183
1233
  const yesterday = loadYesterdayMetrics();
1184
1234
  let trendsText = 'No data (first day or missing)';
1185
1235
  if (yesterday) {
1186
- const dScanned = formatDelta(stats.scanned, yesterday.scanned || 0);
1187
- const dSuspect = formatDelta(stats.suspect, yesterday.suspect || 0);
1236
+ const dScanned = formatDelta(hScanned, yesterday.scanned || 0);
1237
+ const dSuspect = formatDelta(hSuspect, yesterday.suspect || 0);
1188
1238
  const dErrors = formatDelta(stats.errors, yesterday.errors || 0);
1189
1239
  trendsText = `${dScanned} scanned, ${dSuspect} suspects, ${dErrors} errors`;
1190
1240
  }
@@ -1245,8 +1295,8 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1245
1295
  color: 0x3498db,
1246
1296
  fields: [
1247
1297
  { name: 'Coverage', value: coverageText, inline: true },
1248
- { name: 'Clean', value: `${stats.clean}`, inline: true },
1249
- { name: 'Suspects', value: `${stats.suspect}`, inline: true },
1298
+ { name: 'Clean', value: `${hClean}`, inline: true },
1299
+ { name: 'Suspects', value: `${hSuspect}`, inline: true },
1250
1300
  { name: 'Errors', value: formatErrorBreakdown(stats.errors, stats.errorsByType), inline: true },
1251
1301
  { name: 'Avg Scan Time', value: `${avg}s/pkg`, inline: true },
1252
1302
  { name: 'Timeouts', value: timeoutText, inline: true },
@@ -1262,7 +1312,9 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1262
1312
  { name: 'System', value: healthText, inline: false }
1263
1313
  ],
1264
1314
  footer: {
1265
- text: `MUAD'DIB - Daily summary | ${readableTime}`
1315
+ // Headline-source annotation: 'ledger' = window-exact [last report now],
1316
+ // 'counters' = in-memory fallback (ledger unavailable — pre-upgrade behavior).
1317
+ text: `MUAD'DIB - Daily summary | headline: ${headline ? 'ledger (since last report)' : 'counters'} | ${readableTime}`
1266
1318
  },
1267
1319
  timestamp: now.toISOString()
1268
1320
  }]
@@ -1285,20 +1337,34 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
1285
1337
  console.log(`[MONITOR] Daily report suppressed: before ${DAILY_REPORT_HOUR}:00 Paris (hour=${getParisHour()})`);
1286
1338
  return;
1287
1339
  }
1288
- // Crash-safe headline: a restart-storm around report time can zero the in-memory
1289
- // counter (the monitor OOM-restarts ~10×/day). Floor scanned/clean/suspect at the
1290
- // durable scan-stats delta so we never publish "5" when ~44k were really scanned.
1291
- reconcileDailyHeadline(stats);
1340
+ // Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
1341
+ // we persist (no double-scan, no drift between Discord and the on-disk metrics).
1342
+ // Resolved BEFORE the empty-skip and the reconcile: when the ledger headline is
1343
+ // available it IS the published number (window [last report → now], restart-proof),
1344
+ // and the counter-based machinery below only runs as fallback.
1345
+ const ledgerRollup = safeLedgerRollup();
1346
+ const headline = (ledgerRollup && ledgerRollup.headline && ledgerRollup.headline.scanned > 0)
1347
+ ? ledgerRollup.headline : null;
1348
+
1349
+ if (!headline) {
1350
+ // Crash-safe FALLBACK headline: a restart-storm around report time can zero the
1351
+ // in-memory counter (the monitor OOM-restarts ~10×/day). Floor scanned/clean/suspect
1352
+ // at the durable scan-stats delta so we never publish "5" when ~44k were really
1353
+ // scanned. Not applied when the ledger headline is used — that one is window-exact.
1354
+ reconcileDailyHeadline(stats);
1355
+ }
1292
1356
 
1293
1357
  // Never send an empty report (0 scanned — restart with no work done)
1294
- if (stats.scanned === 0) {
1358
+ const publishedScanned = headline ? headline.scanned : stats.scanned;
1359
+ if (publishedScanned === 0) {
1295
1360
  console.log('[MONITOR] Daily report skipped (0 packages scanned)');
1296
1361
  return;
1297
1362
  }
1298
1363
 
1299
1364
  // Write-ahead: mark today's report as sent BEFORE the webhook HTTP request.
1300
1365
  // If the process is killed (SIGKILL) during sendWebhook, the date is already
1301
- // recorded on disk and prevents duplicate reports on next startup.
1366
+ // recorded on disk and prevents duplicate reports on next startup. The same
1367
+ // write-ahead stamps lastReportTs = start of the next report's ledger window.
1302
1368
  const today = getParisDateString();
1303
1369
  stats.lastDailyReportDate = today;
1304
1370
  // Persist the monotonic scan-stats counter as the baseline for the NEXT report's
@@ -1306,23 +1372,23 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
1306
1372
  saveLastDailyReportDate(today, captureScanStatsBaseline());
1307
1373
  // Observability: the success path previously logged nothing, which made the late-fire bug
1308
1374
  // invisible in the journal. Log the stamped date + the actual Paris hour (an on-time 08:00
1309
- // fire vs a catch-up at hour 14 are now distinguishable) + the headline count.
1310
- console.log(`[MONITOR] Daily report firing for ${today} (hour=${getParisHour()} Paris, scanned=${stats.scanned})`);
1375
+ // fire vs a catch-up at hour 14 are now distinguishable) + the headline count + source.
1376
+ console.log(`[MONITOR] Daily report firing for ${today} (hour=${getParisHour()} Paris, scanned=${publishedScanned}, headline=${headline ? 'ledger' : 'counters'})`);
1311
1377
 
1312
- // Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
1313
- // we persist (no double-scan, no drift between Discord and the on-disk metrics).
1314
- const ledgerRollup = safeLedgerRollup();
1315
1378
  const payload = buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup);
1316
1379
 
1317
- // Persist locally with full raw metrics (independent of webhook — enables trend analysis)
1380
+ // Persist locally with full raw metrics (independent of webhook — enables trend analysis).
1381
+ // Headline (scanned/clean/suspect/byTier) follows the same source as the embed: ledger
1382
+ // window when available, in-memory counters otherwise. headlineSource records which.
1318
1383
  persistDailyReport(payload, {
1319
- scanned: stats.scanned,
1320
- clean: stats.clean,
1321
- suspect: stats.suspect,
1384
+ headlineSource: headline ? 'ledger' : 'counters',
1385
+ scanned: publishedScanned,
1386
+ clean: headline ? headline.clean : stats.clean,
1387
+ suspect: headline ? headline.suspect : stats.suspect,
1322
1388
  errors: stats.errors,
1323
1389
  errorsByType: { ...stats.errorsByType },
1324
1390
  avgScanTimeMs: stats.scanned > 0 ? Math.round(stats.totalTimeMs / stats.scanned) : 0,
1325
- suspectByTier: { ...stats.suspectByTier },
1391
+ suspectByTier: headline ? { ...headline.byTier } : { ...stats.suspectByTier },
1326
1392
  mlFiltered: stats.mlFiltered || 0,
1327
1393
  llmAnalyzed: stats.llmAnalyzed || 0,
1328
1394
  llmSuppressed: stats.llmSuppressed || 0,
@@ -270,7 +270,11 @@ async function process(threats, targetPath, options, pythonDeps, warnings, scann
270
270
  debugLog('[EMAIL-DOMAIN] check failed: ' + err.message);
271
271
  }
272
272
  try {
273
- const rdapThreats = await checkCompromisedDomain(_pkgMeta.npmRegistryMeta);
273
+ // shadowCtx identifies the package in shadow-divergence records (V2
274
+ // candidate semantics logged alongside V1 — zero effect on threats).
275
+ const rdapThreats = await checkCompromisedDomain(_pkgMeta.npmRegistryMeta, {
276
+ shadowCtx: { name: packageName, version: packageVersion, ecosystem: 'npm' }
277
+ });
274
278
  for (const t of rdapThreats) deduped.push(t);
275
279
  } catch (err) {
276
280
  debugLog('[RDAP] check failed: ' + err.message);
@@ -17,6 +17,7 @@
17
17
 
18
18
  const dns = require('dns');
19
19
  const { debugLog } = require('../utils.js');
20
+ const { isShadowEnabled, recordShadowDivergence } = require('../shared/shadow.js');
20
21
 
21
22
  const MX_TIMEOUT_MS = 3000;
22
23
  const MX_CACHE_TTL = 30 * 24 * 60 * 60 * 1000; // 30 days
@@ -236,10 +237,67 @@ function isCompromisedDomain(creationDateISO, packageCreatedAtISO) {
236
237
  return cDate > (rDate - COMPROMISE_MARGIN_MS);
237
238
  }
238
239
 
240
+ // =============================================================================
241
+ // V2 candidate semantics (SHADOW-ONLY until adjudicated — V1 above still emits
242
+ // every threat). Two changes vs V1, both validated by the node-ipc takeover
243
+ // (May 2026: domain atlantis-software.net re-registered 2026-05-07, malicious
244
+ // 9.2.3/12.0.1 published 05-14, FIRST publish years earlier):
245
+ //
246
+ // 1. STRICT comparison — creation > first_publish, the 30-day pre-publish
247
+ // margin removed. A dev who buys their domain a few weeks before shipping
248
+ // v1 is the NORMAL case (the margin was the main source of the 850+ FP);
249
+ // a dev cannot have published with an email on a domain that did not
250
+ // exist yet, so creation strictly after first publish stays a hard signal.
251
+ // RDAP caveat that makes this work: many registries RESET the creation
252
+ // date on re-registration (.net/Namecheap do — node-ipc's signal).
253
+ // 2. Public email providers excluded — gmail.com etc. can never be "taken
254
+ // over" by re-registration; any weird RDAP answer for them is noise.
255
+ // This is a domain-CLASS exclusion, not a package whitelist.
256
+ // =============================================================================
257
+
258
+ // Consumer email providers — domain takeover does not apply (the provider
259
+ // owns the domain; accounts are compromised via other vectors, out of scope
260
+ // for this RDAP signal).
261
+ const PUBLIC_EMAIL_PROVIDERS = new Set([
262
+ 'gmail.com', 'googlemail.com',
263
+ 'outlook.com', 'hotmail.com', 'live.com', 'msn.com',
264
+ 'yahoo.com', 'ymail.com', 'rocketmail.com',
265
+ 'proton.me', 'protonmail.com', 'pm.me',
266
+ 'icloud.com', 'me.com', 'mac.com',
267
+ 'aol.com',
268
+ 'gmx.com', 'gmx.de', 'gmx.net',
269
+ 'mail.ru', 'inbox.ru', 'list.ru', 'bk.ru',
270
+ 'qq.com', 'foxmail.com', '163.com', '126.com', 'yeah.net', 'sina.com',
271
+ 'yandex.ru', 'yandex.com',
272
+ 'zoho.com', 'fastmail.com', 'hey.com',
273
+ 'tutanota.com', 'tuta.com', 'tuta.io',
274
+ 'web.de', 't-online.de', 'freenet.de',
275
+ 'free.fr', 'orange.fr', 'laposte.net', 'wanadoo.fr', 'sfr.fr',
276
+ 'naver.com', 'daum.net', 'hanmail.net',
277
+ 'rediffmail.com', 'seznam.cz', 'wp.pl', 'o2.pl', 'interia.pl',
278
+ 'duck.com', 'pobox.com', 'hushmail.com', 'mailbox.org', 'posteo.de'
279
+ ]);
280
+
281
+ /**
282
+ * V2: strict creation-after-first-publish, public providers excluded.
283
+ * Pure — used by the shadow hook below and by scripts/backtest-email-domain.js.
284
+ */
285
+ function isCompromisedDomainV2(creationDateISO, firstPublishISO, domain) {
286
+ if (!creationDateISO || !firstPublishISO) return false;
287
+ if (domain && PUBLIC_EMAIL_PROVIDERS.has(String(domain).toLowerCase())) return false;
288
+ const cDate = new Date(creationDateISO).getTime();
289
+ const rDate = new Date(firstPublishISO).getTime();
290
+ if (isNaN(cDate) || isNaN(rDate)) return false;
291
+ return cDate > rDate;
292
+ }
293
+
239
294
  /**
240
295
  * F1 entry point.
241
- * @param {object|null} meta - Digested metadata. Reads maintainer_emails + created_at.
296
+ * @param {object|null} meta - Digested metadata. Reads maintainer_emails + created_at
297
+ * (= the package's FIRST publish date, both npm and PyPI sides).
242
298
  * @param {object} options - { fetchRdap } for tests to inject a mock.
299
+ * { shadowCtx: {name, version, ecosystem} } identifies the scanned package in
300
+ * shadow-divergence records (optional — without it divergences log package:null).
243
301
  * @returns {Promise<Array>} threats array
244
302
  */
245
303
  async function checkCompromisedDomain(meta, options = {}) {
@@ -263,6 +321,24 @@ async function checkCompromisedDomain(meta, options = {}) {
263
321
  continue;
264
322
  }
265
323
  if (!rdap || !rdap.creationDate) continue;
324
+ // SHADOW (zero effect on the threats emitted below): compare the live V1
325
+ // verdict with the V2 candidate and log only disagreements. Adjudication =
326
+ // scripts/backtest-email-domain.js replay + `muaddib shadow-report`.
327
+ try {
328
+ if (isShadowEnabled()) {
329
+ const v1 = isCompromisedDomain(rdap.creationDate, meta.created_at);
330
+ const v2 = isCompromisedDomainV2(rdap.creationDate, meta.created_at, domain);
331
+ if (v1 !== v2) {
332
+ const ctx = options.shadowCtx || {};
333
+ recordShadowDivergence({
334
+ detector: 'compromised_email_domain',
335
+ package: ctx.name, version: ctx.version, ecosystem: ctx.ecosystem,
336
+ oldVerdict: v1, newVerdict: v2,
337
+ evidence: { domain, creationDate: rdap.creationDate, firstPublish: meta.created_at, oldMarginDays: 30 }
338
+ });
339
+ }
340
+ }
341
+ } catch { /* shadow must never affect the scan */ }
266
342
  if (isCompromisedDomain(rdap.creationDate, meta.created_at)) {
267
343
  const cd = rdap.creationDate.slice(0, 10);
268
344
  const pd = meta.created_at.slice(0, 10);
@@ -297,6 +373,9 @@ module.exports = {
297
373
  checkCompromisedDomain,
298
374
  fetchRdap,
299
375
  isCompromisedDomain,
376
+ // V2 candidate (shadow-only until adjudicated; used by the backtest script)
377
+ isCompromisedDomainV2,
378
+ PUBLIC_EMAIL_PROVIDERS,
300
379
  _resetRdapCache,
301
380
  RDAP_TIMEOUT_MS,
302
381
  RDAP_CACHE_TTL,
@@ -72,7 +72,10 @@ async function runPyPIMaintainerChecks(packageName, pypiRegistryMeta, options =
72
72
  let rdapThreats = [];
73
73
  try {
74
74
  rdapThreats = await checkCompromisedDomain(helperMeta, {
75
- fetchRdap: options.fetchRdap
75
+ fetchRdap: options.fetchRdap,
76
+ // PyPI created_at is the earliest release time (pypi-registry.js) =
77
+ // first publish, so the V2 shadow comparison is valid on this side too.
78
+ shadowCtx: { name: packageName, ecosystem: 'pypi' }
76
79
  });
77
80
  } catch { /* silent */ }
78
81
  for (const t of rdapThreats) threats.push(adaptThreatToPyPI(t, declarationFile));
@@ -0,0 +1,190 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Shadow-mode divergence framework.
5
+ *
6
+ * Lets a detector compute a CANDIDATE new semantics (V2) alongside its live
7
+ * semantics (V1) and log the cases where the two verdicts disagree — with ZERO
8
+ * effect on emitted threats, scores, or tiers. The divergence log is the
9
+ * adjudication input for flipping V1 → V2: replay historical alerts through
10
+ * the shadow (backtest) or let it run live as a post-merge safety net, then
11
+ * read the split with `muaddib shadow-report`.
12
+ *
13
+ * Contract (fail-safe by construction):
14
+ * - Nothing here returns a value the scan pipeline can act on. The framework
15
+ * cannot change a verdict even if misused.
16
+ * - recordShadowDivergence NEVER throws — a shadow failure must never break a
17
+ * scan (same posture as appendScanLedger).
18
+ * - Disabled by default. The daemon opts in via MUADDIB_SHADOW=1 in its
19
+ * service environment; CLI scans and tests stay inert unless they set it.
20
+ * - Bounded: the JSONL file is capped at MUADDIB_SHADOW_MAX entries (default
21
+ * 50 000) with streaming FIFO compaction — same pattern as the scan-ledger.
22
+ *
23
+ * Concurrency: unlike the scan-ledger (main-thread-only writer), this module
24
+ * is called from INSIDE scan workers (pipeline/processor.js runs there), so N
25
+ * worker_threads may append concurrently. Each record is serialized to ONE
26
+ * appendFileSync call of one full line (flag 'a' = O_APPEND; small writes are
27
+ * serialized by the inode lock on ext4) — never two writes per line. The
28
+ * reader skips unparsable lines (a crash mid-write can truncate at most the
29
+ * final line).
30
+ *
31
+ * Env (all read at CALL time so tests can re-point after module load):
32
+ * MUADDIB_SHADOW=1 enable (default off)
33
+ * MUADDIB_SHADOW_FILE=path divergence log override (tests)
34
+ * MUADDIB_SHADOW_MAX=n entry cap (default 50000)
35
+ */
36
+
37
+ const fs = require('fs');
38
+ const path = require('path');
39
+
40
+ const DEFAULT_SHADOW_FILE = path.join(__dirname, '..', '..', 'data', 'shadow-divergence.jsonl');
41
+ const DEFAULT_MAX_ENTRIES = 50_000;
42
+ const EVIDENCE_MAX_BYTES = 2048;
43
+ // Count lines (cheap streaming pass) only every N appends, not on every write.
44
+ const COMPACT_CHECK_INTERVAL = 500;
45
+
46
+ let _appendsSinceCheck = 0;
47
+
48
+ function isShadowEnabled() {
49
+ return globalThis.process.env.MUADDIB_SHADOW === '1';
50
+ }
51
+
52
+ function _shadowFile() {
53
+ return globalThis.process.env.MUADDIB_SHADOW_FILE || DEFAULT_SHADOW_FILE;
54
+ }
55
+
56
+ function _maxEntries() {
57
+ const raw = globalThis.process.env.MUADDIB_SHADOW_MAX;
58
+ const n = raw ? parseInt(raw, 10) : NaN;
59
+ return (Number.isFinite(n) && n >= 10 && n <= 5_000_000) ? n : DEFAULT_MAX_ENTRIES;
60
+ }
61
+
62
+ /**
63
+ * Serialize evidence with a hard size cap. Oversized evidence is replaced by a
64
+ * truncated string form — the log line must stay small so the single-write
65
+ * append atomicity argument holds.
66
+ */
67
+ function _capEvidence(evidence) {
68
+ if (evidence === undefined || evidence === null) return null;
69
+ let s;
70
+ try {
71
+ s = JSON.stringify(evidence);
72
+ } catch {
73
+ s = String(evidence);
74
+ }
75
+ if (s.length <= EVIDENCE_MAX_BYTES) {
76
+ try { return JSON.parse(s); } catch { return s; }
77
+ }
78
+ return { _truncated: true, head: s.slice(0, EVIDENCE_MAX_BYTES) };
79
+ }
80
+
81
+ /**
82
+ * Record one shadow divergence (oldVerdict !== newVerdict). Call sites are
83
+ * expected to compare verdicts BEFORE calling — agreements are not logged
84
+ * (the log captures the would-change population, not every scan).
85
+ * Never throws. No-op when shadow mode is disabled.
86
+ *
87
+ * @param {object} d
88
+ * @param {string} d.detector e.g. 'compromised_email_domain'
89
+ * @param {string} [d.package]
90
+ * @param {string} [d.version]
91
+ * @param {string} [d.ecosystem]
92
+ * @param {*} d.oldVerdict live semantics result
93
+ * @param {*} d.newVerdict candidate semantics result
94
+ * @param {*} [d.evidence] capped at 2KB serialized
95
+ */
96
+ function recordShadowDivergence(d) {
97
+ try {
98
+ if (!isShadowEnabled()) return;
99
+ if (!d || !d.detector) return;
100
+ const file = _shadowFile();
101
+ const dir = path.dirname(file);
102
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
103
+ const entry = {
104
+ ts: new Date().toISOString(),
105
+ detector: String(d.detector),
106
+ package: d.package || null,
107
+ version: d.version || null,
108
+ ecosystem: d.ecosystem || null,
109
+ oldVerdict: d.oldVerdict !== undefined ? d.oldVerdict : null,
110
+ newVerdict: d.newVerdict !== undefined ? d.newVerdict : null,
111
+ evidence: _capEvidence(d.evidence)
112
+ };
113
+ // ONE write per line — see the concurrency note in the header.
114
+ fs.appendFileSync(file, JSON.stringify(entry) + '\n', { encoding: 'utf8', flag: 'a' });
115
+ _appendsSinceCheck++;
116
+ if (_appendsSinceCheck >= COMPACT_CHECK_INTERVAL) {
117
+ _appendsSinceCheck = 0;
118
+ _compactShadowJsonl(file);
119
+ }
120
+ } catch {
121
+ // Never throw, never log loudly — a shadow failure must not affect scans.
122
+ }
123
+ }
124
+
125
+ /**
126
+ * Streaming FIFO compaction: keep only the most recent max entries.
127
+ * Local minimal implementation (not shared with state.js) so the worker-side
128
+ * require graph stays free of the monitor state module.
129
+ */
130
+ function _compactShadowJsonl(file) {
131
+ try {
132
+ const max = _maxEntries();
133
+ const lines = _readLines(file);
134
+ if (lines.length <= max) return;
135
+ const kept = lines.slice(lines.length - max);
136
+ const tmp = file + '.tmp';
137
+ fs.writeFileSync(tmp, kept.join('\n') + '\n', 'utf8');
138
+ fs.renameSync(tmp, file);
139
+ } catch {
140
+ // Best-effort; an oversized shadow log is preferable to a crashed scan.
141
+ }
142
+ }
143
+
144
+ /** Read raw lines, dropping empties. Returns [] on any error. */
145
+ function _readLines(file) {
146
+ try {
147
+ return fs.readFileSync(file, 'utf8').split('\n').filter(l => l.trim().length > 0);
148
+ } catch {
149
+ return [];
150
+ }
151
+ }
152
+
153
+ /**
154
+ * Read divergence entries, tolerant of corrupt lines (skipped silently).
155
+ * @param {object} [opts]
156
+ * @param {string} [opts.detector] filter by detector
157
+ * @param {number|string} [opts.sinceTs] ms epoch or ISO — entries older are skipped
158
+ * @returns {Array<object>}
159
+ */
160
+ function readShadowDivergences(opts = {}) {
161
+ let sinceMs = null;
162
+ if (typeof opts.sinceTs === 'number' && Number.isFinite(opts.sinceTs)) sinceMs = opts.sinceTs;
163
+ else if (typeof opts.sinceTs === 'string') {
164
+ const p = Date.parse(opts.sinceTs);
165
+ if (!Number.isNaN(p)) sinceMs = p;
166
+ }
167
+ const out = [];
168
+ for (const line of _readLines(_shadowFile())) {
169
+ let e;
170
+ try { e = JSON.parse(line); } catch { continue; } // truncated/corrupt line
171
+ if (!e || typeof e !== 'object' || !e.detector) continue;
172
+ if (opts.detector && e.detector !== opts.detector) continue;
173
+ if (sinceMs !== null) {
174
+ const t = e.ts ? Date.parse(e.ts) : NaN;
175
+ if (Number.isNaN(t) || t < sinceMs) continue;
176
+ }
177
+ out.push(e);
178
+ }
179
+ return out;
180
+ }
181
+
182
+ module.exports = {
183
+ isShadowEnabled,
184
+ recordShadowDivergence,
185
+ readShadowDivergences,
186
+ // test seams
187
+ _capEvidence,
188
+ _compactShadowJsonl,
189
+ EVIDENCE_MAX_BYTES
190
+ };