muaddib-scanner 2.11.67 → 2.11.68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/state.js
CHANGED
|
@@ -1059,6 +1059,116 @@ function loadScanLedger() {
|
|
|
1059
1059
|
return entries;
|
|
1060
1060
|
}
|
|
1061
1061
|
|
|
1062
|
+
// Bounded distinct-key tracking for the `vanished` cross-reference (CLAUDE.md §2).
|
|
1063
|
+
// Sits above the MAX_SCAN_LEDGER file ceiling so it is a pure safety valve: in normal
|
|
1064
|
+
// operation the in-window key sets are far smaller than the file, so `exactVanished`
|
|
1065
|
+
// stays true. Only an operator setting MUADDIB_SCAN_LEDGER_MAX above this would trip it.
|
|
1066
|
+
const MAX_ROLLUP_KEYS = 1_200_000;
|
|
1067
|
+
|
|
1068
|
+
/**
|
|
1069
|
+
* Phase 0b: roll up the per-scan ledger into operational-coverage metrics.
|
|
1070
|
+
*
|
|
1071
|
+
* Single streaming pass (never loads the whole file at once — same machinery as
|
|
1072
|
+
* getDetectionStats). It distinguishes:
|
|
1073
|
+
* - scanned : entries that reached a real verdict (outcome !== 'dropped')
|
|
1074
|
+
* - dropped : queue-cap evictions (outcome === 'dropped') — never scanned
|
|
1075
|
+
* - vanished: DISTINCT name@version that were dropped AND never (re)scanned in-window
|
|
1076
|
+
* = a permanent coverage hole (the "which Miasma versions never ran" case)
|
|
1077
|
+
*
|
|
1078
|
+
* HONEST METRIC NOTE — `alertRate` is (suspect+confirmed) / scanned, i.e. "of what we
|
|
1079
|
+
* scanned, the fraction we flagged". It is NOT a true-positive rate: the ledger carries
|
|
1080
|
+
* no ground truth. The GHSA-denominated operational TPR (the 105/429 audit number) needs
|
|
1081
|
+
* the ledger cross-referenced against the GHSA malware feed — that is the Phase 5
|
|
1082
|
+
* coverage-audit, not this rollup. Do not relabel `alertRate` as TPR (CLAUDE.md: pas
|
|
1083
|
+
* d'embellissement des métriques).
|
|
1084
|
+
*
|
|
1085
|
+
* @param {number|string|null} [sinceTs] window start — ms epoch, ISO string, or null for
|
|
1086
|
+
* "whole ledger". Entries with ts < sinceTs (or unparseable ts) are skipped.
|
|
1087
|
+
* @param {object} [opts]
|
|
1088
|
+
* @param {string} [opts.file] ledger path override (tests). Defaults to SCAN_LEDGER_FILE.
|
|
1089
|
+
* @returns {{
|
|
1090
|
+
* generatedAt:string, since:string|null, windowStart:string|null, windowEnd:string|null,
|
|
1091
|
+
* total:number, scanned:number, dropped:number, vanished:number, exactVanished:boolean,
|
|
1092
|
+
* alerted:number, alertRate:number|null,
|
|
1093
|
+
* byOutcome:Object.<string,number>,
|
|
1094
|
+
* byEcosystem:Object.<string,{total:number,scanned:number,dropped:number,alerted:number}>
|
|
1095
|
+
* }}
|
|
1096
|
+
*/
|
|
1097
|
+
function computeLedgerRollup(sinceTs, opts = {}) {
|
|
1098
|
+
const file = opts.file || SCAN_LEDGER_FILE;
|
|
1099
|
+
|
|
1100
|
+
let sinceMs = null;
|
|
1101
|
+
if (typeof sinceTs === 'number' && Number.isFinite(sinceTs)) {
|
|
1102
|
+
sinceMs = sinceTs;
|
|
1103
|
+
} else if (typeof sinceTs === 'string') {
|
|
1104
|
+
const p = Date.parse(sinceTs);
|
|
1105
|
+
if (!Number.isNaN(p)) sinceMs = p;
|
|
1106
|
+
}
|
|
1107
|
+
|
|
1108
|
+
const byOutcome = Object.create(null);
|
|
1109
|
+
const byEcosystem = Object.create(null);
|
|
1110
|
+
let total = 0, scanned = 0, dropped = 0, alerted = 0;
|
|
1111
|
+
let earliest = null, latest = null;
|
|
1112
|
+
// Two sets so `vanished` is correct regardless of drop/scan ordering in the file.
|
|
1113
|
+
// droppedKeys is small (drops only happen under queue-cap pressure); scannedKeys is
|
|
1114
|
+
// bounded by the in-window line count (≤ MAX_SCAN_LEDGER), and further by MAX_ROLLUP_KEYS.
|
|
1115
|
+
const scannedKeys = new Set();
|
|
1116
|
+
const droppedKeys = new Set();
|
|
1117
|
+
let exactVanished = true;
|
|
1118
|
+
|
|
1119
|
+
_iterateJsonlSync(file, (e) => {
|
|
1120
|
+
if (!e || !e.name) return;
|
|
1121
|
+
let t = null;
|
|
1122
|
+
if (e.ts) { const p = Date.parse(e.ts); if (!Number.isNaN(p)) t = p; }
|
|
1123
|
+
if (sinceMs !== null && (t === null || t < sinceMs)) return;
|
|
1124
|
+
|
|
1125
|
+
total++;
|
|
1126
|
+
if (t !== null) {
|
|
1127
|
+
if (earliest === null || t < earliest) earliest = t;
|
|
1128
|
+
if (latest === null || t > latest) latest = t;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
const outcome = (typeof e.outcome === 'string' && e.outcome) ? e.outcome : 'clean';
|
|
1132
|
+
byOutcome[outcome] = (byOutcome[outcome] || 0) + 1;
|
|
1133
|
+
|
|
1134
|
+
const eco = e.ecosystem || 'unknown';
|
|
1135
|
+
let ecoNode = byEcosystem[eco];
|
|
1136
|
+
if (!ecoNode) ecoNode = byEcosystem[eco] = { total: 0, scanned: 0, dropped: 0, alerted: 0 };
|
|
1137
|
+
ecoNode.total++;
|
|
1138
|
+
|
|
1139
|
+
const key = `${e.name}@${e.version || ''}`;
|
|
1140
|
+
const underCap = exactVanished && (scannedKeys.size + droppedKeys.size) < MAX_ROLLUP_KEYS;
|
|
1141
|
+
if (outcome === 'dropped') {
|
|
1142
|
+
dropped++; ecoNode.dropped++;
|
|
1143
|
+
if (underCap) droppedKeys.add(key); else exactVanished = false;
|
|
1144
|
+
} else {
|
|
1145
|
+
scanned++; ecoNode.scanned++;
|
|
1146
|
+
if (outcome === 'suspect' || outcome === 'confirmed') { alerted++; ecoNode.alerted++; }
|
|
1147
|
+
if (underCap) scannedKeys.add(key); else exactVanished = false;
|
|
1148
|
+
}
|
|
1149
|
+
});
|
|
1150
|
+
|
|
1151
|
+
let vanished = 0;
|
|
1152
|
+
for (const k of droppedKeys) { if (!scannedKeys.has(k)) vanished++; }
|
|
1153
|
+
|
|
1154
|
+
return {
|
|
1155
|
+
generatedAt: new Date().toISOString(),
|
|
1156
|
+
since: sinceMs !== null ? new Date(sinceMs).toISOString() : null,
|
|
1157
|
+
windowStart: earliest !== null ? new Date(earliest).toISOString() : null,
|
|
1158
|
+
windowEnd: latest !== null ? new Date(latest).toISOString() : null,
|
|
1159
|
+
total,
|
|
1160
|
+
scanned,
|
|
1161
|
+
dropped,
|
|
1162
|
+
vanished,
|
|
1163
|
+
exactVanished,
|
|
1164
|
+
alerted,
|
|
1165
|
+
// NOT a TPR — see the HONEST METRIC NOTE above. null when nothing was scanned.
|
|
1166
|
+
alertRate: scanned > 0 ? alerted / scanned : null,
|
|
1167
|
+
byOutcome,
|
|
1168
|
+
byEcosystem
|
|
1169
|
+
};
|
|
1170
|
+
}
|
|
1171
|
+
|
|
1062
1172
|
// --- Scan stats (FP rate tracking) ---
|
|
1063
1173
|
|
|
1064
1174
|
function loadScanStats() {
|
|
@@ -1568,6 +1678,7 @@ module.exports = {
|
|
|
1568
1678
|
appendDetection,
|
|
1569
1679
|
appendScanLedger,
|
|
1570
1680
|
loadScanLedger,
|
|
1681
|
+
computeLedgerRollup,
|
|
1571
1682
|
_compactScanLedgerJsonl,
|
|
1572
1683
|
getDetectionStats,
|
|
1573
1684
|
runStateMigrations,
|
package/src/monitor/webhook.js
CHANGED
|
@@ -28,7 +28,8 @@ const {
|
|
|
28
28
|
saveState,
|
|
29
29
|
loadStateRaw,
|
|
30
30
|
getScansSinceLastMemoryPersist,
|
|
31
|
-
setScansSinceLastMemoryPersist
|
|
31
|
+
setScansSinceLastMemoryPersist,
|
|
32
|
+
computeLedgerRollup
|
|
32
33
|
} = require('./state.js');
|
|
33
34
|
const {
|
|
34
35
|
HIGH_CONFIDENCE_MALICE_TYPES,
|
|
@@ -897,7 +898,52 @@ function formatDelta(current, previous) {
|
|
|
897
898
|
return '=0';
|
|
898
899
|
}
|
|
899
900
|
|
|
900
|
-
|
|
901
|
+
// Phase 0b: rolling window for the daily report's ledger section. The report runs
|
|
902
|
+
// once/day, so 24h is the natural "what happened today" view and keeps the rollup's
|
|
903
|
+
// distinct-key sets small (one day of scans, far below MAX_ROLLUP_KEYS). Env-tunable.
|
|
904
|
+
const LEDGER_ROLLUP_WINDOW_MS = (() => {
|
|
905
|
+
const v = parseInt(process.env.MUADDIB_LEDGER_ROLLUP_WINDOW_MS, 10);
|
|
906
|
+
return Number.isFinite(v) && v > 0 ? v : 24 * 60 * 60 * 1000;
|
|
907
|
+
})();
|
|
908
|
+
|
|
909
|
+
/**
|
|
910
|
+
* Compute the per-scan ledger rollup for the daily-report window. Best-effort: a
|
|
911
|
+
* rollup failure (corrupt ledger, I/O) must NEVER break the daily report, so this
|
|
912
|
+
* swallows errors and returns null. Also returns null when the ledger is empty so
|
|
913
|
+
* the report omits the section instead of showing a noise row of zeros.
|
|
914
|
+
*/
|
|
915
|
+
function safeLedgerRollup() {
|
|
916
|
+
try {
|
|
917
|
+
const rollup = computeLedgerRollup(Date.now() - LEDGER_ROLLUP_WINDOW_MS);
|
|
918
|
+
return (rollup && rollup.total > 0) ? rollup : null;
|
|
919
|
+
} catch {
|
|
920
|
+
return null;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
/**
|
|
925
|
+
* Format the ledger rollup as a Discord embed field, or null to omit it (no data).
|
|
926
|
+
* Surfaces operational scan coverage: scanned, alert rate (NOT a TPR — see
|
|
927
|
+
* computeLedgerRollup's HONEST METRIC NOTE), the dropped/vanished coverage holes,
|
|
928
|
+
* and a per-ecosystem split. Compact, well under Discord's 1024-char field limit.
|
|
929
|
+
*/
|
|
930
|
+
function formatLedgerField(rollup) {
|
|
931
|
+
if (!rollup || rollup.total <= 0) return null;
|
|
932
|
+
const pct = rollup.alertRate != null ? (rollup.alertRate * 100).toFixed(2) : '0.00';
|
|
933
|
+
const lines = [`Scanned ${rollup.scanned} · Alerted ${rollup.alerted} (${pct}%)`];
|
|
934
|
+
if (rollup.dropped > 0) {
|
|
935
|
+
const vanishedNote = rollup.exactVanished ? `${rollup.vanished}` : `≥${rollup.vanished}`;
|
|
936
|
+
lines.push(`Dropped ${rollup.dropped} (${vanishedNote} vanished)`);
|
|
937
|
+
}
|
|
938
|
+
const ecos = Object.keys(rollup.byEcosystem)
|
|
939
|
+
.sort((a, b) => rollup.byEcosystem[b].total - rollup.byEcosystem[a].total);
|
|
940
|
+
if (ecos.length > 0) {
|
|
941
|
+
lines.push(ecos.slice(0, 4).map(e => `${e} ${rollup.byEcosystem[e].total}`).join(' · '));
|
|
942
|
+
}
|
|
943
|
+
return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
901
947
|
// Use in-memory stats (accumulated since last reset, restored from disk on restart)
|
|
902
948
|
// instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
|
|
903
949
|
const { top3: diskTop3 } = buildReportFromDisk();
|
|
@@ -1000,6 +1046,12 @@ function buildDailyReportEmbed(stats, dailyAlerts) {
|
|
|
1000
1046
|
} catch { /* non-fatal */ }
|
|
1001
1047
|
const healthText = `Up ${uptimeH}h${uptimeM}m | Heap ${heapMB}MB${jsonlInfo}`;
|
|
1002
1048
|
|
|
1049
|
+
// --- Phase 0b: per-scan ledger rollup (operational coverage) ---
|
|
1050
|
+
// Caller may pass a precomputed rollup (sendDailyReport does, to persist the same
|
|
1051
|
+
// numbers it displays); undefined → compute here; explicit null → omit the section.
|
|
1052
|
+
const ledger = ledgerRollup !== undefined ? ledgerRollup : safeLedgerRollup();
|
|
1053
|
+
const ledgerField = formatLedgerField(ledger);
|
|
1054
|
+
|
|
1003
1055
|
const now = new Date();
|
|
1004
1056
|
const readableTime = now.toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC');
|
|
1005
1057
|
|
|
@@ -1022,6 +1074,7 @@ function buildDailyReportEmbed(stats, dailyAlerts) {
|
|
|
1022
1074
|
? [{ name: 'Deferred Sandbox', value: `Enqueued: ${stats.sandboxDeferred || 0} | Processed: ${stats.deferredProcessed || 0} | Expired: ${stats.deferredExpired || 0}`, inline: false }]
|
|
1023
1075
|
: []),
|
|
1024
1076
|
{ name: 'Stability', value: `Restarts (24h): ${stats.restartsToday || 0} | Temporal load-shed: ${stats.temporalLoadShed || 0} | Queue hard-drops: ${stats.queueHardDrops || 0}`, inline: false },
|
|
1077
|
+
...(ledgerField ? [ledgerField] : []),
|
|
1025
1078
|
{ name: 'System', value: healthText, inline: false }
|
|
1026
1079
|
],
|
|
1027
1080
|
footer: {
|
|
@@ -1060,7 +1113,10 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
|
|
|
1060
1113
|
// delta. Written before the (now last) webhook so a mid-send kill can't double-count.
|
|
1061
1114
|
saveLastDailyReportDate(today, captureScanStatsBaseline());
|
|
1062
1115
|
|
|
1063
|
-
|
|
1116
|
+
// Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
|
|
1117
|
+
// we persist (no double-scan, no drift between Discord and the on-disk metrics).
|
|
1118
|
+
const ledgerRollup = safeLedgerRollup();
|
|
1119
|
+
const payload = buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup);
|
|
1064
1120
|
|
|
1065
1121
|
// Persist locally with full raw metrics (independent of webhook — enables trend analysis)
|
|
1066
1122
|
persistDailyReport(payload, {
|
|
@@ -1081,6 +1137,7 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
|
|
|
1081
1137
|
restartsToday: stats.restartsToday || 0,
|
|
1082
1138
|
temporalLoadShed: stats.temporalLoadShed || 0,
|
|
1083
1139
|
queueHardDrops: stats.queueHardDrops || 0,
|
|
1140
|
+
ledger: ledgerRollup || null,
|
|
1084
1141
|
topSuspects: dailyAlerts.slice().sort((a, b) => (b.score || 0) - (a.score || 0) || b.findingsCount - a.findingsCount).slice(0, 10)
|
|
1085
1142
|
});
|
|
1086
1143
|
|
|
@@ -1337,6 +1394,7 @@ module.exports = {
|
|
|
1337
1394
|
buildMaintainerChangeWebhookEmbed,
|
|
1338
1395
|
buildCanaryExfiltrationWebhookEmbed,
|
|
1339
1396
|
buildDailyReportEmbed,
|
|
1397
|
+
formatLedgerField,
|
|
1340
1398
|
sendDailyReport,
|
|
1341
1399
|
buildReportFromDisk,
|
|
1342
1400
|
buildReportEmbedFromDisk,
|