muaddib-scanner 2.11.75 → 2.11.77
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-commit +18 -0
- package/README.md +15 -6
- package/package.json +1 -2
- package/{self-scan-v2.11.75.json → self-scan-v2.11.77.json} +1 -1
- package/src/commands/safe-install.js +8 -3
- package/src/monitor/daemon.js +34 -22
- package/src/monitor/ingestion.js +43 -6
- package/src/monitor/queue.js +120 -21
- package/src/monitor/scan-queue.js +100 -7
- package/src/monitor/state.js +24 -1
- package/src/monitor/webhook.js +71 -11
- package/src/scanner/temporal-analysis.js +8 -0
- package/src/scanner/temporal-ast-diff.js +5 -0
- package/.dockerignore +0 -7
- package/.env.example +0 -43
- package/ml-retrain/auto-labeler/auto_labeler.py +0 -312
- package/ml-retrain/auto-labeler/ghsa_checker.py +0 -169
- package/ml-retrain/auto-labeler/labeler.py +0 -256
- package/ml-retrain/auto-labeler/npm_checker.py +0 -228
- package/ml-retrain/auto-labeler/ossf_index.py +0 -178
- package/ml-retrain/auto-labeler/requirements.txt +0 -1
- package/ml-retrain/confusion-matrix.png +0 -0
- package/ml-retrain/model-trees-retrained.js +0 -12
- package/ml-retrain/retrain-report.json +0 -225
- package/ml-retrain/retrain.py +0 -974
- package/sbom.json +0 -0
- package/src/ml/train-bundler-detector.py +0 -725
- package/src/ml/train-xgboost.py +0 -957
- package/tools/export-model-js.py +0 -160
- package/tools/requirements-ml.txt +0 -5
- package/tools/train-classifier.py +0 -333
|
@@ -24,36 +24,129 @@ const MAX_SCAN_QUEUE = (() => {
|
|
|
24
24
|
const HARD_DROP_LOG_INTERVAL_MS = 10_000;
|
|
25
25
|
let _lastHardDropLog = 0;
|
|
26
26
|
|
|
27
|
+
// Phase 2b: classes we never want to drop blindly when the queue caps out — the
|
|
28
|
+
// specifically-targeted scans (known-malicious, burst/ATO, first-publish). Eviction drops
|
|
29
|
+
// the oldest UNPROTECTED item instead; only if a bounded head-window is entirely protected
|
|
30
|
+
// do we fall back to strict-oldest (still ledgered, with a distinct source).
|
|
31
|
+
function _isProtected(item) {
|
|
32
|
+
return !!(item && (item.isIOCMatch || item.isBurst || item.firstPublish || item.atoSignal || item.isATOBurstExtra));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// How far from the head we scan for an unprotected victim. Protected items are a small
|
|
36
|
+
// fraction of the flood, so a victim is almost always found within a few slots; the bound
|
|
37
|
+
// keeps eviction O(window) under sustained overflow (CLAUDE.md §2 bounded resources).
|
|
38
|
+
const PROTECTED_EVICTION_SCAN_MAX = (() => {
|
|
39
|
+
const v = parseInt(process.env.MUADDIB_PROTECTED_EVICTION_SCAN_MAX, 10);
|
|
40
|
+
return Number.isFinite(v) && v > 0 ? v : 1024;
|
|
41
|
+
})();
|
|
42
|
+
|
|
27
43
|
/**
|
|
28
|
-
* Push an item onto the scan queue, enforcing the hard cap
|
|
29
|
-
*
|
|
30
|
-
*
|
|
44
|
+
* Push an item onto the scan queue, enforcing the hard cap when at capacity. Evicts the
|
|
45
|
+
* oldest UNPROTECTED item (within a bounded head-window), falling back to strict-oldest if
|
|
46
|
+
* that window is all-protected. `max` defaults to MAX_SCAN_QUEUE (overridable for tests).
|
|
47
|
+
* Returns true iff an item was dropped to make room.
|
|
31
48
|
*/
|
|
32
49
|
function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
33
50
|
let dropped = false;
|
|
34
51
|
if (scanQueue.length >= max) {
|
|
35
|
-
|
|
52
|
+
// Victim = oldest unprotected item within the bounded head-window; else strict oldest.
|
|
53
|
+
let victimIdx = -1;
|
|
54
|
+
const scanLimit = Math.min(scanQueue.length, PROTECTED_EVICTION_SCAN_MAX);
|
|
55
|
+
for (let i = 0; i < scanLimit; i++) {
|
|
56
|
+
if (!_isProtected(scanQueue[i])) { victimIdx = i; break; }
|
|
57
|
+
}
|
|
58
|
+
const protectedFallback = victimIdx === -1;
|
|
59
|
+
const evicted = protectedFallback ? scanQueue.shift() : scanQueue.splice(victimIdx, 1)[0];
|
|
36
60
|
dropped = true;
|
|
37
61
|
if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
|
|
38
62
|
// Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
|
|
39
63
|
// "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
|
|
40
64
|
// require avoids any top-level coupling with state.js; best-effort, never throws.
|
|
65
|
+
// A dropped PROTECTED item (all-protected head-window) gets a distinct source so the
|
|
66
|
+
// rare case stays visible in the 0b ledger rollup.
|
|
41
67
|
try {
|
|
42
68
|
if (evicted && evicted.name) {
|
|
43
69
|
require('./state.js').appendScanLedger({
|
|
44
70
|
name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
|
|
45
|
-
outcome: 'dropped', source: 'queue_cap'
|
|
71
|
+
outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
|
|
46
72
|
});
|
|
47
73
|
}
|
|
48
74
|
} catch { /* ledger is best-effort */ }
|
|
49
75
|
const now = Date.now();
|
|
50
76
|
if (now - _lastHardDropLog > HARD_DROP_LOG_INTERVAL_MS) {
|
|
51
77
|
_lastHardDropLog = now;
|
|
52
|
-
console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping oldest item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
|
|
78
|
+
console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping ${protectedFallback ? 'OLDEST (head-window all protected)' : 'oldest unprotected'} item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
|
|
53
79
|
}
|
|
54
80
|
}
|
|
55
81
|
scanQueue.push(item);
|
|
56
82
|
return dropped;
|
|
57
83
|
}
|
|
58
84
|
|
|
59
|
-
|
|
85
|
+
/**
|
|
86
|
+
* Bulk-evict the scan queue down to `targetKeep`, honoring the SAME protection predicate
|
|
87
|
+
* as enqueueScan and ledgering EVERY dropped item — the single-source-of-truth eviction
|
|
88
|
+
* the daemon's EMERGENCY memory breaker must use instead of a raw `splice(0, n)`.
|
|
89
|
+
*
|
|
90
|
+
* Selection: drop the oldest UNPROTECTED items first; only dip into protected items
|
|
91
|
+
* (oldest-first) if there aren't enough unprotected ones to reach the target. This keeps
|
|
92
|
+
* IOC-match / burst / first-publish / ATO scans alive through a memory emergency, exactly
|
|
93
|
+
* like the per-item cap path — closing the gap where the v2.10.88 circuit breaker silently
|
|
94
|
+
* dropped protected scans (CLAUDE.md "ne jamais perdre de scan" / "no silent caps").
|
|
95
|
+
*
|
|
96
|
+
* In-place compaction (write-pointer, O(n), preserves insertion order, no giant spread) so
|
|
97
|
+
* the daemon (which holds the same array reference) sees the mutation. Best-effort ledger;
|
|
98
|
+
* never throws. `ledgerFn` is injectable for tests; defaults to state.appendScanLedger.
|
|
99
|
+
*
|
|
100
|
+
* @returns {{dropped:number, droppedProtected:number}}
|
|
101
|
+
*/
|
|
102
|
+
function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', ledgerFn = null) {
|
|
103
|
+
const before = scanQueue.length;
|
|
104
|
+
const keep = Math.max(0, targetKeep | 0);
|
|
105
|
+
if (before <= keep) return { dropped: 0, droppedProtected: 0 };
|
|
106
|
+
const toDrop = before - keep;
|
|
107
|
+
|
|
108
|
+
// Victim set: oldest unprotected first, then (only if short) oldest protected.
|
|
109
|
+
const dropSet = new Set();
|
|
110
|
+
for (let i = 0; i < before && dropSet.size < toDrop; i++) {
|
|
111
|
+
if (!_isProtected(scanQueue[i])) dropSet.add(i);
|
|
112
|
+
}
|
|
113
|
+
let droppedProtected = 0;
|
|
114
|
+
if (dropSet.size < toDrop) {
|
|
115
|
+
// Not enough unprotected items: every unprotected one is already marked, so the
|
|
116
|
+
// remaining oldest-first items are protected — drop them as a last resort.
|
|
117
|
+
for (let i = 0; i < before && dropSet.size < toDrop; i++) {
|
|
118
|
+
if (!dropSet.has(i)) { dropSet.add(i); droppedProtected++; }
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Resolve the ledger sink once (per-call require would be 500+ lookups under emergency).
|
|
123
|
+
let appendLedger = ledgerFn;
|
|
124
|
+
if (!appendLedger) {
|
|
125
|
+
try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Compact survivors in place, ledgering each evicted item with an identity-preserving
|
|
129
|
+
// source (protected drops get a distinct suffix so the rare case stays visible in the rollup).
|
|
130
|
+
let w = 0;
|
|
131
|
+
for (let r = 0; r < before; r++) {
|
|
132
|
+
if (dropSet.has(r)) {
|
|
133
|
+
const item = scanQueue[r];
|
|
134
|
+
if (appendLedger && item && item.name) {
|
|
135
|
+
try {
|
|
136
|
+
appendLedger({
|
|
137
|
+
name: item.name, version: item.version, ecosystem: item.ecosystem,
|
|
138
|
+
outcome: 'dropped',
|
|
139
|
+
source: _isProtected(item) ? `${source}_protected` : source
|
|
140
|
+
});
|
|
141
|
+
} catch { /* ledger is best-effort — must never break the breaker */ }
|
|
142
|
+
}
|
|
143
|
+
} else {
|
|
144
|
+
scanQueue[w++] = scanQueue[r];
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
scanQueue.length = w;
|
|
148
|
+
|
|
149
|
+
return { dropped: toDrop, droppedProtected };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
|
package/src/monitor/state.js
CHANGED
|
@@ -972,7 +972,7 @@ let _scanLedgerAppendedSinceCompact = 0;
|
|
|
972
972
|
const SCAN_LEDGER_OUTCOMES = new Set([
|
|
973
973
|
'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
|
|
974
974
|
'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
|
|
975
|
-
'static_timeout', 'size_skip', 'dropped'
|
|
975
|
+
'static_timeout', 'size_skip', 'dropped', 'error'
|
|
976
976
|
]);
|
|
977
977
|
|
|
978
978
|
/**
|
|
@@ -1453,6 +1453,27 @@ function getParisDateString() {
|
|
|
1453
1453
|
return formatter.format(new Date());
|
|
1454
1454
|
}
|
|
1455
1455
|
|
|
1456
|
+
// Hour (Europe/Paris) at/after which the once-daily report may fire. Single source of
|
|
1457
|
+
// truth — imported by webhook.js, daemon.js and queue.js (each previously redefined it,
|
|
1458
|
+
// and webhook.js still re-exports it for back-compat).
|
|
1459
|
+
const DAILY_REPORT_HOUR = 8; // 08:00 Paris time (Europe/Paris)
|
|
1460
|
+
|
|
1461
|
+
/**
|
|
1462
|
+
* Canonical "is the daily report due?" predicate — the ONE gate, defined here in state.js
|
|
1463
|
+
* (a leaf module that daemon.js and queue.js already import, so no require cycle).
|
|
1464
|
+
*
|
|
1465
|
+
* Catch-up semantics: fire at OR AFTER 08:00 Paris, so a missed 08:00 (e.g. the daemon was
|
|
1466
|
+
* down/OOM-restarting at that minute) still fires later the SAME day — losing a whole day
|
|
1467
|
+
* was the old daemon.js `hour === 8` behaviour. But NEVER fire during the 00:00–07:59 Paris
|
|
1468
|
+
* "dead zone": a fire then stamps the NEW day's date before its 08:00 window and, because
|
|
1469
|
+
* hasReportBeenSentToday() keys off the Paris CALENDAR date, permanently suppresses that
|
|
1470
|
+
* day's real report. Replaces the two divergent copies (daemon.js `!== 8`, queue.js `< 8`).
|
|
1471
|
+
*/
|
|
1472
|
+
function isDailyReportDue(stats) {
|
|
1473
|
+
if (getParisHour() < DAILY_REPORT_HOUR) return false;
|
|
1474
|
+
return !hasReportBeenSentToday(stats);
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1456
1477
|
// --- recentlyScanned dedup-set persistence (survives restarts → no re-scan storm) ---
|
|
1457
1478
|
//
|
|
1458
1479
|
// The dedup Set is in-memory only, so every restart starts it empty and re-scans the
|
|
@@ -1703,5 +1724,7 @@ module.exports = {
|
|
|
1703
1724
|
loadRecentlyScanned,
|
|
1704
1725
|
getParisHour,
|
|
1705
1726
|
getParisDateString,
|
|
1727
|
+
DAILY_REPORT_HOUR,
|
|
1728
|
+
isDailyReportDue,
|
|
1706
1729
|
loadStateRaw
|
|
1707
1730
|
};
|
package/src/monitor/webhook.js
CHANGED
|
@@ -16,6 +16,7 @@ const {
|
|
|
16
16
|
DAILY_REPORTS_LOG_DIR,
|
|
17
17
|
getParisDateString,
|
|
18
18
|
getParisHour,
|
|
19
|
+
DAILY_REPORT_HOUR,
|
|
19
20
|
loadScanStats,
|
|
20
21
|
loadDetections,
|
|
21
22
|
saveLastDailyReportDate,
|
|
@@ -60,7 +61,8 @@ const HIGH_INTENT_TYPES = new Set([
|
|
|
60
61
|
'remote_code_load', 'obfuscation_detected'
|
|
61
62
|
]);
|
|
62
63
|
|
|
63
|
-
|
|
64
|
+
// DAILY_REPORT_HOUR (=8) is imported from state.js (single source of truth) and
|
|
65
|
+
// re-exported below for back-compat (monitor.js / tests import it via webhook).
|
|
64
66
|
|
|
65
67
|
// --- Webhook alerting ---
|
|
66
68
|
|
|
@@ -240,6 +242,43 @@ async function sendCampaignPreAlert(name, campaign, ecosystem = 'npm') {
|
|
|
240
242
|
await sendWebhook(url, buildCampaignPreAlertEmbed(name, campaign, ecosystem), { rawPayload: true });
|
|
241
243
|
}
|
|
242
244
|
|
|
245
|
+
/**
|
|
246
|
+
* Layer 1c: Build the burst pre-alert embed (pure — no network). Exported for tests.
|
|
247
|
+
* Fires when ≥K versions of one package land in a short window (account-takeover /
|
|
248
|
+
* "Miasma" burst-publish). Amber to distinguish from IOC (red) and campaign (orange).
|
|
249
|
+
* @param {string} name - Package name
|
|
250
|
+
* @param {number} count - Number of versions seen in the burst window
|
|
251
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi' | 'crates' (link target)
|
|
252
|
+
*/
|
|
253
|
+
function buildBurstPreAlertEmbed(name, count, ecosystem = 'npm') {
|
|
254
|
+
return {
|
|
255
|
+
embeds: [{
|
|
256
|
+
title: '⚠️ BURST PRE-ALERT — Rapid Multi-Version Publish',
|
|
257
|
+
color: 0xf39c12,
|
|
258
|
+
fields: [
|
|
259
|
+
{ name: 'Package', value: `[${ecosystem}/${name}](${registryLink(ecosystem, name)})`, inline: true },
|
|
260
|
+
{ name: 'Versions', value: `${count} in a short window`, inline: true },
|
|
261
|
+
{ name: 'Detection', value: 'Burst-publish (possible ATO / Miasma)', inline: true },
|
|
262
|
+
{ name: 'Status', value: 'Multiple versions published rapidly — every version queued for scan and protected from queue-cap eviction. Treat as suspect until verdicts land.', inline: false }
|
|
263
|
+
],
|
|
264
|
+
footer: {
|
|
265
|
+
text: `MUAD'DIB Burst Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}`
|
|
266
|
+
},
|
|
267
|
+
timestamp: new Date().toISOString()
|
|
268
|
+
}]
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Layer 1c: Send a burst pre-alert webhook. Fire-and-forget; callers dedupe per
|
|
274
|
+
* name/window so a burst pings once, not once per version.
|
|
275
|
+
*/
|
|
276
|
+
async function sendBurstPreAlert(name, count, ecosystem = 'npm') {
|
|
277
|
+
const url = getWebhookUrl();
|
|
278
|
+
if (!url) return;
|
|
279
|
+
await sendWebhook(url, buildBurstPreAlertEmbed(name, count, ecosystem), { rawPayload: true });
|
|
280
|
+
}
|
|
281
|
+
|
|
243
282
|
/**
|
|
244
283
|
* Check if a specific package@version matches a versioned IOC entry.
|
|
245
284
|
* Returns the matching IOC entry or null.
|
|
@@ -1115,6 +1154,14 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1115
1154
|
* @param {Map} downloadsCache - In-memory downloads cache (will be cleared)
|
|
1116
1155
|
*/
|
|
1117
1156
|
async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache) {
|
|
1157
|
+
// Dead-zone guard (defense in depth): never send or stamp before the 08:00 Paris window.
|
|
1158
|
+
// The scheduled gate (isDailyReportDue) already excludes 00:00–07:59, but an ungated /
|
|
1159
|
+
// manual / test caller firing at e.g. 00:43 would otherwise write-ahead the NEW day's date
|
|
1160
|
+
// (below) and suppress that day's real report. This makes the early stamp impossible.
|
|
1161
|
+
if (getParisHour() < DAILY_REPORT_HOUR) {
|
|
1162
|
+
console.log(`[MONITOR] Daily report suppressed: before ${DAILY_REPORT_HOUR}:00 Paris (hour=${getParisHour()})`);
|
|
1163
|
+
return;
|
|
1164
|
+
}
|
|
1118
1165
|
// Crash-safe headline: a restart-storm around report time can zero the in-memory
|
|
1119
1166
|
// counter (the monitor OOM-restarts ~10×/day). Floor scanned/clean/suspect at the
|
|
1120
1167
|
// durable scan-stats delta so we never publish "5" when ~44k were really scanned.
|
|
@@ -1134,6 +1181,10 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
|
|
|
1134
1181
|
// Persist the monotonic scan-stats counter as the baseline for the NEXT report's
|
|
1135
1182
|
// delta. Written before the (now last) webhook so a mid-send kill can't double-count.
|
|
1136
1183
|
saveLastDailyReportDate(today, captureScanStatsBaseline());
|
|
1184
|
+
// Observability: the success path previously logged nothing, which made the late-fire bug
|
|
1185
|
+
// invisible in the journal. Log the stamped date + the actual Paris hour (an on-time 08:00
|
|
1186
|
+
// fire vs a catch-up at hour 14 are now distinguishable) + the headline count.
|
|
1187
|
+
console.log(`[MONITOR] Daily report firing for ${today} (hour=${getParisHour()} Paris, scanned=${stats.scanned})`);
|
|
1137
1188
|
|
|
1138
1189
|
// Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
|
|
1139
1190
|
// we persist (no double-scan, no drift between Discord and the on-disk metrics).
|
|
@@ -1328,16 +1379,23 @@ async function sendReportNow(stats) {
|
|
|
1328
1379
|
return { sent: false, message: `Webhook failed: ${err.message}` };
|
|
1329
1380
|
}
|
|
1330
1381
|
|
|
1331
|
-
// Update lastDailyReportDate on disk
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1335
|
-
|
|
1336
|
-
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
|
|
1340
|
-
|
|
1382
|
+
// Update lastDailyReportDate on disk — but ONLY at/after 08:00 Paris. A manual report run
|
|
1383
|
+
// before 08:00 is a deliberate operator override (we still SEND it), but it must NOT stamp
|
|
1384
|
+
// today's date: hasReportBeenSentToday() keys off the Paris calendar date, so an early
|
|
1385
|
+
// stamp would suppress that day's scheduled 08:00 report (the exact failure we're fixing).
|
|
1386
|
+
if (getParisHour() >= DAILY_REPORT_HOUR) {
|
|
1387
|
+
const today = getParisDateString();
|
|
1388
|
+
const stateRaw = loadStateRaw();
|
|
1389
|
+
const state = {
|
|
1390
|
+
npmLastPackage: stateRaw.npmLastPackage || '',
|
|
1391
|
+
pypiLastPackage: stateRaw.pypiLastPackage || ''
|
|
1392
|
+
};
|
|
1393
|
+
stats.lastDailyReportDate = today;
|
|
1394
|
+
saveState(state, stats);
|
|
1395
|
+
saveLastDailyReportDate(today);
|
|
1396
|
+
} else {
|
|
1397
|
+
console.log(`[MONITOR] Manual report sent; not stamping (before ${DAILY_REPORT_HOUR}:00 Paris — the scheduled report will still fire today)`);
|
|
1398
|
+
}
|
|
1341
1399
|
|
|
1342
1400
|
return { sent: true, message: 'Daily report sent' };
|
|
1343
1401
|
}
|
|
@@ -1399,6 +1457,8 @@ module.exports = {
|
|
|
1399
1457
|
sendIOCPreAlert,
|
|
1400
1458
|
buildCampaignPreAlertEmbed,
|
|
1401
1459
|
sendCampaignPreAlert,
|
|
1460
|
+
buildBurstPreAlertEmbed,
|
|
1461
|
+
sendBurstPreAlert,
|
|
1402
1462
|
matchVersionedIOC,
|
|
1403
1463
|
computeRiskLevel,
|
|
1404
1464
|
computeRiskScore,
|
|
@@ -121,6 +121,14 @@ function _fetchPackageMetadataHttp(packageName) {
|
|
|
121
121
|
return;
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
+
if (res.statusCode === 429) {
|
|
125
|
+
res.resume();
|
|
126
|
+
// Coordinated backoff on the shared registry limiter — the temporal scanners must
|
|
127
|
+
// signal 429 like the metadata path, not hammer through a rate limit (CLAUDE.md storm).
|
|
128
|
+
try { require('../shared/http-limiter.js').signal429(); } catch { /* limiter best-effort */ }
|
|
129
|
+
reject(new Error(`Registry rate limited (HTTP 429) for ${packageName}`));
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
124
132
|
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
125
133
|
res.resume();
|
|
126
134
|
reject(new Error(`Registry returned HTTP ${res.statusCode} for ${packageName}`));
|
|
@@ -71,6 +71,11 @@ function _fetchVersionMetadataHttp(packageName, version) {
|
|
|
71
71
|
res.resume();
|
|
72
72
|
return reject(new Error(`Version ${version} not found for package ${packageName}`));
|
|
73
73
|
}
|
|
74
|
+
if (res.statusCode === 429) {
|
|
75
|
+
res.resume();
|
|
76
|
+
try { require('../shared/http-limiter.js').signal429(); } catch { /* limiter best-effort */ }
|
|
77
|
+
return reject(new Error(`Registry rate limited (HTTP 429) for ${packageName}@${version}`));
|
|
78
|
+
}
|
|
74
79
|
if (res.statusCode < 200 || res.statusCode >= 300) {
|
|
75
80
|
res.resume();
|
|
76
81
|
return reject(new Error(`Registry returned HTTP ${res.statusCode} for ${packageName}@${version}`));
|
package/.dockerignore
DELETED
package/.env.example
DELETED
|
@@ -1,43 +0,0 @@
|
|
|
1
|
-
# MUAD'DIB environment variables — template
|
|
2
|
-
# Copy to .env (local dev) or /opt/muaddib/.env (VPS) and fill in real values.
|
|
3
|
-
# .env files are gitignored. NEVER commit a real token.
|
|
4
|
-
|
|
5
|
-
# ----------------------------------------------------------------------------
|
|
6
|
-
# Threat-feed API tokens (all OPTIONAL — scrapers degrade gracefully if absent)
|
|
7
|
-
# ----------------------------------------------------------------------------
|
|
8
|
-
|
|
9
|
-
# OpenSourceMalware.com — community-verified threat intel
|
|
10
|
-
# Free tier: 60 req/min, /query-latest gives 100 most recent threats per ecosystem.
|
|
11
|
-
# Sign up + generate at: https://opensourcemalware.com/auth → profile → API Tokens
|
|
12
|
-
# Format: osm_<random-32+chars>
|
|
13
|
-
# Used by: src/ioc/scraper.js → scrapeOSMQueryLatest()
|
|
14
|
-
OSM_API_TOKEN=
|
|
15
|
-
|
|
16
|
-
# ----------------------------------------------------------------------------
|
|
17
|
-
# Webhook destinations (optional — monitor alerts)
|
|
18
|
-
# ----------------------------------------------------------------------------
|
|
19
|
-
|
|
20
|
-
# Discord webhook for monitor alerts (P1/P2/P3 triage)
|
|
21
|
-
# DISCORD_WEBHOOK_URL=
|
|
22
|
-
|
|
23
|
-
# ----------------------------------------------------------------------------
|
|
24
|
-
# FPR plan gates — DEFAULT ON since v2.11.9 (no need to set these unless opting OUT)
|
|
25
|
-
# ----------------------------------------------------------------------------
|
|
26
|
-
# Measured impact on the v2.11.4 evaluation corpus (1054 packages):
|
|
27
|
-
# FPR curated 15.6% -> 9.36% (-6.24 pp), FPR random 7.0% -> 2.0% (-5.00 pp).
|
|
28
|
-
# TPR@3 / TPR@20 / ADR strictly unchanged.
|
|
29
|
-
#
|
|
30
|
-
# Opt-OUT individual gates (uncomment + set to 0):
|
|
31
|
-
# MUADDIB_FN_REACHABILITY=0 # function-level reachability gating
|
|
32
|
-
# MUADDIB_DECAY=0 # group score decay on bundled outputs
|
|
33
|
-
# MUADDIB_MATURE_CAP=0 # cap mature, well-trafficked packages at MEDIUM
|
|
34
|
-
# MUADDIB_METADATA_FACTOR=0 # registry signals -> reputation multiplier
|
|
35
|
-
# MUADDIB_DELTA_MODE=0 # delta scoring against prior versions
|
|
36
|
-
#
|
|
37
|
-
# Skip ALL network fetches (npm registry packument + GitHub Releases IOC
|
|
38
|
-
# bootstrap) in one shot. Disables MATURE_CAP + METADATA_FACTOR + DELTA_MODE
|
|
39
|
-
# at the per-scan level AND the first-run IOC database download. Useful for:
|
|
40
|
-
# - air-gap / offline CI environments
|
|
41
|
-
# - test runners (set automatically by tests/run-tests.js)
|
|
42
|
-
# - perf-critical batch scans where you've pre-warmed the IOC cache
|
|
43
|
-
# MUADDIB_NO_REGISTRY_FETCH=1
|