muaddib-scanner 2.11.82 → 2.11.83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/queue.js
CHANGED
|
@@ -80,7 +80,7 @@ const {
|
|
|
80
80
|
|
|
81
81
|
// From ./ingestion.js
|
|
82
82
|
const { getNpmLatestTarball, getPyPITarballUrl } = require('./ingestion.js');
|
|
83
|
-
const { enqueueScan } = require('./scan-queue.js');
|
|
83
|
+
const { enqueueScan, dequeueScan } = require('./scan-queue.js');
|
|
84
84
|
|
|
85
85
|
// From ./tarball-archive.js
|
|
86
86
|
const { archiveSuspectTarball } = require('./tarball-archive.js');
|
|
@@ -259,7 +259,9 @@ function recordTrainingSample(result, params) {
|
|
|
259
259
|
maxSeverity: result.summary ? result.summary.riskLevel : null,
|
|
260
260
|
types: [...new Set((result.threats || []).map(t => t.type))],
|
|
261
261
|
sandbox: params.sandboxResult ? 'run' : 'none',
|
|
262
|
-
source: 'scan'
|
|
262
|
+
source: 'scan',
|
|
263
|
+
// AUDIT-A1: stamped on `result` in scanPackage (single source of truth)
|
|
264
|
+
firstPublish: !!(result && result._firstPublish)
|
|
263
265
|
});
|
|
264
266
|
} catch (err) {
|
|
265
267
|
// Non-fatal: ML export must never crash the monitor
|
|
@@ -673,6 +675,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
673
675
|
|
|
674
676
|
// First-publish detection: used for sandbox priority below
|
|
675
677
|
const isFirstPublish = cacheTrigger && cacheTrigger.reason === 'first_publish';
|
|
678
|
+
// AUDIT-A1 observability: stamp once so every recordTrainingSample(result, …) call
|
|
679
|
+
// below carries firstPublish into the scan-ledger (all ~10 call sites share this
|
|
680
|
+
// `result`). Pairs with the firstPublish flag on the eviction-drop ledger entries so
|
|
681
|
+
// first-publish coverage (scanned vs dropped) becomes measurable. The "Phase 2a"
|
|
682
|
+
// comment below promised this; the threading was missing until now.
|
|
683
|
+
result._firstPublish = isFirstPublish;
|
|
676
684
|
|
|
677
685
|
// npm registry metadata was fetched ONCE before the worker spawn (hoisted above
|
|
678
686
|
// to feed scanContext.npmRegistryMeta) and is reused here for: isFirstPublishHigh-
|
|
@@ -1171,9 +1179,14 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1171
1179
|
console.log(`[MONITOR] REPUTATION BYPASS: ${name} has high-confidence threat — using raw score`);
|
|
1172
1180
|
}
|
|
1173
1181
|
|
|
1174
|
-
// Record daily alert with post-reputation score for top suspects ranking
|
|
1182
|
+
// Record daily alert with post-reputation score for top suspects ranking.
|
|
1183
|
+
// AUDIT-C: carry the distinct CRITICAL/HIGH threat types so the daily report
|
|
1184
|
+
// can annotate MCP suspects with their signals (visual triage, no scoring change).
|
|
1175
1185
|
if (dailyAlerts.length < MAX_DAILY_ALERTS) {
|
|
1176
|
-
|
|
1186
|
+
const signals = [...new Set((result.threats || [])
|
|
1187
|
+
.filter(t => t.severity === 'CRITICAL' || t.severity === 'HIGH')
|
|
1188
|
+
.map(t => t.type))].slice(0, 6);
|
|
1189
|
+
dailyAlerts.push({ name, version, ecosystem, findingsCount: result.summary.total, score: adjustedResult.summary.riskScore || 0, tier, signals });
|
|
1177
1190
|
}
|
|
1178
1191
|
// LLM Detective: AI-powered analysis for T1a/T1b suspects
|
|
1179
1192
|
// Skip for fast-track (large boring packages — LLM analysis adds 10-30s for no value)
|
|
@@ -1354,7 +1367,8 @@ async function _spawnWorker(scanQueue, stats, dailyAlerts, recentlyScanned, down
|
|
|
1354
1367
|
_activeWorkers++;
|
|
1355
1368
|
try {
|
|
1356
1369
|
while (scanQueue.length > 0 && _activeWorkers <= _targetConcurrency) {
|
|
1357
|
-
|
|
1370
|
+
// AUDIT A2: FIFO by default; priority dequeue when MUADDIB_PRIORITY_DEQUEUE=1.
|
|
1371
|
+
const item = dequeueScan(scanQueue);
|
|
1358
1372
|
if (!item) break;
|
|
1359
1373
|
await processQueueItem(item, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable);
|
|
1360
1374
|
}
|
|
@@ -68,7 +68,9 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
|
68
68
|
if (evicted && evicted.name) {
|
|
69
69
|
require('./state.js').appendScanLedger({
|
|
70
70
|
name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
|
|
71
|
-
outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
|
|
71
|
+
outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap',
|
|
72
|
+
// AUDIT-A1 observability (see evictFromScanQueueBulk)
|
|
73
|
+
firstPublish: !!evicted.firstPublish, isBurstExtra: !!evicted.isATOBurstExtra
|
|
72
74
|
});
|
|
73
75
|
}
|
|
74
76
|
} catch { /* ledger is best-effort */ }
|
|
@@ -136,7 +138,12 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
|
|
|
136
138
|
appendLedger({
|
|
137
139
|
name: item.name, version: item.version, ecosystem: item.ecosystem,
|
|
138
140
|
outcome: 'dropped',
|
|
139
|
-
source: _isProtected(item) ? `${source}_protected` : source
|
|
141
|
+
source: _isProtected(item) ? `${source}_protected` : source,
|
|
142
|
+
// AUDIT-A1 observability: record whether a DROPPED item was a first-publish
|
|
143
|
+
// (real coverage loss) vs a burst-extra (version-spam, expected). Lets us
|
|
144
|
+
// measure if the memory breaker is evicting genuine new packages.
|
|
145
|
+
firstPublish: !!item.firstPublish,
|
|
146
|
+
isBurstExtra: !!item.isATOBurstExtra
|
|
140
147
|
});
|
|
141
148
|
} catch { /* ledger is best-effort — must never break the breaker */ }
|
|
142
149
|
}
|
|
@@ -149,4 +156,41 @@ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', le
|
|
|
149
156
|
return { dropped: toDrop, droppedProtected };
|
|
150
157
|
}
|
|
151
158
|
|
|
152
|
-
|
|
159
|
+
// ── AUDIT A2: optional priority dequeue (gated OFF by default) ──────────────
|
|
160
|
+
// Default dequeue is strict FIFO (scanQueue.shift()). When enabled, the worker pulls
|
|
161
|
+
// the OLDEST high-value item (first-publish / known-malicious / burst-MAIN) within a
|
|
162
|
+
// bounded head-window before falling back to FIFO — so a genuine new package never
|
|
163
|
+
// ages out behind a deep version-spam backlog. Gated behind an env flag so deploying
|
|
164
|
+
// the code is INERT until ops flips it on (tune on the AUDIT-A1 first-publish-coverage
|
|
165
|
+
// data first — see brief). Burst EXTRAS (isATOBurstExtra) and regular items stay FIFO.
|
|
166
|
+
const PRIORITY_DEQUEUE = (() => {
|
|
167
|
+
const v = process.env.MUADDIB_PRIORITY_DEQUEUE;
|
|
168
|
+
return v === '1' || v === 'true';
|
|
169
|
+
})();
|
|
170
|
+
const PRIORITY_DEQUEUE_WINDOW = (() => {
|
|
171
|
+
const v = parseInt(process.env.MUADDIB_PRIORITY_DEQUEUE_WINDOW, 10);
|
|
172
|
+
return Number.isFinite(v) && v > 0 ? v : 2048;
|
|
173
|
+
})();
|
|
174
|
+
|
|
175
|
+
function _isPriority(item) {
|
|
176
|
+
return !!(item && (item.firstPublish || item.isIOCMatch || (item.isBurst && !item.isATOBurstExtra)));
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Remove and return the next item to scan. Strict FIFO by default (unchanged). With
|
|
181
|
+
* MUADDIB_PRIORITY_DEQUEUE=1: oldest priority item within a bounded head-window, else
|
|
182
|
+
* FIFO. Single-threaded → splice/shift are atomic w.r.t. other workers.
|
|
183
|
+
* @param {Array} scanQueue
|
|
184
|
+
* @param {{priority?: boolean, window?: number}} [opts] test overrides
|
|
185
|
+
*/
|
|
186
|
+
function dequeueScan(scanQueue, opts = {}) {
|
|
187
|
+
const priority = opts.priority !== undefined ? opts.priority : PRIORITY_DEQUEUE;
|
|
188
|
+
if (!priority || scanQueue.length === 0) return scanQueue.shift();
|
|
189
|
+
const win = Math.min(scanQueue.length, opts.window || PRIORITY_DEQUEUE_WINDOW);
|
|
190
|
+
for (let i = 0; i < win; i++) {
|
|
191
|
+
if (_isPriority(scanQueue[i])) return i === 0 ? scanQueue.shift() : scanQueue.splice(i, 1)[0];
|
|
192
|
+
}
|
|
193
|
+
return scanQueue.shift();
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
module.exports = { enqueueScan, evictFromScanQueueBulk, dequeueScan, isProtected: _isProtected, MAX_SCAN_QUEUE };
|
package/src/monitor/state.js
CHANGED
|
@@ -1010,6 +1010,10 @@ function appendScanLedger(e) {
|
|
|
1010
1010
|
types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
|
|
1011
1011
|
sandbox: e.sandbox || 'none',
|
|
1012
1012
|
firstPublish: !!e.firstPublish,
|
|
1013
|
+
// AUDIT-A1: version-spam marker on dropped burst-extras — lets the coverage
|
|
1014
|
+
// rollup separate "first-publish lost" from "spam extra dropped (expected)".
|
|
1015
|
+
// Only written when true to keep the 127MB ledger lean.
|
|
1016
|
+
...(e.isBurstExtra ? { isBurstExtra: true } : {}),
|
|
1013
1017
|
source: e.source || 'scan'
|
|
1014
1018
|
};
|
|
1015
1019
|
fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');
|
package/src/monitor/webhook.js
CHANGED
|
@@ -1094,6 +1094,22 @@ function formatLedgerField(rollup) {
|
|
|
1094
1094
|
return { name: 'Ledger (24h)', value: lines.join('\n'), inline: false };
|
|
1095
1095
|
}
|
|
1096
1096
|
|
|
1097
|
+
// AUDIT-C: MCP self-identity by package name (matches the F9/F15 MCP_NAME_RE family in
|
|
1098
|
+
// feature-extractor.js — kept local to avoid importing the ML module into the embed path).
|
|
1099
|
+
const _MCP_TRIAGE_NAME_RE = /(?:^|[/_-])mcp(?:[_-]|$)|mcp[_-](?:server|init|bridge|installer|memory|plugin|core|router|host|client|gateway|relay|stdio|transport|orchestrator)/i;
|
|
1100
|
+
|
|
1101
|
+
/**
|
|
1102
|
+
* Triage tag for a daily-report top-suspect. Returns ' 🔌 [MCP: sig1, sig2]' when the
|
|
1103
|
+
* package self-identifies as an MCP server/installer, else ''. Signals come from the
|
|
1104
|
+
* alert's recorded CRITICAL/HIGH threat types (AUDIT-C). Presentation only.
|
|
1105
|
+
*/
|
|
1106
|
+
function mcpTriageTag(a) {
|
|
1107
|
+
const name = (a && (a.name || a.package)) || '';
|
|
1108
|
+
if (!_MCP_TRIAGE_NAME_RE.test(name)) return '';
|
|
1109
|
+
const sigs = Array.isArray(a.signals) ? a.signals.slice(0, 3) : [];
|
|
1110
|
+
return sigs.length ? ` 🔌 [MCP: ${sigs.join(', ')}]` : ' 🔌 [MCP]';
|
|
1111
|
+
}
|
|
1112
|
+
|
|
1097
1113
|
function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
1098
1114
|
// Use in-memory stats (accumulated since last reset, restored from disk on restart)
|
|
1099
1115
|
// instead of disk-based daily entries which can undercount due to UTC/Paris date mismatch
|
|
@@ -1110,7 +1126,10 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1110
1126
|
const version = a.version || 'N/A';
|
|
1111
1127
|
const count = a.findingsCount || (a.findings ? a.findings.length : 0);
|
|
1112
1128
|
const scoreText = a.score != null ? `score ${a.score}, ` : '';
|
|
1113
|
-
|
|
1129
|
+
// AUDIT-C: annotate MCP suspects (identity + signals) for visual triage — MCP
|
|
1130
|
+
// servers score high but are statically ambiguous vs MCP-malware (see AUDIT 2).
|
|
1131
|
+
// Pure presentation, no scoring change.
|
|
1132
|
+
return `${i + 1}. **${name}@${version}** — ${scoreText}${count} finding(s)${mcpTriageTag(a)}`;
|
|
1114
1133
|
}).join('\n')
|
|
1115
1134
|
: 'None';
|
|
1116
1135
|
|