muaddib-scanner 2.11.76 → 2.11.78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.githooks/pre-commit +18 -0
- package/README.md +15 -6
- package/bin/muaddib.js +18 -4
- package/package.json +1 -2
- package/{self-scan-v2.11.76.json → self-scan-v2.11.78.json} +1 -1
- package/src/commands/interactive.js +5 -6
- package/src/commands/safe-install.js +19 -19
- package/src/ioc/scraper.js +46 -10
- package/src/monitor/daemon.js +39 -28
- package/src/monitor/ingestion.js +32 -2
- package/src/monitor/queue.js +84 -21
- package/src/monitor/scan-queue.js +68 -1
- package/src/monitor/state.js +24 -1
- package/src/monitor/webhook.js +32 -11
- package/src/output/formatter.js +3 -4
- package/src/pipeline/executor.js +9 -1
- package/src/runtime/daemon.js +27 -28
- package/src/runtime/watch.js +7 -7
- package/src/sandbox/index.js +11 -9
- package/src/scanner/temporal-analysis.js +8 -0
- package/src/scanner/temporal-ast-diff.js +5 -0
- package/src/utils.js +60 -1
- package/.dockerignore +0 -7
- package/.env.example +0 -43
- package/ml-retrain/auto-labeler/auto_labeler.py +0 -312
- package/ml-retrain/auto-labeler/ghsa_checker.py +0 -169
- package/ml-retrain/auto-labeler/labeler.py +0 -256
- package/ml-retrain/auto-labeler/npm_checker.py +0 -228
- package/ml-retrain/auto-labeler/ossf_index.py +0 -178
- package/ml-retrain/auto-labeler/requirements.txt +0 -1
- package/ml-retrain/confusion-matrix.png +0 -0
- package/ml-retrain/model-trees-retrained.js +0 -12
- package/ml-retrain/retrain-report.json +0 -225
- package/ml-retrain/retrain.py +0 -974
- package/sbom.json +0 -0
- package/src/ml/train-bundler-detector.py +0 -725
- package/src/ml/train-xgboost.py +0 -957
- package/tools/export-model-js.py +0 -160
- package/tools/requirements-ml.txt +0 -5
- package/tools/train-classifier.py +0 -333
package/src/monitor/queue.js
CHANGED
|
@@ -32,8 +32,7 @@ const {
|
|
|
32
32
|
tarballCacheKey,
|
|
33
33
|
tarballCachePath,
|
|
34
34
|
appendAlert,
|
|
35
|
-
|
|
36
|
-
hasReportBeenSentToday,
|
|
35
|
+
isDailyReportDue,
|
|
37
36
|
MAX_DAILY_ALERTS,
|
|
38
37
|
loadScanMemory,
|
|
39
38
|
shouldSuppressByMemory,
|
|
@@ -64,8 +63,7 @@ const {
|
|
|
64
63
|
computeReputationFactor,
|
|
65
64
|
triageRisk,
|
|
66
65
|
sendDailyReport,
|
|
67
|
-
alertedPackageRules
|
|
68
|
-
DAILY_REPORT_HOUR
|
|
66
|
+
alertedPackageRules
|
|
69
67
|
} = require('./webhook.js');
|
|
70
68
|
|
|
71
69
|
// From ./temporal.js
|
|
@@ -99,10 +97,11 @@ let _targetConcurrency = BASE_CONCURRENCY;
|
|
|
99
97
|
const SCAN_CONCURRENCY = BASE_CONCURRENCY; // legacy export — tests check this value
|
|
100
98
|
let _activeWorkers = 0;
|
|
101
99
|
const _workerPromises = new Set();
|
|
102
|
-
// Live static-scan Worker threads
|
|
103
|
-
//
|
|
104
|
-
// ASTs)
|
|
105
|
-
|
|
100
|
+
// Live static-scan Worker threads, mapped to the {name,version,ecosystem} of the scan they
|
|
101
|
+
// run — tracked so the daemon's EMERGENCY memory handler can terminate orphaned workers
|
|
102
|
+
// (each retains its isolate heap + parsed ASTs) AND name the in-flight scans it kills.
|
|
103
|
+
// Bounded by concurrency, so it stays tiny.
|
|
104
|
+
const _liveWorkers = new Map();
|
|
106
105
|
|
|
107
106
|
function getTargetConcurrency() { return _targetConcurrency; }
|
|
108
107
|
function setTargetConcurrency(n) { _targetConcurrency = Math.max(MIN_CONCURRENCY, Math.min(MAX_CONCURRENCY, n)); }
|
|
@@ -115,10 +114,20 @@ function getActiveWorkers() { return _activeWorkers; }
|
|
|
115
114
|
*/
|
|
116
115
|
function terminateAllWorkers() {
|
|
117
116
|
let n = 0;
|
|
118
|
-
|
|
119
|
-
|
|
117
|
+
const dropped = [];
|
|
118
|
+
for (const [w, item] of Array.from(_liveWorkers.entries())) {
|
|
119
|
+
try {
|
|
120
|
+
w.terminate(); n++;
|
|
121
|
+
if (item && item.name) dropped.push(`${item.name}@${item.version || '?'}`);
|
|
122
|
+
} catch { /* already gone */ }
|
|
120
123
|
_liveWorkers.delete(w);
|
|
121
124
|
}
|
|
125
|
+
if (dropped.length) {
|
|
126
|
+
// The terminate rejects each scan's worker promise; that reject propagates to
|
|
127
|
+
// scanPackage's catch, which ledgers it (outcome:'error', source scan_error) — so these
|
|
128
|
+
// in-flight scans are NOT lost from the scan-ledger. This line names them for the operator.
|
|
129
|
+
console.error(`[MONITOR] EMERGENCY worker-terminate killed ${dropped.length} in-flight scan(s): ${dropped.slice(0, 20).join(', ')}${dropped.length > 20 ? ` (+${dropped.length - 20} more)` : ''}`);
|
|
130
|
+
}
|
|
122
131
|
return n;
|
|
123
132
|
}
|
|
124
133
|
const SCAN_TIMEOUT_MS = 300_000; // 5 minutes per package (3 sandbox runs × 90s + static scan headroom)
|
|
@@ -388,7 +397,8 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
|
|
|
388
397
|
const worker = new Worker(SCAN_WORKER_PATH, {
|
|
389
398
|
workerData: { extractedDir, scanContext: scanContext || {} }
|
|
390
399
|
});
|
|
391
|
-
|
|
400
|
+
const _sc = scanContext || {};
|
|
401
|
+
_liveWorkers.set(worker, { name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem });
|
|
392
402
|
|
|
393
403
|
let settled = false;
|
|
394
404
|
let timer = null;
|
|
@@ -639,6 +649,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
639
649
|
// deliberately hangs the parser to evade analysis would otherwise be relabelled
|
|
640
650
|
// benign. Count as inconclusive (excluded from the FP/TP denominator).
|
|
641
651
|
updateScanStats('sandbox_inconclusive');
|
|
652
|
+
// Ledger the inconclusive timeout — the 'static_timeout' outcome existed but was
|
|
653
|
+
// emitted nowhere, so a parser-hang evasion vanished from coverage. Best-effort.
|
|
654
|
+
try {
|
|
655
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'static_timeout', source: 'static_timeout' });
|
|
656
|
+
} catch { /* ledger is best-effort */ }
|
|
642
657
|
return { sandboxResult: null, staticClean: false };
|
|
643
658
|
}
|
|
644
659
|
throw staticErr;
|
|
@@ -1215,6 +1230,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1215
1230
|
stats.scanned++;
|
|
1216
1231
|
stats.totalTimeMs += Date.now() - startTime;
|
|
1217
1232
|
console.error(`[MONITOR] ERROR scanning ${name}@${version}: ${err.message}`);
|
|
1233
|
+
// Ledger the terminal failure so the scan-ledger never over-states coverage (an errored
|
|
1234
|
+
// package is NOT clean). Also captures EMERGENCY worker-terminate losses, whose reject
|
|
1235
|
+
// propagates here (CLAUDE.md "no silent caps"). Best-effort; never throws.
|
|
1236
|
+
try {
|
|
1237
|
+
appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'scan_error' });
|
|
1238
|
+
} catch { /* ledger is best-effort */ }
|
|
1218
1239
|
return { sandboxResult: null, staticClean: false };
|
|
1219
1240
|
} finally {
|
|
1220
1241
|
// Cleanup temp dir
|
|
@@ -1256,15 +1277,9 @@ function timeoutPromise(ms) {
|
|
|
1256
1277
|
});
|
|
1257
1278
|
}
|
|
1258
1279
|
|
|
1259
|
-
|
|
1260
|
-
|
|
1261
|
-
|
|
1262
|
-
*/
|
|
1263
|
-
function isDailyReportDue(stats) {
|
|
1264
|
-
const parisHour = getParisHour();
|
|
1265
|
-
if (parisHour < DAILY_REPORT_HOUR) return false;
|
|
1266
|
-
return !hasReportBeenSentToday(stats);
|
|
1267
|
-
}
|
|
1280
|
+
// isDailyReportDue is the canonical gate in state.js (imported above), called per scan in
|
|
1281
|
+
// processQueueItem below. Previously a local `parisHour < 8` copy here diverged from the
|
|
1282
|
+
// daemon's `!== 8` copy; unifying in state.js removes the divergence. Still re-exported below.
|
|
1268
1283
|
|
|
1269
1284
|
/**
|
|
1270
1285
|
* Process a single item from the scan queue.
|
|
@@ -1358,6 +1373,37 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
|
|
|
1358
1373
|
return Math.max(0, Math.min(targetConcurrency - activeWorkers, queueLength));
|
|
1359
1374
|
}
|
|
1360
1375
|
|
|
1376
|
+
// ── RSS-aware worker admission (P1 OOM durable fix) ──
|
|
1377
|
+
// The pressure breaker is reactive: it stops spawning at HIGH, but the workers already in
|
|
1378
|
+
// flight overshoot RSS by ~2GB (each isolate + gVisor sandbox ~0.55GB, draining up to
|
|
1379
|
+
// SCAN_TIMEOUT) before EMERGENCY truncates the queue + kills them. This caps the OVERSHOOT at
|
|
1380
|
+
// the source — refuse a new spawn when current RSS + one worker's footprint would breach a
|
|
1381
|
+
// soft ceiling (default 80% of the EMERGENCY RSS limit), leaving headroom for in-flight drain.
|
|
1382
|
+
const RSS_SOFT_LIMIT_MB = (() => {
|
|
1383
|
+
const parsed = parseInt(process.env.MUADDIB_RSS_SOFT_LIMIT_MB, 10);
|
|
1384
|
+
if (Number.isFinite(parsed) && parsed > 0) return parsed;
|
|
1385
|
+
const hard = parseInt(process.env.MUADDIB_RSS_LIMIT_MB, 10);
|
|
1386
|
+
const base = (Number.isFinite(hard) && hard > 0) ? hard : 8500;
|
|
1387
|
+
return Math.round(base * 0.80);
|
|
1388
|
+
})();
|
|
1389
|
+
const EST_WORKER_RSS_MB = (() => {
|
|
1390
|
+
const parsed = parseInt(process.env.MUADDIB_EST_WORKER_RSS_MB, 10);
|
|
1391
|
+
return (Number.isFinite(parsed) && parsed > 0) ? parsed : 600;
|
|
1392
|
+
})();
|
|
1393
|
+
|
|
1394
|
+
/**
|
|
1395
|
+
* Pure: how many NEW scan workers the current RSS headroom allows under the soft ceiling.
|
|
1396
|
+
* `currentRssBytes` already includes the active workers, so this answers "how many MORE fit".
|
|
1397
|
+
* Returns 0 (never negative) once RSS reaches the soft limit — existing workers are NOT killed
|
|
1398
|
+
* here, they drain and free memory; ensureWorkers keeps the queue alive with 1 worker if
|
|
1399
|
+
* nothing is running. softLimitMb / estWorkerMb are injectable for tests.
|
|
1400
|
+
*/
|
|
1401
|
+
function rssAdmissionCap(currentRssBytes, softLimitMb = RSS_SOFT_LIMIT_MB, estWorkerMb = EST_WORKER_RSS_MB) {
|
|
1402
|
+
const headroomMb = softLimitMb - (currentRssBytes / 1024 / 1024);
|
|
1403
|
+
if (headroomMb <= 0) return 0;
|
|
1404
|
+
return Math.max(0, Math.floor(headroomMb / estWorkerMb));
|
|
1405
|
+
}
|
|
1406
|
+
|
|
1361
1407
|
/**
|
|
1362
1408
|
* Ensure the target number of workers are running. Non-blocking: spawns
|
|
1363
1409
|
* missing workers as background promises. Called from the daemon main loop
|
|
@@ -1365,7 +1411,23 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
|
|
|
1365
1411
|
*/
|
|
1366
1412
|
function ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
|
|
1367
1413
|
if (scanQueue.length === 0) return;
|
|
1368
|
-
|
|
1414
|
+
let toSpawn = computeWorkersToSpawn(_targetConcurrency, _activeWorkers, scanQueue.length);
|
|
1415
|
+
if (toSpawn <= 0) return;
|
|
1416
|
+
|
|
1417
|
+
// RSS-aware admission (P1 OOM durable fix): cap NEW spawns by memory headroom so the
|
|
1418
|
+
// in-flight worker set can't overshoot the soft RSS ceiling. Never fully deadlock: if
|
|
1419
|
+
// headroom is gone AND nothing is running, allow exactly one so the queue still makes
|
|
1420
|
+
// forward progress (its completion frees memory). Bounds peak RSS BEFORE the reactive breaker.
|
|
1421
|
+
const rssNow = process.memoryUsage().rss;
|
|
1422
|
+
const rssCap = rssAdmissionCap(rssNow);
|
|
1423
|
+
if (toSpawn > rssCap) {
|
|
1424
|
+
if (rssCap === 0 && _activeWorkers === 0) {
|
|
1425
|
+
toSpawn = 1;
|
|
1426
|
+
} else {
|
|
1427
|
+
console.log(`[MONITOR] RSS admission: capping spawn ${toSpawn}->${rssCap} (rss=${Math.round(rssNow / 1024 / 1024)}MB soft=${RSS_SOFT_LIMIT_MB}MB active=${_activeWorkers})`);
|
|
1428
|
+
toSpawn = rssCap;
|
|
1429
|
+
}
|
|
1430
|
+
}
|
|
1369
1431
|
if (toSpawn <= 0) return;
|
|
1370
1432
|
|
|
1371
1433
|
console.log(`[MONITOR] Spawning ${toSpawn} worker(s) (active: ${_activeWorkers}, target: ${_targetConcurrency}, queue: ${scanQueue.length})`);
|
|
@@ -1757,6 +1819,7 @@ module.exports = {
|
|
|
1757
1819
|
getActiveWorkers,
|
|
1758
1820
|
terminateAllWorkers,
|
|
1759
1821
|
computeWorkersToSpawn,
|
|
1822
|
+
rssAdmissionCap,
|
|
1760
1823
|
ensureWorkers,
|
|
1761
1824
|
drainWorkers,
|
|
1762
1825
|
|
|
@@ -82,4 +82,71 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
|
82
82
|
return dropped;
|
|
83
83
|
}
|
|
84
84
|
|
|
85
|
-
|
|
85
|
+
/**
|
|
86
|
+
* Bulk-evict the scan queue down to `targetKeep`, honoring the SAME protection predicate
|
|
87
|
+
* as enqueueScan and ledgering EVERY dropped item — the single-source-of-truth eviction
|
|
88
|
+
* the daemon's EMERGENCY memory breaker must use instead of a raw `splice(0, n)`.
|
|
89
|
+
*
|
|
90
|
+
* Selection: drop the oldest UNPROTECTED items first; only dip into protected items
|
|
91
|
+
* (oldest-first) if there aren't enough unprotected ones to reach the target. This keeps
|
|
92
|
+
* IOC-match / burst / first-publish / ATO scans alive through a memory emergency, exactly
|
|
93
|
+
* like the per-item cap path — closing the gap where the v2.10.88 circuit breaker silently
|
|
94
|
+
* dropped protected scans (CLAUDE.md "ne jamais perdre de scan" / "no silent caps").
|
|
95
|
+
*
|
|
96
|
+
* In-place compaction (write-pointer, O(n), preserves insertion order, no giant spread) so
|
|
97
|
+
* the daemon (which holds the same array reference) sees the mutation. Best-effort ledger;
|
|
98
|
+
* never throws. `ledgerFn` is injectable for tests; defaults to state.appendScanLedger.
|
|
99
|
+
*
|
|
100
|
+
* @returns {{dropped:number, droppedProtected:number}}
|
|
101
|
+
*/
|
|
102
|
+
function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', ledgerFn = null) {
|
|
103
|
+
const before = scanQueue.length;
|
|
104
|
+
const keep = Math.max(0, targetKeep | 0);
|
|
105
|
+
if (before <= keep) return { dropped: 0, droppedProtected: 0 };
|
|
106
|
+
const toDrop = before - keep;
|
|
107
|
+
|
|
108
|
+
// Victim set: oldest unprotected first, then (only if short) oldest protected.
|
|
109
|
+
const dropSet = new Set();
|
|
110
|
+
for (let i = 0; i < before && dropSet.size < toDrop; i++) {
|
|
111
|
+
if (!_isProtected(scanQueue[i])) dropSet.add(i);
|
|
112
|
+
}
|
|
113
|
+
let droppedProtected = 0;
|
|
114
|
+
if (dropSet.size < toDrop) {
|
|
115
|
+
// Not enough unprotected items: every unprotected one is already marked, so the
|
|
116
|
+
// remaining oldest-first items are protected — drop them as a last resort.
|
|
117
|
+
for (let i = 0; i < before && dropSet.size < toDrop; i++) {
|
|
118
|
+
if (!dropSet.has(i)) { dropSet.add(i); droppedProtected++; }
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Resolve the ledger sink once (per-call require would be 500+ lookups under emergency).
|
|
123
|
+
let appendLedger = ledgerFn;
|
|
124
|
+
if (!appendLedger) {
|
|
125
|
+
try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Compact survivors in place, ledgering each evicted item with an identity-preserving
|
|
129
|
+
// source (protected drops get a distinct suffix so the rare case stays visible in the rollup).
|
|
130
|
+
let w = 0;
|
|
131
|
+
for (let r = 0; r < before; r++) {
|
|
132
|
+
if (dropSet.has(r)) {
|
|
133
|
+
const item = scanQueue[r];
|
|
134
|
+
if (appendLedger && item && item.name) {
|
|
135
|
+
try {
|
|
136
|
+
appendLedger({
|
|
137
|
+
name: item.name, version: item.version, ecosystem: item.ecosystem,
|
|
138
|
+
outcome: 'dropped',
|
|
139
|
+
source: _isProtected(item) ? `${source}_protected` : source
|
|
140
|
+
});
|
|
141
|
+
} catch { /* ledger is best-effort — must never break the breaker */ }
|
|
142
|
+
}
|
|
143
|
+
} else {
|
|
144
|
+
scanQueue[w++] = scanQueue[r];
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
scanQueue.length = w;
|
|
148
|
+
|
|
149
|
+
return { dropped: toDrop, droppedProtected };
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
|
package/src/monitor/state.js
CHANGED
|
@@ -972,7 +972,7 @@ let _scanLedgerAppendedSinceCompact = 0;
|
|
|
972
972
|
const SCAN_LEDGER_OUTCOMES = new Set([
|
|
973
973
|
'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
|
|
974
974
|
'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
|
|
975
|
-
'static_timeout', 'size_skip', 'dropped'
|
|
975
|
+
'static_timeout', 'size_skip', 'dropped', 'error'
|
|
976
976
|
]);
|
|
977
977
|
|
|
978
978
|
/**
|
|
@@ -1453,6 +1453,27 @@ function getParisDateString() {
|
|
|
1453
1453
|
return formatter.format(new Date());
|
|
1454
1454
|
}
|
|
1455
1455
|
|
|
1456
|
+
// Hour (Europe/Paris) at/after which the once-daily report may fire. Single source of
|
|
1457
|
+
// truth — imported by webhook.js, daemon.js and queue.js (each previously redefined it,
|
|
1458
|
+
// and webhook.js still re-exports it for back-compat).
|
|
1459
|
+
const DAILY_REPORT_HOUR = 8; // 08:00 Paris time (Europe/Paris)
|
|
1460
|
+
|
|
1461
|
+
/**
|
|
1462
|
+
* Canonical "is the daily report due?" predicate — the ONE gate, defined here in state.js
|
|
1463
|
+
* (a leaf module that daemon.js and queue.js already import, so no require cycle).
|
|
1464
|
+
*
|
|
1465
|
+
* Catch-up semantics: fire at OR AFTER 08:00 Paris, so a missed 08:00 (e.g. the daemon was
|
|
1466
|
+
* down/OOM-restarting at that minute) still fires later the SAME day — losing a whole day
|
|
1467
|
+
* was the old daemon.js `hour === 8` behaviour. But NEVER fire during the 00:00–07:59 Paris
|
|
1468
|
+
* "dead zone": a fire then stamps the NEW day's date before its 08:00 window and, because
|
|
1469
|
+
* hasReportBeenSentToday() keys off the Paris CALENDAR date, permanently suppresses that
|
|
1470
|
+
* day's real report. Replaces the two divergent copies (daemon.js `!== 8`, queue.js `< 8`).
|
|
1471
|
+
*/
|
|
1472
|
+
function isDailyReportDue(stats) {
|
|
1473
|
+
if (getParisHour() < DAILY_REPORT_HOUR) return false;
|
|
1474
|
+
return !hasReportBeenSentToday(stats);
|
|
1475
|
+
}
|
|
1476
|
+
|
|
1456
1477
|
// --- recentlyScanned dedup-set persistence (survives restarts → no re-scan storm) ---
|
|
1457
1478
|
//
|
|
1458
1479
|
// The dedup Set is in-memory only, so every restart starts it empty and re-scans the
|
|
@@ -1703,5 +1724,7 @@ module.exports = {
|
|
|
1703
1724
|
loadRecentlyScanned,
|
|
1704
1725
|
getParisHour,
|
|
1705
1726
|
getParisDateString,
|
|
1727
|
+
DAILY_REPORT_HOUR,
|
|
1728
|
+
isDailyReportDue,
|
|
1706
1729
|
loadStateRaw
|
|
1707
1730
|
};
|
package/src/monitor/webhook.js
CHANGED
|
@@ -16,6 +16,7 @@ const {
|
|
|
16
16
|
DAILY_REPORTS_LOG_DIR,
|
|
17
17
|
getParisDateString,
|
|
18
18
|
getParisHour,
|
|
19
|
+
DAILY_REPORT_HOUR,
|
|
19
20
|
loadScanStats,
|
|
20
21
|
loadDetections,
|
|
21
22
|
saveLastDailyReportDate,
|
|
@@ -60,7 +61,8 @@ const HIGH_INTENT_TYPES = new Set([
|
|
|
60
61
|
'remote_code_load', 'obfuscation_detected'
|
|
61
62
|
]);
|
|
62
63
|
|
|
63
|
-
|
|
64
|
+
// DAILY_REPORT_HOUR (=8) is imported from state.js (single source of truth) and
|
|
65
|
+
// re-exported below for back-compat (monitor.js / tests import it via webhook).
|
|
64
66
|
|
|
65
67
|
// --- Webhook alerting ---
|
|
66
68
|
|
|
@@ -1152,6 +1154,14 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
|
|
|
1152
1154
|
* @param {Map} downloadsCache - In-memory downloads cache (will be cleared)
|
|
1153
1155
|
*/
|
|
1154
1156
|
async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache) {
|
|
1157
|
+
// Dead-zone guard (defense in depth): never send or stamp before the 08:00 Paris window.
|
|
1158
|
+
// The scheduled gate (isDailyReportDue) already excludes 00:00–07:59, but an ungated /
|
|
1159
|
+
// manual / test caller firing at e.g. 00:43 would otherwise write-ahead the NEW day's date
|
|
1160
|
+
// (below) and suppress that day's real report. This makes the early stamp impossible.
|
|
1161
|
+
if (getParisHour() < DAILY_REPORT_HOUR) {
|
|
1162
|
+
console.log(`[MONITOR] Daily report suppressed: before ${DAILY_REPORT_HOUR}:00 Paris (hour=${getParisHour()})`);
|
|
1163
|
+
return;
|
|
1164
|
+
}
|
|
1155
1165
|
// Crash-safe headline: a restart-storm around report time can zero the in-memory
|
|
1156
1166
|
// counter (the monitor OOM-restarts ~10×/day). Floor scanned/clean/suspect at the
|
|
1157
1167
|
// durable scan-stats delta so we never publish "5" when ~44k were really scanned.
|
|
@@ -1171,6 +1181,10 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
|
|
|
1171
1181
|
// Persist the monotonic scan-stats counter as the baseline for the NEXT report's
|
|
1172
1182
|
// delta. Written before the (now last) webhook so a mid-send kill can't double-count.
|
|
1173
1183
|
saveLastDailyReportDate(today, captureScanStatsBaseline());
|
|
1184
|
+
// Observability: the success path previously logged nothing, which made the late-fire bug
|
|
1185
|
+
// invisible in the journal. Log the stamped date + the actual Paris hour (an on-time 08:00
|
|
1186
|
+
// fire vs a catch-up at hour 14 are now distinguishable) + the headline count.
|
|
1187
|
+
console.log(`[MONITOR] Daily report firing for ${today} (hour=${getParisHour()} Paris, scanned=${stats.scanned})`);
|
|
1174
1188
|
|
|
1175
1189
|
// Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
|
|
1176
1190
|
// we persist (no double-scan, no drift between Discord and the on-disk metrics).
|
|
@@ -1365,16 +1379,23 @@ async function sendReportNow(stats) {
|
|
|
1365
1379
|
return { sent: false, message: `Webhook failed: ${err.message}` };
|
|
1366
1380
|
}
|
|
1367
1381
|
|
|
1368
|
-
// Update lastDailyReportDate on disk
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1374
|
-
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
|
|
1382
|
+
// Update lastDailyReportDate on disk — but ONLY at/after 08:00 Paris. A manual report run
|
|
1383
|
+
// before 08:00 is a deliberate operator override (we still SEND it), but it must NOT stamp
|
|
1384
|
+
// today's date: hasReportBeenSentToday() keys off the Paris calendar date, so an early
|
|
1385
|
+
// stamp would suppress that day's scheduled 08:00 report (the exact failure we're fixing).
|
|
1386
|
+
if (getParisHour() >= DAILY_REPORT_HOUR) {
|
|
1387
|
+
const today = getParisDateString();
|
|
1388
|
+
const stateRaw = loadStateRaw();
|
|
1389
|
+
const state = {
|
|
1390
|
+
npmLastPackage: stateRaw.npmLastPackage || '',
|
|
1391
|
+
pypiLastPackage: stateRaw.pypiLastPackage || ''
|
|
1392
|
+
};
|
|
1393
|
+
stats.lastDailyReportDate = today;
|
|
1394
|
+
saveState(state, stats);
|
|
1395
|
+
saveLastDailyReportDate(today);
|
|
1396
|
+
} else {
|
|
1397
|
+
console.log(`[MONITOR] Manual report sent; not stamping (before ${DAILY_REPORT_HOUR}:00 Paris — the scheduled report will still fire today)`);
|
|
1398
|
+
}
|
|
1378
1399
|
|
|
1379
1400
|
return { sent: true, message: 'Daily report sent' };
|
|
1380
1401
|
}
|
package/src/output/formatter.js
CHANGED
|
@@ -3,6 +3,7 @@ const { saveSARIF } = require('../sarif.js');
|
|
|
3
3
|
const { saveCycloneDX } = require('./cyclonedx.js');
|
|
4
4
|
const { getPlaybook } = require('../response/playbooks.js');
|
|
5
5
|
const { DOMAIN_CODES, getRuleDomain } = require('../rules/index.js');
|
|
6
|
+
const { renderScoreBar } = require('../utils.js');
|
|
6
7
|
|
|
7
8
|
// P0a — domain tag formatter for CLI text output.
|
|
8
9
|
// Returns a bracketed 3-letter code like "[MAL]" / "[AUT]" / "[ENG]" / "[VUL]"
|
|
@@ -63,8 +64,7 @@ function formatOutput(result, options, ctx) {
|
|
|
63
64
|
if (!spinner) console.log(`\n[MUADDIB] Scanning ${targetPath}\n`);
|
|
64
65
|
else console.log('');
|
|
65
66
|
|
|
66
|
-
|
|
67
|
-
console.log(`[SCORE] ${result.summary.riskScore}/100 [${explainScoreBar}] ${result.summary.riskLevel}`);
|
|
67
|
+
console.log(`[SCORE] ${result.summary.riskScore}/100 [${renderScoreBar(result.summary.riskScore)}] ${result.summary.riskLevel}`);
|
|
68
68
|
if (mostSuspiciousFile) {
|
|
69
69
|
console.log(` Max file: ${mostSuspiciousFile} (${maxFileScore} pts)`);
|
|
70
70
|
if (packageScore > 0) {
|
|
@@ -140,8 +140,7 @@ function formatOutput(result, options, ctx) {
|
|
|
140
140
|
if (!spinner) console.log(`\n[MUADDIB] Scanning ${targetPath}\n`);
|
|
141
141
|
else console.log('');
|
|
142
142
|
|
|
143
|
-
|
|
144
|
-
console.log(`[SCORE] ${result.summary.riskScore}/100 [${scoreBar}] ${result.summary.riskLevel}`);
|
|
143
|
+
console.log(`[SCORE] ${result.summary.riskScore}/100 [${renderScoreBar(result.summary.riskScore)}] ${result.summary.riskLevel}`);
|
|
145
144
|
if (mostSuspiciousFile) {
|
|
146
145
|
console.log(` Max file: ${mostSuspiciousFile} (${maxFileScore} pts)`);
|
|
147
146
|
if (packageScore > 0) {
|
package/src/pipeline/executor.js
CHANGED
|
@@ -121,6 +121,11 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
121
121
|
spinner.start(`[MUADDIB] Scanning ${targetPath}...`);
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
+
// try/finally guarantees the spinner's setInterval is always cleared. A scanner
|
|
125
|
+
// throwing before the succeed() below would otherwise leave it animating AND keep
|
|
126
|
+
// the event loop alive (process hang). _stop() is idempotent.
|
|
127
|
+
try {
|
|
128
|
+
|
|
124
129
|
// Deobfuscation pre-processor (pass to AST/dataflow scanners unless disabled)
|
|
125
130
|
const deobfuscateFn = options.noDeobfuscate ? null : deobfuscate;
|
|
126
131
|
|
|
@@ -152,7 +157,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
152
157
|
moduleGraphThreats.push({
|
|
153
158
|
type: 'large_package_graph_truncated',
|
|
154
159
|
severity: 'MEDIUM',
|
|
155
|
-
message: `Cross-file analysis
|
|
160
|
+
message: `Cross-file analysis disabled: ${graphMeta.fileCount} files exceed the limit (${graphMeta.maxNodes}). Risk of a blind spot on a monorepo / large package — audit the sub-modules manually.`,
|
|
156
161
|
file: 'package.json',
|
|
157
162
|
line: 0,
|
|
158
163
|
fileCount: graphMeta.fileCount,
|
|
@@ -441,6 +446,9 @@ async function execute(targetPath, options, pythonDeps, warnings) {
|
|
|
441
446
|
}
|
|
442
447
|
|
|
443
448
|
return { threats, scannerErrors };
|
|
449
|
+
} finally {
|
|
450
|
+
if (spinner) spinner._stop();
|
|
451
|
+
}
|
|
444
452
|
}
|
|
445
453
|
|
|
446
454
|
module.exports = { execute, matchPythonIOCs, checkPyPITyposquatting };
|
package/src/runtime/daemon.js
CHANGED
|
@@ -1,24 +1,23 @@
|
|
|
1
1
|
const fs = require('fs');
|
|
2
2
|
const path = require('path');
|
|
3
3
|
const { run } = require('../index.js');
|
|
4
|
+
const { banner } = require('../utils.js');
|
|
4
5
|
|
|
5
6
|
let webhookUrl = null;
|
|
6
7
|
|
|
7
8
|
async function startDaemon(options = {}) {
|
|
8
9
|
webhookUrl = options.webhook || null;
|
|
9
10
|
|
|
10
|
-
console.log(
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
╚════════════════════════════════════════════╝
|
|
15
|
-
`);
|
|
11
|
+
console.log('\n' + banner([
|
|
12
|
+
"MUAD'DIB Security Daemon",
|
|
13
|
+
'Monitoring npm installs'
|
|
14
|
+
]) + '\n');
|
|
16
15
|
|
|
17
|
-
console.log('[DAEMON]
|
|
18
|
-
console.log(`[DAEMON] Webhook: ${webhookUrl ? '
|
|
19
|
-
console.log('[DAEMON] Ctrl+C
|
|
16
|
+
console.log('[DAEMON] Starting...');
|
|
17
|
+
console.log(`[DAEMON] Webhook: ${webhookUrl ? 'Configured' : 'Not configured'}`);
|
|
18
|
+
console.log('[DAEMON] Press Ctrl+C to stop\n');
|
|
20
19
|
|
|
21
|
-
//
|
|
20
|
+
// Watch the current directory
|
|
22
21
|
const cwd = process.cwd();
|
|
23
22
|
const watchers = watchDirectory(cwd);
|
|
24
23
|
|
|
@@ -32,7 +31,7 @@ async function startDaemon(options = {}) {
|
|
|
32
31
|
// Keep process alive until SIGINT
|
|
33
32
|
await new Promise((resolve) => {
|
|
34
33
|
process.once('SIGINT', () => {
|
|
35
|
-
console.log('\n[DAEMON]
|
|
34
|
+
console.log('\n[DAEMON] Stopping...');
|
|
36
35
|
cleanup();
|
|
37
36
|
resolve();
|
|
38
37
|
});
|
|
@@ -47,26 +46,26 @@ function watchDirectory(dir) {
|
|
|
47
46
|
const packageLockPath = path.join(dir, 'package-lock.json');
|
|
48
47
|
const yarnLockPath = path.join(dir, 'yarn.lock');
|
|
49
48
|
|
|
50
|
-
console.log(`[DAEMON]
|
|
49
|
+
console.log(`[DAEMON] Watching ${dir}`);
|
|
51
50
|
|
|
52
|
-
//
|
|
51
|
+
// Watch package-lock.json
|
|
53
52
|
if (fs.existsSync(packageLockPath)) {
|
|
54
53
|
const w = watchFile(packageLockPath, dir);
|
|
55
54
|
if (w) watchers.push(w);
|
|
56
55
|
}
|
|
57
56
|
|
|
58
|
-
//
|
|
57
|
+
// Watch yarn.lock
|
|
59
58
|
if (fs.existsSync(yarnLockPath)) {
|
|
60
59
|
const w = watchFile(yarnLockPath, dir);
|
|
61
60
|
if (w) watchers.push(w);
|
|
62
61
|
}
|
|
63
62
|
|
|
64
|
-
//
|
|
63
|
+
// Watch node_modules
|
|
65
64
|
if (fs.existsSync(nodeModulesPath)) {
|
|
66
65
|
watchers.push(watchNodeModules(nodeModulesPath, dir));
|
|
67
66
|
}
|
|
68
67
|
|
|
69
|
-
//
|
|
68
|
+
// Watch for node_modules creation
|
|
70
69
|
if (process.platform === 'linux') {
|
|
71
70
|
console.log('[DAEMON] Note: recursive fs.watch may not work on Linux');
|
|
72
71
|
}
|
|
@@ -75,12 +74,12 @@ function watchDirectory(dir) {
|
|
|
75
74
|
if (filename === 'node_modules' && eventType === 'rename') {
|
|
76
75
|
const nmPath = path.join(dir, 'node_modules');
|
|
77
76
|
if (fs.existsSync(nmPath)) {
|
|
78
|
-
console.log('[DAEMON] node_modules
|
|
77
|
+
console.log('[DAEMON] node_modules detected, scanning...');
|
|
79
78
|
triggerScan(dir);
|
|
80
79
|
}
|
|
81
80
|
}
|
|
82
81
|
if (filename === 'package-lock.json' || filename === 'yarn.lock') {
|
|
83
|
-
console.log(`[DAEMON] ${filename}
|
|
82
|
+
console.log(`[DAEMON] ${filename} modified, scanning...`);
|
|
84
83
|
triggerScan(dir);
|
|
85
84
|
}
|
|
86
85
|
});
|
|
@@ -106,7 +105,7 @@ function watchFile(filePath, projectDir) {
|
|
|
106
105
|
const currentMtime = fs.statSync(filePath).mtime.getTime();
|
|
107
106
|
if (currentMtime !== lastMtime) {
|
|
108
107
|
lastMtime = currentMtime;
|
|
109
|
-
console.log(`[DAEMON] ${path.basename(filePath)}
|
|
108
|
+
console.log(`[DAEMON] ${path.basename(filePath)} modified`);
|
|
110
109
|
triggerScan(projectDir);
|
|
111
110
|
}
|
|
112
111
|
} catch {
|
|
@@ -123,7 +122,7 @@ function watchFile(filePath, projectDir) {
|
|
|
123
122
|
function watchNodeModules(nodeModulesPath, projectDir) {
|
|
124
123
|
const watcher = fs.watch(nodeModulesPath, { recursive: true }, (eventType, filename) => {
|
|
125
124
|
if (filename && filename.includes('package.json')) {
|
|
126
|
-
console.log(`[DAEMON]
|
|
125
|
+
console.log(`[DAEMON] New package detected: ${filename}`);
|
|
127
126
|
triggerScan(projectDir);
|
|
128
127
|
}
|
|
129
128
|
});
|
|
@@ -147,12 +146,12 @@ function triggerScan(dir) {
|
|
|
147
146
|
const now = Date.now();
|
|
148
147
|
const state = getScanState(dir);
|
|
149
148
|
|
|
150
|
-
// Debounce:
|
|
149
|
+
// Debounce: wait 3 seconds before scanning
|
|
151
150
|
if (state.timeout) {
|
|
152
151
|
clearTimeout(state.timeout);
|
|
153
152
|
}
|
|
154
153
|
|
|
155
|
-
//
|
|
154
|
+
// Avoid over-frequent scans (minimum 10 seconds between each)
|
|
156
155
|
if (now - state.lastScanTime < 10000) {
|
|
157
156
|
state.timeout = setTimeout(() => triggerScan(dir), 10000 - (now - state.lastScanTime));
|
|
158
157
|
return;
|
|
@@ -160,19 +159,19 @@ function triggerScan(dir) {
|
|
|
160
159
|
|
|
161
160
|
state.timeout = setTimeout(async () => {
|
|
162
161
|
state.lastScanTime = Date.now();
|
|
163
|
-
console.log(`\n[DAEMON] ========== SCAN
|
|
164
|
-
console.log(`[DAEMON]
|
|
165
|
-
console.log(`[DAEMON]
|
|
162
|
+
console.log(`\n[DAEMON] ========== AUTOMATIC SCAN ==========`);
|
|
163
|
+
console.log(`[DAEMON] Target: ${dir}`);
|
|
164
|
+
console.log(`[DAEMON] Time: ${new Date().toLocaleTimeString()}\n`);
|
|
166
165
|
|
|
167
166
|
try {
|
|
168
167
|
await run(dir, { webhook: webhookUrl });
|
|
169
168
|
} catch (err) {
|
|
170
|
-
console.log(`[DAEMON]
|
|
169
|
+
console.log(`[DAEMON] Scan error: ${err.message}`);
|
|
171
170
|
}
|
|
172
171
|
|
|
173
172
|
console.log(`\n[DAEMON] ======================================\n`);
|
|
174
|
-
console.log('[DAEMON]
|
|
173
|
+
console.log('[DAEMON] Waiting for changes...');
|
|
175
174
|
}, 3000);
|
|
176
175
|
}
|
|
177
176
|
|
|
178
|
-
module.exports = { startDaemon, watchDirectory, watchFile, watchNodeModules, triggerScan, getScanState };
|
|
177
|
+
module.exports = { startDaemon, watchDirectory, watchFile, watchNodeModules, triggerScan, getScanState };
|
package/src/runtime/watch.js
CHANGED
|
@@ -6,13 +6,13 @@ function watch(targetPath) {
|
|
|
6
6
|
let debounceTimer = null;
|
|
7
7
|
const watchers = [];
|
|
8
8
|
|
|
9
|
-
console.log(`[MUADDIB]
|
|
10
|
-
console.log('[INFO] Ctrl+C
|
|
9
|
+
console.log(`[MUADDIB] Watching ${targetPath}\n`);
|
|
10
|
+
console.log('[INFO] Press Ctrl+C to stop\n');
|
|
11
11
|
|
|
12
|
-
//
|
|
12
|
+
// Initial scan
|
|
13
13
|
run(targetPath, { json: false }).catch(err => console.error('[ERROR]', err.message));
|
|
14
14
|
|
|
15
|
-
//
|
|
15
|
+
// Watch for changes
|
|
16
16
|
const watchPaths = [
|
|
17
17
|
path.join(targetPath, 'package.json'),
|
|
18
18
|
path.join(targetPath, 'package-lock.json'),
|
|
@@ -30,7 +30,7 @@ function watch(targetPath) {
|
|
|
30
30
|
if (debounceTimer) clearTimeout(debounceTimer);
|
|
31
31
|
|
|
32
32
|
debounceTimer = setTimeout(() => {
|
|
33
|
-
console.log(`\n[CHANGE] ${filename || 'unknown file'}
|
|
33
|
+
console.log(`\n[CHANGE] ${filename || 'unknown file'} modified`);
|
|
34
34
|
console.log('[MUADDIB] Re-scan...\n');
|
|
35
35
|
run(targetPath, { json: false }).catch(err => console.error('[ERROR]', err.message));
|
|
36
36
|
}, 1000);
|
|
@@ -45,7 +45,7 @@ function watch(targetPath) {
|
|
|
45
45
|
|
|
46
46
|
// Cleanup on SIGINT
|
|
47
47
|
process.once('SIGINT', () => {
|
|
48
|
-
console.log('\n[MUADDIB]
|
|
48
|
+
console.log('\n[MUADDIB] Stopping watch...');
|
|
49
49
|
for (const w of watchers) {
|
|
50
50
|
try { w.close(); } catch { /* ignore */ }
|
|
51
51
|
}
|
|
@@ -53,4 +53,4 @@ function watch(targetPath) {
|
|
|
53
53
|
});
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
-
module.exports = { watch };
|
|
56
|
+
module.exports = { watch };
|
package/src/sandbox/index.js
CHANGED
|
@@ -1035,16 +1035,18 @@ function scoreFindings(report) {
|
|
|
1035
1035
|
|
|
1036
1036
|
// ── Network report (detailed, colored) ──
|
|
1037
1037
|
|
|
1038
|
-
function generateNetworkReport(report) {
|
|
1038
|
+
function generateNetworkReport(report, useColor = process.stdout.isTTY) {
|
|
1039
1039
|
const lines = [];
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
const
|
|
1043
|
-
const
|
|
1044
|
-
const
|
|
1045
|
-
const
|
|
1046
|
-
const
|
|
1047
|
-
const
|
|
1040
|
+
// Gate ANSI on TTY so piping `sandbox-report` to a file yields clean text
|
|
1041
|
+
// (was unconditionally colored — escape codes leaked into redirected output).
|
|
1042
|
+
const RED = useColor ? '\x1b[31m' : '';
|
|
1043
|
+
const YELLOW = useColor ? '\x1b[33m' : '';
|
|
1044
|
+
const GREEN = useColor ? '\x1b[32m' : '';
|
|
1045
|
+
const CYAN = useColor ? '\x1b[36m' : '';
|
|
1046
|
+
const MAGENTA = useColor ? '\x1b[35m' : '';
|
|
1047
|
+
const BOLD = useColor ? '\x1b[1m' : '';
|
|
1048
|
+
const DIM = useColor ? '\x1b[2m' : '';
|
|
1049
|
+
const RESET = useColor ? '\x1b[0m' : '';
|
|
1048
1050
|
|
|
1049
1051
|
lines.push('');
|
|
1050
1052
|
lines.push(`${BOLD}${MAGENTA}╔══════════════════════════════════════════════════╗${RESET}`);
|