muaddib-scanner 2.11.76 → 2.11.78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/.githooks/pre-commit +18 -0
  2. package/README.md +15 -6
  3. package/bin/muaddib.js +18 -4
  4. package/package.json +1 -2
  5. package/{self-scan-v2.11.76.json → self-scan-v2.11.78.json} +1 -1
  6. package/src/commands/interactive.js +5 -6
  7. package/src/commands/safe-install.js +19 -19
  8. package/src/ioc/scraper.js +46 -10
  9. package/src/monitor/daemon.js +39 -28
  10. package/src/monitor/ingestion.js +32 -2
  11. package/src/monitor/queue.js +84 -21
  12. package/src/monitor/scan-queue.js +68 -1
  13. package/src/monitor/state.js +24 -1
  14. package/src/monitor/webhook.js +32 -11
  15. package/src/output/formatter.js +3 -4
  16. package/src/pipeline/executor.js +9 -1
  17. package/src/runtime/daemon.js +27 -28
  18. package/src/runtime/watch.js +7 -7
  19. package/src/sandbox/index.js +11 -9
  20. package/src/scanner/temporal-analysis.js +8 -0
  21. package/src/scanner/temporal-ast-diff.js +5 -0
  22. package/src/utils.js +60 -1
  23. package/.dockerignore +0 -7
  24. package/.env.example +0 -43
  25. package/ml-retrain/auto-labeler/auto_labeler.py +0 -312
  26. package/ml-retrain/auto-labeler/ghsa_checker.py +0 -169
  27. package/ml-retrain/auto-labeler/labeler.py +0 -256
  28. package/ml-retrain/auto-labeler/npm_checker.py +0 -228
  29. package/ml-retrain/auto-labeler/ossf_index.py +0 -178
  30. package/ml-retrain/auto-labeler/requirements.txt +0 -1
  31. package/ml-retrain/confusion-matrix.png +0 -0
  32. package/ml-retrain/model-trees-retrained.js +0 -12
  33. package/ml-retrain/retrain-report.json +0 -225
  34. package/ml-retrain/retrain.py +0 -974
  35. package/sbom.json +0 -0
  36. package/src/ml/train-bundler-detector.py +0 -725
  37. package/src/ml/train-xgboost.py +0 -957
  38. package/tools/export-model-js.py +0 -160
  39. package/tools/requirements-ml.txt +0 -5
  40. package/tools/train-classifier.py +0 -333
@@ -32,8 +32,7 @@ const {
32
32
  tarballCacheKey,
33
33
  tarballCachePath,
34
34
  appendAlert,
35
- getParisHour,
36
- hasReportBeenSentToday,
35
+ isDailyReportDue,
37
36
  MAX_DAILY_ALERTS,
38
37
  loadScanMemory,
39
38
  shouldSuppressByMemory,
@@ -64,8 +63,7 @@ const {
64
63
  computeReputationFactor,
65
64
  triageRisk,
66
65
  sendDailyReport,
67
- alertedPackageRules,
68
- DAILY_REPORT_HOUR
66
+ alertedPackageRules
69
67
  } = require('./webhook.js');
70
68
 
71
69
  // From ./temporal.js
@@ -99,10 +97,11 @@ let _targetConcurrency = BASE_CONCURRENCY;
99
97
  const SCAN_CONCURRENCY = BASE_CONCURRENCY; // legacy export — tests check this value
100
98
  let _activeWorkers = 0;
101
99
  const _workerPromises = new Set();
102
- // Live static-scan Worker threads tracked so the daemon's EMERGENCY memory
103
- // handler can terminate orphaned workers (each retains its isolate heap + parsed
104
- // ASTs). Bounded by concurrency, so it stays tiny.
105
- const _liveWorkers = new Set();
100
+ // Live static-scan Worker threads, mapped to the {name,version,ecosystem} of the scan they
101
+ // run tracked so the daemon's EMERGENCY memory handler can terminate orphaned workers
102
+ // (each retains its isolate heap + parsed ASTs) AND name the in-flight scans it kills.
103
+ // Bounded by concurrency, so it stays tiny.
104
+ const _liveWorkers = new Map();
106
105
 
107
106
  function getTargetConcurrency() { return _targetConcurrency; }
108
107
  function setTargetConcurrency(n) { _targetConcurrency = Math.max(MIN_CONCURRENCY, Math.min(MAX_CONCURRENCY, n)); }
@@ -115,10 +114,20 @@ function getActiveWorkers() { return _activeWorkers; }
115
114
  */
116
115
  function terminateAllWorkers() {
117
116
  let n = 0;
118
- for (const w of Array.from(_liveWorkers)) {
119
- try { w.terminate(); n++; } catch { /* already gone */ }
117
+ const dropped = [];
118
+ for (const [w, item] of Array.from(_liveWorkers.entries())) {
119
+ try {
120
+ w.terminate(); n++;
121
+ if (item && item.name) dropped.push(`${item.name}@${item.version || '?'}`);
122
+ } catch { /* already gone */ }
120
123
  _liveWorkers.delete(w);
121
124
  }
125
+ if (dropped.length) {
126
+ // The terminate rejects each scan's worker promise; that reject propagates to
127
+ // scanPackage's catch, which ledgers it (outcome:'error', source scan_error) — so these
128
+ // in-flight scans are NOT lost from the scan-ledger. This line names them for the operator.
129
+ console.error(`[MONITOR] EMERGENCY worker-terminate killed ${dropped.length} in-flight scan(s): ${dropped.slice(0, 20).join(', ')}${dropped.length > 20 ? ` (+${dropped.length - 20} more)` : ''}`);
130
+ }
122
131
  return n;
123
132
  }
124
133
  const SCAN_TIMEOUT_MS = 300_000; // 5 minutes per package (3 sandbox runs × 90s + static scan headroom)
@@ -388,7 +397,8 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
388
397
  const worker = new Worker(SCAN_WORKER_PATH, {
389
398
  workerData: { extractedDir, scanContext: scanContext || {} }
390
399
  });
391
- _liveWorkers.add(worker);
400
+ const _sc = scanContext || {};
401
+ _liveWorkers.set(worker, { name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem });
392
402
 
393
403
  let settled = false;
394
404
  let timer = null;
@@ -639,6 +649,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
639
649
  // deliberately hangs the parser to evade analysis would otherwise be relabelled
640
650
  // benign. Count as inconclusive (excluded from the FP/TP denominator).
641
651
  updateScanStats('sandbox_inconclusive');
652
+ // Ledger the inconclusive timeout — the 'static_timeout' outcome existed but was
653
+ // emitted nowhere, so a parser-hang evasion vanished from coverage. Best-effort.
654
+ try {
655
+ appendScanLedger({ name, version, ecosystem, outcome: 'static_timeout', source: 'static_timeout' });
656
+ } catch { /* ledger is best-effort */ }
642
657
  return { sandboxResult: null, staticClean: false };
643
658
  }
644
659
  throw staticErr;
@@ -1215,6 +1230,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
1215
1230
  stats.scanned++;
1216
1231
  stats.totalTimeMs += Date.now() - startTime;
1217
1232
  console.error(`[MONITOR] ERROR scanning ${name}@${version}: ${err.message}`);
1233
+ // Ledger the terminal failure so the scan-ledger never over-states coverage (an errored
1234
+ // package is NOT clean). Also captures EMERGENCY worker-terminate losses, whose reject
1235
+ // propagates here (CLAUDE.md "no silent caps"). Best-effort; never throws.
1236
+ try {
1237
+ appendScanLedger({ name, version, ecosystem, outcome: 'error', source: 'scan_error' });
1238
+ } catch { /* ledger is best-effort */ }
1218
1239
  return { sandboxResult: null, staticClean: false };
1219
1240
  } finally {
1220
1241
  // Cleanup temp dir
@@ -1256,15 +1277,9 @@ function timeoutPromise(ms) {
1256
1277
  });
1257
1278
  }
1258
1279
 
1259
- /**
1260
- * Helper: check if a daily report is due (Paris timezone).
1261
- * Extracted here to avoid circular dependency with monitor.js.
1262
- */
1263
- function isDailyReportDue(stats) {
1264
- const parisHour = getParisHour();
1265
- if (parisHour < DAILY_REPORT_HOUR) return false;
1266
- return !hasReportBeenSentToday(stats);
1267
- }
1280
+ // isDailyReportDue is the canonical gate in state.js (imported above), called per scan in
1281
+ // processQueueItem below. Previously a local `parisHour < 8` copy here diverged from the
1282
+ // daemon's `!== 8` copy; unifying in state.js removes the divergence. Still re-exported below.
1268
1283
 
1269
1284
  /**
1270
1285
  * Process a single item from the scan queue.
@@ -1358,6 +1373,37 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
1358
1373
  return Math.max(0, Math.min(targetConcurrency - activeWorkers, queueLength));
1359
1374
  }
1360
1375
 
1376
+ // ── RSS-aware worker admission (P1 OOM durable fix) ──
1377
+ // The pressure breaker is reactive: it stops spawning at HIGH, but the workers already in
1378
+ // flight overshoot RSS by ~2GB (each isolate + gVisor sandbox ~0.55GB, draining up to
1379
+ // SCAN_TIMEOUT) before EMERGENCY truncates the queue + kills them. This caps the OVERSHOOT at
1380
+ // the source — refuse a new spawn when current RSS + one worker's footprint would breach a
1381
+ // soft ceiling (default 80% of the EMERGENCY RSS limit), leaving headroom for in-flight drain.
1382
+ const RSS_SOFT_LIMIT_MB = (() => {
1383
+ const parsed = parseInt(process.env.MUADDIB_RSS_SOFT_LIMIT_MB, 10);
1384
+ if (Number.isFinite(parsed) && parsed > 0) return parsed;
1385
+ const hard = parseInt(process.env.MUADDIB_RSS_LIMIT_MB, 10);
1386
+ const base = (Number.isFinite(hard) && hard > 0) ? hard : 8500;
1387
+ return Math.round(base * 0.80);
1388
+ })();
1389
+ const EST_WORKER_RSS_MB = (() => {
1390
+ const parsed = parseInt(process.env.MUADDIB_EST_WORKER_RSS_MB, 10);
1391
+ return (Number.isFinite(parsed) && parsed > 0) ? parsed : 600;
1392
+ })();
1393
+
1394
+ /**
1395
+ * Pure: how many NEW scan workers the current RSS headroom allows under the soft ceiling.
1396
+ * `currentRssBytes` already includes the active workers, so this answers "how many MORE fit".
1397
+ * Returns 0 (never negative) once RSS reaches the soft limit — existing workers are NOT killed
1398
+ * here, they drain and free memory; ensureWorkers keeps the queue alive with 1 worker if
1399
+ * nothing is running. softLimitMb / estWorkerMb are injectable for tests.
1400
+ */
1401
+ function rssAdmissionCap(currentRssBytes, softLimitMb = RSS_SOFT_LIMIT_MB, estWorkerMb = EST_WORKER_RSS_MB) {
1402
+ const headroomMb = softLimitMb - (currentRssBytes / 1024 / 1024);
1403
+ if (headroomMb <= 0) return 0;
1404
+ return Math.max(0, Math.floor(headroomMb / estWorkerMb));
1405
+ }
1406
+
1361
1407
  /**
1362
1408
  * Ensure the target number of workers are running. Non-blocking: spawns
1363
1409
  * missing workers as background promises. Called from the daemon main loop
@@ -1365,7 +1411,23 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
1365
1411
  */
1366
1412
  function ensureWorkers(scanQueue, stats, dailyAlerts, recentlyScanned, downloadsCache, sandboxAvailable) {
1367
1413
  if (scanQueue.length === 0) return;
1368
- const toSpawn = computeWorkersToSpawn(_targetConcurrency, _activeWorkers, scanQueue.length);
1414
+ let toSpawn = computeWorkersToSpawn(_targetConcurrency, _activeWorkers, scanQueue.length);
1415
+ if (toSpawn <= 0) return;
1416
+
1417
+ // RSS-aware admission (P1 OOM durable fix): cap NEW spawns by memory headroom so the
1418
+ // in-flight worker set can't overshoot the soft RSS ceiling. Never fully deadlock: if
1419
+ // headroom is gone AND nothing is running, allow exactly one so the queue still makes
1420
+ // forward progress (its completion frees memory). Bounds peak RSS BEFORE the reactive breaker.
1421
+ const rssNow = process.memoryUsage().rss;
1422
+ const rssCap = rssAdmissionCap(rssNow);
1423
+ if (toSpawn > rssCap) {
1424
+ if (rssCap === 0 && _activeWorkers === 0) {
1425
+ toSpawn = 1;
1426
+ } else {
1427
+ console.log(`[MONITOR] RSS admission: capping spawn ${toSpawn}->${rssCap} (rss=${Math.round(rssNow / 1024 / 1024)}MB soft=${RSS_SOFT_LIMIT_MB}MB active=${_activeWorkers})`);
1428
+ toSpawn = rssCap;
1429
+ }
1430
+ }
1369
1431
  if (toSpawn <= 0) return;
1370
1432
 
1371
1433
  console.log(`[MONITOR] Spawning ${toSpawn} worker(s) (active: ${_activeWorkers}, target: ${_targetConcurrency}, queue: ${scanQueue.length})`);
@@ -1757,6 +1819,7 @@ module.exports = {
1757
1819
  getActiveWorkers,
1758
1820
  terminateAllWorkers,
1759
1821
  computeWorkersToSpawn,
1822
+ rssAdmissionCap,
1760
1823
  ensureWorkers,
1761
1824
  drainWorkers,
1762
1825
 
@@ -82,4 +82,71 @@ function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
82
82
  return dropped;
83
83
  }
84
84
 
85
- module.exports = { enqueueScan, MAX_SCAN_QUEUE };
85
+ /**
86
+ * Bulk-evict the scan queue down to `targetKeep`, honoring the SAME protection predicate
87
+ * as enqueueScan and ledgering EVERY dropped item — the single-source-of-truth eviction
88
+ * the daemon's EMERGENCY memory breaker must use instead of a raw `splice(0, n)`.
89
+ *
90
+ * Selection: drop the oldest UNPROTECTED items first; only dip into protected items
91
+ * (oldest-first) if there aren't enough unprotected ones to reach the target. This keeps
92
+ * IOC-match / burst / first-publish / ATO scans alive through a memory emergency, exactly
93
+ * like the per-item cap path — closing the gap where the v2.10.88 circuit breaker silently
94
+ * dropped protected scans (CLAUDE.md "ne jamais perdre de scan" / "no silent caps").
95
+ *
96
+ * In-place compaction (write-pointer, O(n), preserves insertion order, no giant spread) so
97
+ * the daemon (which holds the same array reference) sees the mutation. Best-effort ledger;
98
+ * never throws. `ledgerFn` is injectable for tests; defaults to state.appendScanLedger.
99
+ *
100
+ * @returns {{dropped:number, droppedProtected:number}}
101
+ */
102
+ function evictFromScanQueueBulk(scanQueue, targetKeep, source = 'bulk_evict', ledgerFn = null) {
103
+ const before = scanQueue.length;
104
+ const keep = Math.max(0, targetKeep | 0);
105
+ if (before <= keep) return { dropped: 0, droppedProtected: 0 };
106
+ const toDrop = before - keep;
107
+
108
+ // Victim set: oldest unprotected first, then (only if short) oldest protected.
109
+ const dropSet = new Set();
110
+ for (let i = 0; i < before && dropSet.size < toDrop; i++) {
111
+ if (!_isProtected(scanQueue[i])) dropSet.add(i);
112
+ }
113
+ let droppedProtected = 0;
114
+ if (dropSet.size < toDrop) {
115
+ // Not enough unprotected items: every unprotected one is already marked, so the
116
+ // remaining oldest-first items are protected — drop them as a last resort.
117
+ for (let i = 0; i < before && dropSet.size < toDrop; i++) {
118
+ if (!dropSet.has(i)) { dropSet.add(i); droppedProtected++; }
119
+ }
120
+ }
121
+
122
+ // Resolve the ledger sink once (per-call require would be 500+ lookups under emergency).
123
+ let appendLedger = ledgerFn;
124
+ if (!appendLedger) {
125
+ try { appendLedger = require('./state.js').appendScanLedger; } catch { appendLedger = null; }
126
+ }
127
+
128
+ // Compact survivors in place, ledgering each evicted item with an identity-preserving
129
+ // source (protected drops get a distinct suffix so the rare case stays visible in the rollup).
130
+ let w = 0;
131
+ for (let r = 0; r < before; r++) {
132
+ if (dropSet.has(r)) {
133
+ const item = scanQueue[r];
134
+ if (appendLedger && item && item.name) {
135
+ try {
136
+ appendLedger({
137
+ name: item.name, version: item.version, ecosystem: item.ecosystem,
138
+ outcome: 'dropped',
139
+ source: _isProtected(item) ? `${source}_protected` : source
140
+ });
141
+ } catch { /* ledger is best-effort — must never break the breaker */ }
142
+ }
143
+ } else {
144
+ scanQueue[w++] = scanQueue[r];
145
+ }
146
+ }
147
+ scanQueue.length = w;
148
+
149
+ return { dropped: toDrop, droppedProtected };
150
+ }
151
+
152
+ module.exports = { enqueueScan, evictFromScanQueueBulk, isProtected: _isProtected, MAX_SCAN_QUEUE };
@@ -972,7 +972,7 @@ let _scanLedgerAppendedSinceCompact = 0;
972
972
  const SCAN_LEDGER_OUTCOMES = new Set([
973
973
  'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
974
974
  'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
975
- 'static_timeout', 'size_skip', 'dropped'
975
+ 'static_timeout', 'size_skip', 'dropped', 'error'
976
976
  ]);
977
977
 
978
978
  /**
@@ -1453,6 +1453,27 @@ function getParisDateString() {
1453
1453
  return formatter.format(new Date());
1454
1454
  }
1455
1455
 
1456
+ // Hour (Europe/Paris) at/after which the once-daily report may fire. Single source of
1457
+ // truth — imported by webhook.js, daemon.js and queue.js (each previously redefined it,
1458
+ // and webhook.js still re-exports it for back-compat).
1459
+ const DAILY_REPORT_HOUR = 8; // 08:00 Paris time (Europe/Paris)
1460
+
1461
+ /**
1462
+ * Canonical "is the daily report due?" predicate — the ONE gate, defined here in state.js
1463
+ * (a leaf module that daemon.js and queue.js already import, so no require cycle).
1464
+ *
1465
+ * Catch-up semantics: fire at OR AFTER 08:00 Paris, so a missed 08:00 (e.g. the daemon was
1466
+ * down/OOM-restarting at that minute) still fires later the SAME day — losing a whole day
1467
+ * was the old daemon.js `hour === 8` behaviour. But NEVER fire during the 00:00–07:59 Paris
1468
+ * "dead zone": a fire then stamps the NEW day's date before its 08:00 window and, because
1469
+ * hasReportBeenSentToday() keys off the Paris CALENDAR date, permanently suppresses that
1470
+ * day's real report. Replaces the two divergent copies (daemon.js `!== 8`, queue.js `< 8`).
1471
+ */
1472
+ function isDailyReportDue(stats) {
1473
+ if (getParisHour() < DAILY_REPORT_HOUR) return false;
1474
+ return !hasReportBeenSentToday(stats);
1475
+ }
1476
+
1456
1477
  // --- recentlyScanned dedup-set persistence (survives restarts → no re-scan storm) ---
1457
1478
  //
1458
1479
  // The dedup Set is in-memory only, so every restart starts it empty and re-scans the
@@ -1703,5 +1724,7 @@ module.exports = {
1703
1724
  loadRecentlyScanned,
1704
1725
  getParisHour,
1705
1726
  getParisDateString,
1727
+ DAILY_REPORT_HOUR,
1728
+ isDailyReportDue,
1706
1729
  loadStateRaw
1707
1730
  };
@@ -16,6 +16,7 @@ const {
16
16
  DAILY_REPORTS_LOG_DIR,
17
17
  getParisDateString,
18
18
  getParisHour,
19
+ DAILY_REPORT_HOUR,
19
20
  loadScanStats,
20
21
  loadDetections,
21
22
  saveLastDailyReportDate,
@@ -60,7 +61,8 @@ const HIGH_INTENT_TYPES = new Set([
60
61
  'remote_code_load', 'obfuscation_detected'
61
62
  ]);
62
63
 
63
- const DAILY_REPORT_HOUR = 8; // 08:00 Paris time (Europe/Paris)
64
+ // DAILY_REPORT_HOUR (=8) is imported from state.js (single source of truth) and
65
+ // re-exported below for back-compat (monitor.js / tests import it via webhook).
64
66
 
65
67
  // --- Webhook alerting ---
66
68
 
@@ -1152,6 +1154,14 @@ function buildDailyReportEmbed(stats, dailyAlerts, ledgerRollup) {
1152
1154
  * @param {Map} downloadsCache - In-memory downloads cache (will be cleared)
1153
1155
  */
1154
1156
  async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCache) {
1157
+ // Dead-zone guard (defense in depth): never send or stamp before the 08:00 Paris window.
1158
+ // The scheduled gate (isDailyReportDue) already excludes 00:00–07:59, but an ungated /
1159
+ // manual / test caller firing at e.g. 00:43 would otherwise write-ahead the NEW day's date
1160
+ // (below) and suppress that day's real report. This makes the early stamp impossible.
1161
+ if (getParisHour() < DAILY_REPORT_HOUR) {
1162
+ console.log(`[MONITOR] Daily report suppressed: before ${DAILY_REPORT_HOUR}:00 Paris (hour=${getParisHour()})`);
1163
+ return;
1164
+ }
1155
1165
  // Crash-safe headline: a restart-storm around report time can zero the in-memory
1156
1166
  // counter (the monitor OOM-restarts ~10×/day). Floor scanned/clean/suspect at the
1157
1167
  // durable scan-stats delta so we never publish "5" when ~44k were really scanned.
@@ -1171,6 +1181,10 @@ async function sendDailyReport(stats, dailyAlerts, recentlyScanned, downloadsCac
1171
1181
  // Persist the monotonic scan-stats counter as the baseline for the NEXT report's
1172
1182
  // delta. Written before the (now last) webhook so a mid-send kill can't double-count.
1173
1183
  saveLastDailyReportDate(today, captureScanStatsBaseline());
1184
+ // Observability: the success path previously logged nothing, which made the late-fire bug
1185
+ // invisible in the journal. Log the stamped date + the actual Paris hour (an on-time 08:00
1186
+ // fire vs a catch-up at hour 14 are now distinguishable) + the headline count.
1187
+ console.log(`[MONITOR] Daily report firing for ${today} (hour=${getParisHour()} Paris, scanned=${stats.scanned})`);
1174
1188
 
1175
1189
  // Phase 0b: compute the ledger rollup ONCE so the embed shows exactly the numbers
1176
1190
  // we persist (no double-scan, no drift between Discord and the on-disk metrics).
@@ -1365,16 +1379,23 @@ async function sendReportNow(stats) {
1365
1379
  return { sent: false, message: `Webhook failed: ${err.message}` };
1366
1380
  }
1367
1381
 
1368
- // Update lastDailyReportDate on disk
1369
- const today = getParisDateString();
1370
- const stateRaw = loadStateRaw();
1371
- const state = {
1372
- npmLastPackage: stateRaw.npmLastPackage || '',
1373
- pypiLastPackage: stateRaw.pypiLastPackage || ''
1374
- };
1375
- stats.lastDailyReportDate = today;
1376
- saveState(state, stats);
1377
- saveLastDailyReportDate(today);
1382
+ // Update lastDailyReportDate on disk — but ONLY at/after 08:00 Paris. A manual report run
1383
+ // before 08:00 is a deliberate operator override (we still SEND it), but it must NOT stamp
1384
+ // today's date: hasReportBeenSentToday() keys off the Paris calendar date, so an early
1385
+ // stamp would suppress that day's scheduled 08:00 report (the exact failure we're fixing).
1386
+ if (getParisHour() >= DAILY_REPORT_HOUR) {
1387
+ const today = getParisDateString();
1388
+ const stateRaw = loadStateRaw();
1389
+ const state = {
1390
+ npmLastPackage: stateRaw.npmLastPackage || '',
1391
+ pypiLastPackage: stateRaw.pypiLastPackage || ''
1392
+ };
1393
+ stats.lastDailyReportDate = today;
1394
+ saveState(state, stats);
1395
+ saveLastDailyReportDate(today);
1396
+ } else {
1397
+ console.log(`[MONITOR] Manual report sent; not stamping (before ${DAILY_REPORT_HOUR}:00 Paris — the scheduled report will still fire today)`);
1398
+ }
1378
1399
 
1379
1400
  return { sent: true, message: 'Daily report sent' };
1380
1401
  }
@@ -3,6 +3,7 @@ const { saveSARIF } = require('../sarif.js');
3
3
  const { saveCycloneDX } = require('./cyclonedx.js');
4
4
  const { getPlaybook } = require('../response/playbooks.js');
5
5
  const { DOMAIN_CODES, getRuleDomain } = require('../rules/index.js');
6
+ const { renderScoreBar } = require('../utils.js');
6
7
 
7
8
  // P0a — domain tag formatter for CLI text output.
8
9
  // Returns a bracketed 3-letter code like "[MAL]" / "[AUT]" / "[ENG]" / "[VUL]"
@@ -63,8 +64,7 @@ function formatOutput(result, options, ctx) {
63
64
  if (!spinner) console.log(`\n[MUADDIB] Scanning ${targetPath}\n`);
64
65
  else console.log('');
65
66
 
66
- const explainScoreBar = '█'.repeat(Math.floor(result.summary.riskScore / 5)) + '░'.repeat(20 - Math.floor(result.summary.riskScore / 5));
67
- console.log(`[SCORE] ${result.summary.riskScore}/100 [${explainScoreBar}] ${result.summary.riskLevel}`);
67
+ console.log(`[SCORE] ${result.summary.riskScore}/100 [${renderScoreBar(result.summary.riskScore)}] ${result.summary.riskLevel}`);
68
68
  if (mostSuspiciousFile) {
69
69
  console.log(` Max file: ${mostSuspiciousFile} (${maxFileScore} pts)`);
70
70
  if (packageScore > 0) {
@@ -140,8 +140,7 @@ function formatOutput(result, options, ctx) {
140
140
  if (!spinner) console.log(`\n[MUADDIB] Scanning ${targetPath}\n`);
141
141
  else console.log('');
142
142
 
143
- const scoreBar = '█'.repeat(Math.floor(result.summary.riskScore / 5)) + '░'.repeat(20 - Math.floor(result.summary.riskScore / 5));
144
- console.log(`[SCORE] ${result.summary.riskScore}/100 [${scoreBar}] ${result.summary.riskLevel}`);
143
+ console.log(`[SCORE] ${result.summary.riskScore}/100 [${renderScoreBar(result.summary.riskScore)}] ${result.summary.riskLevel}`);
145
144
  if (mostSuspiciousFile) {
146
145
  console.log(` Max file: ${mostSuspiciousFile} (${maxFileScore} pts)`);
147
146
  if (packageScore > 0) {
@@ -121,6 +121,11 @@ async function execute(targetPath, options, pythonDeps, warnings) {
121
121
  spinner.start(`[MUADDIB] Scanning ${targetPath}...`);
122
122
  }
123
123
 
124
+ // try/finally guarantees the spinner's setInterval is always cleared. A scanner
125
+ // throwing before the succeed() below would otherwise leave it animating AND keep
126
+ // the event loop alive (process hang). _stop() is idempotent.
127
+ try {
128
+
124
129
  // Deobfuscation pre-processor (pass to AST/dataflow scanners unless disabled)
125
130
  const deobfuscateFn = options.noDeobfuscate ? null : deobfuscate;
126
131
 
@@ -152,7 +157,7 @@ async function execute(targetPath, options, pythonDeps, warnings) {
152
157
  moduleGraphThreats.push({
153
158
  type: 'large_package_graph_truncated',
154
159
  severity: 'MEDIUM',
155
- message: `Cross-file analysis désactivée : ${graphMeta.fileCount} fichiers dépassent la limite (${graphMeta.maxNodes}). Risque de blind spot sur monorepo / large package — auditer les sous-modules manuellement.`,
160
+ message: `Cross-file analysis disabled: ${graphMeta.fileCount} files exceed the limit (${graphMeta.maxNodes}). Risk of a blind spot on a monorepo / large package — audit the sub-modules manually.`,
156
161
  file: 'package.json',
157
162
  line: 0,
158
163
  fileCount: graphMeta.fileCount,
@@ -441,6 +446,9 @@ async function execute(targetPath, options, pythonDeps, warnings) {
441
446
  }
442
447
 
443
448
  return { threats, scannerErrors };
449
+ } finally {
450
+ if (spinner) spinner._stop();
451
+ }
444
452
  }
445
453
 
446
454
  module.exports = { execute, matchPythonIOCs, checkPyPITyposquatting };
@@ -1,24 +1,23 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { run } = require('../index.js');
4
+ const { banner } = require('../utils.js');
4
5
 
5
6
  let webhookUrl = null;
6
7
 
7
8
  async function startDaemon(options = {}) {
8
9
  webhookUrl = options.webhook || null;
9
10
 
10
- console.log(`
11
- ╔════════════════════════════════════════════╗
12
- ║ MUAD'DIB Security Daemon ║
13
- ║ Surveillance npm install active ║
14
- ╚════════════════════════════════════════════╝
15
- `);
11
+ console.log('\n' + banner([
12
+ "MUAD'DIB Security Daemon",
13
+ 'Monitoring npm installs'
14
+ ]) + '\n');
16
15
 
17
- console.log('[DAEMON] Demarrage...');
18
- console.log(`[DAEMON] Webhook: ${webhookUrl ? 'Configure' : 'Non configure'}`);
19
- console.log('[DAEMON] Ctrl+C pour arreter\n');
16
+ console.log('[DAEMON] Starting...');
17
+ console.log(`[DAEMON] Webhook: ${webhookUrl ? 'Configured' : 'Not configured'}`);
18
+ console.log('[DAEMON] Press Ctrl+C to stop\n');
20
19
 
21
- // Surveille le dossier courant
20
+ // Watch the current directory
22
21
  const cwd = process.cwd();
23
22
  const watchers = watchDirectory(cwd);
24
23
 
@@ -32,7 +31,7 @@ async function startDaemon(options = {}) {
32
31
  // Keep process alive until SIGINT
33
32
  await new Promise((resolve) => {
34
33
  process.once('SIGINT', () => {
35
- console.log('\n[DAEMON] Arret...');
34
+ console.log('\n[DAEMON] Stopping...');
36
35
  cleanup();
37
36
  resolve();
38
37
  });
@@ -47,26 +46,26 @@ function watchDirectory(dir) {
47
46
  const packageLockPath = path.join(dir, 'package-lock.json');
48
47
  const yarnLockPath = path.join(dir, 'yarn.lock');
49
48
 
50
- console.log(`[DAEMON] Surveillance de ${dir}`);
49
+ console.log(`[DAEMON] Watching ${dir}`);
51
50
 
52
- // Surveille package-lock.json
51
+ // Watch package-lock.json
53
52
  if (fs.existsSync(packageLockPath)) {
54
53
  const w = watchFile(packageLockPath, dir);
55
54
  if (w) watchers.push(w);
56
55
  }
57
56
 
58
- // Surveille yarn.lock
57
+ // Watch yarn.lock
59
58
  if (fs.existsSync(yarnLockPath)) {
60
59
  const w = watchFile(yarnLockPath, dir);
61
60
  if (w) watchers.push(w);
62
61
  }
63
62
 
64
- // Surveille node_modules
63
+ // Watch node_modules
65
64
  if (fs.existsSync(nodeModulesPath)) {
66
65
  watchers.push(watchNodeModules(nodeModulesPath, dir));
67
66
  }
68
67
 
69
- // Surveille la creation de node_modules
68
+ // Watch for node_modules creation
70
69
  if (process.platform === 'linux') {
71
70
  console.log('[DAEMON] Note: recursive fs.watch may not work on Linux');
72
71
  }
@@ -75,12 +74,12 @@ function watchDirectory(dir) {
75
74
  if (filename === 'node_modules' && eventType === 'rename') {
76
75
  const nmPath = path.join(dir, 'node_modules');
77
76
  if (fs.existsSync(nmPath)) {
78
- console.log('[DAEMON] node_modules detecte, scan en cours...');
77
+ console.log('[DAEMON] node_modules detected, scanning...');
79
78
  triggerScan(dir);
80
79
  }
81
80
  }
82
81
  if (filename === 'package-lock.json' || filename === 'yarn.lock') {
83
- console.log(`[DAEMON] ${filename} modifie, scan en cours...`);
82
+ console.log(`[DAEMON] ${filename} modified, scanning...`);
84
83
  triggerScan(dir);
85
84
  }
86
85
  });
@@ -106,7 +105,7 @@ function watchFile(filePath, projectDir) {
106
105
  const currentMtime = fs.statSync(filePath).mtime.getTime();
107
106
  if (currentMtime !== lastMtime) {
108
107
  lastMtime = currentMtime;
109
- console.log(`[DAEMON] ${path.basename(filePath)} modifie`);
108
+ console.log(`[DAEMON] ${path.basename(filePath)} modified`);
110
109
  triggerScan(projectDir);
111
110
  }
112
111
  } catch {
@@ -123,7 +122,7 @@ function watchFile(filePath, projectDir) {
123
122
  function watchNodeModules(nodeModulesPath, projectDir) {
124
123
  const watcher = fs.watch(nodeModulesPath, { recursive: true }, (eventType, filename) => {
125
124
  if (filename && filename.includes('package.json')) {
126
- console.log(`[DAEMON] Nouveau package detecte: ${filename}`);
125
+ console.log(`[DAEMON] New package detected: ${filename}`);
127
126
  triggerScan(projectDir);
128
127
  }
129
128
  });
@@ -147,12 +146,12 @@ function triggerScan(dir) {
147
146
  const now = Date.now();
148
147
  const state = getScanState(dir);
149
148
 
150
- // Debounce: attend 3 secondes avant de scanner
149
+ // Debounce: wait 3 seconds before scanning
151
150
  if (state.timeout) {
152
151
  clearTimeout(state.timeout);
153
152
  }
154
153
 
155
- // Evite les scans trop frequents (minimum 10 secondes entre chaque)
154
+ // Avoid over-frequent scans (minimum 10 seconds between each)
156
155
  if (now - state.lastScanTime < 10000) {
157
156
  state.timeout = setTimeout(() => triggerScan(dir), 10000 - (now - state.lastScanTime));
158
157
  return;
@@ -160,19 +159,19 @@ function triggerScan(dir) {
160
159
 
161
160
  state.timeout = setTimeout(async () => {
162
161
  state.lastScanTime = Date.now();
163
- console.log(`\n[DAEMON] ========== SCAN AUTOMATIQUE ==========`);
164
- console.log(`[DAEMON] Cible: ${dir}`);
165
- console.log(`[DAEMON] Heure: ${new Date().toLocaleTimeString()}\n`);
162
+ console.log(`\n[DAEMON] ========== AUTOMATIC SCAN ==========`);
163
+ console.log(`[DAEMON] Target: ${dir}`);
164
+ console.log(`[DAEMON] Time: ${new Date().toLocaleTimeString()}\n`);
166
165
 
167
166
  try {
168
167
  await run(dir, { webhook: webhookUrl });
169
168
  } catch (err) {
170
- console.log(`[DAEMON] Erreur scan: ${err.message}`);
169
+ console.log(`[DAEMON] Scan error: ${err.message}`);
171
170
  }
172
171
 
173
172
  console.log(`\n[DAEMON] ======================================\n`);
174
- console.log('[DAEMON] En attente de modifications...');
173
+ console.log('[DAEMON] Waiting for changes...');
175
174
  }, 3000);
176
175
  }
177
176
 
178
- module.exports = { startDaemon, watchDirectory, watchFile, watchNodeModules, triggerScan, getScanState };
177
+ module.exports = { startDaemon, watchDirectory, watchFile, watchNodeModules, triggerScan, getScanState };
@@ -6,13 +6,13 @@ function watch(targetPath) {
6
6
  let debounceTimer = null;
7
7
  const watchers = [];
8
8
 
9
- console.log(`[MUADDIB] Surveillance de ${targetPath}\n`);
10
- console.log('[INFO] Ctrl+C pour arreter\n');
9
+ console.log(`[MUADDIB] Watching ${targetPath}\n`);
10
+ console.log('[INFO] Press Ctrl+C to stop\n');
11
11
 
12
- // Scan initial
12
+ // Initial scan
13
13
  run(targetPath, { json: false }).catch(err => console.error('[ERROR]', err.message));
14
14
 
15
- // Surveille les changements
15
+ // Watch for changes
16
16
  const watchPaths = [
17
17
  path.join(targetPath, 'package.json'),
18
18
  path.join(targetPath, 'package-lock.json'),
@@ -30,7 +30,7 @@ function watch(targetPath) {
30
30
  if (debounceTimer) clearTimeout(debounceTimer);
31
31
 
32
32
  debounceTimer = setTimeout(() => {
33
- console.log(`\n[CHANGE] ${filename || 'unknown file'} modifie`);
33
+ console.log(`\n[CHANGE] ${filename || 'unknown file'} modified`);
34
34
  console.log('[MUADDIB] Re-scan...\n');
35
35
  run(targetPath, { json: false }).catch(err => console.error('[ERROR]', err.message));
36
36
  }, 1000);
@@ -45,7 +45,7 @@ function watch(targetPath) {
45
45
 
46
46
  // Cleanup on SIGINT
47
47
  process.once('SIGINT', () => {
48
- console.log('\n[MUADDIB] Arret surveillance...');
48
+ console.log('\n[MUADDIB] Stopping watch...');
49
49
  for (const w of watchers) {
50
50
  try { w.close(); } catch { /* ignore */ }
51
51
  }
@@ -53,4 +53,4 @@ function watch(targetPath) {
53
53
  });
54
54
  }
55
55
 
56
- module.exports = { watch };
56
+ module.exports = { watch };
@@ -1035,16 +1035,18 @@ function scoreFindings(report) {
1035
1035
 
1036
1036
  // ── Network report (detailed, colored) ──
1037
1037
 
1038
- function generateNetworkReport(report) {
1038
+ function generateNetworkReport(report, useColor = process.stdout.isTTY) {
1039
1039
  const lines = [];
1040
- const RED = '\x1b[31m';
1041
- const YELLOW = '\x1b[33m';
1042
- const GREEN = '\x1b[32m';
1043
- const CYAN = '\x1b[36m';
1044
- const MAGENTA = '\x1b[35m';
1045
- const BOLD = '\x1b[1m';
1046
- const DIM = '\x1b[2m';
1047
- const RESET = '\x1b[0m';
1040
+ // Gate ANSI on TTY so piping `sandbox-report` to a file yields clean text
1041
+ // (was unconditionally colored — escape codes leaked into redirected output).
1042
+ const RED = useColor ? '\x1b[31m' : '';
1043
+ const YELLOW = useColor ? '\x1b[33m' : '';
1044
+ const GREEN = useColor ? '\x1b[32m' : '';
1045
+ const CYAN = useColor ? '\x1b[36m' : '';
1046
+ const MAGENTA = useColor ? '\x1b[35m' : '';
1047
+ const BOLD = useColor ? '\x1b[1m' : '';
1048
+ const DIM = useColor ? '\x1b[2m' : '';
1049
+ const RESET = useColor ? '\x1b[0m' : '';
1048
1050
 
1049
1051
  lines.push('');
1050
1052
  lines.push(`${BOLD}${MAGENTA}╔══════════════════════════════════════════════════╗${RESET}`);