muaddib-scanner 2.10.100 → 2.10.101

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.100",
3
+ "version": "2.10.101",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -5,7 +5,7 @@ const os = require('os');
5
5
  const v8 = require('v8');
6
6
  const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
7
7
  const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode, DOWNLOADS_CACHE_TTL } = require('./classify.js');
8
- const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE } = require('./state.js');
8
+ const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE, runStateMigrations } = require('./state.js');
9
9
  const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
10
10
  const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules } = require('./webhook.js');
11
11
  const { poll } = require('./ingestion.js');
@@ -14,6 +14,12 @@ const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = req
14
14
  const { startHealthcheck } = require('./healthcheck.js');
15
15
  const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
16
16
  const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
17
+ // Caches not previously cleared by handleMemoryPressure (OOM fix). These live
18
+ // in the main thread and are populated by temporal-ast-diff and the typosquat
19
+ // scanner, neither of which runs in the static-scan worker.
20
+ const { clearMetadataCache: clearTyposquatMetadataCache } = require('../scanner/typosquat.js');
21
+ const { clearFileListCache } = require('../utils.js');
22
+ const { clearASTCache } = require('../shared/constants.js');
17
23
 
18
24
  const POLL_INTERVAL = 60_000;
19
25
  const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
@@ -401,6 +407,13 @@ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, sca
401
407
  console.error(`[MONITOR] MEMORY PRESSURE CRITICAL: heap at ${pct}% — stopping ingestion, clearing scanner caches`);
402
408
  // temporal-analysis._metadataCache (200 entries × full npm registry metadata)
403
409
  try { clearMetadataCache(); } catch {}
410
+ // typosquat metadataCache (500 entries × npm registry metadata for typosquat scoring)
411
+ try { clearTyposquatMetadataCache(); } catch {}
412
+ // utils._fileListCache, utils._fileContentCache, shared/constants._astCache
413
+ // — populated by temporal-ast-diff (main-thread tarball download + AST parse).
414
+ // Each AST entry can be MB-sized for bundled outputs.
415
+ try { clearFileListCache(); } catch {}
416
+ try { clearASTCache(); } catch {}
404
417
  // pendingGrouped webhook buffers
405
418
  for (const [scope, group] of pendingGrouped) {
406
419
  clearTimeout(group.timer);
@@ -567,6 +580,13 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
567
580
  // External healthcheck (Healthchecks.io) — sends /start ping now, heartbeat every 10 min
568
581
  const healthcheck = startHealthcheck();
569
582
 
583
+ // OOM fix: convert legacy detections.json / temporal-detections.json into
584
+ // append-only JSONL on first boot after upgrade. Idempotent and safe to call
585
+ // every boot (skips when JSONL already exists).
586
+ try { runStateMigrations(); } catch (err) {
587
+ console.error(`[MONITOR] runStateMigrations failed: ${err.message}`);
588
+ }
589
+
570
590
  const state = loadState(stats);
571
591
  loadDailyStats(stats, dailyAlerts); // Restore counters from previous run (survives restarts)
572
592
  console.log(`[MONITOR] State loaded — npm last: ${state.npmLastPackage || 'none'}, pypi last: ${state.pypiLastPackage || 'none'}, npm seq: ${state.npmLastSeq || 'none'}`);
@@ -11,16 +11,26 @@ const { sanitizePackageName } = require('../shared/download.js');
11
11
 
12
12
  const STATE_FILE = path.join(__dirname, '..', '..', 'data', 'monitor-state.json');
13
13
  const ALERTS_FILE = path.join(__dirname, '..', '..', 'data', 'monitor-alerts.jsonl');
14
- const DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'detections.json');
14
+ // Detections + temporal detections are append-only JSONL since the OOM fix.
15
+ // Legacy *.json files are migrated once at boot via runStateMigrations() and
16
+ // kept as *.json.migrated for forensic recovery (no longer read by the monitor).
17
+ const DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'detections.jsonl');
18
+ const DETECTIONS_FILE_LEGACY = path.join(__dirname, '..', '..', 'data', 'detections.json');
15
19
  const SCAN_STATS_FILE = path.join(__dirname, '..', '..', 'data', 'scan-stats.json');
16
20
  const LAST_DAILY_REPORT_FILE = path.join(__dirname, '..', '..', 'data', 'last-daily-report.json');
17
21
  const DAILY_STATS_FILE = path.join(__dirname, '..', '..', 'data', 'daily-stats.json');
18
- const TEMPORAL_DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'temporal-detections.json');
22
+ const TEMPORAL_DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'temporal-detections.jsonl');
23
+ const TEMPORAL_DETECTIONS_FILE_LEGACY = path.join(__dirname, '..', '..', 'data', 'temporal-detections.json');
19
24
 
20
25
  // --- Alerts/detections persistence limits ---
21
26
  const ALERTS_MAX_SIZE = 100 * 1024 * 1024; // 100MB rotation threshold (matches ml-training.jsonl)
22
- const MAX_DETECTIONS = 10_000; // Cap detections arrayoldest entries discarded
27
+ const MAX_DETECTIONS = 10_000; // Cap detections JSONLolder entries pruned at compaction
28
+ const MAX_TEMPORAL_DETECTIONS = 1000; // Cap temporal detections JSONL — pruned at compaction
23
29
  const MAX_DAILY_ALERTS = 50_000; // Cap dailyAlerts array — prevents unbounded growth between daily resets
30
+ // Append count between automatic compactions. Compaction is O(file size) so we
31
+ // avoid running it on every append. With 350 detections/h on the VPS, a value
32
+ // of 100 means ~17 min between compactions, acceptable overhead for the fix.
33
+ const DETECTION_COMPACT_INTERVAL = 100;
24
34
 
25
35
  // Local log persistence directories (parallel to Discord webhooks for offline analysis)
26
36
  // Primary: logs/ relative to project root. Fallback: /tmp/ if primary is read-only (EROFS/EACCES).
@@ -92,6 +102,13 @@ let tarballCacheIndex = null;
92
102
  let scansSinceLastPersist = 0;
93
103
  let scansSinceLastMemoryPersist = 0;
94
104
 
105
+ // Detection JSONL state (OOM fix — see runStateMigrations).
106
+ // In-memory dedup Set replaces the previous "JSON.parse(file).some(...)" lookup
107
+ // that allocated ~15 MB of transient objects per appendDetection call.
108
+ let _detectionDedupSet = null; // Set<"package@version">, lazy-init from JSONL
109
+ let _detectionsAppendedSinceCompact = 0; // counter for lazy compaction trigger
110
+ let _temporalAppendedSinceCompact = 0;
111
+
95
112
  // --- Mutable state getters/setters ---
96
113
 
97
114
  function getScanMemoryCache() { return scanMemoryCache; }
@@ -439,7 +456,83 @@ function purgeTarballCache() {
439
456
  }
440
457
  }
441
458
 
442
- // --- Temporal detections ---
459
+ // --- JSONL streaming helper (OOM fix — keeps memory bounded for large files) ---
460
+
461
+ /**
462
+ * Iterate JSONL lines from a file using chunked sync reads. Avoids loading the
463
+ * full file into memory (which is what the previous read-modify-write pattern
464
+ * did and what triggered the V8 OOM under 16-worker concurrency).
465
+ *
466
+ * Bad lines are silently skipped (the file is human-edited only in incidents).
467
+ * The callback may return `false` to stop iteration early.
468
+ *
469
+ * @param {string} filePath
470
+ * @param {(entry:object) => boolean|void} callback
471
+ */
472
+ function _iterateJsonlSync(filePath, callback) {
473
+ if (!fs.existsSync(filePath)) return;
474
+ const BUF_SIZE = 64 * 1024;
475
+ const fd = fs.openSync(filePath, 'r');
476
+ const buf = Buffer.alloc(BUF_SIZE);
477
+ let leftover = '';
478
+ try {
479
+ while (true) {
480
+ const bytesRead = fs.readSync(fd, buf, 0, BUF_SIZE, null);
481
+ if (bytesRead === 0) break;
482
+ const chunk = leftover + buf.slice(0, bytesRead).toString('utf8');
483
+ const lines = chunk.split('\n');
484
+ leftover = lines.pop() || '';
485
+ for (const line of lines) {
486
+ if (!line.trim()) continue;
487
+ let entry;
488
+ try { entry = JSON.parse(line); } catch { continue; }
489
+ if (callback(entry) === false) return;
490
+ }
491
+ }
492
+ if (leftover.trim()) {
493
+ try {
494
+ const entry = JSON.parse(leftover);
495
+ callback(entry);
496
+ } catch { /* trailing partial line — ignore */ }
497
+ }
498
+ } finally {
499
+ fs.closeSync(fd);
500
+ }
501
+ }
502
+
503
+ /**
504
+ * Count newline-terminated lines without parsing JSON. Used by compaction to
505
+ * skip the rewrite path when the file is already under the cap.
506
+ */
507
+ function _countJsonlLines(filePath) {
508
+ if (!fs.existsSync(filePath)) return 0;
509
+ const BUF_SIZE = 64 * 1024;
510
+ const fd = fs.openSync(filePath, 'r');
511
+ const buf = Buffer.alloc(BUF_SIZE);
512
+ let count = 0;
513
+ let endsWithNewline = false;
514
+ try {
515
+ while (true) {
516
+ const bytesRead = fs.readSync(fd, buf, 0, BUF_SIZE, null);
517
+ if (bytesRead === 0) break;
518
+ for (let i = 0; i < bytesRead; i++) {
519
+ if (buf[i] === 0x0a) count++;
520
+ }
521
+ endsWithNewline = (buf[bytesRead - 1] === 0x0a);
522
+ }
523
+ } finally {
524
+ fs.closeSync(fd);
525
+ }
526
+ // If the file's last line lacks a trailing newline it still counts as one entry.
527
+ if (!endsWithNewline) {
528
+ try {
529
+ if (fs.statSync(filePath).size > 0) count++;
530
+ } catch { /* ignore */ }
531
+ }
532
+ return count;
533
+ }
534
+
535
+ // --- Temporal detections (append-only JSONL since OOM fix) ---
443
536
 
444
537
  /**
445
538
  * Trim temporal findings to essential fields only.
@@ -463,42 +556,79 @@ function trimTemporalFindings(findings) {
463
556
  }
464
557
 
465
558
  /**
466
- * Append a temporal detection to the temporal detections file.
559
+ * Append a temporal detection to the temporal detections JSONL file. Append-only
560
+ * (O(1) regardless of file size) — the previous read-modify-write loaded the
561
+ * entire file on every call which was a major OOM contributor.
562
+ *
467
563
  * @param {string} name - Package name
468
564
  * @param {string} version - Package version
469
- * @param {Array} findings - Temporal findings array
565
+ * @param {Array} findings - Temporal findings array (will be trimmed)
470
566
  */
471
567
  function appendTemporalDetection(name, version, findings) {
472
- let detections = [];
473
568
  try {
474
- if (fs.existsSync(TEMPORAL_DETECTIONS_FILE)) {
475
- detections = JSON.parse(fs.readFileSync(TEMPORAL_DETECTIONS_FILE, 'utf8'));
569
+ const dir = path.dirname(TEMPORAL_DETECTIONS_FILE);
570
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
571
+ const entry = {
572
+ name,
573
+ version,
574
+ findings: trimTemporalFindings(findings),
575
+ timestamp: new Date().toISOString()
576
+ };
577
+ fs.appendFileSync(TEMPORAL_DETECTIONS_FILE, JSON.stringify(entry) + '\n', 'utf8');
578
+ _temporalAppendedSinceCompact++;
579
+ if (_temporalAppendedSinceCompact >= DETECTION_COMPACT_INTERVAL) {
580
+ _temporalAppendedSinceCompact = 0;
581
+ _compactTemporalDetectionsJsonl();
476
582
  }
477
- } catch { /* corrupted file — start fresh */ }
478
- detections.push({
479
- name,
480
- version,
481
- findings: trimTemporalFindings(findings),
482
- timestamp: new Date().toISOString()
483
- });
484
- // Keep last 1000 entries
485
- if (detections.length > 1000) {
486
- detections = detections.slice(-1000);
583
+ } catch (err) {
584
+ if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
585
+ console.warn(`[MONITOR] Permission denied writing temporal detection: ${err.code}`);
586
+ return;
587
+ }
588
+ if (err.code === 'ENOSPC') {
589
+ console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist temporal detection.');
590
+ return;
591
+ }
592
+ console.error(`[MONITOR] Failed to save temporal detection: ${err.message}`);
487
593
  }
488
- atomicWriteFileSync(TEMPORAL_DETECTIONS_FILE, JSON.stringify(detections, null, 2));
489
594
  }
490
595
 
491
596
  /**
492
- * Load temporal detections from file.
493
- * @returns {Array} Array of temporal detection entries
597
+ * Load temporal detections from file using streaming reads.
598
+ * @returns {Array} Array of temporal detection entries (oldest first, capped to MAX_TEMPORAL_DETECTIONS)
494
599
  */
495
600
  function loadTemporalDetections() {
601
+ const detections = [];
496
602
  try {
497
- if (fs.existsSync(TEMPORAL_DETECTIONS_FILE)) {
498
- return JSON.parse(fs.readFileSync(TEMPORAL_DETECTIONS_FILE, 'utf8'));
499
- }
603
+ _iterateJsonlSync(TEMPORAL_DETECTIONS_FILE, (entry) => { detections.push(entry); });
500
604
  } catch { /* ignore */ }
501
- return [];
605
+ return detections;
606
+ }
607
+
608
+ /**
609
+ * Compact the temporal detections JSONL file: keep only the most recent
610
+ * MAX_TEMPORAL_DETECTIONS entries. No-op when the file is already under cap.
611
+ * Internal — called from appendTemporalDetection on a counter trigger and from
612
+ * runStateMigrations to enforce caps after migration.
613
+ */
614
+ function _compactTemporalDetectionsJsonl() {
615
+ try {
616
+ const total = _countJsonlLines(TEMPORAL_DETECTIONS_FILE);
617
+ if (total <= MAX_TEMPORAL_DETECTIONS) return;
618
+ const toDrop = total - MAX_TEMPORAL_DETECTIONS;
619
+ let skipped = 0;
620
+ const kept = [];
621
+ _iterateJsonlSync(TEMPORAL_DETECTIONS_FILE, (entry) => {
622
+ if (skipped < toDrop) { skipped++; return; }
623
+ kept.push(JSON.stringify(entry));
624
+ });
625
+ const tmpFile = TEMPORAL_DETECTIONS_FILE + '.tmp';
626
+ fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
627
+ fs.renameSync(tmpFile, TEMPORAL_DETECTIONS_FILE);
628
+ console.log(`[MONITOR] COMPACT temporal-detections: ${total} -> ${kept.length} entries`);
629
+ } catch (err) {
630
+ console.error(`[MONITOR] Temporal detections compaction failed: ${err.message}`);
631
+ }
502
632
  }
503
633
 
504
634
  // --- State persistence ---
@@ -580,17 +710,46 @@ function appendAlert(alert) {
580
710
  }
581
711
  }
582
712
 
583
- // --- Detection time logging ---
713
+ // --- Detection time logging (append-only JSONL since OOM fix) ---
714
+
715
+ /**
716
+ * Lazy initialization of the in-memory dedup Set. Reading the JSONL file once
717
+ * at first use replaces the per-call read-modify-write that allocated ~15 MB
718
+ * of transient parsed objects on every appendDetection invocation.
719
+ */
720
+ function _initDetectionDedupSet() {
721
+ if (_detectionDedupSet !== null) return;
722
+ _detectionDedupSet = new Set();
723
+ try {
724
+ _iterateJsonlSync(DETECTIONS_FILE, (entry) => {
725
+ if (entry && entry.package && entry.version) {
726
+ _detectionDedupSet.add(`${entry.package}@${entry.version}`);
727
+ }
728
+ });
729
+ } catch { /* ignore — Set stays empty */ }
730
+ }
731
+
732
+ /**
733
+ * Reset internal detection state. Test-only: lets the test suite control file
734
+ * lifecycle without leaking dedup state between cases.
735
+ */
736
+ function _resetDetectionState() {
737
+ _detectionDedupSet = null;
738
+ _detectionsAppendedSinceCompact = 0;
739
+ _temporalAppendedSinceCompact = 0;
740
+ }
584
741
 
742
+ /**
743
+ * Load all detections by streaming the JSONL file. Returns the same
744
+ * { detections: [...] } shape as before so downstream consumers
745
+ * (buildReportFromDisk, daily report) are unchanged.
746
+ */
585
747
  function loadDetections() {
748
+ const detections = [];
586
749
  try {
587
- const raw = fs.readFileSync(DETECTIONS_FILE, 'utf8');
588
- const data = JSON.parse(raw);
589
- if (data && Array.isArray(data.detections)) return data;
590
- return { detections: [] };
591
- } catch {
592
- return { detections: [] };
593
- }
750
+ _iterateJsonlSync(DETECTIONS_FILE, (entry) => { detections.push(entry); });
751
+ } catch { /* ignore */ }
752
+ return { detections };
594
753
  }
595
754
 
596
755
  function appendDetection(name, version, ecosystem, findings, severity) {
@@ -599,12 +758,11 @@ function appendDetection(name, version, ecosystem, findings, severity) {
599
758
  if (!fs.existsSync(dir)) {
600
759
  fs.mkdirSync(dir, { recursive: true });
601
760
  }
602
- const data = loadDetections();
761
+ _initDetectionDedupSet();
603
762
  const key = `${name}@${version}`;
604
- if (data.detections.some(d => `${d.package}@${d.version}` === key)) {
605
- return; // dedup
606
- }
607
- data.detections.push({
763
+ if (_detectionDedupSet.has(key)) return; // dedup
764
+
765
+ const entry = {
608
766
  package: name,
609
767
  version,
610
768
  ecosystem,
@@ -613,44 +771,100 @@ function appendDetection(name, version, ecosystem, findings, severity) {
613
771
  severity,
614
772
  advisory_at: null,
615
773
  lead_time_hours: null
616
- });
617
- // Cap at MAX_DETECTIONS — discard oldest entries
618
- if (data.detections.length > MAX_DETECTIONS) {
619
- data.detections = data.detections.slice(-MAX_DETECTIONS);
774
+ };
775
+ fs.appendFileSync(DETECTIONS_FILE, JSON.stringify(entry) + '\n', 'utf8');
776
+ _detectionDedupSet.add(key);
777
+
778
+ _detectionsAppendedSinceCompact++;
779
+ if (_detectionsAppendedSinceCompact >= DETECTION_COMPACT_INTERVAL) {
780
+ _detectionsAppendedSinceCompact = 0;
781
+ _compactDetectionsJsonl();
620
782
  }
621
- atomicWriteFileSync(DETECTIONS_FILE, JSON.stringify(data, null, 2));
622
783
  } catch (err) {
784
+ if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
785
+ console.warn(`[MONITOR] Permission denied writing detection: ${err.code}`);
786
+ return;
787
+ }
788
+ if (err.code === 'ENOSPC') {
789
+ console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist detection.');
790
+ return;
791
+ }
623
792
  console.error(`[MONITOR] Failed to save detection: ${err.message}`);
624
793
  }
625
794
  }
626
795
 
796
+ /**
797
+ * Compute detection stats by streaming the JSONL file: a single accumulator
798
+ * pass that never holds more than one parsed entry in memory at a time.
799
+ */
627
800
  function getDetectionStats() {
628
- const data = loadDetections();
629
- const detections = data.detections;
630
- const total = detections.length;
631
-
801
+ let total = 0;
632
802
  const bySeverity = {};
633
803
  const byEcosystem = {};
634
- for (const d of detections) {
635
- bySeverity[d.severity] = (bySeverity[d.severity] || 0) + 1;
636
- byEcosystem[d.ecosystem] = (byEcosystem[d.ecosystem] || 0) + 1;
637
- }
804
+ const leadHours = [];
805
+
806
+ try {
807
+ _iterateJsonlSync(DETECTIONS_FILE, (d) => {
808
+ total++;
809
+ if (d.severity) bySeverity[d.severity] = (bySeverity[d.severity] || 0) + 1;
810
+ if (d.ecosystem) byEcosystem[d.ecosystem] = (byEcosystem[d.ecosystem] || 0) + 1;
811
+ if (d.advisory_at && d.lead_time_hours != null) {
812
+ leadHours.push(d.lead_time_hours);
813
+ }
814
+ });
815
+ } catch { /* fallthrough — return whatever we accumulated */ }
638
816
 
639
- const withLeadTime = detections.filter(d => d.advisory_at && d.lead_time_hours != null);
640
817
  let leadTime = null;
641
- if (withLeadTime.length > 0) {
642
- const hours = withLeadTime.map(d => d.lead_time_hours);
818
+ if (leadHours.length > 0) {
819
+ let min = leadHours[0];
820
+ let max = leadHours[0];
821
+ let sum = 0;
822
+ for (const h of leadHours) {
823
+ if (h < min) min = h;
824
+ if (h > max) max = h;
825
+ sum += h;
826
+ }
643
827
  leadTime = {
644
- count: withLeadTime.length,
645
- avg: hours.reduce((a, b) => a + b, 0) / hours.length,
646
- min: Math.min(...hours),
647
- max: Math.max(...hours)
828
+ count: leadHours.length,
829
+ avg: sum / leadHours.length,
830
+ min,
831
+ max
648
832
  };
649
833
  }
650
834
 
651
835
  return { total, bySeverity, byEcosystem, leadTime };
652
836
  }
653
837
 
838
+ /**
839
+ * Compact the detections JSONL file: keep only the most recent MAX_DETECTIONS
840
+ * entries. Rebuilds the in-memory dedup Set from the kept entries so dedup
841
+ * stays consistent. No-op when the file is already under cap.
842
+ */
843
+ function _compactDetectionsJsonl() {
844
+ try {
845
+ const total = _countJsonlLines(DETECTIONS_FILE);
846
+ if (total <= MAX_DETECTIONS) return;
847
+ const toDrop = total - MAX_DETECTIONS;
848
+ let skipped = 0;
849
+ const kept = [];
850
+ const newDedup = new Set();
851
+ _iterateJsonlSync(DETECTIONS_FILE, (entry) => {
852
+ if (skipped < toDrop) { skipped++; return; }
853
+ kept.push(JSON.stringify(entry));
854
+ if (entry && entry.package && entry.version) {
855
+ newDedup.add(`${entry.package}@${entry.version}`);
856
+ }
857
+ });
858
+ const tmpFile = DETECTIONS_FILE + '.tmp';
859
+ fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
860
+ fs.renameSync(tmpFile, DETECTIONS_FILE);
861
+ _detectionDedupSet = newDedup;
862
+ console.log(`[MONITOR] COMPACT detections: ${total} -> ${kept.length} entries`);
863
+ } catch (err) {
864
+ console.error(`[MONITOR] Detections compaction failed: ${err.message}`);
865
+ }
866
+ }
867
+
654
868
  // --- Scan stats (FP rate tracking) ---
655
869
 
656
870
  function loadScanStats() {
@@ -851,6 +1065,88 @@ function getParisDateString() {
851
1065
 
852
1066
  // --- Raw state loader (CLI report helpers) ---
853
1067
 
1068
+ // --- JSONL migration (one-shot, idempotent) ---
1069
+
1070
+ /**
1071
+ * Convert a legacy JSON detections file into the new JSONL format.
1072
+ * Idempotent: skips when the JSONL file already exists, or when the legacy
1073
+ * file is missing. After successful migration the legacy file is renamed to
1074
+ * `<basename>.json.migrated` so the next boot is a no-op and a forensic copy
1075
+ * remains on disk.
1076
+ *
1077
+ * @param {object} opts
1078
+ * @param {string} opts.legacyFile - Path to the legacy `*.json` file
1079
+ * @param {string} opts.targetFile - Path to the destination `*.jsonl` file
1080
+ * @param {(parsed:any) => any[]|null} opts.extractEntries - Returns the array of
1081
+ * entries from the parsed JSON, or null if the file shape is unexpected.
1082
+ * @param {string} opts.label - Short label used in log messages
1083
+ * @returns {{migrated:boolean, entries:number}}
1084
+ */
1085
+ function _migrateJsonToJsonl({ legacyFile, targetFile, extractEntries, label }) {
1086
+ if (!fs.existsSync(legacyFile)) return { migrated: false, entries: 0 };
1087
+ if (fs.existsSync(targetFile)) {
1088
+ // JSONL already in use. Leave the legacy file alone if it's still there
1089
+ // (operator may want to inspect it). Renaming it could surprise scripts.
1090
+ return { migrated: false, entries: 0 };
1091
+ }
1092
+ let parsed;
1093
+ try {
1094
+ parsed = JSON.parse(fs.readFileSync(legacyFile, 'utf8'));
1095
+ } catch (err) {
1096
+ console.warn(`[MONITOR] MIGRATION ${label}: legacy file unreadable (${err.message}) — leaving in place`);
1097
+ return { migrated: false, entries: 0 };
1098
+ }
1099
+ const entries = extractEntries(parsed);
1100
+ if (!Array.isArray(entries)) {
1101
+ console.warn(`[MONITOR] MIGRATION ${label}: unexpected legacy shape — leaving in place`);
1102
+ return { migrated: false, entries: 0 };
1103
+ }
1104
+ const tmpFile = targetFile + '.tmp';
1105
+ try {
1106
+ const dir = path.dirname(targetFile);
1107
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
1108
+ const lines = entries.map(e => JSON.stringify(e));
1109
+ fs.writeFileSync(tmpFile, lines.length ? lines.join('\n') + '\n' : '', 'utf8');
1110
+ fs.renameSync(tmpFile, targetFile);
1111
+ fs.renameSync(legacyFile, legacyFile + '.migrated');
1112
+ console.log(`[MONITOR] MIGRATION ${label}: ${entries.length} entries -> ${path.basename(targetFile)} (legacy kept as ${path.basename(legacyFile)}.migrated)`);
1113
+ return { migrated: true, entries: entries.length };
1114
+ } catch (err) {
1115
+ try { fs.unlinkSync(tmpFile); } catch { /* ignore */ }
1116
+ console.error(`[MONITOR] MIGRATION ${label} failed: ${err.message}`);
1117
+ return { migrated: false, entries: 0 };
1118
+ }
1119
+ }
1120
+
1121
+ /**
1122
+ * Run all state migrations. Called once at startup before any append/load
1123
+ * touches the new JSONL files. Idempotent — safe to call on every boot.
1124
+ *
1125
+ * After migration this function also enforces the post-migration size caps,
1126
+ * so an oversized legacy file is immediately compacted instead of waiting
1127
+ * for DETECTION_COMPACT_INTERVAL appends.
1128
+ */
1129
+ function runStateMigrations() {
1130
+ // Reset internal counters/dedup so the first append re-reads from disk.
1131
+ _resetDetectionState();
1132
+
1133
+ const det = _migrateJsonToJsonl({
1134
+ legacyFile: DETECTIONS_FILE_LEGACY,
1135
+ targetFile: DETECTIONS_FILE,
1136
+ extractEntries: (parsed) => (parsed && Array.isArray(parsed.detections)) ? parsed.detections : null,
1137
+ label: 'detections'
1138
+ });
1139
+ if (det.migrated && det.entries > MAX_DETECTIONS) _compactDetectionsJsonl();
1140
+
1141
+ const tmp = _migrateJsonToJsonl({
1142
+ legacyFile: TEMPORAL_DETECTIONS_FILE_LEGACY,
1143
+ targetFile: TEMPORAL_DETECTIONS_FILE,
1144
+ extractEntries: (parsed) => Array.isArray(parsed) ? parsed : null,
1145
+ label: 'temporal-detections'
1146
+ });
1147
+ if (tmp.migrated && tmp.entries > MAX_TEMPORAL_DETECTIONS) _compactTemporalDetectionsJsonl();
1148
+ }
1149
+
854
1150
  /**
855
1151
  * Read raw state file (without restoring into stats).
856
1152
  */
@@ -868,10 +1164,12 @@ module.exports = {
868
1164
  STATE_FILE,
869
1165
  ALERTS_FILE,
870
1166
  DETECTIONS_FILE,
1167
+ DETECTIONS_FILE_LEGACY,
871
1168
  SCAN_STATS_FILE,
872
1169
  LAST_DAILY_REPORT_FILE,
873
1170
  DAILY_STATS_FILE,
874
1171
  TEMPORAL_DETECTIONS_FILE,
1172
+ TEMPORAL_DETECTIONS_FILE_LEGACY,
875
1173
  PRIMARY_DAILY_REPORTS_DIR,
876
1174
  PRIMARY_ALERTS_DIR,
877
1175
  FALLBACK_DAILY_REPORTS_DIR,
@@ -894,7 +1192,9 @@ module.exports = {
894
1192
  DAILY_STATS_PERSIST_INTERVAL,
895
1193
  ALERTS_MAX_SIZE,
896
1194
  MAX_DETECTIONS,
1195
+ MAX_TEMPORAL_DETECTIONS,
897
1196
  MAX_DAILY_ALERTS,
1197
+ DETECTION_COMPACT_INTERVAL,
898
1198
 
899
1199
  // Mutable state getters/setters
900
1200
  getScanMemoryCache,
@@ -929,6 +1229,13 @@ module.exports = {
929
1229
  loadDetections,
930
1230
  appendDetection,
931
1231
  getDetectionStats,
1232
+ runStateMigrations,
1233
+ // Internal — exported for tests and for the daemon hourly housekeeping.
1234
+ _compactDetectionsJsonl,
1235
+ _compactTemporalDetectionsJsonl,
1236
+ _resetDetectionState,
1237
+ _iterateJsonlSync,
1238
+ _countJsonlLines,
932
1239
  loadScanStats,
933
1240
  updateScanStats,
934
1241
  loadDailyStats,