muaddib-scanner 2.10.100 → 2.10.101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/monitor/daemon.js +21 -1
- package/src/monitor/state.js +366 -59
package/package.json
CHANGED
package/src/monitor/daemon.js
CHANGED
|
@@ -5,7 +5,7 @@ const os = require('os');
|
|
|
5
5
|
const v8 = require('v8');
|
|
6
6
|
const { isDockerAvailable, SANDBOX_CONCURRENCY_MAX } = require('../sandbox/index.js');
|
|
7
7
|
const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled, getLlmDetectiveMode, DOWNLOADS_CACHE_TTL } = require('./classify.js');
|
|
8
|
-
const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE } = require('./state.js');
|
|
8
|
+
const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE, runStateMigrations } = require('./state.js');
|
|
9
9
|
const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
|
|
10
10
|
const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules } = require('./webhook.js');
|
|
11
11
|
const { poll } = require('./ingestion.js');
|
|
@@ -14,6 +14,12 @@ const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY, resetDeltas } = req
|
|
|
14
14
|
const { startHealthcheck } = require('./healthcheck.js');
|
|
15
15
|
const { startDeferredWorker, stopDeferredWorker, persistDeferredQueue, restoreDeferredQueue, clearDeferredQueue } = require('./deferred-sandbox.js');
|
|
16
16
|
const { clearMetadataCache } = require('../scanner/temporal-analysis.js');
|
|
17
|
+
// Caches not previously cleared by handleMemoryPressure (OOM fix). These live
|
|
18
|
+
// in the main thread and are populated by temporal-ast-diff and the typosquat
|
|
19
|
+
// scanner, neither of which runs in the static-scan worker.
|
|
20
|
+
const { clearMetadataCache: clearTyposquatMetadataCache } = require('../scanner/typosquat.js');
|
|
21
|
+
const { clearFileListCache } = require('../utils.js');
|
|
22
|
+
const { clearASTCache } = require('../shared/constants.js');
|
|
17
23
|
|
|
18
24
|
const POLL_INTERVAL = 60_000;
|
|
19
25
|
const PROCESS_LOOP_INTERVAL = 2_000; // Queue check interval when empty
|
|
@@ -401,6 +407,13 @@ function handleMemoryPressure(level, ratio, recentlyScanned, downloadsCache, sca
|
|
|
401
407
|
console.error(`[MONITOR] MEMORY PRESSURE CRITICAL: heap at ${pct}% — stopping ingestion, clearing scanner caches`);
|
|
402
408
|
// temporal-analysis._metadataCache (200 entries × full npm registry metadata)
|
|
403
409
|
try { clearMetadataCache(); } catch {}
|
|
410
|
+
// typosquat metadataCache (500 entries × npm registry metadata for typosquat scoring)
|
|
411
|
+
try { clearTyposquatMetadataCache(); } catch {}
|
|
412
|
+
// utils._fileListCache, utils._fileContentCache, shared/constants._astCache
|
|
413
|
+
// — populated by temporal-ast-diff (main-thread tarball download + AST parse).
|
|
414
|
+
// Each AST entry can be MB-sized for bundled outputs.
|
|
415
|
+
try { clearFileListCache(); } catch {}
|
|
416
|
+
try { clearASTCache(); } catch {}
|
|
404
417
|
// pendingGrouped webhook buffers
|
|
405
418
|
for (const [scope, group] of pendingGrouped) {
|
|
406
419
|
clearTimeout(group.timer);
|
|
@@ -567,6 +580,13 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
|
|
|
567
580
|
// External healthcheck (Healthchecks.io) — sends /start ping now, heartbeat every 10 min
|
|
568
581
|
const healthcheck = startHealthcheck();
|
|
569
582
|
|
|
583
|
+
// OOM fix: convert legacy detections.json / temporal-detections.json into
|
|
584
|
+
// append-only JSONL on first boot after upgrade. Idempotent and safe to call
|
|
585
|
+
// every boot (skips when JSONL already exists).
|
|
586
|
+
try { runStateMigrations(); } catch (err) {
|
|
587
|
+
console.error(`[MONITOR] runStateMigrations failed: ${err.message}`);
|
|
588
|
+
}
|
|
589
|
+
|
|
570
590
|
const state = loadState(stats);
|
|
571
591
|
loadDailyStats(stats, dailyAlerts); // Restore counters from previous run (survives restarts)
|
|
572
592
|
console.log(`[MONITOR] State loaded — npm last: ${state.npmLastPackage || 'none'}, pypi last: ${state.pypiLastPackage || 'none'}, npm seq: ${state.npmLastSeq || 'none'}`);
|
package/src/monitor/state.js
CHANGED
|
@@ -11,16 +11,26 @@ const { sanitizePackageName } = require('../shared/download.js');
|
|
|
11
11
|
|
|
12
12
|
const STATE_FILE = path.join(__dirname, '..', '..', 'data', 'monitor-state.json');
|
|
13
13
|
const ALERTS_FILE = path.join(__dirname, '..', '..', 'data', 'monitor-alerts.jsonl');
|
|
14
|
-
|
|
14
|
+
// Detections + temporal detections are append-only JSONL since the OOM fix.
|
|
15
|
+
// Legacy *.json files are migrated once at boot via runStateMigrations() and
|
|
16
|
+
// kept as *.json.migrated for forensic recovery (no longer read by the monitor).
|
|
17
|
+
const DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'detections.jsonl');
|
|
18
|
+
const DETECTIONS_FILE_LEGACY = path.join(__dirname, '..', '..', 'data', 'detections.json');
|
|
15
19
|
const SCAN_STATS_FILE = path.join(__dirname, '..', '..', 'data', 'scan-stats.json');
|
|
16
20
|
const LAST_DAILY_REPORT_FILE = path.join(__dirname, '..', '..', 'data', 'last-daily-report.json');
|
|
17
21
|
const DAILY_STATS_FILE = path.join(__dirname, '..', '..', 'data', 'daily-stats.json');
|
|
18
|
-
const TEMPORAL_DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'temporal-detections.
|
|
22
|
+
const TEMPORAL_DETECTIONS_FILE = path.join(__dirname, '..', '..', 'data', 'temporal-detections.jsonl');
|
|
23
|
+
const TEMPORAL_DETECTIONS_FILE_LEGACY = path.join(__dirname, '..', '..', 'data', 'temporal-detections.json');
|
|
19
24
|
|
|
20
25
|
// --- Alerts/detections persistence limits ---
|
|
21
26
|
const ALERTS_MAX_SIZE = 100 * 1024 * 1024; // 100MB rotation threshold (matches ml-training.jsonl)
|
|
22
|
-
const MAX_DETECTIONS = 10_000; // Cap detections
|
|
27
|
+
const MAX_DETECTIONS = 10_000; // Cap detections JSONL — older entries pruned at compaction
|
|
28
|
+
const MAX_TEMPORAL_DETECTIONS = 1000; // Cap temporal detections JSONL — pruned at compaction
|
|
23
29
|
const MAX_DAILY_ALERTS = 50_000; // Cap dailyAlerts array — prevents unbounded growth between daily resets
|
|
30
|
+
// Append count between automatic compactions. Compaction is O(file size) so we
|
|
31
|
+
// avoid running it on every append. With 350 detections/h on the VPS, a value
|
|
32
|
+
// of 100 means ~17 min between compactions, acceptable overhead for the fix.
|
|
33
|
+
const DETECTION_COMPACT_INTERVAL = 100;
|
|
24
34
|
|
|
25
35
|
// Local log persistence directories (parallel to Discord webhooks for offline analysis)
|
|
26
36
|
// Primary: logs/ relative to project root. Fallback: /tmp/ if primary is read-only (EROFS/EACCES).
|
|
@@ -92,6 +102,13 @@ let tarballCacheIndex = null;
|
|
|
92
102
|
let scansSinceLastPersist = 0;
|
|
93
103
|
let scansSinceLastMemoryPersist = 0;
|
|
94
104
|
|
|
105
|
+
// Detection JSONL state (OOM fix — see runStateMigrations).
|
|
106
|
+
// In-memory dedup Set replaces the previous "JSON.parse(file).some(...)" lookup
|
|
107
|
+
// that allocated ~15 MB of transient objects per appendDetection call.
|
|
108
|
+
let _detectionDedupSet = null; // Set<"package@version">, lazy-init from JSONL
|
|
109
|
+
let _detectionsAppendedSinceCompact = 0; // counter for lazy compaction trigger
|
|
110
|
+
let _temporalAppendedSinceCompact = 0;
|
|
111
|
+
|
|
95
112
|
// --- Mutable state getters/setters ---
|
|
96
113
|
|
|
97
114
|
function getScanMemoryCache() { return scanMemoryCache; }
|
|
@@ -439,7 +456,83 @@ function purgeTarballCache() {
|
|
|
439
456
|
}
|
|
440
457
|
}
|
|
441
458
|
|
|
442
|
-
// ---
|
|
459
|
+
// --- JSONL streaming helper (OOM fix — keeps memory bounded for large files) ---
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Iterate JSONL lines from a file using chunked sync reads. Avoids loading the
|
|
463
|
+
* full file into memory (which is what the previous read-modify-write pattern
|
|
464
|
+
* did and what triggered the V8 OOM under 16-worker concurrency).
|
|
465
|
+
*
|
|
466
|
+
* Bad lines are silently skipped (the file is human-edited only in incidents).
|
|
467
|
+
* The callback may return `false` to stop iteration early.
|
|
468
|
+
*
|
|
469
|
+
* @param {string} filePath
|
|
470
|
+
* @param {(entry:object) => boolean|void} callback
|
|
471
|
+
*/
|
|
472
|
+
function _iterateJsonlSync(filePath, callback) {
|
|
473
|
+
if (!fs.existsSync(filePath)) return;
|
|
474
|
+
const BUF_SIZE = 64 * 1024;
|
|
475
|
+
const fd = fs.openSync(filePath, 'r');
|
|
476
|
+
const buf = Buffer.alloc(BUF_SIZE);
|
|
477
|
+
let leftover = '';
|
|
478
|
+
try {
|
|
479
|
+
while (true) {
|
|
480
|
+
const bytesRead = fs.readSync(fd, buf, 0, BUF_SIZE, null);
|
|
481
|
+
if (bytesRead === 0) break;
|
|
482
|
+
const chunk = leftover + buf.slice(0, bytesRead).toString('utf8');
|
|
483
|
+
const lines = chunk.split('\n');
|
|
484
|
+
leftover = lines.pop() || '';
|
|
485
|
+
for (const line of lines) {
|
|
486
|
+
if (!line.trim()) continue;
|
|
487
|
+
let entry;
|
|
488
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
489
|
+
if (callback(entry) === false) return;
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
if (leftover.trim()) {
|
|
493
|
+
try {
|
|
494
|
+
const entry = JSON.parse(leftover);
|
|
495
|
+
callback(entry);
|
|
496
|
+
} catch { /* trailing partial line — ignore */ }
|
|
497
|
+
}
|
|
498
|
+
} finally {
|
|
499
|
+
fs.closeSync(fd);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
/**
|
|
504
|
+
* Count newline-terminated lines without parsing JSON. Used by compaction to
|
|
505
|
+
* skip the rewrite path when the file is already under the cap.
|
|
506
|
+
*/
|
|
507
|
+
function _countJsonlLines(filePath) {
|
|
508
|
+
if (!fs.existsSync(filePath)) return 0;
|
|
509
|
+
const BUF_SIZE = 64 * 1024;
|
|
510
|
+
const fd = fs.openSync(filePath, 'r');
|
|
511
|
+
const buf = Buffer.alloc(BUF_SIZE);
|
|
512
|
+
let count = 0;
|
|
513
|
+
let endsWithNewline = false;
|
|
514
|
+
try {
|
|
515
|
+
while (true) {
|
|
516
|
+
const bytesRead = fs.readSync(fd, buf, 0, BUF_SIZE, null);
|
|
517
|
+
if (bytesRead === 0) break;
|
|
518
|
+
for (let i = 0; i < bytesRead; i++) {
|
|
519
|
+
if (buf[i] === 0x0a) count++;
|
|
520
|
+
}
|
|
521
|
+
endsWithNewline = (buf[bytesRead - 1] === 0x0a);
|
|
522
|
+
}
|
|
523
|
+
} finally {
|
|
524
|
+
fs.closeSync(fd);
|
|
525
|
+
}
|
|
526
|
+
// If the file's last line lacks a trailing newline it still counts as one entry.
|
|
527
|
+
if (!endsWithNewline) {
|
|
528
|
+
try {
|
|
529
|
+
if (fs.statSync(filePath).size > 0) count++;
|
|
530
|
+
} catch { /* ignore */ }
|
|
531
|
+
}
|
|
532
|
+
return count;
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// --- Temporal detections (append-only JSONL since OOM fix) ---
|
|
443
536
|
|
|
444
537
|
/**
|
|
445
538
|
* Trim temporal findings to essential fields only.
|
|
@@ -463,42 +556,79 @@ function trimTemporalFindings(findings) {
|
|
|
463
556
|
}
|
|
464
557
|
|
|
465
558
|
/**
|
|
466
|
-
* Append a temporal detection to the temporal detections file.
|
|
559
|
+
* Append a temporal detection to the temporal detections JSONL file. Append-only
|
|
560
|
+
* (O(1) regardless of file size) — the previous read-modify-write loaded the
|
|
561
|
+
* entire file on every call which was a major OOM contributor.
|
|
562
|
+
*
|
|
467
563
|
* @param {string} name - Package name
|
|
468
564
|
* @param {string} version - Package version
|
|
469
|
-
* @param {Array} findings - Temporal findings array
|
|
565
|
+
* @param {Array} findings - Temporal findings array (will be trimmed)
|
|
470
566
|
*/
|
|
471
567
|
function appendTemporalDetection(name, version, findings) {
|
|
472
|
-
let detections = [];
|
|
473
568
|
try {
|
|
474
|
-
|
|
475
|
-
|
|
569
|
+
const dir = path.dirname(TEMPORAL_DETECTIONS_FILE);
|
|
570
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
571
|
+
const entry = {
|
|
572
|
+
name,
|
|
573
|
+
version,
|
|
574
|
+
findings: trimTemporalFindings(findings),
|
|
575
|
+
timestamp: new Date().toISOString()
|
|
576
|
+
};
|
|
577
|
+
fs.appendFileSync(TEMPORAL_DETECTIONS_FILE, JSON.stringify(entry) + '\n', 'utf8');
|
|
578
|
+
_temporalAppendedSinceCompact++;
|
|
579
|
+
if (_temporalAppendedSinceCompact >= DETECTION_COMPACT_INTERVAL) {
|
|
580
|
+
_temporalAppendedSinceCompact = 0;
|
|
581
|
+
_compactTemporalDetectionsJsonl();
|
|
476
582
|
}
|
|
477
|
-
} catch {
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
583
|
+
} catch (err) {
|
|
584
|
+
if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
|
|
585
|
+
console.warn(`[MONITOR] Permission denied writing temporal detection: ${err.code}`);
|
|
586
|
+
return;
|
|
587
|
+
}
|
|
588
|
+
if (err.code === 'ENOSPC') {
|
|
589
|
+
console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist temporal detection.');
|
|
590
|
+
return;
|
|
591
|
+
}
|
|
592
|
+
console.error(`[MONITOR] Failed to save temporal detection: ${err.message}`);
|
|
487
593
|
}
|
|
488
|
-
atomicWriteFileSync(TEMPORAL_DETECTIONS_FILE, JSON.stringify(detections, null, 2));
|
|
489
594
|
}
|
|
490
595
|
|
|
491
596
|
/**
|
|
492
|
-
* Load temporal detections from file.
|
|
493
|
-
* @returns {Array} Array of temporal detection entries
|
|
597
|
+
* Load temporal detections from file using streaming reads.
|
|
598
|
+
* @returns {Array} Array of temporal detection entries (oldest first, capped to MAX_TEMPORAL_DETECTIONS)
|
|
494
599
|
*/
|
|
495
600
|
function loadTemporalDetections() {
|
|
601
|
+
const detections = [];
|
|
496
602
|
try {
|
|
497
|
-
|
|
498
|
-
return JSON.parse(fs.readFileSync(TEMPORAL_DETECTIONS_FILE, 'utf8'));
|
|
499
|
-
}
|
|
603
|
+
_iterateJsonlSync(TEMPORAL_DETECTIONS_FILE, (entry) => { detections.push(entry); });
|
|
500
604
|
} catch { /* ignore */ }
|
|
501
|
-
return
|
|
605
|
+
return detections;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* Compact the temporal detections JSONL file: keep only the most recent
|
|
610
|
+
* MAX_TEMPORAL_DETECTIONS entries. No-op when the file is already under cap.
|
|
611
|
+
* Internal — called from appendTemporalDetection on a counter trigger and from
|
|
612
|
+
* runStateMigrations to enforce caps after migration.
|
|
613
|
+
*/
|
|
614
|
+
function _compactTemporalDetectionsJsonl() {
|
|
615
|
+
try {
|
|
616
|
+
const total = _countJsonlLines(TEMPORAL_DETECTIONS_FILE);
|
|
617
|
+
if (total <= MAX_TEMPORAL_DETECTIONS) return;
|
|
618
|
+
const toDrop = total - MAX_TEMPORAL_DETECTIONS;
|
|
619
|
+
let skipped = 0;
|
|
620
|
+
const kept = [];
|
|
621
|
+
_iterateJsonlSync(TEMPORAL_DETECTIONS_FILE, (entry) => {
|
|
622
|
+
if (skipped < toDrop) { skipped++; return; }
|
|
623
|
+
kept.push(JSON.stringify(entry));
|
|
624
|
+
});
|
|
625
|
+
const tmpFile = TEMPORAL_DETECTIONS_FILE + '.tmp';
|
|
626
|
+
fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
|
|
627
|
+
fs.renameSync(tmpFile, TEMPORAL_DETECTIONS_FILE);
|
|
628
|
+
console.log(`[MONITOR] COMPACT temporal-detections: ${total} -> ${kept.length} entries`);
|
|
629
|
+
} catch (err) {
|
|
630
|
+
console.error(`[MONITOR] Temporal detections compaction failed: ${err.message}`);
|
|
631
|
+
}
|
|
502
632
|
}
|
|
503
633
|
|
|
504
634
|
// --- State persistence ---
|
|
@@ -580,17 +710,46 @@ function appendAlert(alert) {
|
|
|
580
710
|
}
|
|
581
711
|
}
|
|
582
712
|
|
|
583
|
-
// --- Detection time logging ---
|
|
713
|
+
// --- Detection time logging (append-only JSONL since OOM fix) ---
|
|
714
|
+
|
|
715
|
+
/**
|
|
716
|
+
* Lazy initialization of the in-memory dedup Set. Reading the JSONL file once
|
|
717
|
+
* at first use replaces the per-call read-modify-write that allocated ~15 MB
|
|
718
|
+
* of transient parsed objects on every appendDetection invocation.
|
|
719
|
+
*/
|
|
720
|
+
function _initDetectionDedupSet() {
|
|
721
|
+
if (_detectionDedupSet !== null) return;
|
|
722
|
+
_detectionDedupSet = new Set();
|
|
723
|
+
try {
|
|
724
|
+
_iterateJsonlSync(DETECTIONS_FILE, (entry) => {
|
|
725
|
+
if (entry && entry.package && entry.version) {
|
|
726
|
+
_detectionDedupSet.add(`${entry.package}@${entry.version}`);
|
|
727
|
+
}
|
|
728
|
+
});
|
|
729
|
+
} catch { /* ignore — Set stays empty */ }
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Reset internal detection state. Test-only: lets the test suite control file
|
|
734
|
+
* lifecycle without leaking dedup state between cases.
|
|
735
|
+
*/
|
|
736
|
+
function _resetDetectionState() {
|
|
737
|
+
_detectionDedupSet = null;
|
|
738
|
+
_detectionsAppendedSinceCompact = 0;
|
|
739
|
+
_temporalAppendedSinceCompact = 0;
|
|
740
|
+
}
|
|
584
741
|
|
|
742
|
+
/**
|
|
743
|
+
* Load all detections by streaming the JSONL file. Returns the same
|
|
744
|
+
* { detections: [...] } shape as before so downstream consumers
|
|
745
|
+
* (buildReportFromDisk, daily report) are unchanged.
|
|
746
|
+
*/
|
|
585
747
|
function loadDetections() {
|
|
748
|
+
const detections = [];
|
|
586
749
|
try {
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
return { detections: [] };
|
|
591
|
-
} catch {
|
|
592
|
-
return { detections: [] };
|
|
593
|
-
}
|
|
750
|
+
_iterateJsonlSync(DETECTIONS_FILE, (entry) => { detections.push(entry); });
|
|
751
|
+
} catch { /* ignore */ }
|
|
752
|
+
return { detections };
|
|
594
753
|
}
|
|
595
754
|
|
|
596
755
|
function appendDetection(name, version, ecosystem, findings, severity) {
|
|
@@ -599,12 +758,11 @@ function appendDetection(name, version, ecosystem, findings, severity) {
|
|
|
599
758
|
if (!fs.existsSync(dir)) {
|
|
600
759
|
fs.mkdirSync(dir, { recursive: true });
|
|
601
760
|
}
|
|
602
|
-
|
|
761
|
+
_initDetectionDedupSet();
|
|
603
762
|
const key = `${name}@${version}`;
|
|
604
|
-
if (
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
data.detections.push({
|
|
763
|
+
if (_detectionDedupSet.has(key)) return; // dedup
|
|
764
|
+
|
|
765
|
+
const entry = {
|
|
608
766
|
package: name,
|
|
609
767
|
version,
|
|
610
768
|
ecosystem,
|
|
@@ -613,44 +771,100 @@ function appendDetection(name, version, ecosystem, findings, severity) {
|
|
|
613
771
|
severity,
|
|
614
772
|
advisory_at: null,
|
|
615
773
|
lead_time_hours: null
|
|
616
|
-
}
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
774
|
+
};
|
|
775
|
+
fs.appendFileSync(DETECTIONS_FILE, JSON.stringify(entry) + '\n', 'utf8');
|
|
776
|
+
_detectionDedupSet.add(key);
|
|
777
|
+
|
|
778
|
+
_detectionsAppendedSinceCompact++;
|
|
779
|
+
if (_detectionsAppendedSinceCompact >= DETECTION_COMPACT_INTERVAL) {
|
|
780
|
+
_detectionsAppendedSinceCompact = 0;
|
|
781
|
+
_compactDetectionsJsonl();
|
|
620
782
|
}
|
|
621
|
-
atomicWriteFileSync(DETECTIONS_FILE, JSON.stringify(data, null, 2));
|
|
622
783
|
} catch (err) {
|
|
784
|
+
if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') {
|
|
785
|
+
console.warn(`[MONITOR] Permission denied writing detection: ${err.code}`);
|
|
786
|
+
return;
|
|
787
|
+
}
|
|
788
|
+
if (err.code === 'ENOSPC') {
|
|
789
|
+
console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist detection.');
|
|
790
|
+
return;
|
|
791
|
+
}
|
|
623
792
|
console.error(`[MONITOR] Failed to save detection: ${err.message}`);
|
|
624
793
|
}
|
|
625
794
|
}
|
|
626
795
|
|
|
796
|
+
/**
|
|
797
|
+
* Compute detection stats by streaming the JSONL file: a single accumulator
|
|
798
|
+
* pass that never holds more than one parsed entry in memory at a time.
|
|
799
|
+
*/
|
|
627
800
|
function getDetectionStats() {
|
|
628
|
-
|
|
629
|
-
const detections = data.detections;
|
|
630
|
-
const total = detections.length;
|
|
631
|
-
|
|
801
|
+
let total = 0;
|
|
632
802
|
const bySeverity = {};
|
|
633
803
|
const byEcosystem = {};
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
804
|
+
const leadHours = [];
|
|
805
|
+
|
|
806
|
+
try {
|
|
807
|
+
_iterateJsonlSync(DETECTIONS_FILE, (d) => {
|
|
808
|
+
total++;
|
|
809
|
+
if (d.severity) bySeverity[d.severity] = (bySeverity[d.severity] || 0) + 1;
|
|
810
|
+
if (d.ecosystem) byEcosystem[d.ecosystem] = (byEcosystem[d.ecosystem] || 0) + 1;
|
|
811
|
+
if (d.advisory_at && d.lead_time_hours != null) {
|
|
812
|
+
leadHours.push(d.lead_time_hours);
|
|
813
|
+
}
|
|
814
|
+
});
|
|
815
|
+
} catch { /* fallthrough — return whatever we accumulated */ }
|
|
638
816
|
|
|
639
|
-
const withLeadTime = detections.filter(d => d.advisory_at && d.lead_time_hours != null);
|
|
640
817
|
let leadTime = null;
|
|
641
|
-
if (
|
|
642
|
-
|
|
818
|
+
if (leadHours.length > 0) {
|
|
819
|
+
let min = leadHours[0];
|
|
820
|
+
let max = leadHours[0];
|
|
821
|
+
let sum = 0;
|
|
822
|
+
for (const h of leadHours) {
|
|
823
|
+
if (h < min) min = h;
|
|
824
|
+
if (h > max) max = h;
|
|
825
|
+
sum += h;
|
|
826
|
+
}
|
|
643
827
|
leadTime = {
|
|
644
|
-
count:
|
|
645
|
-
avg:
|
|
646
|
-
min
|
|
647
|
-
max
|
|
828
|
+
count: leadHours.length,
|
|
829
|
+
avg: sum / leadHours.length,
|
|
830
|
+
min,
|
|
831
|
+
max
|
|
648
832
|
};
|
|
649
833
|
}
|
|
650
834
|
|
|
651
835
|
return { total, bySeverity, byEcosystem, leadTime };
|
|
652
836
|
}
|
|
653
837
|
|
|
838
|
+
/**
|
|
839
|
+
* Compact the detections JSONL file: keep only the most recent MAX_DETECTIONS
|
|
840
|
+
* entries. Rebuilds the in-memory dedup Set from the kept entries so dedup
|
|
841
|
+
* stays consistent. No-op when the file is already under cap.
|
|
842
|
+
*/
|
|
843
|
+
function _compactDetectionsJsonl() {
|
|
844
|
+
try {
|
|
845
|
+
const total = _countJsonlLines(DETECTIONS_FILE);
|
|
846
|
+
if (total <= MAX_DETECTIONS) return;
|
|
847
|
+
const toDrop = total - MAX_DETECTIONS;
|
|
848
|
+
let skipped = 0;
|
|
849
|
+
const kept = [];
|
|
850
|
+
const newDedup = new Set();
|
|
851
|
+
_iterateJsonlSync(DETECTIONS_FILE, (entry) => {
|
|
852
|
+
if (skipped < toDrop) { skipped++; return; }
|
|
853
|
+
kept.push(JSON.stringify(entry));
|
|
854
|
+
if (entry && entry.package && entry.version) {
|
|
855
|
+
newDedup.add(`${entry.package}@${entry.version}`);
|
|
856
|
+
}
|
|
857
|
+
});
|
|
858
|
+
const tmpFile = DETECTIONS_FILE + '.tmp';
|
|
859
|
+
fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
|
|
860
|
+
fs.renameSync(tmpFile, DETECTIONS_FILE);
|
|
861
|
+
_detectionDedupSet = newDedup;
|
|
862
|
+
console.log(`[MONITOR] COMPACT detections: ${total} -> ${kept.length} entries`);
|
|
863
|
+
} catch (err) {
|
|
864
|
+
console.error(`[MONITOR] Detections compaction failed: ${err.message}`);
|
|
865
|
+
}
|
|
866
|
+
}
|
|
867
|
+
|
|
654
868
|
// --- Scan stats (FP rate tracking) ---
|
|
655
869
|
|
|
656
870
|
function loadScanStats() {
|
|
@@ -851,6 +1065,88 @@ function getParisDateString() {
|
|
|
851
1065
|
|
|
852
1066
|
// --- Raw state loader (CLI report helpers) ---
|
|
853
1067
|
|
|
1068
|
+
// --- JSONL migration (one-shot, idempotent) ---
|
|
1069
|
+
|
|
1070
|
+
/**
|
|
1071
|
+
* Convert a legacy JSON detections file into the new JSONL format.
|
|
1072
|
+
* Idempotent: skips when the JSONL file already exists, or when the legacy
|
|
1073
|
+
* file is missing. After successful migration the legacy file is renamed to
|
|
1074
|
+
* `<basename>.json.migrated` so the next boot is a no-op and a forensic copy
|
|
1075
|
+
* remains on disk.
|
|
1076
|
+
*
|
|
1077
|
+
* @param {object} opts
|
|
1078
|
+
* @param {string} opts.legacyFile - Path to the legacy `*.json` file
|
|
1079
|
+
* @param {string} opts.targetFile - Path to the destination `*.jsonl` file
|
|
1080
|
+
* @param {(parsed:any) => any[]|null} opts.extractEntries - Returns the array of
|
|
1081
|
+
* entries from the parsed JSON, or null if the file shape is unexpected.
|
|
1082
|
+
* @param {string} opts.label - Short label used in log messages
|
|
1083
|
+
* @returns {{migrated:boolean, entries:number}}
|
|
1084
|
+
*/
|
|
1085
|
+
function _migrateJsonToJsonl({ legacyFile, targetFile, extractEntries, label }) {
|
|
1086
|
+
if (!fs.existsSync(legacyFile)) return { migrated: false, entries: 0 };
|
|
1087
|
+
if (fs.existsSync(targetFile)) {
|
|
1088
|
+
// JSONL already in use. Leave the legacy file alone if it's still there
|
|
1089
|
+
// (operator may want to inspect it). Renaming it could surprise scripts.
|
|
1090
|
+
return { migrated: false, entries: 0 };
|
|
1091
|
+
}
|
|
1092
|
+
let parsed;
|
|
1093
|
+
try {
|
|
1094
|
+
parsed = JSON.parse(fs.readFileSync(legacyFile, 'utf8'));
|
|
1095
|
+
} catch (err) {
|
|
1096
|
+
console.warn(`[MONITOR] MIGRATION ${label}: legacy file unreadable (${err.message}) — leaving in place`);
|
|
1097
|
+
return { migrated: false, entries: 0 };
|
|
1098
|
+
}
|
|
1099
|
+
const entries = extractEntries(parsed);
|
|
1100
|
+
if (!Array.isArray(entries)) {
|
|
1101
|
+
console.warn(`[MONITOR] MIGRATION ${label}: unexpected legacy shape — leaving in place`);
|
|
1102
|
+
return { migrated: false, entries: 0 };
|
|
1103
|
+
}
|
|
1104
|
+
const tmpFile = targetFile + '.tmp';
|
|
1105
|
+
try {
|
|
1106
|
+
const dir = path.dirname(targetFile);
|
|
1107
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
1108
|
+
const lines = entries.map(e => JSON.stringify(e));
|
|
1109
|
+
fs.writeFileSync(tmpFile, lines.length ? lines.join('\n') + '\n' : '', 'utf8');
|
|
1110
|
+
fs.renameSync(tmpFile, targetFile);
|
|
1111
|
+
fs.renameSync(legacyFile, legacyFile + '.migrated');
|
|
1112
|
+
console.log(`[MONITOR] MIGRATION ${label}: ${entries.length} entries -> ${path.basename(targetFile)} (legacy kept as ${path.basename(legacyFile)}.migrated)`);
|
|
1113
|
+
return { migrated: true, entries: entries.length };
|
|
1114
|
+
} catch (err) {
|
|
1115
|
+
try { fs.unlinkSync(tmpFile); } catch { /* ignore */ }
|
|
1116
|
+
console.error(`[MONITOR] MIGRATION ${label} failed: ${err.message}`);
|
|
1117
|
+
return { migrated: false, entries: 0 };
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
/**
|
|
1122
|
+
* Run all state migrations. Called once at startup before any append/load
|
|
1123
|
+
* touches the new JSONL files. Idempotent — safe to call on every boot.
|
|
1124
|
+
*
|
|
1125
|
+
* After migration this function also enforces the post-migration size caps,
|
|
1126
|
+
* so an oversized legacy file is immediately compacted instead of waiting
|
|
1127
|
+
* for DETECTION_COMPACT_INTERVAL appends.
|
|
1128
|
+
*/
|
|
1129
|
+
function runStateMigrations() {
|
|
1130
|
+
// Reset internal counters/dedup so the first append re-reads from disk.
|
|
1131
|
+
_resetDetectionState();
|
|
1132
|
+
|
|
1133
|
+
const det = _migrateJsonToJsonl({
|
|
1134
|
+
legacyFile: DETECTIONS_FILE_LEGACY,
|
|
1135
|
+
targetFile: DETECTIONS_FILE,
|
|
1136
|
+
extractEntries: (parsed) => (parsed && Array.isArray(parsed.detections)) ? parsed.detections : null,
|
|
1137
|
+
label: 'detections'
|
|
1138
|
+
});
|
|
1139
|
+
if (det.migrated && det.entries > MAX_DETECTIONS) _compactDetectionsJsonl();
|
|
1140
|
+
|
|
1141
|
+
const tmp = _migrateJsonToJsonl({
|
|
1142
|
+
legacyFile: TEMPORAL_DETECTIONS_FILE_LEGACY,
|
|
1143
|
+
targetFile: TEMPORAL_DETECTIONS_FILE,
|
|
1144
|
+
extractEntries: (parsed) => Array.isArray(parsed) ? parsed : null,
|
|
1145
|
+
label: 'temporal-detections'
|
|
1146
|
+
});
|
|
1147
|
+
if (tmp.migrated && tmp.entries > MAX_TEMPORAL_DETECTIONS) _compactTemporalDetectionsJsonl();
|
|
1148
|
+
}
|
|
1149
|
+
|
|
854
1150
|
/**
|
|
855
1151
|
* Read raw state file (without restoring into stats).
|
|
856
1152
|
*/
|
|
@@ -868,10 +1164,12 @@ module.exports = {
|
|
|
868
1164
|
STATE_FILE,
|
|
869
1165
|
ALERTS_FILE,
|
|
870
1166
|
DETECTIONS_FILE,
|
|
1167
|
+
DETECTIONS_FILE_LEGACY,
|
|
871
1168
|
SCAN_STATS_FILE,
|
|
872
1169
|
LAST_DAILY_REPORT_FILE,
|
|
873
1170
|
DAILY_STATS_FILE,
|
|
874
1171
|
TEMPORAL_DETECTIONS_FILE,
|
|
1172
|
+
TEMPORAL_DETECTIONS_FILE_LEGACY,
|
|
875
1173
|
PRIMARY_DAILY_REPORTS_DIR,
|
|
876
1174
|
PRIMARY_ALERTS_DIR,
|
|
877
1175
|
FALLBACK_DAILY_REPORTS_DIR,
|
|
@@ -894,7 +1192,9 @@ module.exports = {
|
|
|
894
1192
|
DAILY_STATS_PERSIST_INTERVAL,
|
|
895
1193
|
ALERTS_MAX_SIZE,
|
|
896
1194
|
MAX_DETECTIONS,
|
|
1195
|
+
MAX_TEMPORAL_DETECTIONS,
|
|
897
1196
|
MAX_DAILY_ALERTS,
|
|
1197
|
+
DETECTION_COMPACT_INTERVAL,
|
|
898
1198
|
|
|
899
1199
|
// Mutable state getters/setters
|
|
900
1200
|
getScanMemoryCache,
|
|
@@ -929,6 +1229,13 @@ module.exports = {
|
|
|
929
1229
|
loadDetections,
|
|
930
1230
|
appendDetection,
|
|
931
1231
|
getDetectionStats,
|
|
1232
|
+
runStateMigrations,
|
|
1233
|
+
// Internal — exported for tests and for the daemon hourly housekeeping.
|
|
1234
|
+
_compactDetectionsJsonl,
|
|
1235
|
+
_compactTemporalDetectionsJsonl,
|
|
1236
|
+
_resetDetectionState,
|
|
1237
|
+
_iterateJsonlSync,
|
|
1238
|
+
_countJsonlLines,
|
|
932
1239
|
loadScanStats,
|
|
933
1240
|
updateScanStats,
|
|
934
1241
|
loadDailyStats,
|