muaddib-scanner 2.11.95 → 2.11.97
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
package/src/monitor/queue.js
CHANGED
|
@@ -19,6 +19,7 @@ const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
|
19
19
|
const { scanPackageJson } = require('../scanner/package.js');
|
|
20
20
|
const { scanShellScripts } = require('../scanner/shell.js');
|
|
21
21
|
const { buildTrainingRecord } = require('../ml/feature-extractor.js');
|
|
22
|
+
const { appendWorkerMem } = require('./worker-mem.js');
|
|
22
23
|
const { appendRecord: appendTrainingRecord, relabelRecords } = require('../ml/jsonl-writer.js');
|
|
23
24
|
|
|
24
25
|
// From ./state.js
|
|
@@ -426,6 +427,17 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
|
|
|
426
427
|
const _sc = scanContext || {};
|
|
427
428
|
_liveWorkers.set(worker, { name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem });
|
|
428
429
|
|
|
430
|
+
// Off-heap attribution (worker-mem.jsonl, gated MUADDIB_WORKER_MEM=1):
|
|
431
|
+
// process RSS around each worker's lifetime. tid captured now — after
|
|
432
|
+
// 'exit' worker.threadId becomes -1.
|
|
433
|
+
const _wmTid = worker.threadId;
|
|
434
|
+
const _wmSpawnedAt = Date.now();
|
|
435
|
+
appendWorkerMem({
|
|
436
|
+
ev: 'spawn', tid: _wmTid,
|
|
437
|
+
name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem,
|
|
438
|
+
rss: process.memoryUsage().rss
|
|
439
|
+
});
|
|
440
|
+
|
|
429
441
|
let settled = false;
|
|
430
442
|
let timer = null;
|
|
431
443
|
const done = (fn) => {
|
|
@@ -462,9 +474,19 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
|
|
|
462
474
|
|
|
463
475
|
worker.on('error', (err) => done(() => reject(err)));
|
|
464
476
|
|
|
465
|
-
worker.on('exit', (code) =>
|
|
466
|
-
|
|
467
|
-
|
|
477
|
+
worker.on('exit', (code) => {
|
|
478
|
+
// 'exit' fires exactly once per worker (even after terminate/error), so
|
|
479
|
+
// it is the one reliable place to close the spawn/exit RSS pair.
|
|
480
|
+
appendWorkerMem({
|
|
481
|
+
ev: 'exit', tid: _wmTid,
|
|
482
|
+
name: _sc.name, version: _sc.version, code,
|
|
483
|
+
durMs: Date.now() - _wmSpawnedAt,
|
|
484
|
+
rss: process.memoryUsage().rss
|
|
485
|
+
});
|
|
486
|
+
done(() => {
|
|
487
|
+
if (code !== 0) reject(new Error(`Worker exited with code ${code}`));
|
|
488
|
+
});
|
|
489
|
+
});
|
|
468
490
|
});
|
|
469
491
|
}
|
|
470
492
|
|
package/src/monitor/spill.js
CHANGED
|
@@ -83,9 +83,24 @@ function _writeEntries(file, entries) {
|
|
|
83
83
|
fs.renameSync(tmp, file);
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
+
// Conservative upper bound on a spilled line's byte size (the SPILL_FIELDS
|
|
87
|
+
// record + ts + newline run ~150-250 bytes). Used for the O(1) stat-gated
|
|
88
|
+
// compaction trigger below — overestimating only makes compaction fire a bit
|
|
89
|
+
// late, never early, so the file ceiling stays ~MUADDIB_SPILL_MAX.
|
|
90
|
+
const SPILL_LINE_BYTES_EST = 256;
|
|
91
|
+
|
|
86
92
|
/**
|
|
87
93
|
* Append evicted queue items to the backlog. Never throws; on write failure the
|
|
88
94
|
* caller's fallback is the pre-spill behavior (drop, ledgered).
|
|
95
|
+
*
|
|
96
|
+
* HOT PATH — runs INSIDE the EMERGENCY memory handler (evictFromScanQueueBulk),
|
|
97
|
+
* so it MUST be append-only and allocation-free beyond the write buffer. The
|
|
98
|
+
* 2026-06-11 freeze was caused by calling _compactBacklog (which reads + parses
|
|
99
|
+
* the WHOLE backlog) on every spill: a large allocation during a reclaim stall
|
|
100
|
+
* that wedged the handler before it could free RSS. Compaction now fires ONLY
|
|
101
|
+
* when a cheap statSync shows the file is genuinely near the cap (normally
|
|
102
|
+
* never during an EMERGENCY — the backlog is far below the byte budget there),
|
|
103
|
+
* and the calm-time drain also keeps it bounded.
|
|
89
104
|
* @param {Array<object>} items evicted scan-queue items
|
|
90
105
|
* @returns {number} how many items were actually persisted
|
|
91
106
|
*/
|
|
@@ -107,7 +122,11 @@ function spillItems(items) {
|
|
|
107
122
|
written++;
|
|
108
123
|
}
|
|
109
124
|
if (buf) fs.appendFileSync(file, buf, 'utf8');
|
|
110
|
-
|
|
125
|
+
// O(1) stat-gated compaction: only read+rewrite the file when it is actually
|
|
126
|
+
// near the cap. NO whole-file read on the normal EMERGENCY spill path.
|
|
127
|
+
let size = 0;
|
|
128
|
+
try { size = fs.statSync(file).size; } catch { /* fresh file */ }
|
|
129
|
+
if (size > _maxEntries() * SPILL_LINE_BYTES_EST) _compactBacklog(file);
|
|
111
130
|
} catch {
|
|
112
131
|
return 0; // degrade to drop-with-ledger at the call site
|
|
113
132
|
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Per-worker memory instrumentation (off-heap RSS attribution, 2026-06).
|
|
5
|
+
*
|
|
6
|
+
* The EMERGENCY breaker fires on process RSS while the heap sits at ~15% —
|
|
7
|
+
* the driver is off-heap (malloc arenas + tarball Buffers) and mem-trend.jsonl
|
|
8
|
+
* only samples the whole process. This module attributes memory to individual
|
|
9
|
+
* scan workers / packages so the worker_threads → child_process decision can
|
|
10
|
+
* be made on data:
|
|
11
|
+
* H1: RSS stays high AFTER workers die → arenas never returned to the OS
|
|
12
|
+
* H2: RSS peaks only WHILE workers live → concurrent in-flight peak
|
|
13
|
+
*
|
|
14
|
+
* Producers:
|
|
15
|
+
* - queue.js (parent): ev:'spawn' / ev:'exit' around each scan worker,
|
|
16
|
+
* with process-wide RSS (delta attributable per package, noisy but
|
|
17
|
+
* aggregable over 24-48h).
|
|
18
|
+
* - scan-worker.js (worker): ev:'sample' every sampleIntervalMs() with the
|
|
19
|
+
* isolate-local heapUsed/external/arrayBuffers (rss there is process-wide).
|
|
20
|
+
*
|
|
21
|
+
* Same hot-path safety rules as spill.js (2026-06-11 prod-freeze lesson):
|
|
22
|
+
* append-only, stat-gated O(1) rotation, never read the file back, never throw.
|
|
23
|
+
* OFF unless MUADDIB_WORKER_MEM=1 (staged rollout, same pattern as
|
|
24
|
+
* MUADDIB_WORKER_MAX_OLD_MB) so tests and CLI runs never touch data/.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const fs = require('fs');
|
|
28
|
+
const path = require('path');
|
|
29
|
+
|
|
30
|
+
const DEFAULT_FILE = path.join(__dirname, '..', '..', 'data', 'worker-mem.jsonl');
|
|
31
|
+
const DEFAULT_MAX_MB = 64; // rotate past 64MB (file + .1 = 128MB worst case, ~2.5 days at concurrency 8)
|
|
32
|
+
const DEFAULT_SAMPLE_MS = 10000; // per-worker isolate sample cadence
|
|
33
|
+
|
|
34
|
+
function workerMemEnabled() {
|
|
35
|
+
return process.env.MUADDIB_WORKER_MEM === '1';
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function workerMemFile() {
|
|
39
|
+
return process.env.MUADDIB_WORKER_MEM_FILE || DEFAULT_FILE;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** 0 = sampling disabled (instrumentation off, or explicit MUADDIB_WORKER_MEM_SAMPLE_MS=0). */
|
|
43
|
+
function sampleIntervalMs() {
|
|
44
|
+
if (!workerMemEnabled()) return 0;
|
|
45
|
+
const v = parseInt(process.env.MUADDIB_WORKER_MEM_SAMPLE_MS, 10);
|
|
46
|
+
if (Number.isFinite(v) && v >= 0) return v;
|
|
47
|
+
return DEFAULT_SAMPLE_MS;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Append one instrumentation entry (ts stamped here). Bounded resource
|
|
52
|
+
* (CLAUDE.md §2): stat-gated truncate-rotate, no read-back on the hot path.
|
|
53
|
+
* @returns {boolean} true if a line was written
|
|
54
|
+
*/
|
|
55
|
+
function appendWorkerMem(entry) {
|
|
56
|
+
if (!workerMemEnabled()) return false;
|
|
57
|
+
try {
|
|
58
|
+
const file = workerMemFile();
|
|
59
|
+
const maxMb = parseInt(process.env.MUADDIB_WORKER_MEM_MAX_MB, 10);
|
|
60
|
+
const maxBytes = (Number.isFinite(maxMb) && maxMb > 0 ? maxMb : DEFAULT_MAX_MB) * 1024 * 1024;
|
|
61
|
+
try {
|
|
62
|
+
const st = fs.statSync(file);
|
|
63
|
+
if (st.size > maxBytes) fs.renameSync(file, file + '.1');
|
|
64
|
+
} catch { /* no file yet — fine */ }
|
|
65
|
+
fs.appendFileSync(file, JSON.stringify({ ts: new Date().toISOString(), ...entry }) + '\n', 'utf8');
|
|
66
|
+
return true;
|
|
67
|
+
} catch { /* instrumentation must never crash the daemon or a worker */
|
|
68
|
+
return false;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
module.exports = { appendWorkerMem, sampleIntervalMs, workerMemEnabled, workerMemFile };
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
* parentPort.postMessage({ type: 'error', message: string })
|
|
13
13
|
*/
|
|
14
14
|
|
|
15
|
-
const { parentPort, workerData } = require('worker_threads');
|
|
15
|
+
const { parentPort, workerData, threadId } = require('worker_threads');
|
|
16
16
|
|
|
17
17
|
if (!parentPort) {
|
|
18
18
|
// Not running as a worker — exit gracefully
|
|
@@ -20,17 +20,38 @@ if (!parentPort) {
|
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
const { run } = require('../index.js');
|
|
23
|
+
const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js');
|
|
23
24
|
|
|
24
25
|
(async () => {
|
|
26
|
+
// Off-heap attribution samples (worker-mem.jsonl): heapUsed/external/
|
|
27
|
+
// arrayBuffers are isolate-local here, rss is process-wide. The samples MUST
|
|
28
|
+
// NOT go through parentPort — the parent settles the scan promise on the
|
|
29
|
+
// first message it receives (queue.js done()), so a sample message would
|
|
30
|
+
// hang the scan forever. unref() so the timer never keeps the worker alive.
|
|
31
|
+
const scanContext = workerData.scanContext || {};
|
|
32
|
+
const everyMs = sampleIntervalMs();
|
|
33
|
+
let sampler = null;
|
|
34
|
+
if (everyMs > 0) {
|
|
35
|
+
sampler = setInterval(() => {
|
|
36
|
+
const m = process.memoryUsage();
|
|
37
|
+
appendWorkerMem({
|
|
38
|
+
ev: 'sample', tid: threadId,
|
|
39
|
+
name: scanContext.name, version: scanContext.version,
|
|
40
|
+
heapUsed: m.heapUsed, external: m.external, arrayBuffers: m.arrayBuffers, rss: m.rss
|
|
41
|
+
});
|
|
42
|
+
}, everyMs);
|
|
43
|
+
sampler.unref();
|
|
44
|
+
}
|
|
25
45
|
try {
|
|
26
46
|
// scanContext (optional) carries monitor-side info that opt-in scanners need
|
|
27
47
|
// (e.g. trusted-dep-diff requires package name + version to query the registry).
|
|
28
48
|
// It is spread INTO the pipeline options, but `_capture: true` always wins so
|
|
29
49
|
// the worker keeps returning the result object — never prints.
|
|
30
|
-
const scanContext = workerData.scanContext || {};
|
|
31
50
|
const result = await run(workerData.extractedDir, { ...scanContext, _capture: true });
|
|
32
51
|
parentPort.postMessage({ type: 'result', data: result });
|
|
33
52
|
} catch (err) {
|
|
34
53
|
parentPort.postMessage({ type: 'error', message: err.message || String(err) });
|
|
54
|
+
} finally {
|
|
55
|
+
if (sampler) clearInterval(sampler);
|
|
35
56
|
}
|
|
36
57
|
})();
|