muaddib-scanner 2.11.98 → 2.11.99
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Heavy-lane semaphore (C2, 2026-06-11) — bound the daemon's RSS by limiting
|
|
5
|
+
* how many MEMORY-heavy static scans run concurrently.
|
|
6
|
+
*
|
|
7
|
+
* Measured (worker-mem.jsonl, n=461 workers): per-worker isolate heap peaks
|
|
8
|
+
* are BIMODAL — p50 = 12MB, but 12.6% of scans jump straight to 0.9-2.1GB
|
|
9
|
+
* (giant minified JS bundles; the AST cache accumulates across every parsable
|
|
10
|
+
* file, executor.js only skips files > getMaxFileSize() individually). With
|
|
11
|
+
* 8 concurrent workers a handful of heavies coincide → process RSS > the
|
|
12
|
+
* 8.5GB breaker → EMERGENCY. The heavies are identifiable BEFORE the worker
|
|
13
|
+
* spawns (total parsable-JS bytes on disk), so instead of killing them (a
|
|
14
|
+
* 768MB worker cap would cost 12% of coverage) we serialize them: at most
|
|
15
|
+
* MUADDIB_HEAVY_SCAN_MAX run at once, lights are NEVER blocked.
|
|
16
|
+
* Worst-case RSS ≈ baseline 2GB + 2×2GB heavies + N×12MB lights ≈ 5-6GB.
|
|
17
|
+
*
|
|
18
|
+
* Same {active, queue[]} semaphore pattern as src/shared/http-limiter.js and
|
|
19
|
+
* the sandbox slots (src/sandbox/index.js), plus two extensions those never
|
|
20
|
+
* needed: an abort-aware acquire and a wait-timeout. Both MUST remove their
|
|
21
|
+
* waiter from the queue on the way out — a release would otherwise hand the
|
|
22
|
+
* slot to a dead waiter and leak it permanently.
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
// Max number of HEAVY_LANE_WAIT_TIMEOUT requeues before an item's final pass
|
|
26
|
+
// runs without the wait bound (abort-aware only, still bounded by the outer
|
|
27
|
+
// SCAN_TIMEOUT_MS). Guarantees an item cannot loop in the queue forever.
|
|
28
|
+
const HEAVY_REQUEUE_MAX = 3;
|
|
29
|
+
|
|
30
|
+
// Env knobs (read at call time so tests can flip them around resetHeavyLane()):
|
|
31
|
+
// - MUADDIB_HEAVY_SCAN_MAX: concurrent heavy scans (default 2, 0 = lane off)
|
|
32
|
+
// - MUADDIB_HEAVY_SCAN_BYTES: heavy threshold on total parsable-JS bytes.
|
|
33
|
+
// Default 3 MiB — the measured distribution has a HOLE between light
|
|
34
|
+
// (≤12MB heap ⇔ <~1MB JS) and heavy (≥512MB heap ⇔ ≥~8MB JS); 3 MiB sits
|
|
35
|
+
// in the hole with ~3× margin both ways. A false-heavy costs a short wait;
|
|
36
|
+
// a false-light risks an EMERGENCY — hence the deliberately low default.
|
|
37
|
+
// - MUADDIB_HEAVY_WAIT_MAX_MS: wait bound before requeue (default 120s —
|
|
38
|
+
// ~2.5 slot services of 45s, leaves >150s of the 300s scan budget).
|
|
39
|
+
function heavyScanMax() {
|
|
40
|
+
const v = parseInt(process.env.MUADDIB_HEAVY_SCAN_MAX, 10);
|
|
41
|
+
return Number.isFinite(v) && v >= 0 ? v : 2;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function heavyScanBytesThreshold() {
|
|
45
|
+
const v = parseInt(process.env.MUADDIB_HEAVY_SCAN_BYTES, 10);
|
|
46
|
+
return Number.isFinite(v) && v > 0 ? v : 3 * 1024 * 1024;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function heavyWaitMaxMs() {
|
|
50
|
+
const v = parseInt(process.env.MUADDIB_HEAVY_WAIT_MAX_MS, 10);
|
|
51
|
+
return Number.isFinite(v) && v >= 0 ? v : 120000;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const _lane = { active: 0, queue: [] };
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* Pure classifier. `truncated` (the bounded measurement walk overflowed its
|
|
58
|
+
* depth/file caps) classifies heavy by default — defensive: an unmeasurable
|
|
59
|
+
* package is exactly the kind that blows a worker.
|
|
60
|
+
* @param {{totalJsBytes: number, truncated: boolean}|null} weight
|
|
61
|
+
* @param {number} [thresholdBytes]
|
|
62
|
+
*/
|
|
63
|
+
function isHeavyScan(weight, thresholdBytes = heavyScanBytesThreshold()) {
|
|
64
|
+
if (!weight) return false;
|
|
65
|
+
if (weight.truncated) return true;
|
|
66
|
+
return (weight.totalJsBytes || 0) >= thresholdBytes;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Acquire a heavy-lane slot. Resolves true when a slot is held, false when
|
|
71
|
+
* the lane is disabled (MUADDIB_HEAVY_SCAN_MAX=0 — nothing to release).
|
|
72
|
+
* FIFO when saturated.
|
|
73
|
+
*
|
|
74
|
+
* @param {Object} [opts]
|
|
75
|
+
* @param {AbortSignal} [opts.signal] - outer scan abort: rejects err.code='ABORT_ERR'
|
|
76
|
+
* @param {number} [opts.maxWaitMs] - wait bound; 0/absent = unbounded.
|
|
77
|
+
* On expiry rejects err.code='HEAVY_LANE_WAIT_TIMEOUT' (caller requeues).
|
|
78
|
+
* @returns {Promise<boolean>}
|
|
79
|
+
*/
|
|
80
|
+
function acquireHeavySlot(opts = {}) {
|
|
81
|
+
const max = heavyScanMax();
|
|
82
|
+
if (max === 0) return Promise.resolve(false);
|
|
83
|
+
if (_lane.active < max) {
|
|
84
|
+
_lane.active++;
|
|
85
|
+
return Promise.resolve(true);
|
|
86
|
+
}
|
|
87
|
+
const { signal, maxWaitMs } = opts;
|
|
88
|
+
return new Promise((resolve, reject) => {
|
|
89
|
+
let timer = null;
|
|
90
|
+
const cleanup = () => {
|
|
91
|
+
if (timer) { clearTimeout(timer); timer = null; }
|
|
92
|
+
if (signal) { try { signal.removeEventListener('abort', onAbort); } catch { /* not added */ } }
|
|
93
|
+
};
|
|
94
|
+
const waiter = () => {
|
|
95
|
+
cleanup();
|
|
96
|
+
resolve(true); // slot transferred by releaseHeavySlot (active unchanged)
|
|
97
|
+
};
|
|
98
|
+
// Leaving the queue WITHOUT being woken: splice the waiter out, or the
|
|
99
|
+
// next release hands the slot to this dead waiter and leaks it (trap #1).
|
|
100
|
+
const bail = (err) => {
|
|
101
|
+
const i = _lane.queue.indexOf(waiter);
|
|
102
|
+
if (i === -1) return; // already woken — the release path owns the slot
|
|
103
|
+
_lane.queue.splice(i, 1);
|
|
104
|
+
cleanup();
|
|
105
|
+
reject(err);
|
|
106
|
+
};
|
|
107
|
+
const onAbort = () => {
|
|
108
|
+
const err = new Error('Heavy-lane wait aborted (outer scan timeout)');
|
|
109
|
+
err.code = 'ABORT_ERR';
|
|
110
|
+
bail(err);
|
|
111
|
+
};
|
|
112
|
+
// Push BEFORE wiring abort/timeout: bail() rejects only when it finds the
|
|
113
|
+
// waiter in the queue (its index check guards the already-woken race) —
|
|
114
|
+
// a pre-aborted signal firing before the push would otherwise bail into
|
|
115
|
+
// the guard and leave the promise forever pending.
|
|
116
|
+
_lane.queue.push(waiter);
|
|
117
|
+
if (signal) {
|
|
118
|
+
if (signal.aborted) { onAbort(); return; }
|
|
119
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
120
|
+
}
|
|
121
|
+
if (Number.isFinite(maxWaitMs) && maxWaitMs > 0) {
|
|
122
|
+
// Deliberately NOT unref'd: a pending acquire is active work (a scan
|
|
123
|
+
// holding tmp disk and a pool slot) — it must keep the process alive.
|
|
124
|
+
timer = setTimeout(() => {
|
|
125
|
+
const err = new Error(`Heavy-lane slot not acquired within ${maxWaitMs}ms`);
|
|
126
|
+
err.code = 'HEAVY_LANE_WAIT_TIMEOUT';
|
|
127
|
+
bail(err);
|
|
128
|
+
}, maxWaitMs);
|
|
129
|
+
}
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function releaseHeavySlot() {
|
|
134
|
+
if (_lane.queue.length > 0) {
|
|
135
|
+
const next = _lane.queue.shift();
|
|
136
|
+
next(); // transfers the slot to the next waiter (active count unchanged)
|
|
137
|
+
} else if (_lane.active > 0) {
|
|
138
|
+
_lane.active--;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function getHeavyLaneState() {
|
|
143
|
+
return { active: _lane.active, waiting: _lane.queue.length, max: heavyScanMax() };
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Test helper — same role as resetSandboxLimiter in src/sandbox/index.js. */
|
|
147
|
+
function resetHeavyLane() {
|
|
148
|
+
_lane.active = 0;
|
|
149
|
+
_lane.queue.length = 0;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
module.exports = {
|
|
153
|
+
acquireHeavySlot,
|
|
154
|
+
releaseHeavySlot,
|
|
155
|
+
isHeavyScan,
|
|
156
|
+
getHeavyLaneState,
|
|
157
|
+
resetHeavyLane,
|
|
158
|
+
heavyScanBytesThreshold,
|
|
159
|
+
heavyWaitMaxMs,
|
|
160
|
+
HEAVY_REQUEUE_MAX
|
|
161
|
+
};
|
package/src/monitor/queue.js
CHANGED
|
@@ -13,13 +13,14 @@ const { Worker } = require('worker_threads');
|
|
|
13
13
|
const { runSandbox, tryAcquireSandboxSlot } = require('../sandbox/index.js');
|
|
14
14
|
const { sendWebhook } = require('../webhook.js');
|
|
15
15
|
const { downloadToFile, extractArchive, sanitizePackageName } = require('../shared/download.js');
|
|
16
|
-
const { MAX_TARBALL_SIZE } = require('../shared/constants.js');
|
|
16
|
+
const { MAX_TARBALL_SIZE, getMaxFileSize } = require('../shared/constants.js');
|
|
17
17
|
const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
|
|
18
18
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
19
19
|
const { scanPackageJson } = require('../scanner/package.js');
|
|
20
20
|
const { scanShellScripts } = require('../scanner/shell.js');
|
|
21
21
|
const { buildTrainingRecord } = require('../ml/feature-extractor.js');
|
|
22
22
|
const { appendWorkerMem } = require('./worker-mem.js');
|
|
23
|
+
const { acquireHeavySlot, releaseHeavySlot, isHeavyScan, getHeavyLaneState, heavyWaitMaxMs, HEAVY_REQUEUE_MAX } = require('./heavy-lane.js');
|
|
23
24
|
const { appendRecord: appendTrainingRecord, relabelRecords } = require('../ml/jsonl-writer.js');
|
|
24
25
|
|
|
25
26
|
// From ./state.js
|
|
@@ -305,6 +306,60 @@ function countPackageFiles(dir) {
|
|
|
305
306
|
return { fileCountTotal, hasTests };
|
|
306
307
|
}
|
|
307
308
|
|
|
309
|
+
// C2 heavy-lane measurement bounds. Distinct from countPackageFiles (whose
|
|
310
|
+
// depth cap of 5 is an ML-feature contract — do not touch it).
|
|
311
|
+
const JS_WEIGHT_MAX_DEPTH = 8;
|
|
312
|
+
const JS_WEIGHT_MAX_FILES = 2000;
|
|
313
|
+
const JS_WEIGHT_FILE_PATTERN = /\.(?:[cm]?js|[jt]sx?)$/i;
|
|
314
|
+
|
|
315
|
+
/**
|
|
316
|
+
* Measure how much parsable JS a package carries — the heavy-lane
|
|
317
|
+
* classification signal. The per-worker isolate heap is driven by the SUM of
|
|
318
|
+
* AST-parsed JS bytes (executor.js skips files > getMaxFileSize()
|
|
319
|
+
* individually, but the AST cache accumulates across files), so we sum the
|
|
320
|
+
* on-disk sizes of parsable JS files, skipping the ones the executor will
|
|
321
|
+
* skip anyway. NEVER use meta.unpackedSize for this — it is absent for PyPI
|
|
322
|
+
* and part of npm (the `|| 0` hole that lets giant bundles bypass the C1
|
|
323
|
+
* size-cap in the first place).
|
|
324
|
+
*
|
|
325
|
+
* Bounded walk; an overflow (depth/file caps) returns truncated:true, which
|
|
326
|
+
* isHeavyScan classifies heavy by default.
|
|
327
|
+
*
|
|
328
|
+
* @param {string} dir - extracted package directory
|
|
329
|
+
* @returns {{ totalJsBytes: number, maxJsFileBytes: number, truncated: boolean }}
|
|
330
|
+
*/
|
|
331
|
+
function measureJsWeight(dir) {
|
|
332
|
+
let totalJsBytes = 0;
|
|
333
|
+
let maxJsFileBytes = 0;
|
|
334
|
+
let seen = 0;
|
|
335
|
+
let truncated = false;
|
|
336
|
+
const perFileCap = getMaxFileSize();
|
|
337
|
+
|
|
338
|
+
function walk(current, depth) {
|
|
339
|
+
if (truncated) return;
|
|
340
|
+
if (depth > JS_WEIGHT_MAX_DEPTH) { truncated = true; return; }
|
|
341
|
+
let entries;
|
|
342
|
+
try { entries = fs.readdirSync(current, { withFileTypes: true }); } catch { return; }
|
|
343
|
+
for (const entry of entries) {
|
|
344
|
+
if (truncated) return;
|
|
345
|
+
if (entry.isDirectory()) {
|
|
346
|
+
if (ML_EXCLUDED_DIRS.has(entry.name)) continue;
|
|
347
|
+
walk(path.join(current, entry.name), depth + 1);
|
|
348
|
+
} else if (entry.isFile() && JS_WEIGHT_FILE_PATTERN.test(entry.name)) {
|
|
349
|
+
if (++seen > JS_WEIGHT_MAX_FILES) { truncated = true; return; }
|
|
350
|
+
let size;
|
|
351
|
+
try { size = fs.statSync(path.join(current, entry.name)).size; } catch { continue; }
|
|
352
|
+
if (size > perFileCap) continue; // executor skips these — they never reach the AST
|
|
353
|
+
totalJsBytes += size;
|
|
354
|
+
if (size > maxJsFileBytes) maxJsFileBytes = size;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
walk(dir, 0);
|
|
360
|
+
return { totalJsBytes, maxJsFileBytes, truncated };
|
|
361
|
+
}
|
|
362
|
+
|
|
308
363
|
/**
|
|
309
364
|
* Pure classifier: is this a prebuilt native-binary platform shard (the kind that
|
|
310
365
|
* hangs the sandbox install and always times out INCONCLUSIVE)? No I/O — the parsed
|
|
@@ -435,6 +490,7 @@ function runScanInWorker(extractedDir, timeoutMs, scanContext = null, signal = n
|
|
|
435
490
|
appendWorkerMem({
|
|
436
491
|
ev: 'spawn', tid: _wmTid,
|
|
437
492
|
name: _sc.name, version: _sc.version, ecosystem: _sc.ecosystem,
|
|
493
|
+
lane: _sc._lane, jsBytes: _sc._jsBytes,
|
|
438
494
|
rss: process.memoryUsage().rss
|
|
439
495
|
});
|
|
440
496
|
|
|
@@ -645,6 +701,18 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
645
701
|
// ML Phase 2a: Count JS files and detect test presence for enriched features
|
|
646
702
|
const { fileCountTotal, hasTests } = countPackageFiles(extractedDir);
|
|
647
703
|
|
|
704
|
+
// C2 heavy-lane classification (see heavy-lane.js header): measured on
|
|
705
|
+
// disk, after extraction — registry metadata is not trustworthy here.
|
|
706
|
+
// Measurement failure falls back to the compressed tarball size
|
|
707
|
+
// (conservative: never silently far under the real JS weight).
|
|
708
|
+
let jsWeight;
|
|
709
|
+
try {
|
|
710
|
+
jsWeight = measureJsWeight(extractedDir);
|
|
711
|
+
} catch {
|
|
712
|
+
jsWeight = { totalJsBytes: fileSize, maxJsFileBytes: 0, truncated: false };
|
|
713
|
+
}
|
|
714
|
+
const lane = isHeavyScan(jsWeight) ? 'heavy' : 'light';
|
|
715
|
+
|
|
648
716
|
// Hoisted before the worker spawn (per-worker 429-storm fix): fetch the npm
|
|
649
717
|
// registry metadata ONCE on the main thread. The shared http-limiter coordinates
|
|
650
718
|
// it and the temporal cache is warm (npm-registry.js reads it first), so only
|
|
@@ -663,6 +731,28 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
663
731
|
}
|
|
664
732
|
}
|
|
665
733
|
|
|
734
|
+
// C2 heavy-lane: serialize the memory-heavy scans. Acquired AFTER the
|
|
735
|
+
// registry fetch above (never hold the slot during network I/O); released
|
|
736
|
+
// in the finally right after the static scan — the slot covers ONLY the
|
|
737
|
+
// worker's lifetime (≤ STATIC_SCAN_TIMEOUT_MS), not the sandbox (which
|
|
738
|
+
// has its own semaphore and runs outside the daemon's heap).
|
|
739
|
+
let heavySlotHeld = false;
|
|
740
|
+
if (lane === 'heavy') {
|
|
741
|
+
stats.heavyScans = (stats.heavyScans || 0) + 1;
|
|
742
|
+
const laneState = getHeavyLaneState();
|
|
743
|
+
if (laneState.max > 0 && laneState.active >= laneState.max) {
|
|
744
|
+
stats.heavyLaneWaits = (stats.heavyLaneWaits || 0) + 1;
|
|
745
|
+
console.log(`[MONITOR] HEAVY_LANE: ${name}@${version} waiting for a slot (${(jsWeight.totalJsBytes / 1024 / 1024).toFixed(1)}MB JS, active=${laneState.active}, waiting=${laneState.waiting})`);
|
|
746
|
+
}
|
|
747
|
+
// After HEAVY_REQUEUE_MAX requeues the final pass waits unbounded
|
|
748
|
+
// (abort-aware only, still under the outer SCAN_TIMEOUT_MS) so an item
|
|
749
|
+
// cannot loop in the queue forever.
|
|
750
|
+
const lastPass = (meta._heavyRetries || 0) >= HEAVY_REQUEUE_MAX;
|
|
751
|
+
const waitStart = Date.now();
|
|
752
|
+
heavySlotHeld = await acquireHeavySlot({ signal, maxWaitMs: lastPass ? 0 : heavyWaitMaxMs() });
|
|
753
|
+
stats.heavyLaneWaitMsTotal = (stats.heavyLaneWaitMsTotal || 0) + (Date.now() - waitStart);
|
|
754
|
+
}
|
|
755
|
+
|
|
666
756
|
let result;
|
|
667
757
|
try {
|
|
668
758
|
// scanContext: feeds monitor-side info (name/version/ecosystem) and the
|
|
@@ -679,7 +769,12 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
679
769
|
// Stage 2: set by processQueueItem when MUADDIB_TRIAGE_MODE=enforce.
|
|
680
770
|
// Defaults to 'full' so any CLI/test caller that bypasses triage gets
|
|
681
771
|
// the full 20-scanner pipeline (unchanged behaviour).
|
|
682
|
-
scanMode: (meta && meta.scanMode) || 'full'
|
|
772
|
+
scanMode: (meta && meta.scanMode) || 'full',
|
|
773
|
+
// C2 observability: lane + JS weight flow into the worker-mem spawn
|
|
774
|
+
// event (runScanInWorker) so lane×heap-peak cross-checks are possible
|
|
775
|
+
// post-rollout (hard criterion: zero 'light' scans peaking >512MB).
|
|
776
|
+
_lane: lane,
|
|
777
|
+
_jsBytes: jsWeight.totalJsBytes
|
|
683
778
|
};
|
|
684
779
|
// Hand the main-thread-fetched metadata to the worker so its processor skips
|
|
685
780
|
// the per-worker getPackageMetadata fetch (429-storm fix). npm only; the key
|
|
@@ -705,6 +800,10 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
705
800
|
return { sandboxResult: null, staticClean: false };
|
|
706
801
|
}
|
|
707
802
|
throw staticErr;
|
|
803
|
+
} finally {
|
|
804
|
+
// Single release point — success, static timeout, EMERGENCY terminate
|
|
805
|
+
// and abort all funnel through here exactly once (heavySlotHeld guard).
|
|
806
|
+
if (heavySlotHeld) { releaseHeavySlot(); heavySlotHeld = false; }
|
|
708
807
|
}
|
|
709
808
|
|
|
710
809
|
// Phase 3 signal — agent-supply-chain lens. Pure observability, no scoring impact.
|
|
@@ -1285,6 +1384,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
1285
1384
|
}
|
|
1286
1385
|
}
|
|
1287
1386
|
} catch (err) {
|
|
1387
|
+
// C2 heavy-lane: a wait-timeout is NOT a scan failure — processQueueItem
|
|
1388
|
+
// requeues the item (bounded by HEAVY_REQUEUE_MAX). Re-throw BEFORE any
|
|
1389
|
+
// error accounting: this catch otherwise swallows everything into the
|
|
1390
|
+
// 'scan_error' ledger path and the requeue would never happen.
|
|
1391
|
+
if (err && err.code === 'HEAVY_LANE_WAIT_TIMEOUT') throw err;
|
|
1288
1392
|
recordError(err, stats);
|
|
1289
1393
|
stats.scanned++;
|
|
1290
1394
|
stats.totalTimeMs += Date.now() - startTime;
|
|
@@ -1376,6 +1480,22 @@ async function processQueueItem(item, stats, dailyAlerts, recentlyScanned, downl
|
|
|
1376
1480
|
})
|
|
1377
1481
|
]);
|
|
1378
1482
|
} catch (err) {
|
|
1483
|
+
// C2 heavy-lane: the bounded wait expired while the heavy slots were
|
|
1484
|
+
// saturated (typical under a spill-drain burst). Not a failure — put the
|
|
1485
|
+
// item back at the queue tail (natural backoff) up to HEAVY_REQUEUE_MAX
|
|
1486
|
+
// passes; scanPackage runs the final pass without the wait bound. Note:
|
|
1487
|
+
// _heavyRetries does not survive a spill (spillItems strips non-re-enqueue
|
|
1488
|
+
// fields) — acceptable, the spill drain runs in calm windows anyway.
|
|
1489
|
+
if (err && err.code === 'HEAVY_LANE_WAIT_TIMEOUT') {
|
|
1490
|
+
const decision = computeHeavyRequeue(item);
|
|
1491
|
+
if (decision.requeue) {
|
|
1492
|
+
stats.heavyLaneRequeues = (stats.heavyLaneRequeues || 0) + 1;
|
|
1493
|
+
console.log(`[MONITOR] HEAVY_LANE: requeued ${item.name}@${item.version || '?'} (wait-timeout pass ${decision.retries}/${HEAVY_REQUEUE_MAX})`);
|
|
1494
|
+
enqueueScan(scanQueue, item, stats);
|
|
1495
|
+
return;
|
|
1496
|
+
}
|
|
1497
|
+
// Safety net — should be unreachable (the last pass waits unbounded).
|
|
1498
|
+
}
|
|
1379
1499
|
recordError(err, stats);
|
|
1380
1500
|
console.error(`[MONITOR] Queue error for ${item.name}: ${err.message}`);
|
|
1381
1501
|
// IOC fallback: if scan failed for a known malicious package, send P1 alert.
|
|
@@ -1450,6 +1570,18 @@ function computeWorkersToSpawn(targetConcurrency, activeWorkers, queueLength) {
|
|
|
1450
1570
|
return Math.max(0, Math.min(targetConcurrency - activeWorkers, queueLength));
|
|
1451
1571
|
}
|
|
1452
1572
|
|
|
1573
|
+
/**
|
|
1574
|
+
* Pure requeue decision for a heavy-lane wait-timeout (same extraction
|
|
1575
|
+
* rationale as computeWorkersToSpawn). Mutates item._heavyRetries; once the
|
|
1576
|
+
* counter passes HEAVY_REQUEUE_MAX the item is NOT requeued again — its next
|
|
1577
|
+
* pass through scanPackage waits unbounded instead.
|
|
1578
|
+
*/
|
|
1579
|
+
function computeHeavyRequeue(item) {
|
|
1580
|
+
const retries = (item._heavyRetries || 0) + 1;
|
|
1581
|
+
item._heavyRetries = retries;
|
|
1582
|
+
return { requeue: retries <= HEAVY_REQUEUE_MAX, retries };
|
|
1583
|
+
}
|
|
1584
|
+
|
|
1453
1585
|
// ── RSS-aware worker admission (P1 OOM durable fix) ──
|
|
1454
1586
|
// The pressure breaker is reactive: it stops spawning at HIGH, but the workers already in
|
|
1455
1587
|
// flight overshoot RSS by ~2GB (each isolate + gVisor sandbox ~0.55GB, draining up to
|
|
@@ -1795,7 +1927,10 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
|
|
|
1795
1927
|
registryScripts: item.registryScripts || null,
|
|
1796
1928
|
_cacheTrigger: item._cacheTrigger || null,
|
|
1797
1929
|
fastTrack: item.fastTrack || false,
|
|
1798
|
-
scanMode: effectiveScanMode
|
|
1930
|
+
scanMode: effectiveScanMode,
|
|
1931
|
+
// C2 heavy-lane: pass count set by computeHeavyRequeue — at
|
|
1932
|
+
// HEAVY_REQUEUE_MAX the final pass waits for its slot unbounded.
|
|
1933
|
+
_heavyRetries: item._heavyRetries || 0
|
|
1799
1934
|
}, stats, dailyAlerts, recentlyScanned, downloadsCache, scanQueue, sandboxAvailable, signal);
|
|
1800
1935
|
const sandboxResult = scanResult && scanResult.sandboxResult;
|
|
1801
1936
|
const staticClean = scanResult && scanResult.staticClean;
|
|
@@ -1917,6 +2052,8 @@ module.exports = {
|
|
|
1917
2052
|
isBundledToolingOnly,
|
|
1918
2053
|
recordTrainingSample,
|
|
1919
2054
|
countPackageFiles,
|
|
2055
|
+
measureJsWeight,
|
|
2056
|
+
computeHeavyRequeue,
|
|
1920
2057
|
classifyNativeShard,
|
|
1921
2058
|
shouldSkipSandbox,
|
|
1922
2059
|
runScanInWorker,
|