muaddib-scanner 2.11.63 → 2.11.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.63",
3
+ "version": "2.11.65",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-05T21:19:52.384Z",
3
+ "timestamp": "2026-06-06T16:38:57.648Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -8,7 +8,7 @@ const { setVerboseMode, isSandboxEnabled, isCanaryEnabled, isLlmDetectiveEnabled
8
8
  const { loadState, saveState, loadDailyStats, saveDailyStats, purgeTarballCache, getParisHour, atomicWriteFileSync, saveNpmSeq, ALERTS_FILE, runStateMigrations, loadRecentlyScanned, saveRecentlyScanned } = require('./state.js');
9
9
  const { isTemporalEnabled, isTemporalAstEnabled, isTemporalPublishEnabled, isTemporalMaintainerEnabled } = require('./temporal.js');
10
10
  const { pendingGrouped, flushScopeGroup, sendDailyReport, DAILY_REPORT_HOUR, alertedPackageRules, ALERTED_PACKAGES_MAX: MAX_ALERTED_PACKAGES } = require('./webhook.js');
11
- const { poll } = require('./ingestion.js');
11
+ const { poll, getPollBackoffMs } = require('./ingestion.js');
12
12
  const { ensureWorkers, drainWorkers, getTargetConcurrency, setTargetConcurrency, getActiveWorkers, terminateAllWorkers } = require('./queue.js');
13
13
  const { computeTarget, ADJUST_INTERVAL_MS, BASE_CONCURRENCY } = require('./adaptive-concurrency.js');
14
14
  const { startHealthcheck } = require('./healthcheck.js');
@@ -31,6 +31,120 @@ const QUEUE_STATE_MAX_AGE_MS = 24 * 60 * 60 * 1000; // 24h expiry
31
31
  const MAX_QUEUE_PERSIST_SIZE = 200_000; // Don't persist if queue > 200K items (OOM guard)
32
32
  const MAX_RESTORE_QUEUE_SIZE = 100_000; // Cap restored queue at 100K items
33
33
 
34
+ // ─── Poll-loop watchdog ───
35
+ // The decoupled poll runs on a setInterval guarded by a `pollInProgress` flag.
36
+ // If a poll cycle's awaited promise never settles (e.g. an HTTP response whose
37
+ // body trickles forever, so the socket-inactivity timeout in httpsGet never
38
+ // fires and it only resolves on 'end'), `pollInProgress` would stay `true` and
39
+ // every subsequent tick would silently early-return — wedging ingestion at 0
40
+ // scanned until a manual `systemctl restart`. The watchdog bounds every cycle
41
+ // so the flag is ALWAYS released; shouldSkipPoll() adds a stale-flag backstop
42
+ // for any future hang path that bypasses runPollCycle().
43
+ const POLL_WATCHDOG_MS = Math.max(60_000, parseInt(process.env.MUADDIB_POLL_WATCHDOG_MS, 10) || 300_000);
44
+
45
+ /**
46
+ * Run ONE poll cycle bounded by a watchdog so the caller's pollInProgress flag
47
+ * can never stay stuck. On timeout it REJECTS (does not resolve) with a
48
+ * 'poll watchdog' error, so the caller's existing catch logs it and the finally
49
+ * resets the flag — the next tick retries. The local timer is cleared on every
50
+ * settle path, so a fast poll leaves no dangling timer.
51
+ * @param {Function} pollFn - injectable for tests; defaults to the real poll().
52
+ * @returns {Promise<void>}
53
+ */
54
+ async function runPollCycle(state, scanQueue, stats, watchdogMs = POLL_WATCHDOG_MS, pollFn = poll) {
55
+ let timer;
56
+ const watchdog = new Promise((_, reject) => {
57
+ timer = setTimeout(
58
+ () => reject(new Error(`poll watchdog: poll exceeded ${Math.round(watchdogMs / 1000)}s`)),
59
+ watchdogMs
60
+ );
61
+ });
62
+ try {
63
+ await Promise.race([pollFn(state, scanQueue, stats), watchdog]);
64
+ } finally {
65
+ clearTimeout(timer);
66
+ }
67
+ }
68
+
69
+ /**
70
+ * Decide whether the poll scheduler should skip this tick, and whether the
71
+ * pollInProgress flag is stale enough to force-reset. Pure — unit-testable
72
+ * without timers. forceReset fires only when a cycle has been "in flight" for
73
+ * longer than watchdogMs + one interval, i.e. a hang path that bypassed the
74
+ * per-cycle watchdog (runPollCycle always settles within watchdogMs).
75
+ * @returns {{skip: boolean, forceReset: boolean}}
76
+ */
77
+ function shouldSkipPoll(pollInProgress, pollStartedAt, now, watchdogMs, interval) {
78
+ if (!pollInProgress) return { skip: false, forceReset: false };
79
+ if (now - pollStartedAt > watchdogMs + interval) return { skip: false, forceReset: true };
80
+ return { skip: true, forceReset: false };
81
+ }
82
+
83
+ // ─── Heap diagnostics (restart root-cause) ───
84
+ // mem-trend shows the main-thread heap balloons to 6-7GB in the worker-starved
85
+ // regime while documented structures sum to <1GB — i.e. ~5GB+ unaccounted. These
86
+ // helpers localise it: a cheap always-on heap-spaces line (retention vs churn)
87
+ // plus an OPT-IN, disk-guarded, one-shot v8 heap snapshot for dominator-tree
88
+ // analysis. Snapshot is OFF unless MUADDIB_HEAPSNAPSHOT_MB is set (writing a
89
+ // multi-GB snapshot blocks the event loop ~10-60s — must be deliberate).
90
+ const HEAPSNAPSHOT_MB = parseInt(process.env.MUADDIB_HEAPSNAPSHOT_MB, 10) || 0; // 0 = disabled
91
+ const HEAPSNAPSHOT_MIN_FREE_GB = Math.max(1, parseInt(process.env.MUADDIB_HEAPSNAPSHOT_MIN_FREE_GB, 10) || 12);
92
+ const HEAPSNAPSHOT_DIR = process.env.MUADDIB_HEAPSNAPSHOT_DIR || path.join(__dirname, '..', '..', 'data');
93
+ let heapSnapshotTaken = false;
94
+
95
+ /**
96
+ * Pure decision: write a heap snapshot now? Separated from the I/O so it is
97
+ * unit-testable without producing a multi-GB file.
98
+ * @returns {{take: boolean, reason: string}}
99
+ */
100
+ function shouldSnapshot(heapUsedMB, thresholdMB, alreadyTaken, freeGB, minFreeGB) {
101
+ if (!thresholdMB || thresholdMB <= 0) return { take: false, reason: 'disabled' };
102
+ if (alreadyTaken) return { take: false, reason: 'already-taken' };
103
+ if (heapUsedMB < thresholdMB) return { take: false, reason: 'below-threshold' };
104
+ if (freeGB < minFreeGB) return { take: false, reason: `low-disk(${Math.round(freeGB)}<${minFreeGB}GB)` };
105
+ return { take: true, reason: 'ok' };
106
+ }
107
+
108
+ /**
109
+ * Compact one-line summary of v8.getHeapSpaceStatistics() used sizes (MB).
110
+ * old_space high ⇒ retained objects (leak); large_object_space high ⇒ big
111
+ * strings/arrays; new_space high ⇒ allocation churn. Pure — unit-testable.
112
+ */
113
+ function formatHeapSpaces(stats) {
114
+ return (stats || [])
115
+ .map(s => `${s.space_name}=${(s.space_used_size / 1024 / 1024).toFixed(0)}`)
116
+ .join(' ');
117
+ }
118
+
119
+ function getFreeDiskGB(dir) {
120
+ try {
121
+ const st = fs.statfsSync(dir);
122
+ return (st.bavail * st.bsize) / (1024 ** 3);
123
+ } catch {
124
+ return Infinity; // statfsSync unavailable (older Node) — don't block on disk
125
+ }
126
+ }
127
+
128
+ // Opt-in, one-shot, disk-guarded heap snapshot. BLOCKS the event loop while
129
+ // writing (≈ size of the live heap) — only fires when explicitly enabled.
130
+ function maybeHeapSnapshot(heapUsedMB) {
131
+ if (!HEAPSNAPSHOT_MB || heapSnapshotTaken || heapUsedMB < HEAPSNAPSHOT_MB) return;
132
+ const decision = shouldSnapshot(heapUsedMB, HEAPSNAPSHOT_MB, heapSnapshotTaken, getFreeDiskGB(HEAPSNAPSHOT_DIR), HEAPSNAPSHOT_MIN_FREE_GB);
133
+ if (!decision.take) {
134
+ console.log(`[MONITOR] HEAP-SNAPSHOT skipped: ${decision.reason} (heap=${heapUsedMB}MB)`);
135
+ return;
136
+ }
137
+ heapSnapshotTaken = true; // set BEFORE writing so a failed/slow write can't loop
138
+ const file = path.join(HEAPSNAPSHOT_DIR, `heap-${new Date().toISOString().replace(/[:.]/g, '-')}.heapsnapshot`);
139
+ try {
140
+ console.log(`[MONITOR] HEAP-SNAPSHOT writing (heap=${heapUsedMB}MB) → ${file} — blocks the event loop`);
141
+ v8.writeHeapSnapshot(file);
142
+ console.log(`[MONITOR] HEAP-SNAPSHOT written → ${file} (scp it off + open in Chrome DevTools → dominator tree)`);
143
+ } catch (err) {
144
+ console.error(`[MONITOR] HEAP-SNAPSHOT failed: ${err.message}`);
145
+ }
146
+ }
147
+
34
148
  // ─── Memory pressure circuit breaker ───
35
149
  // Graduated response based on V8 heap usage against heap_size_limit.
36
150
  // Threat model: when GC thrashing starts (>90% heap limit), throughput drops to 0
@@ -869,11 +983,27 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
869
983
  // Backpressure: poll() skips when queue >= 30K or memory pressure >= CRITICAL (90%).
870
984
  // Adaptive concurrency adjusts scan throughput to match ingestion rate.
871
985
  let pollInProgress = false;
986
+ let pollStartedAt = 0;
987
+ let backoffUntil = 0;
872
988
  pollIntervalHandle = setInterval(async () => {
873
- if (!running || pollInProgress) return;
989
+ if (!running) return;
990
+ // Backoff window after consecutive total-registry failures. Hoisted out of
991
+ // poll() (it used to `await sleep(backoff)` while holding pollInProgress, up
992
+ // to POLL_MAX_BACKOFF=16min) so the watchdog can stay sized to poll *work*.
993
+ if (Date.now() < backoffUntil) return;
994
+ // Skip if a cycle is already in flight, unless the flag is stale — a
995
+ // backstop for any hang path that bypasses runPollCycle()'s watchdog.
996
+ const { skip, forceReset } = shouldSkipPoll(pollInProgress, pollStartedAt, Date.now(), POLL_WATCHDOG_MS, POLL_INTERVAL);
997
+ if (forceReset) {
998
+ console.warn(`[MONITOR] Poll flag stuck for ${((Date.now() - pollStartedAt) / 1000).toFixed(0)}s — force-resetting`);
999
+ pollInProgress = false;
1000
+ } else if (skip) {
1001
+ return;
1002
+ }
874
1003
  pollInProgress = true;
1004
+ pollStartedAt = Date.now();
875
1005
  try {
876
- await poll(state, scanQueue, stats);
1006
+ await runPollCycle(state, scanQueue, stats);
877
1007
  // Atomicity: persist queue + seq together after each poll
878
1008
  persistQueue(scanQueue, state);
879
1009
  saveNpmSeq(state.npmLastSeq);
@@ -881,6 +1011,14 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
881
1011
  if (scanQueue.length > QUEUE_WARNING_THRESHOLD) {
882
1012
  console.log(`[MONITOR] WARNING: scan queue depth ${scanQueue.length} — processing may be lagging behind ingestion`);
883
1013
  }
1014
+ // Apply hoisted poll backoff (set after consecutive total-registry failures).
1015
+ const backoffMs = getPollBackoffMs();
1016
+ if (backoffMs > 0) {
1017
+ backoffUntil = Date.now() + backoffMs;
1018
+ console.log(`[MONITOR] Poll backoff: skipping ticks for ${(backoffMs / 1000).toFixed(0)}s after consecutive registry failures`);
1019
+ } else {
1020
+ backoffUntil = 0;
1021
+ }
884
1022
  } catch (err) {
885
1023
  console.error('[MONITOR] Poll error (interval):', err.message);
886
1024
  } finally {
@@ -934,6 +1072,11 @@ async function startMonitor(options, stats, dailyAlerts, recentlyScanned, downlo
934
1072
  // P1.0: persist the same sample as a time series for offline leak localisation.
935
1073
  appendMemTrend(currentMem, getActiveWorkers(), scanQueue.length);
936
1074
 
1075
+ // Heap diagnostics (restart root-cause): cheap heap-spaces breakdown
1076
+ // (retention vs churn) + opt-in one-shot snapshot at MUADDIB_HEAPSNAPSHOT_MB.
1077
+ console.log(`[MONITOR] HEAP-SPACES: ${formatHeapSpaces(v8.getHeapSpaceStatistics())}`);
1078
+ maybeHeapSnapshot(Number(heapUsedMB));
1079
+
937
1080
  // Graduated response at HIGH+
938
1081
  if (pressureLevel >= MEMORY_PRESSURE_LEVELS.HIGH) {
939
1082
  handleMemoryPressure(pressureLevel, heapRatio, recentlyScanned, downloadsCache, scanQueue);
@@ -995,6 +1138,11 @@ module.exports = {
995
1138
  recordRestart,
996
1139
  countRecentRestarts,
997
1140
  POLL_INTERVAL,
1141
+ POLL_WATCHDOG_MS,
1142
+ runPollCycle,
1143
+ shouldSkipPoll,
1144
+ shouldSnapshot,
1145
+ formatHeapSpaces,
998
1146
  PROCESS_LOOP_INTERVAL,
999
1147
  QUEUE_WARNING_THRESHOLD,
1000
1148
  QUEUE_PERSIST_INTERVAL,
@@ -38,6 +38,7 @@ const SELF_PACKAGE_NAME = require('../../package.json').name;
38
38
 
39
39
  const POLL_INTERVAL = 60_000;
40
40
  const POLL_MAX_BACKOFF = 960_000; // 16 minutes max backoff
41
+ const MAX_RESPONSE_BYTES = 64 * 1024 * 1024; // OOM guard: cap a single buffered HTTP (JSON/XML metadata) response at 64MB
41
42
 
42
43
  // --- Mutable state ---
43
44
  let consecutivePollErrors = 0;
@@ -48,7 +49,11 @@ let consecutivePollErrors = 0;
48
49
  // pollPyPIChangelog. Kept tiny on purpose — only network I/O lives here.
49
50
  const _deps = {
50
51
  httpsPost: null, // populated below once httpsPost is defined
51
- httpsGet: null // populated below; used by npm pollers so tests can stub
52
+ httpsGet: null, // populated below; used by npm pollers so tests can stub
53
+ // Low-level client (https.get / https.request). Routing through _deps lets a
54
+ // test inject a fake req/res to exercise the absolute-deadline timer without
55
+ // real TLS. Production always uses the real `https` module.
56
+ https
52
57
  };
53
58
 
54
59
  function getConsecutivePollErrors() {
@@ -59,36 +64,71 @@ function setConsecutivePollErrors(val) {
59
64
  consecutivePollErrors = val;
60
65
  }
61
66
 
62
- // --- Utility ---
63
-
64
- function sleep(ms) {
65
- return new Promise((resolve) => setTimeout(resolve, ms));
67
+ /**
68
+ * Backoff (ms) the poll scheduler should wait before its next cycle, derived
69
+ * from consecutive total-registry failures. Returns 0 when healthy or after a
70
+ * single failure; otherwise exponential POLL_INTERVAL * 2^(n-1), capped at
71
+ * POLL_MAX_BACKOFF (16min). Pure read of module state — poll() never sleeps;
72
+ * the scheduler (daemon.js) owns the wait. See poll() for why the sleep was
73
+ * hoisted out (it used to hold pollInProgress for up to 16min).
74
+ * @returns {number}
75
+ */
76
+ function getPollBackoffMs() {
77
+ if (consecutivePollErrors <= 1) return 0;
78
+ return Math.min(POLL_INTERVAL * Math.pow(2, consecutivePollErrors - 1), POLL_MAX_BACKOFF);
66
79
  }
67
80
 
68
81
  // --- HTTP helpers ---
69
82
 
70
- function httpsGet(url, timeoutMs = 30_000) {
83
+ function httpsGet(url, timeoutMs = 30_000, deadlineMs = Math.max(timeoutMs * 2, 90_000)) {
71
84
  return new Promise((resolve, reject) => {
72
- const req = https.get(url, { timeout: timeoutMs }, (res) => {
85
+ let settled = false;
86
+ let req;
87
+ // Absolute deadline. Node's `{ timeout }` option is a socket-INACTIVITY
88
+ // timeout, not an overall deadline: a response whose body trickles forever
89
+ // (heartbeat/keep-alive bytes, or a long-poll feed that never sends 'end')
90
+ // keeps the socket "active", so the inactivity timeout never fires and this
91
+ // promise never settles — wedging the poll loop. The deadline bounds the
92
+ // WHOLE request+body and destroys the socket so it can't leak.
93
+ const deadline = setTimeout(() => {
94
+ if (req) req.destroy(new Error(`Overall deadline (${Math.round(deadlineMs / 1000)}s) exceeded for ${url}`));
95
+ }, deadlineMs);
96
+ const done = (err, value) => {
97
+ if (settled) return;
98
+ settled = true;
99
+ clearTimeout(deadline);
100
+ if (err) reject(err); else resolve(value);
101
+ };
102
+ req = _deps.https.get(url, { timeout: timeoutMs }, (res) => {
73
103
  if (res.statusCode === 301 || res.statusCode === 302) {
74
104
  res.resume();
75
105
  const location = res.headers.location;
76
- if (!location) return reject(new Error(`Redirect without Location for ${url}`));
77
- return httpsGet(location, timeoutMs).then(resolve, reject);
106
+ if (!location) return done(new Error(`Redirect without Location for ${url}`));
107
+ // Hand the deadline off to the recursive call, which has its own.
108
+ settled = true;
109
+ clearTimeout(deadline);
110
+ return httpsGet(location, timeoutMs, deadlineMs).then(resolve, reject);
78
111
  }
79
112
  if (res.statusCode < 200 || res.statusCode >= 300) {
80
113
  res.resume();
81
- return reject(new Error(`HTTP ${res.statusCode} for ${url}`));
114
+ return done(new Error(`HTTP ${res.statusCode} for ${url}`));
82
115
  }
83
116
  const chunks = [];
84
- res.on('data', (chunk) => chunks.push(chunk));
85
- res.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
86
- res.on('error', reject);
117
+ let total = 0;
118
+ res.on('data', (chunk) => {
119
+ total += chunk.length;
120
+ if (total > MAX_RESPONSE_BYTES) {
121
+ req.destroy(new Error(`Response exceeded ${MAX_RESPONSE_BYTES} bytes for ${url}`));
122
+ return;
123
+ }
124
+ chunks.push(chunk);
125
+ });
126
+ res.on('end', () => done(null, Buffer.concat(chunks).toString('utf8')));
127
+ res.on('error', (err) => done(err));
87
128
  });
88
- req.on('error', reject);
129
+ req.on('error', (err) => done(err));
89
130
  req.on('timeout', () => {
90
- req.destroy();
91
- reject(new Error(`Timeout for ${url}`));
131
+ req.destroy(new Error(`Timeout for ${url}`));
92
132
  });
93
133
  });
94
134
  }
@@ -97,7 +137,7 @@ function httpsGet(url, timeoutMs = 30_000) {
97
137
  * Minimal HTTPS POST. Used for PyPI XML-RPC; kept inside the ingestion module
98
138
  * (rather than pulled into shared/) because XML-RPC is its only consumer today.
99
139
  */
100
- function httpsPost(url, body, headers = {}, timeoutMs = 30_000) {
140
+ function httpsPost(url, body, headers = {}, timeoutMs = 30_000, deadlineMs = Math.max(timeoutMs * 2, 90_000)) {
101
141
  return new Promise((resolve, reject) => {
102
142
  const u = new URL(url);
103
143
  const options = {
@@ -112,20 +152,40 @@ function httpsPost(url, body, headers = {}, timeoutMs = 30_000) {
112
152
  ...headers
113
153
  }
114
154
  };
115
- const req = https.request(options, (res) => {
155
+ let settled = false;
156
+ let req;
157
+ // Absolute deadline — see httpsGet for the rationale (inactivity timeout is
158
+ // not an overall deadline; a trickling body would hang forever otherwise).
159
+ const deadline = setTimeout(() => {
160
+ if (req) req.destroy(new Error(`Overall deadline (${Math.round(deadlineMs / 1000)}s) exceeded for POST ${url}`));
161
+ }, deadlineMs);
162
+ const done = (err, value) => {
163
+ if (settled) return;
164
+ settled = true;
165
+ clearTimeout(deadline);
166
+ if (err) reject(err); else resolve(value);
167
+ };
168
+ req = _deps.https.request(options, (res) => {
116
169
  if (res.statusCode < 200 || res.statusCode >= 300) {
117
170
  res.resume();
118
- return reject(new Error(`HTTP ${res.statusCode} for POST ${url}`));
171
+ return done(new Error(`HTTP ${res.statusCode} for POST ${url}`));
119
172
  }
120
173
  const chunks = [];
121
- res.on('data', (chunk) => chunks.push(chunk));
122
- res.on('end', () => resolve(Buffer.concat(chunks).toString('utf8')));
123
- res.on('error', reject);
174
+ let total = 0;
175
+ res.on('data', (chunk) => {
176
+ total += chunk.length;
177
+ if (total > MAX_RESPONSE_BYTES) {
178
+ req.destroy(new Error(`Response exceeded ${MAX_RESPONSE_BYTES} bytes for POST ${url}`));
179
+ return;
180
+ }
181
+ chunks.push(chunk);
182
+ });
183
+ res.on('end', () => done(null, Buffer.concat(chunks).toString('utf8')));
184
+ res.on('error', (err) => done(err));
124
185
  });
125
- req.on('error', reject);
186
+ req.on('error', (err) => done(err));
126
187
  req.on('timeout', () => {
127
- req.destroy();
128
- reject(new Error(`Timeout for POST ${url}`));
188
+ req.destroy(new Error(`Timeout for POST ${url}`));
129
189
  });
130
190
  req.write(body);
131
191
  req.end();
@@ -1292,13 +1352,15 @@ async function poll(state, scanQueue, stats) {
1292
1352
  pollPyPI(state, scanQueue, stats)
1293
1353
  ]);
1294
1354
 
1295
- // Track consecutive poll failures for backoff
1355
+ // Track consecutive poll failures. The backoff WAIT is applied by the
1356
+ // scheduler (daemon.js, via getPollBackoffMs()), NOT here: sleeping inside
1357
+ // poll() used to hold pollInProgress for up to POLL_MAX_BACKOFF (16min),
1358
+ // stalling ingestion and forcing the poll watchdog to be sized above the
1359
+ // backoff. poll() stays sleep-free so the watchdog bounds poll *work* only.
1296
1360
  if (npmCount === -1 && pypiCount === -1) {
1297
1361
  consecutivePollErrors++;
1298
1362
  if (consecutivePollErrors > 1) {
1299
- const backoff = Math.min(POLL_INTERVAL * Math.pow(2, consecutivePollErrors - 1), POLL_MAX_BACKOFF);
1300
- console.log(`[MONITOR] Both registries failed (${consecutivePollErrors}x) — backing off ${(backoff / 1000).toFixed(0)}s`);
1301
- await sleep(backoff);
1363
+ console.log(`[MONITOR] Both registries failed (${consecutivePollErrors}x) scheduler will back off ${(getPollBackoffMs() / 1000).toFixed(0)}s`);
1302
1364
  }
1303
1365
  } else {
1304
1366
  consecutivePollErrors = 0;
@@ -1314,10 +1376,12 @@ module.exports = {
1314
1376
  SELF_PACKAGE_NAME,
1315
1377
  POLL_INTERVAL,
1316
1378
  POLL_MAX_BACKOFF,
1379
+ MAX_RESPONSE_BYTES,
1317
1380
 
1318
1381
  // Mutable state
1319
1382
  getConsecutivePollErrors,
1320
1383
  setConsecutivePollErrors,
1384
+ getPollBackoffMs,
1321
1385
 
1322
1386
  // HTTP helpers
1323
1387
  httpsGet,
@@ -13,6 +13,54 @@ const _inflightRequests = new Map(); // packageName → Promise
13
13
  const METADATA_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
14
14
  const NEGATIVE_CACHE_TTL = 60 * 1000; // 60 seconds for failed fetches
15
15
  const METADATA_CACHE_MAX = 200;
16
+ // Heap-leak fix: how many of the newest versions keep their FULL body in the cached
17
+ // packument. Consumers (lifecycle/ast diff, maintainer-change) only diff the latest 2.
18
+ const META_KEEP_VERSIONS = Math.max(2, parseInt(process.env.MUADDIB_META_KEEP_VERSIONS, 10) || 3);
19
+
20
+ /**
21
+ * Shrink a full npm packument to what _metadataCache consumers actually need, so the
22
+ * cache never retains tens-of-MB packuments (packages with thousands of versions) —
23
+ * the root cause of the monitor's old_space leak → OOM restarts.
24
+ *
25
+ * Kept: every root field (small), the FULL `time` map (publish timeline — required by
26
+ * getLatestVersions + publish-anomaly), root `dist-tags`/`maintainers`, and the FULL
27
+ * bodies of the newest META_KEEP_VERSIONS versions (+ dist-tags.latest). Older versions
28
+ * are replaced by a truthy placeholder (1) so existence checks
29
+ * (`if (!versions[v]) continue`) and totalVersions counts stay correct without the bulk.
30
+ * The big optional blobs (`readme`, `_attachments`) are dropped.
31
+ * @param {object} parsed - full registry packument
32
+ * @returns {object} slimmed packument (safe drop-in for all current consumers)
33
+ */
34
+ function projectPackument(parsed) {
35
+ if (!parsed || typeof parsed !== 'object' || !parsed.versions || typeof parsed.versions !== 'object') {
36
+ return parsed;
37
+ }
38
+ const versions = parsed.versions;
39
+ const time = (parsed.time && typeof parsed.time === 'object') ? parsed.time : {};
40
+
41
+ // Newest META_KEEP_VERSIONS versions by publish date (same ordering as getLatestVersions).
42
+ const dated = [];
43
+ for (const [v, t] of Object.entries(time)) {
44
+ if (v === 'created' || v === 'modified') continue;
45
+ if (!versions[v]) continue;
46
+ dated.push([v, t]);
47
+ }
48
+ dated.sort((a, b) => new Date(b[1]) - new Date(a[1]));
49
+ const keep = new Set(dated.slice(0, META_KEEP_VERSIONS).map(e => e[0]));
50
+ const distTags = parsed['dist-tags'];
51
+ if (distTags && distTags.latest && versions[distTags.latest]) keep.add(distTags.latest);
52
+
53
+ const slimVersions = {};
54
+ for (const v of Object.keys(versions)) {
55
+ slimVersions[v] = keep.has(v) ? versions[v] : 1; // truthy placeholder for old versions
56
+ }
57
+
58
+ const slim = { ...parsed, versions: slimVersions };
59
+ delete slim.readme;
60
+ delete slim.readmeFilename;
61
+ delete slim._attachments;
62
+ return slim;
63
+ }
16
64
 
17
65
  const LIFECYCLE_SCRIPTS = [
18
66
  'preinstall',
@@ -99,14 +147,19 @@ function _fetchPackageMetadataHttp(packageName) {
99
147
  if (destroyed) return;
100
148
  try {
101
149
  const parsed = JSON.parse(data);
150
+ // Heap-leak fix: project to essentials BEFORE caching. A full packument can be
151
+ // tens of MB (packages with thousands of versions); retaining it whole bloated
152
+ // old_space → OOM restarts. Resolve the slim copy too so the full `parsed` is
153
+ // freed immediately (consumers only need time + the latest few version bodies).
154
+ const slim = projectPackument(parsed);
102
155
  // Store in cache on successful fetch
103
156
  if (_metadataCache.size >= METADATA_CACHE_MAX) {
104
157
  // Evict oldest entry
105
158
  const oldestKey = _metadataCache.keys().next().value;
106
159
  _metadataCache.delete(oldestKey);
107
160
  }
108
- _metadataCache.set(packageName, { data: parsed, fetchedAt: Date.now() });
109
- resolve(parsed);
161
+ _metadataCache.set(packageName, { data: slim, fetchedAt: Date.now() });
162
+ resolve(slim);
110
163
  } catch (e) {
111
164
  reject(new Error(`Invalid JSON from registry for ${packageName}: ${e.message}`));
112
165
  }
@@ -333,6 +386,7 @@ async function detectSuddenLifecycleChange(packageName) {
333
386
  module.exports = {
334
387
  fetchPackageMetadata,
335
388
  clearMetadataCache,
389
+ projectPackument,
336
390
  getLifecycleScripts,
337
391
  compareLifecycleScripts,
338
392
  getLatestVersions,