muaddib-scanner 2.11.97 → 2.11.98

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.97",
3
+ "version": "2.11.98",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T15:01:04.220Z",
3
+ "timestamp": "2026-06-11T15:36:15.399Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -1017,6 +1017,12 @@ async function pollNpm(state, scanQueue, stats) {
1017
1017
 
1018
1018
  const PYPI_USER_AGENT = `${SELF_PACKAGE_NAME} (security-monitor; +https://github.com/DNSZLSK/muaddib)`;
1019
1019
 
1020
+ // A normal 15-min poll is a few dozen events; a changelog_since_serial batch
1021
+ // caps around ~50K. Anything this large means we are far behind — worth one
1022
+ // extra changelog_last_serial call to measure the GLOBAL lag (see the global
1023
+ // catch-up protection in pollPyPIChangelog).
1024
+ const PYPI_CATCHUP_PROBE_MIN_EVENTS = 10000;
1025
+
1020
1026
  /**
1021
1027
  * Build an XML-RPC methodCall envelope. PyPI accepts only <int> and <string>
1022
1028
  * params for the methods we use (changelog_last_serial, changelog_since_serial),
@@ -1188,6 +1194,38 @@ async function pollPyPIChangelog(state, scanQueue, stats) {
1188
1194
  return 0;
1189
1195
  }
1190
1196
 
1197
+ // GLOBAL catch-up protection (2026-06-11 incident): the per-batch gap
1198
+ // below is bounded by one changelog_since_serial response (~50K events,
1199
+ // observed 33-43K), so it can NEVER exceed PYPI_CATCHUP_MAX (100K) — a
1200
+ // poller resumed from an ancient serial (a test-fixture serial leaked
1201
+ // into prod state) replayed YEARS of history, ~15K ancient packages per
1202
+ // poll, without ever tripping the skip. A full batch is the tell: probe
1203
+ // the registry's current serial and skip to it when the global lag is
1204
+ // beyond the cap. Costs one extra XML-RPC call only on full batches.
1205
+ if (events.length >= PYPI_CATCHUP_PROBE_MIN_EVENTS) {
1206
+ await acquireRegistrySlot();
1207
+ let curBody;
1208
+ try {
1209
+ curBody = await _deps.httpsPost(
1210
+ PYPI_XMLRPC_URL,
1211
+ buildXmlRpcCall('changelog_last_serial', []),
1212
+ { 'User-Agent': PYPI_USER_AGENT },
1213
+ 10_000
1214
+ );
1215
+ } finally {
1216
+ releaseRegistrySlot();
1217
+ }
1218
+ const currentSerial = parseXmlRpcInt(curBody);
1219
+ if (currentSerial != null && currentSerial - lastSerial > PYPI_CATCHUP_MAX) {
1220
+ console.warn(`[MONITOR] PyPI changelog globally behind (${currentSerial - lastSerial} serials) — skipping to current ${currentSerial}`);
1221
+ stats.pypiCatchupSkips = (stats.pypiCatchupSkips || 0) + 1;
1222
+ stats.pypiCatchupSkippedEvents = (stats.pypiCatchupSkippedEvents || 0) + (currentSerial - lastSerial);
1223
+ state.pypiLastSerial = currentSerial;
1224
+ savePypiSerial(currentSerial);
1225
+ return 0;
1226
+ }
1227
+ }
1228
+
1191
1229
  // Catch-up protection: if events span more than PYPI_CATCHUP_MAX serials,
1192
1230
  // skip to the latest serial to avoid an avalanche after long downtime.
1193
1231
  const lastEventSerial = events[events.length - 1].serial;
@@ -81,7 +81,10 @@ const ALERTS_LOG_DIR = resolveWritableDir(PRIMARY_ALERTS_DIR, FALLBACK_ALERTS_DI
81
81
 
82
82
  // --- npm seq constants ---
83
83
 
84
- const NPM_SEQ_FILE = path.join(__dirname, '..', '..', 'data', 'npm-seq.json');
84
+ // Env-overridable same prod-state-pollution guard as PYPI_SERIAL_FILE below
85
+ // (the npm-seq roundtrip test used to unlink the REAL file).
86
+ const NPM_SEQ_FILE = process.env.MUADDIB_NPM_SEQ_FILE
87
+ || path.join(__dirname, '..', '..', 'data', 'npm-seq.json');
85
88
  const CHANGES_STREAM_URL = 'https://replicate.npmjs.com/registry/_changes';
86
89
  const CHANGES_LIMIT = 1000;
87
90
  const CHANGES_CATCHUP_MAX = 500000; // If behind by more than 500k seqs, skip to "now"
@@ -96,7 +99,13 @@ const CHANGES_CATCHUP_MAX = 500000; // If behind by more than 500k seqs, skip to
96
99
  // PYPI_CATCHUP_MAX is the staleness cap: if we are behind by more than this many
97
100
  // serials (≈ days of activity at ~30k events/day in 2026), skip to "now" rather
98
101
  // than fetch a monster batch. Mirrors CHANGES_CATCHUP_MAX for npm.
99
- const PYPI_SERIAL_FILE = path.join(__dirname, '..', '..', 'data', 'pypi-serial.json');
102
+ // Env-overridable (2026-06-11 incident): integration tests exercise the real
103
+ // pollPyPIChangelog with stubbed HTTP but the real savePypiSerial — a fixture
104
+ // serial (1002) leaked into prod state, and the next daemon boot replayed the
105
+ // PyPI changelog from 2011 (~15K ancient packages queued per poll). The test
106
+ // harness points this at a tmp file so NO test can touch prod state.
107
+ const PYPI_SERIAL_FILE = process.env.MUADDIB_PYPI_SERIAL_FILE
108
+ || path.join(__dirname, '..', '..', 'data', 'pypi-serial.json');
100
109
  const PYPI_XMLRPC_URL = 'https://pypi.org/pypi';
101
110
  const PYPI_CATCHUP_MAX = 100000;
102
111
 
@@ -32,14 +32,21 @@ const { appendWorkerMem, sampleIntervalMs } = require('../monitor/worker-mem.js'
32
32
  const everyMs = sampleIntervalMs();
33
33
  let sampler = null;
34
34
  if (everyMs > 0) {
35
- sampler = setInterval(() => {
35
+ const sampleNow = () => {
36
36
  const m = process.memoryUsage();
37
37
  appendWorkerMem({
38
38
  ev: 'sample', tid: threadId,
39
39
  name: scanContext.name, version: scanContext.version,
40
40
  heapUsed: m.heapUsed, external: m.external, arrayBuffers: m.arrayBuffers, rss: m.rss
41
41
  });
42
- }, everyMs);
42
+ };
43
+ // One immediate baseline sample, deterministically: a mostly-synchronous
44
+ // scan (small package, sync AST walks, microtask-only awaits) can starve
45
+ // the event loop for its whole lifetime, so the interval alone may never
46
+ // fire (bit CI on 2026-06-11). The baseline also gives the per-package
47
+ // delta a clean starting point.
48
+ sampleNow();
49
+ sampler = setInterval(sampleNow, everyMs);
43
50
  sampler.unref();
44
51
  }
45
52
  try {