muaddib-scanner 2.11.101 → 2.11.103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.101",
3
+ "version": "2.11.103",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-11T18:54:05.745Z",
3
+ "timestamp": "2026-06-12T10:16:00.853Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -10,6 +10,30 @@
10
10
  * Without rate limiting, 10 concurrent slots × fast-completing requests = 100+ req/s
11
11
  * bursts that trigger npm 429 responses → exponential backoff → scan times 10s→90s.
12
12
  *
13
+ * Token grants are FIFO and only ever issued against a real token: a waiter that
14
+ * reaches the head of the queue during an exhausted second waits for the next
15
+ * refill instead of proceeding at zero. (The original implementation resolved
16
+ * waiters unconditionally after one refill window — under contention every
17
+ * queued caller passed at once and the effective send rate was the demand, not
18
+ * RATE_LIMIT_PER_SEC. Observed 2026-06-12: ~5 sustained 429/s at 12 workers,
19
+ * insensitive to MUADDIB_REGISTRY_RATE.)
20
+ *
21
+ * 429 backoff escalates: the first 429 pauses all token grants for
22
+ * BACKOFF_BASE_MS; each new 429 arriving after the previous pause expired
23
+ * doubles the pause (capped at BACKOFF_MAX_MS). 429s landing while a pause is
24
+ * already active (the rest of an in-flight burst) extend nothing and do not
25
+ * escalate — one escalation step per pause window. The escalation level resets
26
+ * after a quiet period (no 429 for 2× the last pause + 5× base). After a pause
27
+ * expires, the bucket restarts at half budget (slow start) before resuming the
28
+ * full per-second rate.
29
+ *
30
+ * Caveat: this module is per-thread state. Scan workers (worker_threads) each
31
+ * load their own instance, so the aggregate send rate across the daemon is up
32
+ * to (1 + N workers) × RATE_LIMIT_PER_SEC if every thread is registry-heavy.
33
+ * In practice the registry hot paths (ingestion, centralized metadata) run on
34
+ * the main thread; the escalating backoff is the feedback that bounds each
35
+ * thread when npm signals overload.
36
+ *
13
37
  * Consumers: queue.js (downloadToFile), temporal-analysis.js, npm-registry.js.
14
38
  * NOT covered: api.npmjs.org (different server), replicate.npmjs.com (CouchDB changes stream).
15
39
  */
@@ -19,6 +43,9 @@
19
43
  // Defaults preserve prior behavior (20 in-flight / 30 req/s).
20
44
  const REGISTRY_SEMAPHORE_MAX = Math.max(1, parseInt(process.env.MUADDIB_REGISTRY_CONCURRENCY, 10) || 20);
21
45
  const RATE_LIMIT_PER_SEC = Math.max(1, parseInt(process.env.MUADDIB_REGISTRY_RATE, 10) || 30);
46
+ // Backoff envs exist as test seams + emergency overrides; the defaults are the contract.
47
+ const BACKOFF_BASE_MS = Math.max(10, parseInt(process.env.MUADDIB_REGISTRY_BACKOFF_BASE_MS, 10) || 1000);
48
+ const BACKOFF_MAX_MS = Math.max(BACKOFF_BASE_MS, parseInt(process.env.MUADDIB_REGISTRY_BACKOFF_MAX_MS, 10) || 60_000);
22
49
 
23
50
  // --- Concurrency semaphore ---
24
51
 
@@ -47,13 +74,32 @@ function releaseRegistrySlot() {
47
74
 
48
75
  // --- Token bucket rate limiter ---
49
76
  // Refills RATE_LIMIT_PER_SEC tokens per second. Each request consumes 1 token.
50
- // If no tokens available, waits until the next refill.
77
+ // Exhausted the caller joins a FIFO queue and is resolved only when a refill
78
+ // actually hands it a token (one shared grant timer, not one timer per waiter).
51
79
 
52
80
  let _tokens = RATE_LIMIT_PER_SEC;
53
81
  let _lastRefill = Date.now();
82
+ const _rateWaiters = [];
83
+ let _grantTimer = null;
84
+
85
+ // 429 backoff escalation state
86
+ let _backoffCount = 0; // total 429s seen (logging, tests, getBackoffCount export)
87
+ let _consecutive429 = 0; // escalation level (one step per expired pause window)
88
+ let _pauseUntil = 0; // token grants suspended until this timestamp
89
+ let _lastPauseMs = 0;
90
+ let _last429At = 0;
54
91
 
55
92
  function _refillTokens() {
56
93
  const now = Date.now();
94
+ if (now < _pauseUntil) return; // backoff pause: no refills, no grants
95
+ if (_pauseUntil > _lastRefill) {
96
+ // First refill after a backoff pause: slow start at half budget so the
97
+ // recovery doesn't reopen with a full-rate burst against a server that
98
+ // just told us to back off.
99
+ _tokens = Math.max(1, Math.floor(RATE_LIMIT_PER_SEC / 2));
100
+ _lastRefill = now;
101
+ return;
102
+ }
57
103
  const elapsed = now - _lastRefill;
58
104
  if (elapsed >= 1000) {
59
105
  _tokens = Math.min(RATE_LIMIT_PER_SEC, _tokens + Math.floor(elapsed / 1000) * RATE_LIMIT_PER_SEC);
@@ -61,48 +107,105 @@ function _refillTokens() {
61
107
  }
62
108
  }
63
109
 
110
+ function _drainRateWaiters() {
111
+ _grantTimer = null;
112
+ _refillTokens();
113
+ while (_tokens > 0 && _rateWaiters.length > 0) {
114
+ _tokens--;
115
+ const grant = _rateWaiters.shift();
116
+ grant();
117
+ }
118
+ if (_rateWaiters.length > 0) _scheduleGrant();
119
+ }
120
+
121
+ function _scheduleGrant() {
122
+ if (_grantTimer) return;
123
+ const now = Date.now();
124
+ // Next grant opportunity: end of the backoff pause, or the next refill boundary.
125
+ const wakeAt = Math.max(_pauseUntil, _lastRefill + 1000);
126
+ _grantTimer = setTimeout(_drainRateWaiters, Math.max(10, wakeAt - now));
127
+ }
128
+
64
129
  function _acquireRateToken() {
65
130
  _refillTokens();
66
- if (_tokens > 0) {
131
+ // Fast path only when nobody is queued — a token freed by a refill belongs
132
+ // to the FIFO head, not to whoever calls next.
133
+ if (_tokens > 0 && _rateWaiters.length === 0) {
67
134
  _tokens--;
68
135
  return Promise.resolve();
69
136
  }
70
- // Wait until next refill
71
- const waitMs = 1000 - (Date.now() - _lastRefill);
72
137
  return new Promise(resolve => {
73
- setTimeout(() => {
74
- _refillTokens();
75
- _tokens = Math.max(0, _tokens - 1);
76
- resolve();
77
- }, Math.max(10, waitMs));
138
+ _rateWaiters.push(resolve);
139
+ _scheduleGrant();
78
140
  });
79
141
  }
80
142
 
81
143
  // --- 429 backoff helper ---
82
- // Call this when a 429 response is received. Drains all tokens to force
83
- // a ~1s pause on subsequent requests (token bucket naturally refills).
84
-
85
- let _backoffCount = 0;
144
+ // Call this when a 429 response is received. Suspends ALL token grants for an
145
+ // escalating pause so every in-flight caller backs off together.
86
146
 
87
147
  function signal429() {
88
- _tokens = 0;
89
- _lastRefill = Date.now() + 1000; // Force 1s pause
148
+ const now = Date.now();
90
149
  _backoffCount++;
91
- if (_backoffCount % 10 === 1) {
92
- console.warn(`[HTTP-LIMITER] 429 rate limited by npm registry (total: ${_backoffCount})`);
150
+ _tokens = 0;
151
+ if (now < _pauseUntil) {
152
+ // Remaining 429s from the same in-flight burst: the pause is already
153
+ // armed, don't escalate once per response.
154
+ _last429At = now;
155
+ return;
93
156
  }
157
+ // Quiet long enough since the last 429 → the incident is over, restart at base.
158
+ const quietResetMs = _lastPauseMs * 2 + BACKOFF_BASE_MS * 5;
159
+ if (_last429At && now - _last429At > quietResetMs) {
160
+ _consecutive429 = 0;
161
+ }
162
+ _consecutive429++;
163
+ const pause = Math.min(BACKOFF_MAX_MS, BACKOFF_BASE_MS * 2 ** (_consecutive429 - 1));
164
+ _lastPauseMs = pause;
165
+ _last429At = now;
166
+ _pauseUntil = now + pause;
167
+ if (_backoffCount % 10 === 1 || pause >= BACKOFF_BASE_MS * 8) {
168
+ console.warn(`[HTTP-LIMITER] 429 rate limited by npm registry (total: ${_backoffCount}, pause ${pause}ms, level ${_consecutive429})`);
169
+ }
170
+ // Re-aim the wake timer at the new pause end (no-op if none is pending).
171
+ if (_rateWaiters.length > 0) _scheduleGrant();
94
172
  }
95
173
 
96
174
  function getBackoffCount() {
97
175
  return _backoffCount;
98
176
  }
99
177
 
178
+ /** Observability seam (tests, debugging): never used for control flow. */
179
+ function getRateLimiterState() {
180
+ return {
181
+ tokens: _tokens,
182
+ pendingWaiters: _rateWaiters.length,
183
+ consecutive429: _consecutive429,
184
+ pauseRemainingMs: Math.max(0, _pauseUntil - Date.now()),
185
+ lastPauseMs: _lastPauseMs,
186
+ backoffCount: _backoffCount
187
+ };
188
+ }
189
+
100
190
  function resetLimiter() {
101
191
  _semaphore.active = 0;
102
192
  _semaphore.queue.length = 0;
103
193
  _tokens = RATE_LIMIT_PER_SEC;
104
194
  _lastRefill = Date.now();
105
195
  _backoffCount = 0;
196
+ _consecutive429 = 0;
197
+ _pauseUntil = 0;
198
+ _lastPauseMs = 0;
199
+ _last429At = 0;
200
+ if (_grantTimer) {
201
+ clearTimeout(_grantTimer);
202
+ _grantTimer = null;
203
+ }
204
+ // Release anything still queued so a reset between tests can never hang the suite.
205
+ while (_rateWaiters.length > 0) {
206
+ const grant = _rateWaiters.shift();
207
+ grant();
208
+ }
106
209
  }
107
210
 
108
211
  function getActiveSemaphore() {
@@ -116,6 +219,7 @@ module.exports = {
116
219
  releaseRegistrySlot,
117
220
  signal429,
118
221
  getBackoffCount,
222
+ getRateLimiterState,
119
223
  resetLimiter,
120
224
  getActiveSemaphore
121
225
  };