muaddib-scanner 2.10.20 → 2.10.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.20",
3
+ "version": "2.10.21",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,5 +1,6 @@
1
1
  const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
2
2
  const { debugLog } = require('../utils.js');
3
+ const { acquireRegistrySlot, releaseRegistrySlot } = require('../shared/http-limiter.js');
3
4
 
4
5
  const REGISTRY_URL = 'https://registry.npmjs.org';
5
6
  const DOWNLOADS_URL = 'https://api.npmjs.org/downloads/point/last-week';
@@ -94,7 +95,12 @@ async function getPackageMetadata(packageName) {
94
95
  }
95
96
  if (!meta) {
96
97
  const registryUrl = REGISTRY_URL + '/' + encodeURIComponent(packageName);
97
- meta = await fetchWithRetry(registryUrl);
98
+ await acquireRegistrySlot();
99
+ try {
100
+ meta = await fetchWithRetry(registryUrl);
101
+ } finally {
102
+ releaseRegistrySlot();
103
+ }
98
104
  }
99
105
  if (!meta) return null;
100
106
 
@@ -121,9 +127,16 @@ async function getPackageMetadata(packageName) {
121
127
  ? SEARCH_URL + '?text=maintainer:' + encodeURIComponent(maintainer) + '&size=1'
122
128
  : null;
123
129
 
130
+ async function fetchAuthorWithSlot() {
131
+ if (!authorUrl) return null;
132
+ await acquireRegistrySlot();
133
+ try { return await fetchWithRetry(authorUrl); }
134
+ finally { releaseRegistrySlot(); }
135
+ }
136
+
124
137
  const [downloadsData, authorData] = await Promise.all([
125
- fetchWithRetry(downloadsUrl),
126
- authorUrl ? fetchWithRetry(authorUrl) : Promise.resolve(null)
138
+ fetchWithRetry(downloadsUrl), // api.npmjs.org — no semaphore needed
139
+ fetchAuthorWithSlot() // registry.npmjs.org — semaphore protected
127
140
  ]);
128
141
 
129
142
  const weeklyDownloads = downloadsData?.downloads ?? 0;
@@ -0,0 +1,54 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Centralized HTTP concurrency limiter for npm registry requests.
5
+ *
6
+ * With 16 monitor workers × 7+ HTTP requests/package, uncapped concurrency
7
+ * reaches 112+ simultaneous requests — well above npm's implicit rate limit.
8
+ * This module caps ALL registry.npmjs.org requests to a single semaphore
9
+ * so that no more than REGISTRY_SEMAPHORE_MAX requests are in-flight at once.
10
+ *
11
+ * Consumers: temporal-analysis.js, temporal-ast-diff.js, monitor.js (getNpmLatestTarball),
12
+ * npm-registry.js (fetchWithRetry to registry.npmjs.org).
13
+ * NOT covered: api.npmjs.org (different server), replicate.npmjs.com (CouchDB changes stream).
14
+ */
15
+
16
+ const REGISTRY_SEMAPHORE_MAX = 10;
17
+
18
+ const _semaphore = { active: 0, queue: [] };
19
+
20
+ function acquireRegistrySlot() {
21
+ if (_semaphore.active < REGISTRY_SEMAPHORE_MAX) {
22
+ _semaphore.active++;
23
+ return Promise.resolve();
24
+ }
25
+ return new Promise(resolve => {
26
+ _semaphore.queue.push(resolve);
27
+ });
28
+ }
29
+
30
+ function releaseRegistrySlot() {
31
+ if (_semaphore.queue.length > 0) {
32
+ const next = _semaphore.queue.shift();
33
+ next(); // Transfers slot to next waiter (active count stays the same)
34
+ } else {
35
+ _semaphore.active--;
36
+ }
37
+ }
38
+
39
+ function resetLimiter() {
40
+ _semaphore.active = 0;
41
+ _semaphore.queue.length = 0;
42
+ }
43
+
44
+ function getActiveSemaphore() {
45
+ return _semaphore;
46
+ }
47
+
48
+ module.exports = {
49
+ REGISTRY_SEMAPHORE_MAX,
50
+ acquireRegistrySlot,
51
+ releaseRegistrySlot,
52
+ resetLimiter,
53
+ getActiveSemaphore
54
+ };
@@ -1,4 +1,5 @@
1
1
  const https = require('https');
2
+ const { acquireRegistrySlot, releaseRegistrySlot, resetLimiter, getActiveSemaphore, REGISTRY_SEMAPHORE_MAX } = require('./shared/http-limiter.js');
2
3
 
3
4
  const REGISTRY_URL = 'https://registry.npmjs.org';
4
5
  const TIMEOUT_MS = 10_000;
@@ -13,31 +14,6 @@ const METADATA_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
13
14
  const NEGATIVE_CACHE_TTL = 60 * 1000; // 60 seconds for failed fetches
14
15
  const METADATA_CACHE_MAX = 200;
15
16
 
16
- // HTTP semaphore: limits concurrent requests to npm registry to prevent throttling.
17
- // With 16 monitor workers × 7 requests/package, uncapped concurrency hits 112 simultaneous
18
- // requests — well above npm's implicit rate limit. Cap at 10 to stay under the threshold.
19
- const HTTP_SEMAPHORE_MAX = 10;
20
- const _httpSemaphore = { active: 0, queue: [] };
21
-
22
- function _acquireHttpSlot() {
23
- if (_httpSemaphore.active < HTTP_SEMAPHORE_MAX) {
24
- _httpSemaphore.active++;
25
- return Promise.resolve();
26
- }
27
- return new Promise(resolve => {
28
- _httpSemaphore.queue.push(resolve);
29
- });
30
- }
31
-
32
- function _releaseHttpSlot() {
33
- if (_httpSemaphore.queue.length > 0) {
34
- const next = _httpSemaphore.queue.shift();
35
- next(); // Transfers the slot to the next waiter (active count stays the same)
36
- } else {
37
- _httpSemaphore.active--;
38
- }
39
- }
40
-
41
17
  const LIFECYCLE_SCRIPTS = [
42
18
  'preinstall',
43
19
  'install',
@@ -52,10 +28,10 @@ const LIFECYCLE_SCRIPTS = [
52
28
  /**
53
29
  * Raw HTTP fetch — always hits the npm registry. Use fetchPackageMetadata() instead,
54
30
  * which adds caching, inflight dedup, and semaphore.
55
- * Acquires an HTTP semaphore slot before making the request.
31
+ * Acquires a shared HTTP semaphore slot before making the request.
56
32
  */
57
33
  async function _fetchPackageMetadataImpl(packageName) {
58
- await _acquireHttpSlot();
34
+ await acquireRegistrySlot();
59
35
  try {
60
36
  return await _fetchPackageMetadataHttp(packageName);
61
37
  } catch (err) {
@@ -67,7 +43,7 @@ async function _fetchPackageMetadataImpl(packageName) {
67
43
  _metadataCache.set(packageName, { data: null, error: true, fetchedAt: Date.now() });
68
44
  throw err;
69
45
  } finally {
70
- _releaseHttpSlot();
46
+ releaseRegistrySlot();
71
47
  }
72
48
  }
73
49
 
@@ -183,13 +159,12 @@ function fetchPackageMetadata(packageName) {
183
159
  }
184
160
 
185
161
  /**
186
- * Clear the metadata cache and reset semaphore. Exported for tests and monitor reset.
162
+ * Clear the metadata cache and reset shared semaphore. Exported for tests and monitor reset.
187
163
  */
188
164
  function clearMetadataCache() {
189
165
  _metadataCache.clear();
190
166
  _inflightRequests.clear();
191
- _httpSemaphore.active = 0;
192
- _httpSemaphore.queue.length = 0;
167
+ resetLimiter();
193
168
  }
194
169
 
195
170
  /**
@@ -365,9 +340,10 @@ module.exports = {
365
340
  // Exposed for tests only
366
341
  _metadataCache,
367
342
  _inflightRequests,
368
- _httpSemaphore,
369
343
  METADATA_CACHE_TTL,
370
344
  METADATA_CACHE_MAX,
371
345
  NEGATIVE_CACHE_TTL,
372
- HTTP_SEMAPHORE_MAX
346
+ // Re-export shared semaphore for backward compat with existing tests
347
+ _httpSemaphore: getActiveSemaphore(),
348
+ HTTP_SEMAPHORE_MAX: REGISTRY_SEMAPHORE_MAX
373
349
  };
@@ -7,6 +7,7 @@ const walk = require('acorn-walk');
7
7
  const { findJsFiles, forEachSafeFile, debugLog } = require('./utils.js');
8
8
  const { fetchPackageMetadata, getLatestVersions } = require('./temporal-analysis.js');
9
9
  const { downloadToFile, extractTarGz, sanitizePackageName } = require('./shared/download.js');
10
+ const { acquireRegistrySlot, releaseRegistrySlot } = require('./shared/http-limiter.js');
10
11
 
11
12
  const { MAX_FILE_SIZE, getMaxFileSize, ACORN_OPTIONS, safeParse } = require('./shared/constants.js');
12
13
 
@@ -36,11 +37,21 @@ const PATTERN_SEVERITY = {
36
37
 
37
38
  /**
38
39
  * Fetch version-specific metadata from npm registry.
40
+ * Acquires a shared HTTP semaphore slot to prevent registry throttling.
39
41
  * @param {string} packageName
40
42
  * @param {string} version
41
43
  * @returns {Promise<object>}
42
44
  */
43
- function fetchVersionMetadata(packageName, version) {
45
+ async function fetchVersionMetadata(packageName, version) {
46
+ await acquireRegistrySlot();
47
+ try {
48
+ return await _fetchVersionMetadataHttp(packageName, version);
49
+ } finally {
50
+ releaseRegistrySlot();
51
+ }
52
+ }
53
+
54
+ function _fetchVersionMetadataHttp(packageName, version) {
44
55
  const encodedName = encodeURIComponent(packageName).replace('%40', '@');
45
56
  const url = `${REGISTRY_URL}/${encodedName}/${encodeURIComponent(version)}`;
46
57
  const urlObj = new URL(url);
@@ -99,7 +110,13 @@ async function fetchPackageTarball(packageName, version) {
99
110
  let extractedDir;
100
111
  try {
101
112
  const tgzPath = path.join(tmpDir, 'package.tar.gz');
102
- await downloadToFile(tarballUrl, tgzPath);
113
+ // Tarball downloads go through the shared semaphore (npm CDN)
114
+ await acquireRegistrySlot();
115
+ try {
116
+ await downloadToFile(tarballUrl, tgzPath);
117
+ } finally {
118
+ releaseRegistrySlot();
119
+ }
103
120
  extractedDir = extractTarGz(tgzPath, tmpDir);
104
121
  } catch (err) {
105
122
  try { fs.rmSync(tmpDir, { recursive: true, force: true }); } catch (e) { debugLog('tmpDir cleanup failed:', e.message); }