muaddib-scanner 2.10.19 → 2.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.19",
3
+ "version": "2.10.20",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -6,11 +6,38 @@ const MAX_RESPONSE_SIZE = 50 * 1024 * 1024; // 50MB (some packages have lots of
6
6
 
7
7
  // Metadata cache: avoids duplicate HTTP requests when multiple temporal modules
8
8
  // fetch the same package metadata within a short window (monitor pipeline).
9
- const _metadataCache = new Map(); // packageName { data, fetchedAt }
9
+ // Entries with error=true are negative cache (shorter TTL) to avoid retry storms.
10
+ const _metadataCache = new Map(); // packageName → { data, fetchedAt, error? }
10
11
  const _inflightRequests = new Map(); // packageName → Promise
11
12
  const METADATA_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
13
+ const NEGATIVE_CACHE_TTL = 60 * 1000; // 60 seconds for failed fetches
12
14
  const METADATA_CACHE_MAX = 200;
13
15
 
16
+ // HTTP semaphore: limits concurrent requests to npm registry to prevent throttling.
17
+ // With 16 monitor workers × 7 requests/package, uncapped concurrency hits 112 simultaneous
18
+ // requests — well above npm's implicit rate limit. Cap at 10 to stay under the threshold.
19
+ const HTTP_SEMAPHORE_MAX = 10;
20
+ const _httpSemaphore = { active: 0, queue: [] };
21
+
22
+ function _acquireHttpSlot() {
23
+ if (_httpSemaphore.active < HTTP_SEMAPHORE_MAX) {
24
+ _httpSemaphore.active++;
25
+ return Promise.resolve();
26
+ }
27
+ return new Promise(resolve => {
28
+ _httpSemaphore.queue.push(resolve);
29
+ });
30
+ }
31
+
32
+ function _releaseHttpSlot() {
33
+ if (_httpSemaphore.queue.length > 0) {
34
+ const next = _httpSemaphore.queue.shift();
35
+ next(); // Transfers the slot to the next waiter (active count stays the same)
36
+ } else {
37
+ _httpSemaphore.active--;
38
+ }
39
+ }
40
+
14
41
  const LIFECYCLE_SCRIPTS = [
15
42
  'preinstall',
16
43
  'install',
@@ -24,9 +51,30 @@ const LIFECYCLE_SCRIPTS = [
24
51
 
25
52
  /**
26
53
  * Raw HTTP fetch — always hits the npm registry. Use fetchPackageMetadata() instead,
27
- * which adds caching and inflight dedup.
54
+ * which adds caching, inflight dedup, and semaphore.
55
+ * Acquires an HTTP semaphore slot before making the request.
28
56
  */
29
- function _fetchPackageMetadataImpl(packageName) {
57
+ async function _fetchPackageMetadataImpl(packageName) {
58
+ await _acquireHttpSlot();
59
+ try {
60
+ return await _fetchPackageMetadataHttp(packageName);
61
+ } catch (err) {
62
+ // Negative cache: store failure for 60s to prevent retry storms
63
+ if (_metadataCache.size >= METADATA_CACHE_MAX) {
64
+ const oldestKey = _metadataCache.keys().next().value;
65
+ _metadataCache.delete(oldestKey);
66
+ }
67
+ _metadataCache.set(packageName, { data: null, error: true, fetchedAt: Date.now() });
68
+ throw err;
69
+ } finally {
70
+ _releaseHttpSlot();
71
+ }
72
+ }
73
+
74
+ /**
75
+ * Low-level HTTP request to npm registry. No caching, no semaphore.
76
+ */
77
+ function _fetchPackageMetadataHttp(packageName) {
30
78
  const encodedName = encodeURIComponent(packageName).replace('%40', '@');
31
79
  const url = `${REGISTRY_URL}/${encodedName}`;
32
80
  const urlObj = new URL(url);
@@ -103,16 +151,23 @@ function _fetchPackageMetadataImpl(packageName) {
103
151
  }
104
152
 
105
153
  /**
106
- * Fetch full package metadata from the npm registry with caching and inflight dedup.
107
- * Multiple callers requesting the same package within 5 minutes share one HTTP request.
154
+ * Fetch full package metadata from the npm registry with caching, inflight dedup,
155
+ * negative cache, and HTTP semaphore. Multiple callers requesting the same package
156
+ * within 5 minutes share one HTTP request. Failed fetches are cached for 60s.
108
157
  * @param {string} packageName - npm package name (scoped or unscoped)
109
158
  * @returns {Promise<object>} Full registry metadata (versions, time, maintainers, etc.)
110
159
  */
111
160
  function fetchPackageMetadata(packageName) {
112
- // Check cache first (TTL-based)
161
+ // Check cache first (TTL-based, positive + negative)
113
162
  const cached = _metadataCache.get(packageName);
114
- if (cached && (Date.now() - cached.fetchedAt) < METADATA_CACHE_TTL) {
115
- return Promise.resolve(cached.data);
163
+ if (cached) {
164
+ const ttl = cached.error ? NEGATIVE_CACHE_TTL : METADATA_CACHE_TTL;
165
+ if ((Date.now() - cached.fetchedAt) < ttl) {
166
+ if (cached.error) {
167
+ return Promise.reject(new Error(`Negative cache hit for ${packageName} (failed ${Math.round((Date.now() - cached.fetchedAt) / 1000)}s ago)`));
168
+ }
169
+ return Promise.resolve(cached.data);
170
+ }
116
171
  }
117
172
 
118
173
  // Dedup inflight requests — if the same package is already being fetched, reuse that Promise
@@ -128,11 +183,13 @@ function fetchPackageMetadata(packageName) {
128
183
  }
129
184
 
130
185
  /**
131
- * Clear the metadata cache. Exported for tests and monitor reset.
186
+ * Clear the metadata cache and reset semaphore. Exported for tests and monitor reset.
132
187
  */
133
188
  function clearMetadataCache() {
134
189
  _metadataCache.clear();
135
190
  _inflightRequests.clear();
191
+ _httpSemaphore.active = 0;
192
+ _httpSemaphore.queue.length = 0;
136
193
  }
137
194
 
138
195
  /**
@@ -308,6 +365,9 @@ module.exports = {
308
365
  // Exposed for tests only
309
366
  _metadataCache,
310
367
  _inflightRequests,
368
+ _httpSemaphore,
311
369
  METADATA_CACHE_TTL,
312
- METADATA_CACHE_MAX
370
+ METADATA_CACHE_MAX,
371
+ NEGATIVE_CACHE_TTL,
372
+ HTTP_SEMAPHORE_MAX
313
373
  };
package/src/utils.js CHANGED
@@ -28,8 +28,10 @@ let _filesCapped = false;
28
28
  * File content cache — read each file once, reused across all scanners in a single scan.
29
29
  * Key = absolute file path, Value = file content string.
30
30
  * Cleared between scans via clearFileListCache().
31
+ * Capped at 500 entries to prevent OOM during evaluate (200 packages sequential).
31
32
  */
32
33
  const _fileContentCache = new Map();
34
+ const _FILE_CONTENT_CACHE_MAX = 500;
33
35
 
34
36
  function setExtraExcludes(dirs, scanRoot) {
35
37
  _extraExcludedDirs = Array.isArray(dirs) ? dirs : [];
@@ -339,7 +341,8 @@ function forEachSafeFile(files, callback) {
339
341
  content = fs.readFileSync(file, 'utf8');
340
342
  } catch { continue; }
341
343
 
342
- // Cache for subsequent scanners
344
+ // Cache for subsequent scanners (evict all if over cap to prevent OOM in evaluate loops)
345
+ if (_fileContentCache.size >= _FILE_CONTENT_CACHE_MAX) _fileContentCache.clear();
343
346
  _fileContentCache.set(file, content);
344
347
  callback(file, content);
345
348
  }