muaddib-scanner 2.10.18 → 2.10.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/scanner/npm-registry.js +17 -3
- package/src/temporal-analysis.js +119 -6
- package/src/utils.js +4 -1
package/package.json
CHANGED
|
@@ -79,9 +79,23 @@ async function getPackageMetadata(packageName) {
|
|
|
79
79
|
// Validate package name before building URL
|
|
80
80
|
if (!NPM_PACKAGE_REGEX.test(packageName)) return null;
|
|
81
81
|
|
|
82
|
-
// 1. Registry metadata
|
|
83
|
-
|
|
84
|
-
|
|
82
|
+
// 1. Registry metadata — read from temporal-analysis cache if warm (monitor pipeline
|
|
83
|
+
// pre-fetches metadata for temporal checks). Only reads the Map, never fires HTTP.
|
|
84
|
+
// Falls back to own fetchWithRetry (with retries + 429 handling) on cache miss.
|
|
85
|
+
let meta = null;
|
|
86
|
+
try {
|
|
87
|
+
const { _metadataCache, METADATA_CACHE_TTL } = require('../temporal-analysis.js');
|
|
88
|
+
const cached = _metadataCache.get(packageName);
|
|
89
|
+
if (cached && (Date.now() - cached.fetchedAt) < METADATA_CACHE_TTL) {
|
|
90
|
+
meta = cached.data;
|
|
91
|
+
}
|
|
92
|
+
} catch {
|
|
93
|
+
// temporal-analysis not available — fall through to fetchWithRetry
|
|
94
|
+
}
|
|
95
|
+
if (!meta) {
|
|
96
|
+
const registryUrl = REGISTRY_URL + '/' + encodeURIComponent(packageName);
|
|
97
|
+
meta = await fetchWithRetry(registryUrl);
|
|
98
|
+
}
|
|
85
99
|
if (!meta) return null;
|
|
86
100
|
|
|
87
101
|
const createdAt = meta.time?.created || null;
|
package/src/temporal-analysis.js
CHANGED
|
@@ -4,6 +4,40 @@ const REGISTRY_URL = 'https://registry.npmjs.org';
|
|
|
4
4
|
const TIMEOUT_MS = 10_000;
|
|
5
5
|
const MAX_RESPONSE_SIZE = 50 * 1024 * 1024; // 50MB (some packages have lots of versions)
|
|
6
6
|
|
|
7
|
+
// Metadata cache: avoids duplicate HTTP requests when multiple temporal modules
|
|
8
|
+
// fetch the same package metadata within a short window (monitor pipeline).
|
|
9
|
+
// Entries with error=true are negative cache (shorter TTL) to avoid retry storms.
|
|
10
|
+
const _metadataCache = new Map(); // packageName → { data, fetchedAt, error? }
|
|
11
|
+
const _inflightRequests = new Map(); // packageName → Promise
|
|
12
|
+
const METADATA_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
|
|
13
|
+
const NEGATIVE_CACHE_TTL = 60 * 1000; // 60 seconds for failed fetches
|
|
14
|
+
const METADATA_CACHE_MAX = 200;
|
|
15
|
+
|
|
16
|
+
// HTTP semaphore: limits concurrent requests to npm registry to prevent throttling.
|
|
17
|
+
// With 16 monitor workers × 7 requests/package, uncapped concurrency hits 112 simultaneous
|
|
18
|
+
// requests — well above npm's implicit rate limit. Cap at 10 to stay under the threshold.
|
|
19
|
+
const HTTP_SEMAPHORE_MAX = 10;
|
|
20
|
+
const _httpSemaphore = { active: 0, queue: [] };
|
|
21
|
+
|
|
22
|
+
function _acquireHttpSlot() {
|
|
23
|
+
if (_httpSemaphore.active < HTTP_SEMAPHORE_MAX) {
|
|
24
|
+
_httpSemaphore.active++;
|
|
25
|
+
return Promise.resolve();
|
|
26
|
+
}
|
|
27
|
+
return new Promise(resolve => {
|
|
28
|
+
_httpSemaphore.queue.push(resolve);
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function _releaseHttpSlot() {
|
|
33
|
+
if (_httpSemaphore.queue.length > 0) {
|
|
34
|
+
const next = _httpSemaphore.queue.shift();
|
|
35
|
+
next(); // Transfers the slot to the next waiter (active count stays the same)
|
|
36
|
+
} else {
|
|
37
|
+
_httpSemaphore.active--;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
7
41
|
const LIFECYCLE_SCRIPTS = [
|
|
8
42
|
'preinstall',
|
|
9
43
|
'install',
|
|
@@ -16,11 +50,31 @@ const LIFECYCLE_SCRIPTS = [
|
|
|
16
50
|
];
|
|
17
51
|
|
|
18
52
|
/**
|
|
19
|
-
*
|
|
20
|
-
*
|
|
21
|
-
*
|
|
53
|
+
* Raw HTTP fetch — always hits the npm registry. Use fetchPackageMetadata() instead,
|
|
54
|
+
* which adds caching, inflight dedup, and semaphore.
|
|
55
|
+
* Acquires an HTTP semaphore slot before making the request.
|
|
22
56
|
*/
|
|
23
|
-
function
|
|
57
|
+
async function _fetchPackageMetadataImpl(packageName) {
|
|
58
|
+
await _acquireHttpSlot();
|
|
59
|
+
try {
|
|
60
|
+
return await _fetchPackageMetadataHttp(packageName);
|
|
61
|
+
} catch (err) {
|
|
62
|
+
// Negative cache: store failure for 60s to prevent retry storms
|
|
63
|
+
if (_metadataCache.size >= METADATA_CACHE_MAX) {
|
|
64
|
+
const oldestKey = _metadataCache.keys().next().value;
|
|
65
|
+
_metadataCache.delete(oldestKey);
|
|
66
|
+
}
|
|
67
|
+
_metadataCache.set(packageName, { data: null, error: true, fetchedAt: Date.now() });
|
|
68
|
+
throw err;
|
|
69
|
+
} finally {
|
|
70
|
+
_releaseHttpSlot();
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Low-level HTTP request to npm registry. No caching, no semaphore.
|
|
76
|
+
*/
|
|
77
|
+
function _fetchPackageMetadataHttp(packageName) {
|
|
24
78
|
const encodedName = encodeURIComponent(packageName).replace('%40', '@');
|
|
25
79
|
const url = `${REGISTRY_URL}/${encodedName}`;
|
|
26
80
|
const urlObj = new URL(url);
|
|
@@ -68,7 +122,15 @@ function fetchPackageMetadata(packageName) {
|
|
|
68
122
|
res.on('end', () => {
|
|
69
123
|
if (destroyed) return;
|
|
70
124
|
try {
|
|
71
|
-
|
|
125
|
+
const parsed = JSON.parse(data);
|
|
126
|
+
// Store in cache on successful fetch
|
|
127
|
+
if (_metadataCache.size >= METADATA_CACHE_MAX) {
|
|
128
|
+
// Evict oldest entry
|
|
129
|
+
const oldestKey = _metadataCache.keys().next().value;
|
|
130
|
+
_metadataCache.delete(oldestKey);
|
|
131
|
+
}
|
|
132
|
+
_metadataCache.set(packageName, { data: parsed, fetchedAt: Date.now() });
|
|
133
|
+
resolve(parsed);
|
|
72
134
|
} catch (e) {
|
|
73
135
|
reject(new Error(`Invalid JSON from registry for ${packageName}: ${e.message}`));
|
|
74
136
|
}
|
|
@@ -88,6 +150,48 @@ function fetchPackageMetadata(packageName) {
|
|
|
88
150
|
});
|
|
89
151
|
}
|
|
90
152
|
|
|
153
|
+
/**
|
|
154
|
+
* Fetch full package metadata from the npm registry with caching, inflight dedup,
|
|
155
|
+
* negative cache, and HTTP semaphore. Multiple callers requesting the same package
|
|
156
|
+
* within 5 minutes share one HTTP request. Failed fetches are cached for 60s.
|
|
157
|
+
* @param {string} packageName - npm package name (scoped or unscoped)
|
|
158
|
+
* @returns {Promise<object>} Full registry metadata (versions, time, maintainers, etc.)
|
|
159
|
+
*/
|
|
160
|
+
function fetchPackageMetadata(packageName) {
|
|
161
|
+
// Check cache first (TTL-based, positive + negative)
|
|
162
|
+
const cached = _metadataCache.get(packageName);
|
|
163
|
+
if (cached) {
|
|
164
|
+
const ttl = cached.error ? NEGATIVE_CACHE_TTL : METADATA_CACHE_TTL;
|
|
165
|
+
if ((Date.now() - cached.fetchedAt) < ttl) {
|
|
166
|
+
if (cached.error) {
|
|
167
|
+
return Promise.reject(new Error(`Negative cache hit for ${packageName} (failed ${Math.round((Date.now() - cached.fetchedAt) / 1000)}s ago)`));
|
|
168
|
+
}
|
|
169
|
+
return Promise.resolve(cached.data);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Dedup inflight requests — if the same package is already being fetched, reuse that Promise
|
|
174
|
+
if (_inflightRequests.has(packageName)) {
|
|
175
|
+
return _inflightRequests.get(packageName);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const promise = _fetchPackageMetadataImpl(packageName).finally(() => {
|
|
179
|
+
_inflightRequests.delete(packageName);
|
|
180
|
+
});
|
|
181
|
+
_inflightRequests.set(packageName, promise);
|
|
182
|
+
return promise;
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* Clear the metadata cache and reset semaphore. Exported for tests and monitor reset.
|
|
187
|
+
*/
|
|
188
|
+
function clearMetadataCache() {
|
|
189
|
+
_metadataCache.clear();
|
|
190
|
+
_inflightRequests.clear();
|
|
191
|
+
_httpSemaphore.active = 0;
|
|
192
|
+
_httpSemaphore.queue.length = 0;
|
|
193
|
+
}
|
|
194
|
+
|
|
91
195
|
/**
|
|
92
196
|
* Extract lifecycle scripts from a package.json object.
|
|
93
197
|
* @param {object} packageJson - A package.json object (or a version entry from registry metadata)
|
|
@@ -253,8 +357,17 @@ async function detectSuddenLifecycleChange(packageName) {
|
|
|
253
357
|
|
|
254
358
|
module.exports = {
|
|
255
359
|
fetchPackageMetadata,
|
|
360
|
+
clearMetadataCache,
|
|
256
361
|
getLifecycleScripts,
|
|
257
362
|
compareLifecycleScripts,
|
|
258
363
|
getLatestVersions,
|
|
259
|
-
detectSuddenLifecycleChange
|
|
364
|
+
detectSuddenLifecycleChange,
|
|
365
|
+
// Exposed for tests only
|
|
366
|
+
_metadataCache,
|
|
367
|
+
_inflightRequests,
|
|
368
|
+
_httpSemaphore,
|
|
369
|
+
METADATA_CACHE_TTL,
|
|
370
|
+
METADATA_CACHE_MAX,
|
|
371
|
+
NEGATIVE_CACHE_TTL,
|
|
372
|
+
HTTP_SEMAPHORE_MAX
|
|
260
373
|
};
|
package/src/utils.js
CHANGED
|
@@ -28,8 +28,10 @@ let _filesCapped = false;
|
|
|
28
28
|
* File content cache — read each file once, reused across all scanners in a single scan.
|
|
29
29
|
* Key = absolute file path, Value = file content string.
|
|
30
30
|
* Cleared between scans via clearFileListCache().
|
|
31
|
+
* Capped at 500 entries to prevent OOM during evaluate (200 packages sequential).
|
|
31
32
|
*/
|
|
32
33
|
const _fileContentCache = new Map();
|
|
34
|
+
const _FILE_CONTENT_CACHE_MAX = 500;
|
|
33
35
|
|
|
34
36
|
function setExtraExcludes(dirs, scanRoot) {
|
|
35
37
|
_extraExcludedDirs = Array.isArray(dirs) ? dirs : [];
|
|
@@ -339,7 +341,8 @@ function forEachSafeFile(files, callback) {
|
|
|
339
341
|
content = fs.readFileSync(file, 'utf8');
|
|
340
342
|
} catch { continue; }
|
|
341
343
|
|
|
342
|
-
// Cache for subsequent scanners
|
|
344
|
+
// Cache for subsequent scanners (evict all if over cap to prevent OOM in evaluate loops)
|
|
345
|
+
if (_fileContentCache.size >= _FILE_CONTENT_CACHE_MAX) _fileContentCache.clear();
|
|
343
346
|
_fileContentCache.set(file, content);
|
|
344
347
|
callback(file, content);
|
|
345
348
|
}
|