muaddib-scanner 2.10.17 → 2.10.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.17",
3
+ "version": "2.10.19",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -27,7 +27,7 @@ const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, annotate
27
27
  const { computeReachableFiles } = require('./scanner/reachability.js');
28
28
  const { runTemporalAnalyses } = require('./temporal-runner.js');
29
29
  const { formatOutput } = require('./output-formatter.js');
30
- const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, debugLog } = require('./utils.js');
30
+ const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, wasFilesCapped, debugLog } = require('./utils.js');
31
31
  const { SEVERITY_WEIGHTS, RISK_THRESHOLDS, MAX_RISK_SCORE, isPackageLevelThreat, computeGroupScore, applyFPReductions, applyCompoundBoosts, calculateRiskScore, applyConfigOverrides, resetConfigOverrides, getSeverityWeights } = require('./scoring.js');
32
32
  const { resolveConfig } = require('./config.js');
33
33
  const { buildIntentPairs } = require('./intent-graph.js');
@@ -480,6 +480,11 @@ async function run(targetPath, options = {}) {
480
480
  aiConfigThreats
481
481
  ] = scanResult;
482
482
 
483
+ // Emit warning if file count cap was hit
484
+ if (wasFilesCapped()) {
485
+ warnings.push('File count cap reached (500 files) — some files were not scanned. Root-level files were prioritized.');
486
+ }
487
+
483
488
  // Stop spinner now that scanning is complete
484
489
  if (spinner) {
485
490
  spinner.succeed(`[MUADDIB] Scanned ${targetPath}`);
@@ -0,0 +1,31 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Worker thread entry point for static analysis.
5
+ * Runs the full scan pipeline in an isolated V8 isolate.
6
+ * The parent can call worker.terminate() to kill synchronous code
7
+ * (V8::TerminateExecution) — this is the only way to enforce a real
8
+ * timeout on synchronous AST parsing and tree-walking.
9
+ *
10
+ * Communication:
11
+ * parentPort.postMessage({ type: 'result', data: scanResult })
12
+ * parentPort.postMessage({ type: 'error', message: string })
13
+ */
14
+
15
+ const { parentPort, workerData } = require('worker_threads');
16
+
17
+ if (!parentPort) {
18
+ // Not running as a worker — exit gracefully
19
+ process.exit(1);
20
+ }
21
+
22
+ const { run } = require('./index.js');
23
+
24
+ (async () => {
25
+ try {
26
+ const result = await run(workerData.extractedDir, { _capture: true });
27
+ parentPort.postMessage({ type: 'result', data: result });
28
+ } catch (err) {
29
+ parentPort.postMessage({ type: 'error', message: err.message || String(err) });
30
+ }
31
+ })();
@@ -79,9 +79,23 @@ async function getPackageMetadata(packageName) {
79
79
  // Validate package name before building URL
80
80
  if (!NPM_PACKAGE_REGEX.test(packageName)) return null;
81
81
 
82
- // 1. Registry metadata
83
- const registryUrl = REGISTRY_URL + '/' + encodeURIComponent(packageName);
84
- const meta = await fetchWithRetry(registryUrl);
82
+ // 1. Registry metadata — read from temporal-analysis cache if warm (monitor pipeline
83
+ // pre-fetches metadata for temporal checks). Only reads the Map, never fires HTTP.
84
+ // Falls back to own fetchWithRetry (with retries + 429 handling) on cache miss.
85
+ let meta = null;
86
+ try {
87
+ const { _metadataCache, METADATA_CACHE_TTL } = require('../temporal-analysis.js');
88
+ const cached = _metadataCache.get(packageName);
89
+ if (cached && (Date.now() - cached.fetchedAt) < METADATA_CACHE_TTL) {
90
+ meta = cached.data;
91
+ }
92
+ } catch {
93
+ // temporal-analysis not available — fall through to fetchWithRetry
94
+ }
95
+ if (!meta) {
96
+ const registryUrl = REGISTRY_URL + '/' + encodeURIComponent(packageName);
97
+ meta = await fetchWithRetry(registryUrl);
98
+ }
85
99
  if (!meta) return null;
86
100
 
87
101
  const createdAt = meta.time?.created || null;
@@ -2,8 +2,10 @@ const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { findFiles, forEachSafeFile } = require('../utils.js');
4
4
 
5
- // node_modules NOT excluded: detect obfuscated code in dependencies
6
- const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache'];
5
+ // node_modules NOT excluded: detect obfuscated code in dependencies.
6
+ // dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
7
+ // and costs significant processing time on large SDKs.
8
+ const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
7
9
 
8
10
  function detectObfuscation(targetPath) {
9
11
  const threats = [];
@@ -100,22 +100,50 @@ const ACORN_OPTIONS = { ecmaVersion: 2024, sourceType: 'module', allowHashBang:
100
100
 
101
101
  const acorn = require('acorn');
102
102
 
103
+ /**
104
+ * AST parse cache — same content+options returns the same AST.
105
+ * Scanners do not mutate AST nodes (verified: only read comparisons).
106
+ * Cleared between scans via clearASTCache().
107
+ * Key = code.length + '|' + optionsKey + '|' + code.slice(0,128) + code.slice(-64)
108
+ * (length-prefixed partial key for fast Map lookup; collisions resolved by full WeakRef check)
109
+ */
110
+ const _astCache = new Map();
111
+ const _AST_CACHE_MAX = 600; // Max entries (one scan ≈ 500 files max)
112
+
103
113
  /**
104
114
  * Parse JS source with module-mode fallback to script-mode.
105
115
  * `const package = ...` is valid in script mode but reserved in module mode.
116
+ * Results are cached for reuse across scanners within the same scan.
106
117
  * Returns AST or null if both modes fail.
107
118
  */
108
119
  function safeParse(code, extraOptions = {}) {
120
+ // Build cache key: options signature + content fingerprint
121
+ const optKey = Object.keys(extraOptions).length === 0 ? '' : JSON.stringify(extraOptions);
122
+ const cacheKey = code.length + '|' + optKey + '|' + code.slice(0, 128) + code.slice(-64);
123
+
124
+ const cached = _astCache.get(cacheKey);
125
+ if (cached !== undefined) return cached;
126
+
109
127
  const opts = { ...ACORN_OPTIONS, ...extraOptions };
128
+ let ast = null;
110
129
  try {
111
- return acorn.parse(code, opts);
130
+ ast = acorn.parse(code, opts);
112
131
  } catch {
113
132
  try {
114
- return acorn.parse(code, { ...opts, sourceType: 'script' });
133
+ ast = acorn.parse(code, { ...opts, sourceType: 'script' });
115
134
  } catch {
116
- return null;
135
+ ast = null;
117
136
  }
118
137
  }
138
+
139
+ // Cache the result (including null for unparseable files)
140
+ if (_astCache.size >= _AST_CACHE_MAX) _astCache.clear();
141
+ _astCache.set(cacheKey, ast);
142
+ return ast;
143
+ }
144
+
145
+ function clearASTCache() {
146
+ _astCache.clear();
119
147
  }
120
148
 
121
- module.exports = { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX, MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT, MAX_FILE_SIZE, ACORN_OPTIONS, safeParse, getMaxFileSize, setMaxFileSize, resetMaxFileSize };
149
+ module.exports = { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX, MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT, MAX_FILE_SIZE, ACORN_OPTIONS, safeParse, clearASTCache, getMaxFileSize, setMaxFileSize, resetMaxFileSize };
@@ -4,6 +4,13 @@ const REGISTRY_URL = 'https://registry.npmjs.org';
4
4
  const TIMEOUT_MS = 10_000;
5
5
  const MAX_RESPONSE_SIZE = 50 * 1024 * 1024; // 50MB (some packages have lots of versions)
6
6
 
7
+ // Metadata cache: avoids duplicate HTTP requests when multiple temporal modules
8
+ // fetch the same package metadata within a short window (monitor pipeline).
9
+ const _metadataCache = new Map(); // packageName → { data, fetchedAt }
10
+ const _inflightRequests = new Map(); // packageName → Promise
11
+ const METADATA_CACHE_TTL = 5 * 60 * 1000; // 5 minutes
12
+ const METADATA_CACHE_MAX = 200;
13
+
7
14
  const LIFECYCLE_SCRIPTS = [
8
15
  'preinstall',
9
16
  'install',
@@ -16,11 +23,10 @@ const LIFECYCLE_SCRIPTS = [
16
23
  ];
17
24
 
18
25
  /**
19
- * Fetch full package metadata from the npm registry.
20
- * @param {string} packageName - npm package name (scoped or unscoped)
21
- * @returns {Promise<object>} Full registry metadata (versions, time, maintainers, etc.)
26
+ * Raw HTTP fetch always hits the npm registry. Use fetchPackageMetadata() instead,
27
+ * which adds caching and inflight dedup.
22
28
  */
23
- function fetchPackageMetadata(packageName) {
29
+ function _fetchPackageMetadataImpl(packageName) {
24
30
  const encodedName = encodeURIComponent(packageName).replace('%40', '@');
25
31
  const url = `${REGISTRY_URL}/${encodedName}`;
26
32
  const urlObj = new URL(url);
@@ -68,7 +74,15 @@ function fetchPackageMetadata(packageName) {
68
74
  res.on('end', () => {
69
75
  if (destroyed) return;
70
76
  try {
71
- resolve(JSON.parse(data));
77
+ const parsed = JSON.parse(data);
78
+ // Store in cache on successful fetch
79
+ if (_metadataCache.size >= METADATA_CACHE_MAX) {
80
+ // Evict oldest entry
81
+ const oldestKey = _metadataCache.keys().next().value;
82
+ _metadataCache.delete(oldestKey);
83
+ }
84
+ _metadataCache.set(packageName, { data: parsed, fetchedAt: Date.now() });
85
+ resolve(parsed);
72
86
  } catch (e) {
73
87
  reject(new Error(`Invalid JSON from registry for ${packageName}: ${e.message}`));
74
88
  }
@@ -88,6 +102,39 @@ function fetchPackageMetadata(packageName) {
88
102
  });
89
103
  }
90
104
 
105
+ /**
106
+ * Fetch full package metadata from the npm registry with caching and inflight dedup.
107
+ * Multiple callers requesting the same package within 5 minutes share one HTTP request.
108
+ * @param {string} packageName - npm package name (scoped or unscoped)
109
+ * @returns {Promise<object>} Full registry metadata (versions, time, maintainers, etc.)
110
+ */
111
+ function fetchPackageMetadata(packageName) {
112
+ // Check cache first (TTL-based)
113
+ const cached = _metadataCache.get(packageName);
114
+ if (cached && (Date.now() - cached.fetchedAt) < METADATA_CACHE_TTL) {
115
+ return Promise.resolve(cached.data);
116
+ }
117
+
118
+ // Dedup inflight requests — if the same package is already being fetched, reuse that Promise
119
+ if (_inflightRequests.has(packageName)) {
120
+ return _inflightRequests.get(packageName);
121
+ }
122
+
123
+ const promise = _fetchPackageMetadataImpl(packageName).finally(() => {
124
+ _inflightRequests.delete(packageName);
125
+ });
126
+ _inflightRequests.set(packageName, promise);
127
+ return promise;
128
+ }
129
+
130
+ /**
131
+ * Clear the metadata cache. Exported for tests and monitor reset.
132
+ */
133
+ function clearMetadataCache() {
134
+ _metadataCache.clear();
135
+ _inflightRequests.clear();
136
+ }
137
+
91
138
  /**
92
139
  * Extract lifecycle scripts from a package.json object.
93
140
  * @param {object} packageJson - A package.json object (or a version entry from registry metadata)
@@ -253,8 +300,14 @@ async function detectSuddenLifecycleChange(packageName) {
253
300
 
254
301
  module.exports = {
255
302
  fetchPackageMetadata,
303
+ clearMetadataCache,
256
304
  getLifecycleScripts,
257
305
  compareLifecycleScripts,
258
306
  getLatestVersions,
259
- detectSuddenLifecycleChange
307
+ detectSuddenLifecycleChange,
308
+ // Exposed for tests only
309
+ _metadataCache,
310
+ _inflightRequests,
311
+ METADATA_CACHE_TTL,
312
+ METADATA_CACHE_MAX
260
313
  };
package/src/utils.js CHANGED
@@ -1,12 +1,14 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
- const { MAX_FILE_SIZE, getMaxFileSize } = require('./shared/constants.js');
3
+ const { MAX_FILE_SIZE, getMaxFileSize, clearASTCache } = require('./shared/constants.js');
4
4
 
5
5
  /**
6
6
  * Directories excluded from scanning.
7
- * Only skip dependency/VCS/cache dirs - never skip user source code.
7
+ * Skips dependency/VCS/cache dirs and bundled output (dist/build/out).
8
+ * Bundled output is minified, huge, and produces FPs without security value.
9
+ * Obfuscation scanner uses its own OBF_EXCLUDED_DIRS to intentionally scan these.
8
10
  */
9
- const EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache'];
11
+ const EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
10
12
 
11
13
  /**
12
14
  * Extra directories to exclude (set at runtime via --exclude flag).
@@ -20,6 +22,14 @@ let _scanRoot = '';
20
22
  * Cleared between scans via clearFileListCache().
21
23
  */
22
24
  const _fileListCache = new Map();
25
+ let _filesCapped = false;
26
+
27
+ /**
28
+ * File content cache — read each file once, reused across all scanners in a single scan.
29
+ * Key = absolute file path, Value = file content string.
30
+ * Cleared between scans via clearFileListCache().
31
+ */
32
+ const _fileContentCache = new Map();
23
33
 
24
34
  function setExtraExcludes(dirs, scanRoot) {
25
35
  _extraExcludedDirs = Array.isArray(dirs) ? dirs : [];
@@ -59,6 +69,13 @@ function isDevFile(relativePath) {
59
69
  return DEV_PATTERNS.some(pattern => pattern.test(relativePath));
60
70
  }
61
71
 
72
+ /**
73
+ * Maximum number of files to scan per package.
74
+ * Malware packages rarely have >50 JS files; 500 is a generous safety margin.
75
+ * Prevents large SDKs (1000+ files) from monopolizing scan time.
76
+ */
77
+ const MAX_SCAN_FILES = 500;
78
+
62
79
  /**
63
80
  * Generic recursive file finder with symlink protection and depth limit.
64
81
  * @param {string} dir - Starting directory
@@ -66,6 +83,7 @@ function isDevFile(relativePath) {
66
83
  * @param {string[]} [options.extensions=['.js']] - File extensions to match
67
84
  * @param {string[]} [options.excludedDirs=EXCLUDED_DIRS] - Dirs to skip
68
85
  * @param {number} [options.maxDepth=100] - Max recursion depth
86
+ * @param {number} [options.maxFiles=MAX_SCAN_FILES] - Max files to return (0=unlimited)
69
87
  * @param {string[]} [options.results=[]] - Accumulator (internal)
70
88
  * @param {Set} [options.visitedInodes=new Set()] - Symlink loop detection (note: inode tracking
71
89
  * is unreliable on Windows where stat.ino may be 0; maxDepth serves as fallback protection)
@@ -77,6 +95,7 @@ function findFiles(dir, options = {}) {
77
95
  extensions = ['.js'],
78
96
  excludedDirs = EXCLUDED_DIRS,
79
97
  maxDepth = 100,
98
+ maxFiles = MAX_SCAN_FILES,
80
99
  results = [],
81
100
  visitedInodes = new Set(),
82
101
  visitedPaths = new Set(),
@@ -90,6 +109,21 @@ function findFiles(dir, options = {}) {
90
109
  const cached = _fileListCache.get(cacheKey);
91
110
  if (cached) return [...cached]; // return copy to prevent mutation
92
111
  const result = _findFilesImpl(dir, { extensions, excludedDirs, maxDepth, results, visitedInodes, visitedPaths, depth });
112
+
113
+ // Apply file count cap: sort by depth (shallowest first) so root-level files
114
+ // (most likely to contain malicious entry points) are prioritized.
115
+ if (maxFiles > 0 && result.length > maxFiles) {
116
+ result.sort((a, b) => {
117
+ const depthA = a.split(path.sep).length;
118
+ const depthB = b.split(path.sep).length;
119
+ return depthA - depthB;
120
+ });
121
+ const capped = result.slice(0, maxFiles);
122
+ _fileListCache.set(cacheKey, [...capped]);
123
+ _filesCapped = true;
124
+ return capped;
125
+ }
126
+
93
127
  _fileListCache.set(cacheKey, [...result]);
94
128
  return result;
95
129
  }
@@ -188,6 +222,13 @@ function findJsFiles(dir, results = []) {
188
222
 
189
223
  function clearFileListCache() {
190
224
  _fileListCache.clear();
225
+ _fileContentCache.clear();
226
+ clearASTCache();
227
+ _filesCapped = false;
228
+ }
229
+
230
+ function wasFilesCapped() {
231
+ return _filesCapped;
191
232
  }
192
233
 
193
234
  /**
@@ -278,9 +319,17 @@ class Spinner {
278
319
  /**
279
320
  * Iterates files with size guard and error handling.
280
321
  * Calls callback(file, content) for each readable file under MAX_FILE_SIZE.
322
+ * File contents are cached in _fileContentCache for reuse across scanners.
281
323
  */
282
324
  function forEachSafeFile(files, callback) {
283
325
  for (const file of files) {
326
+ // Check content cache first
327
+ const cached = _fileContentCache.get(file);
328
+ if (cached !== undefined) {
329
+ callback(file, cached);
330
+ continue;
331
+ }
332
+
284
333
  try {
285
334
  const stat = fs.statSync(file);
286
335
  if (stat.size > getMaxFileSize()) continue;
@@ -289,6 +338,9 @@ function forEachSafeFile(files, callback) {
289
338
  try {
290
339
  content = fs.readFileSync(file, 'utf8');
291
340
  } catch { continue; }
341
+
342
+ // Cache for subsequent scanners
343
+ _fileContentCache.set(file, content);
292
344
  callback(file, content);
293
345
  }
294
346
  }
@@ -332,11 +384,13 @@ function debugLog(...args) {
332
384
 
333
385
  module.exports = {
334
386
  EXCLUDED_DIRS,
387
+ MAX_SCAN_FILES,
335
388
  DEV_PATTERNS,
336
389
  isDevFile,
337
390
  findFiles,
338
391
  findJsFiles,
339
392
  clearFileListCache,
393
+ wasFilesCapped,
340
394
  escapeHtml,
341
395
  getCallName,
342
396
  Spinner,