muaddib-scanner 2.10.16 → 2.10.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.10.16",
3
+ "version": "2.10.18",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -27,7 +27,7 @@ const { buildModuleGraph, annotateTaintedExports, detectCrossFileFlows, annotate
27
27
  const { computeReachableFiles } = require('./scanner/reachability.js');
28
28
  const { runTemporalAnalyses } = require('./temporal-runner.js');
29
29
  const { formatOutput } = require('./output-formatter.js');
30
- const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, debugLog } = require('./utils.js');
30
+ const { setExtraExcludes, getExtraExcludes, Spinner, listInstalledPackages, clearFileListCache, wasFilesCapped, debugLog } = require('./utils.js');
31
31
  const { SEVERITY_WEIGHTS, RISK_THRESHOLDS, MAX_RISK_SCORE, isPackageLevelThreat, computeGroupScore, applyFPReductions, applyCompoundBoosts, calculateRiskScore, applyConfigOverrides, resetConfigOverrides, getSeverityWeights } = require('./scoring.js');
32
32
  const { resolveConfig } = require('./config.js');
33
33
  const { buildIntentPairs } = require('./intent-graph.js');
@@ -480,6 +480,11 @@ async function run(targetPath, options = {}) {
480
480
  aiConfigThreats
481
481
  ] = scanResult;
482
482
 
483
+ // Emit warning if file count cap was hit
484
+ if (wasFilesCapped()) {
485
+ warnings.push('File count cap reached (500 files) — some files were not scanned. Root-level files were prioritized.');
486
+ }
487
+
483
488
  // Stop spinner now that scanning is complete
484
489
  if (spinner) {
485
490
  spinner.succeed(`[MUADDIB] Scanned ${targetPath}`);
@@ -0,0 +1,31 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Worker thread entry point for static analysis.
5
+ * Runs the full scan pipeline in an isolated V8 isolate.
6
+ * The parent can call worker.terminate() to kill synchronous code
7
+ * (V8::TerminateExecution) — this is the only way to enforce a real
8
+ * timeout on synchronous AST parsing and tree-walking.
9
+ *
10
+ * Communication:
11
+ * parentPort.postMessage({ type: 'result', data: scanResult })
12
+ * parentPort.postMessage({ type: 'error', message: string })
13
+ */
14
+
15
+ const { parentPort, workerData } = require('worker_threads');
16
+
17
+ if (!parentPort) {
18
+ // Not running as a worker — exit gracefully
19
+ process.exit(1);
20
+ }
21
+
22
+ const { run } = require('./index.js');
23
+
24
+ (async () => {
25
+ try {
26
+ const result = await run(workerData.extractedDir, { _capture: true });
27
+ parentPort.postMessage({ type: 'result', data: result });
28
+ } catch (err) {
29
+ parentPort.postMessage({ type: 'error', message: err.message || String(err) });
30
+ }
31
+ })();
@@ -2,8 +2,10 @@ const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { findFiles, forEachSafeFile } = require('../utils.js');
4
4
 
5
- // node_modules NOT excluded: detect obfuscated code in dependencies
6
- const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache'];
5
+ // node_modules NOT excluded: detect obfuscated code in dependencies.
6
+ // dist/build/out/output excluded: bundled output is always flagged as isPackageOutput (LOW)
7
+ // and costs significant processing time on large SDKs.
8
+ const OBF_EXCLUDED_DIRS = ['.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
7
9
 
8
10
  function detectObfuscation(targetPath) {
9
11
  const threats = [];
@@ -100,22 +100,50 @@ const ACORN_OPTIONS = { ecmaVersion: 2024, sourceType: 'module', allowHashBang:
100
100
 
101
101
  const acorn = require('acorn');
102
102
 
103
+ /**
104
+ * AST parse cache — same content+options returns the same AST.
105
+ * Scanners do not mutate AST nodes (verified: only read comparisons).
106
+ * Cleared between scans via clearASTCache().
107
+ * Key = code.length + '|' + optionsKey + '|' + code.slice(0,128) + code.slice(-64)
108
+ * (length-prefixed partial key for fast Map lookup; collisions resolved by full WeakRef check)
109
+ */
110
+ const _astCache = new Map();
111
+ const _AST_CACHE_MAX = 600; // Max entries (one scan ≈ 500 files max)
112
+
103
113
  /**
104
114
  * Parse JS source with module-mode fallback to script-mode.
105
115
  * `const package = ...` is valid in script mode but reserved in module mode.
116
+ * Results are cached for reuse across scanners within the same scan.
106
117
  * Returns AST or null if both modes fail.
107
118
  */
108
119
  function safeParse(code, extraOptions = {}) {
120
+ // Build cache key: options signature + content fingerprint
121
+ const optKey = Object.keys(extraOptions).length === 0 ? '' : JSON.stringify(extraOptions);
122
+ const cacheKey = code.length + '|' + optKey + '|' + code.slice(0, 128) + code.slice(-64);
123
+
124
+ const cached = _astCache.get(cacheKey);
125
+ if (cached !== undefined) return cached;
126
+
109
127
  const opts = { ...ACORN_OPTIONS, ...extraOptions };
128
+ let ast = null;
110
129
  try {
111
- return acorn.parse(code, opts);
130
+ ast = acorn.parse(code, opts);
112
131
  } catch {
113
132
  try {
114
- return acorn.parse(code, { ...opts, sourceType: 'script' });
133
+ ast = acorn.parse(code, { ...opts, sourceType: 'script' });
115
134
  } catch {
116
- return null;
135
+ ast = null;
117
136
  }
118
137
  }
138
+
139
+ // Cache the result (including null for unparseable files)
140
+ if (_astCache.size >= _AST_CACHE_MAX) _astCache.clear();
141
+ _astCache.set(cacheKey, ast);
142
+ return ast;
143
+ }
144
+
145
+ function clearASTCache() {
146
+ _astCache.clear();
119
147
  }
120
148
 
121
- module.exports = { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX, MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT, MAX_FILE_SIZE, ACORN_OPTIONS, safeParse, getMaxFileSize, setMaxFileSize, resetMaxFileSize };
149
+ module.exports = { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX, MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT, MAX_FILE_SIZE, ACORN_OPTIONS, safeParse, clearASTCache, getMaxFileSize, setMaxFileSize, resetMaxFileSize };
package/src/utils.js CHANGED
@@ -1,12 +1,14 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
- const { MAX_FILE_SIZE, getMaxFileSize } = require('./shared/constants.js');
3
+ const { MAX_FILE_SIZE, getMaxFileSize, clearASTCache } = require('./shared/constants.js');
4
4
 
5
5
  /**
6
6
  * Directories excluded from scanning.
7
- * Only skip dependency/VCS/cache dirs - never skip user source code.
7
+ * Skips dependency/VCS/cache dirs and bundled output (dist/build/out).
8
+ * Bundled output is minified, huge, and produces FPs without security value.
9
+ * Obfuscation scanner uses its own OBF_EXCLUDED_DIRS to intentionally scan these.
8
10
  */
9
- const EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache'];
11
+ const EXCLUDED_DIRS = ['node_modules', '.git', '.muaddib-cache', 'dist', 'build', 'out', 'output'];
10
12
 
11
13
  /**
12
14
  * Extra directories to exclude (set at runtime via --exclude flag).
@@ -20,6 +22,14 @@ let _scanRoot = '';
20
22
  * Cleared between scans via clearFileListCache().
21
23
  */
22
24
  const _fileListCache = new Map();
25
+ let _filesCapped = false;
26
+
27
+ /**
28
+ * File content cache — read each file once, reused across all scanners in a single scan.
29
+ * Key = absolute file path, Value = file content string.
30
+ * Cleared between scans via clearFileListCache().
31
+ */
32
+ const _fileContentCache = new Map();
23
33
 
24
34
  function setExtraExcludes(dirs, scanRoot) {
25
35
  _extraExcludedDirs = Array.isArray(dirs) ? dirs : [];
@@ -59,6 +69,13 @@ function isDevFile(relativePath) {
59
69
  return DEV_PATTERNS.some(pattern => pattern.test(relativePath));
60
70
  }
61
71
 
72
+ /**
73
+ * Maximum number of files to scan per package.
74
+ * Malware packages rarely have >50 JS files; 500 is a generous safety margin.
75
+ * Prevents large SDKs (1000+ files) from monopolizing scan time.
76
+ */
77
+ const MAX_SCAN_FILES = 500;
78
+
62
79
  /**
63
80
  * Generic recursive file finder with symlink protection and depth limit.
64
81
  * @param {string} dir - Starting directory
@@ -66,6 +83,7 @@ function isDevFile(relativePath) {
66
83
  * @param {string[]} [options.extensions=['.js']] - File extensions to match
67
84
  * @param {string[]} [options.excludedDirs=EXCLUDED_DIRS] - Dirs to skip
68
85
  * @param {number} [options.maxDepth=100] - Max recursion depth
86
+ * @param {number} [options.maxFiles=MAX_SCAN_FILES] - Max files to return (0=unlimited)
69
87
  * @param {string[]} [options.results=[]] - Accumulator (internal)
70
88
  * @param {Set} [options.visitedInodes=new Set()] - Symlink loop detection (note: inode tracking
71
89
  * is unreliable on Windows where stat.ino may be 0; maxDepth serves as fallback protection)
@@ -77,6 +95,7 @@ function findFiles(dir, options = {}) {
77
95
  extensions = ['.js'],
78
96
  excludedDirs = EXCLUDED_DIRS,
79
97
  maxDepth = 100,
98
+ maxFiles = MAX_SCAN_FILES,
80
99
  results = [],
81
100
  visitedInodes = new Set(),
82
101
  visitedPaths = new Set(),
@@ -90,6 +109,21 @@ function findFiles(dir, options = {}) {
90
109
  const cached = _fileListCache.get(cacheKey);
91
110
  if (cached) return [...cached]; // return copy to prevent mutation
92
111
  const result = _findFilesImpl(dir, { extensions, excludedDirs, maxDepth, results, visitedInodes, visitedPaths, depth });
112
+
113
+ // Apply file count cap: sort by depth (shallowest first) so root-level files
114
+ // (most likely to contain malicious entry points) are prioritized.
115
+ if (maxFiles > 0 && result.length > maxFiles) {
116
+ result.sort((a, b) => {
117
+ const depthA = a.split(path.sep).length;
118
+ const depthB = b.split(path.sep).length;
119
+ return depthA - depthB;
120
+ });
121
+ const capped = result.slice(0, maxFiles);
122
+ _fileListCache.set(cacheKey, [...capped]);
123
+ _filesCapped = true;
124
+ return capped;
125
+ }
126
+
93
127
  _fileListCache.set(cacheKey, [...result]);
94
128
  return result;
95
129
  }
@@ -188,6 +222,13 @@ function findJsFiles(dir, results = []) {
188
222
 
189
223
  function clearFileListCache() {
190
224
  _fileListCache.clear();
225
+ _fileContentCache.clear();
226
+ clearASTCache();
227
+ _filesCapped = false;
228
+ }
229
+
230
+ function wasFilesCapped() {
231
+ return _filesCapped;
191
232
  }
192
233
 
193
234
  /**
@@ -278,9 +319,17 @@ class Spinner {
278
319
  /**
279
320
  * Iterates files with size guard and error handling.
280
321
  * Calls callback(file, content) for each readable file under MAX_FILE_SIZE.
322
+ * File contents are cached in _fileContentCache for reuse across scanners.
281
323
  */
282
324
  function forEachSafeFile(files, callback) {
283
325
  for (const file of files) {
326
+ // Check content cache first
327
+ const cached = _fileContentCache.get(file);
328
+ if (cached !== undefined) {
329
+ callback(file, cached);
330
+ continue;
331
+ }
332
+
284
333
  try {
285
334
  const stat = fs.statSync(file);
286
335
  if (stat.size > getMaxFileSize()) continue;
@@ -289,6 +338,9 @@ function forEachSafeFile(files, callback) {
289
338
  try {
290
339
  content = fs.readFileSync(file, 'utf8');
291
340
  } catch { continue; }
341
+
342
+ // Cache for subsequent scanners
343
+ _fileContentCache.set(file, content);
292
344
  callback(file, content);
293
345
  }
294
346
  }
@@ -332,11 +384,13 @@ function debugLog(...args) {
332
384
 
333
385
  module.exports = {
334
386
  EXCLUDED_DIRS,
387
+ MAX_SCAN_FILES,
335
388
  DEV_PATTERNS,
336
389
  isDevFile,
337
390
  findFiles,
338
391
  findJsFiles,
339
392
  clearFileListCache,
393
+ wasFilesCapped,
340
394
  escapeHtml,
341
395
  getCallName,
342
396
  Spinner,