muaddib-scanner 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/muaddib.js CHANGED
@@ -561,7 +561,10 @@ if (command === 'version' || command === '--version' || command === '-v') {
561
561
  } else {
562
562
  // Start full monitor
563
563
  const { startMonitor } = require('../src/monitor.js');
564
- startMonitor().catch(err => {
564
+ const monitorOpts = {
565
+ verbose: options.includes('--verbose')
566
+ };
567
+ startMonitor(monitorOpts).catch(err => {
565
568
  console.error('[ERROR]', err.message);
566
569
  process.exit(1);
567
570
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.1.0",
3
+ "version": "2.1.1",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
package/src/index.js CHANGED
@@ -723,15 +723,12 @@ async function run(targetPath, options = {}) {
723
723
  const countStr = t.count > 1 ? ` (x${t.count})` : '';
724
724
  console.log(` ${i + 1}. [${t.severity}] ${t.type}${countStr}`);
725
725
  console.log(` ${t.message}`);
726
- console.log(` File: ${t.file}\n`);
727
- });
728
-
729
- console.log('[RESPONSE] Recommendations:\n');
730
- deduped.forEach(t => {
726
+ console.log(` File: ${t.file}`);
731
727
  const playbook = getPlaybook(t.type);
732
728
  if (playbook) {
733
- console.log(` -> ${playbook}\n`);
729
+ console.log(` \u2192 ${playbook}`);
734
730
  }
731
+ console.log('');
735
732
  });
736
733
  }
737
734
 
@@ -1,10 +1,12 @@
1
1
  const https = require('https');
2
2
  const fs = require('fs');
3
3
  const path = require('path');
4
+ const os = require('os');
4
5
  const AdmZip = require('adm-zip');
5
6
 
6
7
  const IOC_FILE = path.join(__dirname, 'data/iocs.json');
7
8
  const COMPACT_IOC_FILE = path.join(__dirname, 'data/iocs-compact.json');
9
+ const HOME_IOC_FILE = path.join(os.homedir(), '.muaddib', 'data', 'iocs.json');
8
10
  const STATIC_IOCS_FILE = path.join(__dirname, '../../data/static-iocs.json');
9
11
  const { generateCompactIOCs } = require('./updater.js');
10
12
  const { Spinner } = require('../utils.js');
@@ -623,11 +625,11 @@ async function scrapeOSSFMaliciousPackages(knownIds) {
623
625
  return packages;
624
626
  }
625
627
 
626
- // Incremental: compare tree SHA
628
+ // Incremental: compare tree SHA (stored in ~/.muaddib/data/ to persist across npm updates)
627
629
  const treeSha = data.sha;
628
- const dataDir = path.join(__dirname, 'data');
629
- if (!fs.existsSync(dataDir)) fs.mkdirSync(dataDir, { recursive: true });
630
- const shaFile = path.join(dataDir, '.ossf-tree-sha');
630
+ const homeDir = path.dirname(HOME_IOC_FILE);
631
+ if (!fs.existsSync(homeDir)) fs.mkdirSync(homeDir, { recursive: true });
632
+ const shaFile = path.join(homeDir, '.ossf-tree-sha');
631
633
  let lastSha = null;
632
634
  try { lastSha = fs.readFileSync(shaFile, 'utf8').trim(); } catch {}
633
635
 
@@ -1004,9 +1006,18 @@ async function runScraper() {
1004
1006
  throw new Error(`Data directory is not writable: ${dataDir}`);
1005
1007
  }
1006
1008
 
1007
- // Load existing IOCs
1009
+ // Load existing IOCs (check ~/.muaddib/data/ first, then local)
1008
1010
  let existingIOCs = { packages: [], pypi_packages: [], hashes: [], markers: [], files: [] };
1009
- if (fs.existsSync(IOC_FILE)) {
1011
+ if (fs.existsSync(HOME_IOC_FILE)) {
1012
+ try {
1013
+ existingIOCs = JSON.parse(fs.readFileSync(HOME_IOC_FILE, 'utf8'));
1014
+ if (!existingIOCs.pypi_packages) existingIOCs.pypi_packages = [];
1015
+ console.log('[INFO] Loaded existing IOCs from ' + HOME_IOC_FILE);
1016
+ } catch {
1017
+ console.log('[WARN] Home IOCs file corrupted, trying local...');
1018
+ }
1019
+ }
1020
+ if (existingIOCs.packages.length === 0 && fs.existsSync(IOC_FILE)) {
1010
1021
  try {
1011
1022
  existingIOCs = JSON.parse(fs.readFileSync(IOC_FILE, 'utf8'));
1012
1023
  if (!existingIOCs.pypi_packages) existingIOCs.pypi_packages = [];
@@ -1186,7 +1197,21 @@ async function runScraper() {
1186
1197
  const tmpCompactFile = COMPACT_IOC_FILE + '.tmp';
1187
1198
  fs.writeFileSync(tmpCompactFile, JSON.stringify(compactIOCs));
1188
1199
  fs.renameSync(tmpCompactFile, COMPACT_IOC_FILE);
1189
- saveSpinner.succeed('Saved IOCs + compact format');
1200
+
1201
+ // Persist to ~/.muaddib/data/ (survives npm update)
1202
+ saveSpinner.update('Persisting to home directory...');
1203
+ const homeDir = path.dirname(HOME_IOC_FILE);
1204
+ if (!fs.existsSync(homeDir)) {
1205
+ fs.mkdirSync(homeDir, { recursive: true });
1206
+ }
1207
+ try {
1208
+ const tmpHomeFile = HOME_IOC_FILE + '.tmp';
1209
+ fs.writeFileSync(tmpHomeFile, JSON.stringify(existingIOCs, null, 2));
1210
+ fs.renameSync(tmpHomeFile, HOME_IOC_FILE);
1211
+ saveSpinner.succeed('Saved IOCs + compact format + home directory');
1212
+ } catch (e) {
1213
+ saveSpinner.succeed('Saved IOCs + compact format (home dir write failed: ' + e.message + ')');
1214
+ }
1190
1215
 
1191
1216
  // Display summary
1192
1217
  console.log('\n' + '='.repeat(60));
@@ -1,8 +1,9 @@
1
1
  const fs = require('fs');
2
2
  const path = require('path');
3
+ const os = require('os');
3
4
 
4
- const CACHE_PATH = path.join(__dirname, '../../.muaddib-cache');
5
- const CACHE_IOC_FILE = path.join(CACHE_PATH, 'iocs.json');
5
+ const HOME_DATA_PATH = path.join(os.homedir(), '.muaddib', 'data');
6
+ const CACHE_IOC_FILE = path.join(HOME_DATA_PATH, 'iocs.json');
6
7
  const LOCAL_IOC_FILE = path.join(__dirname, 'data/iocs.json');
7
8
  const LOCAL_COMPACT_FILE = path.join(__dirname, 'data/iocs-compact.json');
8
9
  const { loadYAMLIOCs } = require('./yaml-loader.js');
@@ -56,16 +57,16 @@ async function updateIOCs() {
56
57
  mergeIOCs(baseIOCs, githubIOCs);
57
58
  console.log(' +' + shaiHulud.packages.length + ' GenSecAI, +' + datadog.packages.length + ' DataDog');
58
59
 
59
- // Step 4: Merge and save to cache
60
- if (!fs.existsSync(CACHE_PATH)) {
61
- fs.mkdirSync(CACHE_PATH, { recursive: true });
60
+ // Step 4: Merge and save to cache (~/.muaddib/data/ — persists across npm updates)
61
+ if (!fs.existsSync(HOME_DATA_PATH)) {
62
+ fs.mkdirSync(HOME_DATA_PATH, { recursive: true });
62
63
  }
63
64
 
64
65
  // Verify write permission before attempting save (CROSS-001)
65
66
  try {
66
- fs.accessSync(CACHE_PATH, fs.constants.W_OK);
67
+ fs.accessSync(HOME_DATA_PATH, fs.constants.W_OK);
67
68
  } catch {
68
- console.log('[WARN] Cache directory is not writable: ' + CACHE_PATH);
69
+ console.log('[WARN] Cache directory is not writable: ' + HOME_DATA_PATH);
69
70
  console.log('[WARN] IOCs loaded in memory but not persisted to disk.');
70
71
  return { total: baseIOCs.packages.length, totalPyPI: (baseIOCs.pypi_packages || []).length };
71
72
  }
package/src/monitor.js CHANGED
@@ -2,7 +2,6 @@ const https = require('https');
2
2
  const fs = require('fs');
3
3
  const path = require('path');
4
4
  const os = require('os');
5
- const { execSync } = require('child_process');
6
5
  const { run } = require('./index.js');
7
6
  const { runSandbox, isDockerAvailable } = require('./sandbox.js');
8
7
  const { sendWebhook } = require('./webhook.js');
@@ -10,13 +9,14 @@ const { detectSuddenLifecycleChange } = require('./temporal-analysis.js');
10
9
  const { detectSuddenAstChanges } = require('./temporal-ast-diff.js');
11
10
  const { detectPublishAnomaly } = require('./publish-anomaly.js');
12
11
  const { detectMaintainerChange } = require('./maintainer-change.js');
12
+ const { downloadToFile, extractTarGz, sanitizePackageName } = require('./shared/download.js');
13
+ const { MAX_TARBALL_SIZE } = require('./shared/constants.js');
13
14
 
14
15
  const STATE_FILE = path.join(__dirname, '..', 'data', 'monitor-state.json');
15
16
  const ALERTS_FILE = path.join(__dirname, '..', 'data', 'monitor-alerts.json');
16
17
  const DETECTIONS_FILE = path.join(__dirname, '..', 'data', 'detections.json');
17
18
  const SCAN_STATS_FILE = path.join(__dirname, '..', 'data', 'scan-stats.json');
18
19
  const POLL_INTERVAL = 60_000;
19
- const MAX_TARBALL_SIZE = 50 * 1024 * 1024; // 50MB
20
20
  const SCAN_TIMEOUT_MS = 180_000; // 3 minutes per package
21
21
 
22
22
  // --- Stats counters ---
@@ -93,6 +93,35 @@ function hasHighOrCritical(result) {
93
93
  return result.summary.critical > 0 || result.summary.high > 0;
94
94
  }
95
95
 
96
+ // --- Verbose mode (--verbose sends ALL alerts including temporal/publish/maintainer) ---
97
+
98
+ let verboseMode = false;
99
+
100
+ function isVerboseMode() {
101
+ if (verboseMode) return true;
102
+ const env = process.env.MUADDIB_MONITOR_VERBOSE;
103
+ return env !== undefined && env.toLowerCase() === 'true';
104
+ }
105
+
106
+ function setVerboseMode(value) {
107
+ verboseMode = !!value;
108
+ }
109
+
110
+ // --- IOC match types (these are the only static-analysis types that warrant a webhook) ---
111
+
112
+ const IOC_MATCH_TYPES = new Set([
113
+ 'known_malicious_package',
114
+ 'known_malicious_hash',
115
+ 'pypi_malicious_package',
116
+ 'shai_hulud_marker',
117
+ 'shai_hulud_backdoor'
118
+ ]);
119
+
120
+ function hasIOCMatch(result) {
121
+ if (!result || !result.threats) return false;
122
+ return result.threats.some(t => IOC_MATCH_TYPES.has(t.type));
123
+ }
124
+
96
125
  // --- Webhook alerting ---
97
126
 
98
127
  function getWebhookUrl() {
@@ -107,9 +136,10 @@ function shouldSendWebhook(result, sandboxResult) {
107
136
  return sandboxResult.score > 0;
108
137
  }
109
138
 
110
- // No sandbox — fall back to static analysis thresholds
111
- if (result.summary.critical > 0) return true;
112
- if (result.summary.high > 0 && computeRiskScore(result.summary) >= 25) return true;
139
+ // No sandbox — only send webhook for confirmed IOC matches
140
+ // (known_malicious_package, known_malicious_hash, pypi_malicious_package, etc.)
141
+ if (hasIOCMatch(result)) return true;
142
+
113
143
  return false;
114
144
  }
115
145
 
@@ -223,13 +253,17 @@ function buildTemporalWebhookEmbed(temporalResult) {
223
253
  }
224
254
 
225
255
  async function tryTemporalAlert(temporalResult) {
256
+ // Temporal anomalies are logged only — no webhook unless --verbose
257
+ console.log(`[MONITOR] ANOMALY (logged only): temporal lifecycle change for ${temporalResult.packageName}`);
258
+ if (!isVerboseMode()) return;
259
+
226
260
  const url = getWebhookUrl();
227
261
  if (!url) return;
228
262
 
229
263
  const payload = buildTemporalWebhookEmbed(temporalResult);
230
264
  try {
231
265
  await sendWebhook(url, payload, { rawPayload: true });
232
- console.log(`[MONITOR] Temporal webhook sent for ${temporalResult.packageName}`);
266
+ console.log(`[MONITOR] Temporal webhook sent for ${temporalResult.packageName} (verbose mode)`);
233
267
  } catch (err) {
234
268
  console.error(`[MONITOR] Temporal webhook failed for ${temporalResult.packageName}: ${err.message}`);
235
269
  }
@@ -276,13 +310,17 @@ function buildTemporalAstWebhookEmbed(astResult) {
276
310
  }
277
311
 
278
312
  async function tryTemporalAstAlert(astResult) {
313
+ // AST anomalies are logged only — no webhook unless --verbose
314
+ console.log(`[MONITOR] ANOMALY (logged only): AST change for ${astResult.packageName}`);
315
+ if (!isVerboseMode()) return;
316
+
279
317
  const url = getWebhookUrl();
280
318
  if (!url) return;
281
319
 
282
320
  const payload = buildTemporalAstWebhookEmbed(astResult);
283
321
  try {
284
322
  await sendWebhook(url, payload, { rawPayload: true });
285
- console.log(`[MONITOR] Temporal AST webhook sent for ${astResult.packageName}`);
323
+ console.log(`[MONITOR] Temporal AST webhook sent for ${astResult.packageName} (verbose mode)`);
286
324
  } catch (err) {
287
325
  console.error(`[MONITOR] Temporal AST webhook failed for ${astResult.packageName}: ${err.message}`);
288
326
  }
@@ -370,13 +408,17 @@ function buildPublishAnomalyWebhookEmbed(publishResult) {
370
408
  }
371
409
 
372
410
  async function tryTemporalPublishAlert(publishResult) {
411
+ // Publish anomalies are logged only — no webhook unless --verbose
412
+ console.log(`[MONITOR] ANOMALY (logged only): publish frequency for ${publishResult.packageName}`);
413
+ if (!isVerboseMode()) return;
414
+
373
415
  const url = getWebhookUrl();
374
416
  if (!url) return;
375
417
 
376
418
  const payload = buildPublishAnomalyWebhookEmbed(publishResult);
377
419
  try {
378
420
  await sendWebhook(url, payload, { rawPayload: true });
379
- console.log(`[MONITOR] Publish anomaly webhook sent for ${publishResult.packageName}`);
421
+ console.log(`[MONITOR] Publish anomaly webhook sent for ${publishResult.packageName} (verbose mode)`);
380
422
  } catch (err) {
381
423
  console.error(`[MONITOR] Publish anomaly webhook failed for ${publishResult.packageName}: ${err.message}`);
382
424
  }
@@ -467,13 +509,17 @@ function buildMaintainerChangeWebhookEmbed(maintainerResult) {
467
509
  }
468
510
 
469
511
  async function tryTemporalMaintainerAlert(maintainerResult) {
512
+ // Maintainer changes are logged only — no webhook unless --verbose
513
+ console.log(`[MONITOR] ANOMALY (logged only): maintainer change for ${maintainerResult.packageName}`);
514
+ if (!isVerboseMode()) return;
515
+
470
516
  const url = getWebhookUrl();
471
517
  if (!url) return;
472
518
 
473
519
  const payload = buildMaintainerChangeWebhookEmbed(maintainerResult);
474
520
  try {
475
521
  await sendWebhook(url, payload, { rawPayload: true });
476
- console.log(`[MONITOR] Maintainer change webhook sent for ${maintainerResult.packageName}`);
522
+ console.log(`[MONITOR] Maintainer change webhook sent for ${maintainerResult.packageName} (verbose mode)`);
477
523
  } catch (err) {
478
524
  console.error(`[MONITOR] Maintainer change webhook failed for ${maintainerResult.packageName}: ${err.message}`);
479
525
  }
@@ -618,81 +664,6 @@ function httpsGet(url, timeoutMs = 30_000) {
618
664
  });
619
665
  }
620
666
 
621
- // --- Download & extraction helpers ---
622
-
623
- function downloadToFile(url, destPath, timeoutMs = 30_000) {
624
- return new Promise((resolve, reject) => {
625
- const doRequest = (requestUrl) => {
626
- const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
627
- if (res.statusCode === 301 || res.statusCode === 302) {
628
- res.resume();
629
- const location = res.headers.location;
630
- if (!location) return reject(new Error(`Redirect without Location for ${requestUrl}`));
631
- return doRequest(location);
632
- }
633
- if (res.statusCode < 200 || res.statusCode >= 300) {
634
- res.resume();
635
- return reject(new Error(`HTTP ${res.statusCode} for ${requestUrl}`));
636
- }
637
- const contentLength = parseInt(res.headers['content-length'], 10);
638
- if (contentLength && contentLength > MAX_TARBALL_SIZE) {
639
- res.resume();
640
- return reject(new Error(`Package too large: ${contentLength} bytes (max ${MAX_TARBALL_SIZE})`));
641
- }
642
- const fileStream = fs.createWriteStream(destPath);
643
- let downloadedBytes = 0;
644
- res.on('data', (chunk) => {
645
- downloadedBytes += chunk.length;
646
- if (downloadedBytes > MAX_TARBALL_SIZE) {
647
- res.destroy();
648
- fileStream.destroy();
649
- try { fs.unlinkSync(destPath); } catch {}
650
- reject(new Error(`Package too large: ${downloadedBytes}+ bytes (max ${MAX_TARBALL_SIZE})`));
651
- }
652
- });
653
- res.pipe(fileStream);
654
- fileStream.on('finish', () => resolve(downloadedBytes));
655
- fileStream.on('error', (err) => {
656
- try { fs.unlinkSync(destPath); } catch {}
657
- reject(err);
658
- });
659
- res.on('error', (err) => {
660
- fileStream.destroy();
661
- try { fs.unlinkSync(destPath); } catch {}
662
- reject(err);
663
- });
664
- });
665
- req.on('error', reject);
666
- req.on('timeout', () => {
667
- req.destroy();
668
- reject(new Error(`Timeout downloading ${requestUrl}`));
669
- });
670
- };
671
- doRequest(url);
672
- });
673
- }
674
-
675
- function extractTarGz(tgzPath, destDir) {
676
- // Use cwd + relative paths so C: never appears in tar arguments
677
- // (GNU tar treats C: as remote host, bsdtar doesn't support --force-local)
678
- const tgzDir = path.dirname(path.resolve(tgzPath));
679
- const tgzName = path.basename(tgzPath);
680
- const relDest = path.relative(tgzDir, path.resolve(destDir)) || '.';
681
- execSync(`tar xzf "${tgzName}" -C "${relDest}"`, { cwd: tgzDir, timeout: 60_000, stdio: 'pipe' });
682
- // npm tarballs extract into a package/ subdirectory; detect it
683
- const packageSubdir = path.join(destDir, 'package');
684
- if (fs.existsSync(packageSubdir) && fs.statSync(packageSubdir).isDirectory()) {
685
- return packageSubdir;
686
- }
687
- // Otherwise return destDir itself (PyPI sdists vary)
688
- const entries = fs.readdirSync(destDir);
689
- if (entries.length === 1) {
690
- const single = path.join(destDir, entries[0]);
691
- if (fs.statSync(single).isDirectory()) return single;
692
- }
693
- return destDir;
694
- }
695
-
696
667
  // --- Tarball URL helpers ---
697
668
 
698
669
  function getNpmTarballUrl(pkgData) {
@@ -702,7 +673,12 @@ function getNpmTarballUrl(pkgData) {
702
673
  async function getPyPITarballUrl(packageName) {
703
674
  const url = `https://pypi.org/pypi/${encodeURIComponent(packageName)}/json`;
704
675
  const body = await httpsGet(url);
705
- const data = JSON.parse(body);
676
+ let data;
677
+ try {
678
+ data = JSON.parse(body);
679
+ } catch (e) {
680
+ throw new Error(`Invalid JSON from PyPI for ${packageName}: ${e.message}`);
681
+ }
706
682
  const version = (data.info && data.info.version) || '';
707
683
  const urls = data.urls || [];
708
684
  // Prefer sdist (.tar.gz)
@@ -871,7 +847,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl) {
871
847
  const startTime = Date.now();
872
848
  const tmpBase = path.join(os.tmpdir(), 'muaddib-monitor');
873
849
  if (!fs.existsSync(tmpBase)) fs.mkdirSync(tmpBase, { recursive: true });
874
- const tmpDir = fs.mkdtempSync(path.join(tmpBase, `${name.replace(/\//g, '_')}-`));
850
+ const tmpDir = fs.mkdtempSync(path.join(tmpBase, `${sanitizePackageName(name)}-`));
875
851
 
876
852
  try {
877
853
  const tgzPath = path.join(tmpDir, 'package.tar.gz');
@@ -1134,7 +1110,12 @@ function parseNpmRss(xml) {
1134
1110
  async function getNpmLatestTarball(packageName) {
1135
1111
  const url = `https://registry.npmjs.org/${encodeURIComponent(packageName)}/latest`;
1136
1112
  const body = await httpsGet(url);
1137
- const data = JSON.parse(body);
1113
+ let data;
1114
+ try {
1115
+ data = JSON.parse(body);
1116
+ } catch (e) {
1117
+ throw new Error(`Invalid JSON from npm registry for ${packageName}: ${e.message}`);
1118
+ }
1138
1119
  const version = data.version || '';
1139
1120
  const tarball = (data.dist && data.dist.tarball) || null;
1140
1121
  return { version, tarball };
@@ -1259,7 +1240,11 @@ async function pollPyPI(state) {
1259
1240
 
1260
1241
  // --- Main loop ---
1261
1242
 
1262
- async function startMonitor() {
1243
+ async function startMonitor(options) {
1244
+ if (options && options.verbose) {
1245
+ setVerboseMode(true);
1246
+ }
1247
+
1263
1248
  console.log(`
1264
1249
  ╔════════════════════════════════════════════╗
1265
1250
  ║ MUAD'DIB - Registry Monitor ║
@@ -1311,6 +1296,15 @@ async function startMonitor() {
1311
1296
  console.log('[MONITOR] Maintainer change analysis disabled (MUADDIB_MONITOR_TEMPORAL_MAINTAINER=false)');
1312
1297
  }
1313
1298
 
1299
+ // Webhook filtering mode
1300
+ if (isVerboseMode()) {
1301
+ console.log('[MONITOR] Verbose mode ON — ALL anomalies sent as webhooks (temporal, publish, maintainer, AST)');
1302
+ } else {
1303
+ console.log('[MONITOR] Strict webhook mode — only IOC matches, sandbox confirmations, and canary exfiltrations trigger webhooks');
1304
+ console.log('[MONITOR] Temporal/publish/maintainer anomalies are logged but NOT sent as webhooks');
1305
+ console.log('[MONITOR] Use --verbose to send all anomalies as webhooks');
1306
+ }
1307
+
1314
1308
  const state = loadState();
1315
1309
  console.log(`[MONITOR] State loaded — npm last: ${state.npmLastPackage || 'none'}, pypi last: ${state.pypiLastPackage || 'none'}`);
1316
1310
  console.log(`[MONITOR] Polling every ${POLL_INTERVAL / 1000}s. Ctrl+C to stop.\n`);
@@ -1538,6 +1532,10 @@ module.exports = {
1538
1532
  isCanaryEnabled,
1539
1533
  buildCanaryExfiltrationWebhookEmbed,
1540
1534
  isPublishAnomalyOnly,
1535
+ isVerboseMode,
1536
+ setVerboseMode,
1537
+ hasIOCMatch,
1538
+ IOC_MATCH_TYPES,
1541
1539
  DETECTIONS_FILE,
1542
1540
  appendDetection,
1543
1541
  loadDetections,
@@ -2,10 +2,7 @@ const fs = require('fs');
2
2
  const path = require('path');
3
3
  const { spawnSync } = require('child_process');
4
4
  const { loadCachedIOCs } = require('./ioc/updater.js');
5
- const { REHABILITATED_PACKAGES } = require('./shared/constants.js');
6
-
7
- // Regex to validate npm package names (prevents command injection)
8
- const NPM_PACKAGE_REGEX = /^(@[a-z0-9-~][a-z0-9-._~]*\/)?[a-z0-9-~][a-z0-9-._~]*$/;
5
+ const { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX } = require('./shared/constants.js');
9
6
 
10
7
  /**
11
8
  * Validates that a package name is safe (no command injection)
package/src/sandbox.js CHANGED
@@ -8,9 +8,10 @@ const {
8
8
  detectCanaryInOutput
9
9
  } = require('./canary-tokens.js');
10
10
 
11
+ const { NPM_PACKAGE_REGEX } = require('./shared/constants.js');
12
+
11
13
  const DOCKER_IMAGE = 'muaddib-sandbox';
12
14
  const CONTAINER_TIMEOUT = 120000; // 120 seconds
13
- const NPM_PACKAGE_REGEX = /^(@[a-z0-9-~][a-z0-9-._~]*\/)?[a-z0-9-~][a-z0-9-._~]*$/;
14
15
 
15
16
  // Domains excluded from network findings (false positives)
16
17
  const SAFE_DOMAINS = [
@@ -1,10 +1,11 @@
1
+ const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
2
+
1
3
  const REGISTRY_URL = 'https://registry.npmjs.org';
2
4
  const DOWNLOADS_URL = 'https://api.npmjs.org/downloads/point/last-week';
3
5
  const SEARCH_URL = 'https://registry.npmjs.org/-/v1/search';
4
6
 
5
7
  const REQUEST_TIMEOUT = 10000; // 10 seconds
6
8
  const MAX_RETRIES = 3;
7
- const NPM_NAME_REGEX = /^(@[a-z0-9-~][a-z0-9-._~]*\/)?[a-z0-9-~][a-z0-9-._~]*$/;
8
9
 
9
10
  /**
10
11
  * Create a timeout signal, with fallback for older Node versions.
@@ -75,7 +76,7 @@ async function fetchWithRetry(url) {
75
76
 
76
77
  async function getPackageMetadata(packageName) {
77
78
  // Validate package name before building URL
78
- if (!NPM_NAME_REGEX.test(packageName)) return null;
79
+ if (!NPM_PACKAGE_REGEX.test(packageName)) return null;
79
80
 
80
81
  // 1. Registry metadata
81
82
  const registryUrl = REGISTRY_URL + '/' + encodeURIComponent(packageName);
@@ -79,4 +79,11 @@ const REHABILITATED_PACKAGES = {
79
79
  }
80
80
  };
81
81
 
82
- module.exports = { REHABILITATED_PACKAGES };
82
+ // Regex to validate npm package names (prevents command injection)
83
+ const NPM_PACKAGE_REGEX = /^(@[a-z0-9-~][a-z0-9-._~]*\/)?[a-z0-9-~][a-z0-9-._~]*$/;
84
+
85
+ // Download/extraction limits
86
+ const MAX_TARBALL_SIZE = 50 * 1024 * 1024; // 50MB
87
+ const DOWNLOAD_TIMEOUT = 30_000; // 30 seconds
88
+
89
+ module.exports = { REHABILITATED_PACKAGES, NPM_PACKAGE_REGEX, MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT };
@@ -0,0 +1,171 @@
1
+ const https = require('https');
2
+ const fs = require('fs');
3
+ const path = require('path');
4
+ const { execFileSync } = require('child_process');
5
+ const { MAX_TARBALL_SIZE, DOWNLOAD_TIMEOUT } = require('./constants.js');
6
+
7
+ // Allowed redirect domains for tarball downloads (SSRF protection)
8
+ const ALLOWED_DOWNLOAD_DOMAINS = [
9
+ 'registry.npmjs.org',
10
+ 'registry.yarnpkg.com',
11
+ 'pypi.org',
12
+ 'files.pythonhosted.org'
13
+ ];
14
+
15
+ // Private IP ranges — block redirects to internal networks
16
+ const PRIVATE_IP_PATTERNS = [
17
+ /^127\./,
18
+ /^10\./,
19
+ /^172\.(1[6-9]|2[0-9]|3[0-1])\./,
20
+ /^192\.168\./,
21
+ /^0\./,
22
+ /^169\.254\./,
23
+ /^::1$/,
24
+ /^::ffff:127\./,
25
+ /^fc00:/,
26
+ /^fe80:/
27
+ ];
28
+
29
+ /**
30
+ * Validates that a redirect URL is allowed (SSRF protection).
31
+ * Only HTTPS to whitelisted domains is permitted.
32
+ * @param {string} redirectUrl - The redirect target URL
33
+ * @returns {{allowed: boolean, error?: string}}
34
+ */
35
+ function isAllowedDownloadRedirect(redirectUrl) {
36
+ try {
37
+ const urlObj = new URL(redirectUrl);
38
+ if (urlObj.protocol !== 'https:') {
39
+ return { allowed: false, error: `Redirect blocked: non-HTTPS protocol ${urlObj.protocol}` };
40
+ }
41
+ const hostname = urlObj.hostname.toLowerCase();
42
+ // Block private IP addresses
43
+ if (PRIVATE_IP_PATTERNS.some(p => p.test(hostname))) {
44
+ return { allowed: false, error: `Redirect blocked: private IP ${hostname}` };
45
+ }
46
+ const domainAllowed = ALLOWED_DOWNLOAD_DOMAINS.some(domain =>
47
+ hostname === domain || hostname.endsWith('.' + domain)
48
+ );
49
+ if (!domainAllowed) {
50
+ return { allowed: false, error: `Redirect blocked: domain ${hostname} not in allowlist` };
51
+ }
52
+ return { allowed: true };
53
+ } catch {
54
+ return { allowed: false, error: `Redirect blocked: invalid URL ${redirectUrl}` };
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Download a file from HTTPS URL to disk, with SSRF-safe redirect handling.
60
+ * @param {string} url - Source URL (must be HTTPS)
61
+ * @param {string} destPath - Local file path to write to
62
+ * @param {number} [timeoutMs] - Download timeout in ms (default: DOWNLOAD_TIMEOUT)
63
+ * @returns {Promise<number>} Number of bytes downloaded
64
+ */
65
+ function downloadToFile(url, destPath, timeoutMs = DOWNLOAD_TIMEOUT) {
66
+ return new Promise((resolve, reject) => {
67
+ const doRequest = (requestUrl) => {
68
+ const req = https.get(requestUrl, { timeout: timeoutMs }, (res) => {
69
+ if (res.statusCode === 301 || res.statusCode === 302) {
70
+ res.resume();
71
+ const location = res.headers.location;
72
+ if (!location) return reject(new Error(`Redirect without Location for ${requestUrl}`));
73
+ // Resolve relative redirects against the request URL
74
+ const absoluteLocation = new URL(location, requestUrl).href;
75
+ const check = isAllowedDownloadRedirect(absoluteLocation);
76
+ if (!check.allowed) {
77
+ return reject(new Error(check.error));
78
+ }
79
+ return doRequest(absoluteLocation);
80
+ }
81
+ if (res.statusCode < 200 || res.statusCode >= 300) {
82
+ res.resume();
83
+ return reject(new Error(`HTTP ${res.statusCode} for ${requestUrl}`));
84
+ }
85
+ const contentLength = parseInt(res.headers['content-length'], 10);
86
+ if (contentLength && contentLength > MAX_TARBALL_SIZE) {
87
+ res.resume();
88
+ return reject(new Error(`Package too large: ${contentLength} bytes (max ${MAX_TARBALL_SIZE})`));
89
+ }
90
+ const fileStream = fs.createWriteStream(destPath);
91
+ let downloadedBytes = 0;
92
+ res.on('data', (chunk) => {
93
+ downloadedBytes += chunk.length;
94
+ if (downloadedBytes > MAX_TARBALL_SIZE) {
95
+ res.destroy();
96
+ fileStream.destroy();
97
+ try { fs.unlinkSync(destPath); } catch {}
98
+ reject(new Error(`Package too large: ${downloadedBytes}+ bytes (max ${MAX_TARBALL_SIZE})`));
99
+ }
100
+ });
101
+ res.pipe(fileStream);
102
+ fileStream.on('finish', () => resolve(downloadedBytes));
103
+ fileStream.on('error', (err) => {
104
+ try { fs.unlinkSync(destPath); } catch {}
105
+ reject(err);
106
+ });
107
+ res.on('error', (err) => {
108
+ fileStream.destroy();
109
+ try { fs.unlinkSync(destPath); } catch {}
110
+ reject(err);
111
+ });
112
+ });
113
+ req.on('error', reject);
114
+ req.on('timeout', () => {
115
+ req.destroy();
116
+ reject(new Error(`Timeout downloading ${requestUrl}`));
117
+ });
118
+ };
119
+ doRequest(url);
120
+ });
121
+ }
122
+
123
+ /**
124
+ * Extract a .tar.gz to a directory. Returns the package root.
125
+ * Uses execFileSync (no shell) to prevent command injection.
126
+ * @param {string} tgzPath - Path to the .tar.gz file
127
+ * @param {string} destDir - Destination directory
128
+ * @returns {string} Path to extracted package root
129
+ */
130
+ function extractTarGz(tgzPath, destDir) {
131
+ // Use cwd + relative paths so C: never appears in tar arguments
132
+ // (GNU tar treats C: as remote host, bsdtar doesn't support --force-local)
133
+ const tgzDir = path.dirname(path.resolve(tgzPath));
134
+ const tgzName = path.basename(tgzPath);
135
+ const relDest = path.relative(tgzDir, path.resolve(destDir)) || '.';
136
+ execFileSync('tar', ['xzf', tgzName, '-C', relDest], { cwd: tgzDir, timeout: 60_000, stdio: 'pipe' });
137
+ // npm tarballs extract into a package/ subdirectory; detect it
138
+ const packageSubdir = path.join(destDir, 'package');
139
+ if (fs.existsSync(packageSubdir) && fs.statSync(packageSubdir).isDirectory()) {
140
+ return packageSubdir;
141
+ }
142
+ // Otherwise return destDir itself (PyPI sdists vary)
143
+ const entries = fs.readdirSync(destDir);
144
+ if (entries.length === 1) {
145
+ const single = path.join(destDir, entries[0]);
146
+ if (fs.statSync(single).isDirectory()) return single;
147
+ }
148
+ return destDir;
149
+ }
150
+
151
+ /**
152
+ * Sanitize a package name for use in temporary directory names.
153
+ * Removes path traversal sequences, slashes, and @ symbols.
154
+ * @param {string} packageName - Raw package name
155
+ * @returns {string} Safe string for directory names
156
+ */
157
+ function sanitizePackageName(packageName) {
158
+ return packageName
159
+ .replace(/\.\./g, '')
160
+ .replace(/\//g, '_')
161
+ .replace(/@/g, '');
162
+ }
163
+
164
+ module.exports = {
165
+ downloadToFile,
166
+ extractTarGz,
167
+ sanitizePackageName,
168
+ isAllowedDownloadRedirect,
169
+ ALLOWED_DOWNLOAD_DOMAINS,
170
+ PRIVATE_IP_PATTERNS
171
+ };
@@ -2,16 +2,14 @@ const https = require('https');
2
2
  const fs = require('fs');
3
3
  const path = require('path');
4
4
  const os = require('os');
5
- const { execSync } = require('child_process');
6
5
  const acorn = require('acorn');
7
6
  const walk = require('acorn-walk');
8
7
  const { findJsFiles } = require('./utils.js');
9
8
  const { fetchPackageMetadata, getLatestVersions } = require('./temporal-analysis.js');
9
+ const { downloadToFile, extractTarGz, sanitizePackageName } = require('./shared/download.js');
10
10
 
11
11
  const REGISTRY_URL = 'https://registry.npmjs.org';
12
- const MAX_TARBALL_SIZE = 50 * 1024 * 1024; // 50MB
13
12
  const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
14
- const DOWNLOAD_TIMEOUT = 30_000;
15
13
  const METADATA_TIMEOUT = 10_000;
16
14
 
17
15
  const SENSITIVE_PATHS = [
@@ -77,84 +75,6 @@ function fetchVersionMetadata(packageName, version) {
77
75
  });
78
76
  }
79
77
 
80
- /**
81
- * Download a file from HTTPS URL to disk.
82
- */
83
- function downloadToFile(url, destPath) {
84
- return new Promise((resolve, reject) => {
85
- const doRequest = (requestUrl) => {
86
- const req = https.get(requestUrl, { timeout: DOWNLOAD_TIMEOUT }, (res) => {
87
- if (res.statusCode === 301 || res.statusCode === 302) {
88
- res.resume();
89
- const location = res.headers.location;
90
- if (!location) return reject(new Error(`Redirect without Location for ${requestUrl}`));
91
- return doRequest(location);
92
- }
93
- if (res.statusCode < 200 || res.statusCode >= 300) {
94
- res.resume();
95
- return reject(new Error(`HTTP ${res.statusCode} for ${requestUrl}`));
96
- }
97
- const contentLength = parseInt(res.headers['content-length'], 10);
98
- if (contentLength && contentLength > MAX_TARBALL_SIZE) {
99
- res.resume();
100
- return reject(new Error(`Tarball too large: ${contentLength} bytes`));
101
- }
102
- const fileStream = fs.createWriteStream(destPath);
103
- let downloaded = 0;
104
- res.on('data', chunk => {
105
- downloaded += chunk.length;
106
- if (downloaded > MAX_TARBALL_SIZE) {
107
- res.destroy();
108
- fileStream.destroy();
109
- try { fs.unlinkSync(destPath); } catch {}
110
- reject(new Error(`Tarball too large: ${downloaded}+ bytes`));
111
- }
112
- });
113
- res.pipe(fileStream);
114
- fileStream.on('finish', () => resolve(downloaded));
115
- fileStream.on('error', err => {
116
- try { fs.unlinkSync(destPath); } catch {}
117
- reject(err);
118
- });
119
- res.on('error', err => {
120
- fileStream.destroy();
121
- try { fs.unlinkSync(destPath); } catch {}
122
- reject(err);
123
- });
124
- });
125
- req.on('error', reject);
126
- req.on('timeout', () => {
127
- req.destroy();
128
- reject(new Error(`Download timeout for ${requestUrl}`));
129
- });
130
- };
131
- doRequest(url);
132
- });
133
- }
134
-
135
- /**
136
- * Extract a .tar.gz to a directory. Returns the package root.
137
- */
138
- function extractTarGz(tgzPath, destDir) {
139
- // Use cwd + relative paths so C: never appears in tar arguments
140
- // (GNU tar treats C: as remote host, bsdtar doesn't support --force-local)
141
- const tgzDir = path.dirname(path.resolve(tgzPath));
142
- const tgzName = path.basename(tgzPath);
143
- const relDest = path.relative(tgzDir, path.resolve(destDir)) || '.';
144
- execSync(`tar xzf "${tgzName}" -C "${relDest}"`, { cwd: tgzDir, timeout: 60_000, stdio: 'pipe' });
145
- // npm tarballs extract into a package/ subdirectory
146
- const packageSubdir = path.join(destDir, 'package');
147
- if (fs.existsSync(packageSubdir) && fs.statSync(packageSubdir).isDirectory()) {
148
- return packageSubdir;
149
- }
150
- const entries = fs.readdirSync(destDir);
151
- if (entries.length === 1) {
152
- const single = path.join(destDir, entries[0]);
153
- if (fs.statSync(single).isDirectory()) return single;
154
- }
155
- return destDir;
156
- }
157
-
158
78
  // --- Core functions ---
159
79
 
160
80
  /**
@@ -170,7 +90,7 @@ async function fetchPackageTarball(packageName, version) {
170
90
  throw new Error(`No tarball URL found for ${packageName}@${version}`);
171
91
  }
172
92
 
173
- const safeName = packageName.replace(/\//g, '_').replace(/@/g, '');
93
+ const safeName = sanitizePackageName(packageName);
174
94
  const tmpBase = path.join(os.tmpdir(), 'muaddib-ast-diff');
175
95
  if (!fs.existsSync(tmpBase)) fs.mkdirSync(tmpBase, { recursive: true });
176
96
  const tmpDir = fs.mkdtempSync(path.join(tmpBase, `${safeName}-${version}-`));