muaddib-scanner 2.8.2 → 2.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.8.2",
3
+ "version": "2.8.5",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -15,6 +15,9 @@ const { NPM_PACKAGE_REGEX } = require('../shared/constants.js');
15
15
  // Version format validation (semver-like + wildcard)
16
16
  const VERSION_RE = /^(\*|0|[1-9]\d*(\.\d+){0,2}(-[\w.]+)?(\+[\w.]+)?)$/;
17
17
 
18
+ // Aggregated warning counter for noisy logs (reset per scraper run)
19
+ let _noVersionSkipCount = 0;
20
+
18
21
  /**
19
22
  * Validate an IOC package entry before insertion.
20
23
  * Returns true if valid, false if should be skipped.
@@ -462,7 +465,11 @@ function extractVersions(affected) {
462
465
  }
463
466
  }
464
467
 
465
- return versions.size > 0 ? [...versions] : ['*'];
468
+ if (versions.size === 0) {
469
+ _noVersionSkipCount++;
470
+ return [];
471
+ }
472
+ return [...versions];
466
473
  }
467
474
 
468
475
  /**
@@ -519,7 +526,8 @@ async function scrapeShaiHuludDetector() {
519
526
  // Extract packages — one IOC per version for correct matching
520
527
  const pkgList = data.packages || [];
521
528
  for (const pkg of pkgList) {
522
- const versions = pkg.affectedVersions || ['*'];
529
+ const versions = pkg.affectedVersions || [];
530
+ if (versions.length === 0) continue; // Skip packages with no version info — avoids false wildcard
523
531
  for (const ver of versions) {
524
532
  packages.push({
525
533
  id: `SHAI-HULUD-${pkg.name}-${ver}`,
@@ -588,10 +596,11 @@ async function scrapeDatadogIOCs() {
588
596
  ? versionsStr.split(',').map(v => v.trim()).filter(Boolean)
589
597
  : [versionsStr];
590
598
  for (const ver of versionList) {
599
+ if (!ver || ver === '*') continue; // Skip wildcard fallbacks — avoids false positive cascade
591
600
  packages.push({
592
601
  id: `DATADOG-${name}`,
593
602
  name: name,
594
- version: ver || '*',
603
+ version: ver,
595
604
  severity: 'critical',
596
605
  confidence: 'high',
597
606
  source: 'datadog-consolidated',
@@ -967,10 +976,11 @@ async function scrapeStaticIOCs() {
967
976
 
968
977
  // Socket.dev reports
969
978
  for (const pkg of staticIOCs.socket || []) {
979
+ if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
970
980
  packages.push({
971
981
  id: `SOCKET-${pkg.name}`,
972
982
  name: pkg.name,
973
- version: pkg.version || '*',
983
+ version: pkg.version,
974
984
  severity: pkg.severity || 'critical',
975
985
  confidence: 'high',
976
986
  source: 'socket-dev',
@@ -983,10 +993,11 @@ async function scrapeStaticIOCs() {
983
993
 
984
994
  // Phylum Research
985
995
  for (const pkg of staticIOCs.phylum || []) {
996
+ if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
986
997
  packages.push({
987
998
  id: `PHYLUM-${pkg.name}`,
988
999
  name: pkg.name,
989
- version: pkg.version || '*',
1000
+ version: pkg.version,
990
1001
  severity: pkg.severity || 'critical',
991
1002
  confidence: 'high',
992
1003
  source: 'phylum',
@@ -999,10 +1010,11 @@ async function scrapeStaticIOCs() {
999
1010
 
1000
1011
  // npm removed packages
1001
1012
  for (const pkg of staticIOCs.npmRemoved || []) {
1013
+ if (!pkg.version) continue; // Skip entries without version — avoids wildcard cascade
1002
1014
  packages.push({
1003
1015
  id: `NPM-REMOVED-${pkg.name}`,
1004
1016
  name: pkg.name,
1005
- version: pkg.version || '*',
1017
+ version: pkg.version,
1006
1018
  severity: 'critical',
1007
1019
  confidence: 'high',
1008
1020
  source: 'npm-removed',
@@ -1080,6 +1092,9 @@ async function runScraper() {
1080
1092
  console.log(' OSV + OSSF + GenSecAI + DataDog + Snyk');
1081
1093
  console.log('='.repeat(60) + '\n');
1082
1094
 
1095
+ // Reset aggregated warning counters
1096
+ _noVersionSkipCount = 0;
1097
+
1083
1098
  // Create data directory if needed
1084
1099
  const dataDir = path.dirname(IOC_FILE);
1085
1100
  if (!fs.existsSync(dataDir)) {
@@ -1143,6 +1158,11 @@ async function runScraper() {
1143
1158
  const snykPackages = results[5];
1144
1159
  const pypiPackages = results[6];
1145
1160
 
1161
+ // Log aggregated warnings
1162
+ if (_noVersionSkipCount > 0) {
1163
+ console.log('[SCRAPER] WARN: ' + _noVersionSkipCount + ' packages skipped (no version info, wildcard fallback avoided)');
1164
+ }
1165
+
1146
1166
  // Merge all scraped packages
1147
1167
  const allPackages = [
1148
1168
  ...osvResult.packages,
@@ -1380,12 +1400,17 @@ async function runScraper() {
1380
1400
  };
1381
1401
  }
1382
1402
 
1403
+ // Test helpers for aggregated warning counters
1404
+ function getNoVersionSkipCount() { return _noVersionSkipCount; }
1405
+ function resetNoVersionSkipCount() { _noVersionSkipCount = 0; }
1406
+
1383
1407
  module.exports = {
1384
1408
  runScraper, scrapeShaiHuludDetector, scrapeDatadogIOCs,
1385
1409
  // Pure utility functions (exported for testing)
1386
1410
  parseCSVLine, parseCSV, extractVersions, parseOSVEntry,
1387
1411
  createFreshness, isAllowedRedirect, loadStaticIOCs,
1388
1412
  validateIOCEntry,
1413
+ getNoVersionSkipCount, resetNoVersionSkipCount,
1389
1414
  CONFIDENCE_ORDER, ALLOWED_REDIRECT_DOMAINS,
1390
1415
  MAX_ENTRY_UNCOMPRESSED, MAX_TOTAL_UNCOMPRESSED
1391
1416
  };
@@ -2062,11 +2062,14 @@ function handlePostWalk(ctx) {
2062
2062
  // Wave 4: Download-execute-cleanup — https download + chmod executable + execSync + unlink
2063
2063
  // Exclude when all URLs in the file point to safe registries (npm, GitHub, nodejs.org)
2064
2064
  // B4: removed fetchOnlySafeDomains guard — compound requires fetch+chmod+exec, which is never legitimate
2065
+ // C10: If file also contains hash/checksum verification, downgrade to HIGH — real droppers
2066
+ // don't verify payload integrity; legitimate installers (esbuild, sharp) do.
2065
2067
  if (ctx.hasRemoteFetch && ctx.hasChmodExecutable && ctx.hasExecSyncCall) {
2066
2068
  ctx.threats.push({
2067
2069
  type: 'download_exec_binary',
2068
- severity: 'CRITICAL',
2069
- message: 'Download-execute pattern: remote fetch + chmod executable + execSync in same file. Binary dropper camouflaged as native addon build.',
2070
+ severity: ctx.hasHashVerification ? 'HIGH' : 'CRITICAL',
2071
+ message: 'Download-execute pattern: remote fetch + chmod executable + execSync in same file.' +
2072
+ (ctx.hasHashVerification ? ' Hash verification detected — likely legitimate binary installer.' : ' Binary dropper camouflaged as native addon build.'),
2070
2073
  file: ctx.relFile
2071
2074
  });
2072
2075
  }
@@ -2082,22 +2085,41 @@ function handlePostWalk(ctx) {
2082
2085
  });
2083
2086
  }
2084
2087
 
2085
- // WASM payload detection: WebAssembly.compile/instantiate + readFileSync/https in same file
2086
- // WASM host import objects can contain callback functions that read credentials and exfiltrate.
2087
- // This pattern is never legitimate in npm packages — WASM should use pure computation, not host I/O.
2088
+ // WASM payload detection: WebAssembly.compile/instantiate + network in same file
2089
+ // C5+C6: Only emit CRITICAL wasm_host_sink if corroborating exfil signals exist
2090
+ // (env_access, sensitive_string, credential reads). WASM + fetch alone is likely
2091
+ // just WASM module loading via fetch() (standard pattern: fetch('mod.wasm').then(WebAssembly.instantiateStreaming))
2088
2092
  if (ctx.hasWasmLoad && ctx.hasNetworkCallInFile) {
2089
- ctx.threats.push({
2090
- type: 'wasm_host_sink',
2091
- severity: 'CRITICAL',
2092
- message: 'WebAssembly module with network-capable host imports. WASM can invoke host callbacks to exfiltrate data while hiding control flow.',
2093
- file: ctx.relFile
2094
- });
2093
+ // C5/C6: Distinguish fetch-for-WASM-loading from independent network channels
2094
+ // https.request, http.get, dns.resolve are NEVER used for WASM loading — they indicate
2095
+ // an independent network channel (e.g., WASM host callbacks for C2 exfiltration)
2096
+ const hasExfilSignal = ctx.threats.some(t =>
2097
+ t.file === ctx.relFile && (
2098
+ t.type === 'env_access' || t.type === 'sensitive_string' ||
2099
+ t.type === 'suspicious_dataflow' || t.type === 'credential_regex_harvest'
2100
+ )
2101
+ );
2102
+ if (ctx.hasNonFetchNetworkCall || hasExfilSignal) {
2103
+ ctx.threats.push({
2104
+ type: 'wasm_host_sink',
2105
+ severity: 'CRITICAL',
2106
+ message: 'WebAssembly module with network-capable host imports and credential/env access. WASM can invoke host callbacks to exfiltrate data while hiding control flow.',
2107
+ file: ctx.relFile
2108
+ });
2109
+ } else {
2110
+ // WASM + network but no credential/env signals → standalone MEDIUM (likely fetch for WASM loading)
2111
+ ctx.threats.push({
2112
+ type: 'wasm_standalone',
2113
+ severity: 'MEDIUM',
2114
+ message: 'WebAssembly module with network calls but no credential/env access signals. Likely WASM loading via fetch(). Verify .wasm file purpose.',
2115
+ file: ctx.relFile
2116
+ });
2117
+ }
2095
2118
  }
2096
2119
 
2097
2120
  // WASM standalone: WebAssembly.compile/instantiate WITHOUT network sinks.
2098
2121
  // Legitimate: crypto, image processing, codecs. Still warrants investigation
2099
2122
  // because WASM hides control flow from static analysis.
2100
- // Compound WASM + network → wasm_host_sink (CRITICAL) takes priority (mutually exclusive).
2101
2123
  if (ctx.hasWasmLoad && !ctx.hasNetworkCallInFile) {
2102
2124
  ctx.threats.push({
2103
2125
  type: 'wasm_standalone',
@@ -111,6 +111,8 @@ function analyzeFile(content, filePath, basePath) {
111
111
  hasEnvEnumeration: false, // Object.entries/keys/values(process.env)
112
112
  hasEnvHarvestPattern: /\b(KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|NPM|AWS|SSH|WEBHOOK)\b/.test(content),
113
113
  hasNetworkCallInFile: /\b(fetch|https?\.request|https?\.get|dns\.resolve)\b/.test(content),
114
+ // C5: Non-fetch network calls indicate independent network channel (NOT WASM loading)
115
+ hasNonFetchNetworkCall: /\bhttps?\.request\b|\bhttps?\.get\b|\bdns\.resolve\b/.test(content),
114
116
  // Credential regex harvesting: regex literals or new RegExp() whose PATTERN contains credential keywords
115
117
  // Must check that the keyword is inside the regex, not just anywhere in the file
116
118
  hasCredentialRegex: hasCredentialInsideRegex(content),
@@ -154,7 +156,11 @@ function analyzeFile(content, filePath, basePath) {
154
156
  // WASM payload detection: WebAssembly.compile/instantiate with host import sinks
155
157
  hasWasmLoad: /\bWebAssembly\s*\.\s*(compile|instantiate|compileStreaming|instantiateStreaming)\b/.test(content),
156
158
  hasWasmHostSink: false, // set in handleCallExpression when WASM import object contains network/fs sinks
157
- hasProxyTrap: false // set in handleNewExpression when Proxy has set/get/apply trap
159
+ hasProxyTrap: false, // set in handleNewExpression when Proxy has set/get/apply trap
160
+ // C10: Hash verification — legitimate binary installers verify checksums
161
+ // Requires BOTH createHash() call AND .digest() call — false positives from
162
+ // standalone mentions of 'sha256' or 'integrity' in comments/descriptions
163
+ hasHashVerification: /\bcreateHash\s*\(/.test(content) && /\.digest\s*\(/.test(content)
158
164
  };
159
165
 
160
166
  // Compute fetchOnlySafeDomains: check if ALL URLs in file point to known registries
@@ -809,6 +809,26 @@ function analyzeFile(content, filePath, basePath) {
809
809
  const allTelemetryOnly = sources.every(s => s.type === 'telemetry_read');
810
810
  if (allTelemetryOnly && severity === 'CRITICAL') severity = 'HIGH';
811
811
 
812
+ // C7: SDK pattern downgrade — if ALL env_read sources match SDK env→domain mappings,
813
+ // this is legitimate SDK usage (e.g., STRIPE_API_KEY → api.stripe.com). Cap at HIGH.
814
+ if (severity === 'CRITICAL') {
815
+ const envSources = sources.filter(s => s.type === 'env_read');
816
+ if (envSources.length > 0 && sources.every(s => s.type === 'env_read' || s.type === 'telemetry_read')) {
817
+ try {
818
+ const { isSDKPattern } = require('../intent-graph.js');
819
+ const fileContent = fs.readFileSync(filePath, 'utf8');
820
+ const allSDK = envSources.every(s => {
821
+ // Extract env var name from source name (e.g., "STRIPE_API_KEY" from "process.env.STRIPE_API_KEY")
822
+ const envVar = s.name.replace(/^process\.env\./, '').replace(/^process\.env\[['"]/, '').replace(/['"]\]$/, '');
823
+ return isSDKPattern(envVar, fileContent);
824
+ });
825
+ if (allSDK) severity = 'HIGH';
826
+ } catch {
827
+ // Intent graph not available — keep CRITICAL
828
+ }
829
+ }
830
+ }
831
+
812
832
  const sourceDesc = hasCommandOutput ? 'command output' : 'credentials read';
813
833
  threats.push({
814
834
  type: 'suspicious_dataflow',
@@ -188,11 +188,15 @@ async function scanPackageJson(targetPath) {
188
188
  }
189
189
 
190
190
  if (malicious) {
191
+ // C1: Include triggering dependency metadata for diagnostic
191
192
  threats.push({
192
193
  type: 'known_malicious_package',
193
194
  severity: 'CRITICAL',
194
195
  message: `Malicious dependency declared: ${depName}@${depVersion} (source: ${malicious.source || 'IOC'})`,
195
- file: 'package.json'
196
+ file: 'package.json',
197
+ matchedDep: depName,
198
+ matchedVersion: malicious.version,
199
+ iocSource: malicious.source || 'IOC'
196
200
  });
197
201
  }
198
202
  }
package/src/scoring.js CHANGED
@@ -340,6 +340,21 @@ function applyFPReductions(threats, reachableFiles, packageName, packageDeps) {
340
340
  t.severity = 'MEDIUM';
341
341
  t.mcpSdkDowngrade = true;
342
342
  }
343
+
344
+ // C12: AI SDK awareness — env_access on AI API keys is expected in SDK packages.
345
+ // Downgrade env_access HIGH → MEDIUM when @modelcontextprotocol/sdk, @anthropic/sdk,
346
+ // or openai is in dependencies AND the env var is an AI provider key.
347
+ // Does NOT affect compound detections (intent_credential_exfil stays CRITICAL).
348
+ if (t.type === 'env_access' && t.severity === 'HIGH' &&
349
+ packageDeps && typeof packageDeps === 'object') {
350
+ const hasAiSdk = packageDeps['@modelcontextprotocol/sdk'] ||
351
+ packageDeps['@anthropic/sdk'] ||
352
+ packageDeps['openai'];
353
+ if (hasAiSdk && /\b(ANTHROPIC_API_KEY|OPENAI_API_KEY|CLAUDE_API_KEY)\b/.test(t.message)) {
354
+ t.reductions.push({ rule: 'ai_sdk_env', from: 'HIGH', to: 'MEDIUM' });
355
+ t.severity = 'MEDIUM';
356
+ }
357
+ }
343
358
  }
344
359
  }
345
360