npm - muaddib-scanner - Versions diffs - 2.7.6 → 2.7.7 - Mend

muaddib-scanner 2.7.6 → 2.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.7.6",
+  "version": "2.7.7",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/src/index.js CHANGED Viewed

@@ -568,8 +568,9 @@ async function run(targetPath, options = {}) {
   // A malware package typically has 1-3 occurrences, not dozens.
   applyFPReductions(deduped, reachableFiles, packageName);
-  // Intent coherence analysis: detect source→sink pairs across files
-  const intentResult = buildIntentPairs(deduped);
+  // Intent coherence analysis: detect source→sink pairs within files
+  // Pass targetPath for destination-aware SDK pattern detection
+  const intentResult = buildIntentPairs(deduped, targetPath);
   // Add intent threats to deduped before enrichment so they get rules/playbooks
   if (intentResult.intentThreats) {
     for (const it of intentResult.intentThreats) {

package/src/intent-graph.js CHANGED Viewed

@@ -1,5 +1,8 @@
 'use strict';
+const fs = require('fs');
+const path = require('path');
 // ============================================
 // INTENT GRAPH — Intra-File Coherence Analysis
 // ============================================
@@ -14,6 +17,7 @@
 // 3. Sources = ONLY high-confidence credential access (NOT env_access, NOT suspicious_dataflow)
 // 4. Sinks = ONLY threats already identified by scanners (NO content-based scanning)
 // 5. No double-counting — suspicious_dataflow is already a compound detection
+// 6. Destination-aware: SDK patterns (env key matches API domain) are NOT exfiltration
 // ============================================
 // SOURCE CLASSIFICATION
@@ -32,6 +36,165 @@ const SOURCE_TYPES = {
 // Sensitive env var patterns — env_access referencing these is credential theft, not config
 const SENSITIVE_ENV_PATTERNS = /TOKEN|KEY|SECRET|PASSWORD|CREDENTIAL|API_KEY|AUTH/i;
+// ============================================
+// DESTINATION-AWARE SDK DETECTION
+// ============================================
+// Curated allowlist: when an env var matching the pattern is sent to a matching domain,
+// it is legitimate SDK usage, not credential exfiltration.
+// Safe-by-default: unknown env vars or unknown domains remain CRITICAL.
+const SDK_ENV_DOMAIN_MAP = [
+  { envPattern: /^AWS_/i, domains: ['amazonaws.com', 'aws.amazon.com'] },
+  { envPattern: /^AZURE_/i, domains: ['azure.com', 'microsoft.com'] },
+  { envPattern: /^GOOGLE_|^GCP_/i, domains: ['googleapis.com', 'google.com'] },
+  { envPattern: /^FIREBASE_/i, domains: ['firebase.com', 'googleapis.com'] },
+  { envPattern: /^SALESFORCE_/i, domains: ['salesforce.com', 'force.com'] },
+  { envPattern: /^SUPABASE_/i, domains: ['supabase.co', 'supabase.com'] },
+  { envPattern: /^MAILGUN_/i, domains: ['mailgun.net', 'mailgun.com'] },
+  { envPattern: /^STRIPE_/i, domains: ['stripe.com'] },
+  { envPattern: /^TWILIO_/i, domains: ['twilio.com'] },
+  { envPattern: /^SENDGRID_/i, domains: ['sendgrid.com', 'sendgrid.net'] },
+  { envPattern: /^DATADOG_/i, domains: ['datadoghq.com'] },
+  { envPattern: /^SENTRY_/i, domains: ['sentry.io'] },
+  { envPattern: /^SLACK_/i, domains: ['slack.com'] },
+  { envPattern: /^GITHUB_/i, domains: ['github.com', 'githubusercontent.com'] },
+  { envPattern: /^GITLAB_/i, domains: ['gitlab.com'] },
+  { envPattern: /^CLOUDFLARE_/i, domains: ['cloudflare.com'] },
+  { envPattern: /^OPENAI_/i, domains: ['openai.com'] },
+  { envPattern: /^ANTHROPIC_/i, domains: ['anthropic.com'] },
+  { envPattern: /^MONGODB_|^MONGO_/i, domains: ['mongodb.com', 'mongodb.net'] },
+  { envPattern: /^AUTH0_/i, domains: ['auth0.com'] },
+  { envPattern: /^HUBSPOT_/i, domains: ['hubspot.com', 'hubapi.com'] },
+  { envPattern: /^CONTENTFUL_/i, domains: ['contentful.com'] },
+];
+// Tokens stripped when extracting brand keyword from env var name
+const ENV_NOISE_TOKENS = new Set([
+  'API', 'KEY', 'SECRET', 'TOKEN', 'PASSWORD', 'CREDENTIAL',
+  'AUTH', 'ACCESS', 'PRIVATE', 'PUBLIC', 'CLIENT', 'ID', 'URL'
+]);
+// Suspicious tunneling/proxy domains — never considered legitimate SDK destinations
+const SUSPICIOUS_DOMAIN_PATTERNS = /ngrok|serveo|localtunnel|burpcollaborator|requestbin|pipedream|webhook\.site/i;
+// URL extraction regex (matches http/https URLs in source code)
+const URL_EXTRACT_RE = /https?:\/\/[a-zA-Z0-9\-._~:/?#[\]@!$&'()*+,;=%]+/g;
+// Hostname extraction from Node.js request options: hostname: 'domain.com' or host: 'domain.com'
+const HOSTNAME_OPTION_RE = /(?:hostname|host)\s*:\s*['"`]([a-zA-Z0-9\-._]+)['"`]/g;
+/**
+ * Extract env var name from an intent source threat message.
+ * Messages look like: "process.env.SALESFORCE_API_KEY", "env var MAILGUN_API_KEY accessed"
+ */
+function extractEnvVarFromMessage(sourceThreats) {
+  for (const t of sourceThreats) {
+    if (!t.message) continue;
+    // Match process.env.VAR_NAME pattern
+    const envMatch = t.message.match(/process\.env\.([A-Z_][A-Z0-9_]*)/i);
+    if (envMatch) return envMatch[1];
+    // Match standalone VAR_NAME patterns (e.g., "SALESFORCE_API_KEY")
+    const varMatch = t.message.match(/\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b/);
+    if (varMatch) return varMatch[1];
+  }
+  return null;
+}
+/**
+ * Extract brand keyword from env var name by removing noise tokens.
+ * MAILGUN_API_KEY → MAILGUN, SALESFORCE_CLIENT_SECRET → SALESFORCE
+ */
+function extractBrandFromEnvVar(envVarName) {
+  const parts = envVarName.toUpperCase().split('_');
+  const brandParts = parts.filter(p => !ENV_NOISE_TOKENS.has(p) && p.length > 0);
+  return brandParts.length > 0 ? brandParts[0] : null;
+}
+/**
+ * Extract domain from a URL string.
+ * Returns the hostname (without port).
+ */
+function extractDomain(url) {
+  try {
+    const match = url.match(/^https?:\/\/([^/:?#]+)/i);
+    return match ? match[1].toLowerCase() : null;
+  } catch {
+    return null;
+  }
+}
+/**
+ * Check if a domain matches any of the expected SDK domains (suffix match).
+ * api.mailgun.net matches mailgun.net, sub.api.stripe.com matches stripe.com
+ */
+function domainMatchesSuffix(domain, expectedDomains) {
+  for (const expected of expectedDomains) {
+    if (domain === expected || domain.endsWith('.' + expected)) return true;
+  }
+  return false;
+}
+/**
+ * Check if an env var + file content represents a legitimate SDK pattern.
+ *
+ * Returns true ONLY if:
+ * 1. The env var matches a known SDK mapping (allowlist) OR heuristic brand match
+ * 2. ALL URLs in the file point to domains matching the expected SDK
+ * 3. No suspicious tunneling/proxy domains are present
+ *
+ * @param {string} envVarName - e.g., "SALESFORCE_API_KEY"
+ * @param {string} fileContent - source code of the file
+ * @returns {boolean} true if SDK pattern (should skip intent pair)
+ */
+function isSDKPattern(envVarName, fileContent) {
+  // Extract domains from full URLs (https://api.stripe.com/v1/charges)
+  const urls = fileContent.match(URL_EXTRACT_RE) || [];
+  const domains = urls.map(u => extractDomain(u)).filter(Boolean);
+  // Also extract hostnames from Node.js request options (hostname: 'api.stripe.com')
+  let hostnameMatch;
+  const hostnameRe = new RegExp(HOSTNAME_OPTION_RE.source, 'g');
+  while ((hostnameMatch = hostnameRe.exec(fileContent)) !== null) {
+    const hostname = hostnameMatch[1].toLowerCase();
+    if (hostname && !domains.includes(hostname)) {
+      domains.push(hostname);
+    }
+  }
+  // No URLs found — can't confirm SDK pattern, default to suspicious
+  if (domains.length === 0) return false;
+  // Check for suspicious tunneling domains — immediate fail
+  for (const domain of domains) {
+    if (SUSPICIOUS_DOMAIN_PATTERNS.test(domain)) return false;
+  }
+  // Check for raw IP addresses — immediate fail
+  for (const domain of domains) {
+    if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(domain)) return false;
+  }
+  // 1. Try curated allowlist first
+  for (const mapping of SDK_ENV_DOMAIN_MAP) {
+    if (mapping.envPattern.test(envVarName)) {
+      // All domains must match expected SDK domains
+      return domains.every(d => domainMatchesSuffix(d, mapping.domains));
+    }
+  }
+  // 2. Heuristic fallback: extract brand keyword and check domain labels
+  const brand = extractBrandFromEnvVar(envVarName);
+  if (!brand || brand.length < 3) return false; // Too short for reliable matching
+  const brandLower = brand.toLowerCase();
+  // Check if every domain has the brand as a whole label
+  // e.g., brand "ACME" matches "api.acme.com" (label "acme") but not "api.acmetech.com"
+  return domains.every(d => {
+    const labels = d.split('.');
+    return labels.some(label => label === brandLower);
+  });
+}
 // ============================================
 // SINK CLASSIFICATION (from existing threats only)
 // ============================================
@@ -149,9 +312,10 @@ function classifySink(threat) {
  * Cross-file detection is handled by module-graph.js (cross_file_dataflow).
  *
  * @param {Array} threats - deduplicated threat array
+ * @param {string} [targetPath] - root path for reading source files (SDK pattern detection)
  * @returns {Object} { pairs, intentScore, intentThreats }
  */
-function buildIntentPairs(threats) {
+function buildIntentPairs(threats, targetPath) {
   // Only consider MEDIUM+ threats. LOW severity means applyFPReductions already
   // determined this is noise (bundler artifact, dist/ file, count threshold exceeded).
   // Re-elevating LOW threats via intent pairing would undo FP reductions.
@@ -169,15 +333,23 @@ function buildIntentPairs(threats) {
   const pairs = [];
   let intentScore = 0;
+  // Cache file contents for SDK pattern checks (lazy, per file)
+  const fileContentCache = new Map();
   // Only pair sources and sinks within the SAME file
   for (const [file, fileThreats] of byFile) {
     const sources = [];
     const sinks = [];
+    // Track which threats are credential sources (for env var extraction)
+    const sourceThreats = [];
     for (const t of fileThreats) {
       const srcType = classifySource(t);
       const sinkType = classifySink(t);
-      if (srcType) sources.push(srcType);
+      if (srcType) {
+        sources.push(srcType);
+        sourceThreats.push(t);
+      }
       if (sinkType) sinks.push(sinkType);
     }
@@ -194,6 +366,30 @@ function buildIntentPairs(threats) {
         const pairKey = `${srcType}:${sinkType}:${file}`;
         if (pairSet.has(pairKey)) continue;
+        // Destination-aware SDK check: credential_read → network_external
+        // If the env var matches the API domain, this is legitimate SDK usage
+        if (srcType === 'credential_read' && sinkType === 'network_external' && targetPath) {
+          const envVarName = extractEnvVarFromMessage(sourceThreats);
+          if (envVarName) {
+            try {
+              let content = fileContentCache.get(file);
+              if (content === undefined) {
+                const filePath = path.join(targetPath, file);
+                content = fs.readFileSync(filePath, 'utf8');
+                fileContentCache.set(file, content);
+              }
+              if (isSDKPattern(envVarName, content)) {
+                // SDK pattern confirmed — skip this pair
+                pairSet.add(pairKey); // Mark as seen to avoid re-checking
+                continue;
+              }
+            } catch {
+              // File read error — default to suspicious (CRITICAL)
+            }
+          }
+        }
         pairSet.add(pairKey);
         pairs.push({
@@ -235,5 +431,9 @@ module.exports = {
   classifySource,
   classifySink,
   buildIntentPairs,
-  COHERENCE_MATRIX
+  COHERENCE_MATRIX,
+  isSDKPattern,
+  extractEnvVarFromMessage,
+  extractBrandFromEnvVar,
+  SDK_ENV_DOMAIN_MAP
 };