npm - muaddib-scanner - Versions diffs - 2.11.65 → 2.11.67 - Mend

muaddib-scanner 2.11.65 → 2.11.67

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/package.json +1 -1
package/{self-scan-v2.11.65.json → self-scan-v2.11.67.json} +1 -1
package/src/ioc/updater.js +42 -9
package/src/monitor/classify.js +3 -1
package/src/monitor/queue.js +17 -0
package/src/monitor/scan-queue.js +12 -1
package/src/monitor/state.js +113 -0
package/src/monitor/tarball-archive.js +67 -25
package/src/response/playbooks.js +4 -0
package/src/rules/index.js +13 -0
package/src/scanner/package.js +45 -0
package/src/scoring.js +3 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "muaddib-scanner",
-  "version": "2.11.65",
+  "version": "2.11.67",
   "description": "Supply-chain threat detection & response for npm & PyPI/Python",
   "main": "src/index.js",
   "bin": {

package/{self-scan-v2.11.65.json → self-scan-v2.11.67.json} RENAMED Viewed

@@ -1,6 +1,6 @@
 {
   "target": "node_modules",
-  "timestamp": "2026-06-06T16:38:57.648Z",
+  "timestamp": "2026-06-07T12:54:23.816Z",
   "threats": [
     {
       "type": "string_mutation_obfuscation",

package/src/ioc/updater.js CHANGED Viewed

@@ -123,6 +123,11 @@ async function updateIOCs() {
   console.log('[4/4] Saved to cache: ' + CACHE_IOC_FILE);
   console.log('\n[OK] IOCs updated: ' + totalNpm + ' npm + ' + totalPyPI + ' PyPI packages');
+  // Fresh IOC files written — drop the in-process singleton so the next
+  // loadCachedIOCs() rebuilds from them (cross-process monitors pick the change
+  // up via the mtime/size source signature within SOURCE_CHECK_INTERVAL).
+  invalidateCache();
   return { total: totalNpm, totalPyPI: totalPyPI };
 }
@@ -202,16 +207,41 @@ function mergeIOCs(target, source) {
   return added;
 }
-// Cache to avoid reloading IOCs on each call
+// IOC store cache. The optimized store is large (~240K entries → hundreds of MB),
+// so it MUST be a stable singleton: rebuilding it duplicates that memory, and any
+// in-flight async scan (sandbox/deferred/network) that captured a prior copy pins
+// it — a periodic rebuild therefore accumulates copies. This was the monitor's
+// old_space → OOM leak: a heap snapshot showed 7+ live copies of the 421K-entry
+// Map retained via loadCachedIOCs closures + suspended Generators/Promises.
+// Fix: rebuild ONLY when a source file actually changes (mtime/size signature) or
+// on invalidateCache(); otherwise return the same object. The signature is
+// re-checked at most every SOURCE_CHECK_INTERVAL so the hot path (called per
+// scan/poll) does zero disk I/O.
+const IOCS_DIR = path.join(__dirname, '..', '..', 'iocs');
+const IOC_SOURCE_FILES = [
+  CACHE_IOC_FILE, LOCAL_IOC_FILE, LOCAL_COMPACT_FILE,
+  path.join(IOCS_DIR, 'packages.yaml'), path.join(IOCS_DIR, 'builtin.yaml'),
+  path.join(IOCS_DIR, 'hashes.yaml'), path.join(IOCS_DIR, 'string-iocs.yaml')
+];
+function iocSourcesSignature() {
+  let sig = '';
+  for (const f of IOC_SOURCE_FILES) { try { const s = fs.statSync(f); sig += s.mtimeMs + ':' + s.size + ';'; } catch { sig += '0;'; } }
+  return sig;
+}
 let cachedIOCsResult = null;
-let cachedIOCsTime = 0;
-const CACHE_TTL = 10000; // 10 seconds
+let cachedIOCsSig = null;
+let lastSourceCheck = 0;
+const SOURCE_CHECK_INTERVAL = 10000; // re-stat source files at most every 10s
 function loadCachedIOCs() {
-  // Return cache if still valid
   const now = Date.now();
-  if (cachedIOCsResult && (now - cachedIOCsTime) < CACHE_TTL) {
-    return cachedIOCsResult;
+  if (cachedIOCsResult) {
+    // Hot path: within the check window, return the singleton with no disk I/O.
+    if (now - lastSourceCheck < SOURCE_CHECK_INTERVAL) return cachedIOCsResult;
+    lastSourceCheck = now;
+    // Throttled freshness check: keep the singleton unless a source file changed.
+    if (iocSourcesSignature() === cachedIOCsSig) return cachedIOCsResult;
   }
   // Priority 1: YAML IOCs
@@ -279,9 +309,11 @@ function loadCachedIOCs() {
   // Create optimized structures for O(1) lookup
   const optimized = createOptimizedIOCs(merged);
-  // Store in cache
+  // Store as the shared singleton; record the source signature so we only rebuild
+  // when the IOC files actually change (see loadCachedIOCs header).
   cachedIOCsResult = optimized;
-  cachedIOCsTime = now;
+  cachedIOCsSig = iocSourcesSignature();
+  lastSourceCheck = now;
   return optimized;
 }
@@ -560,7 +592,8 @@ function expandCompactIOCs(compact) {
 function invalidateCache() {
   cachedIOCsResult = null;
-  cachedIOCsTime = 0;
+  cachedIOCsSig = null;
+  lastSourceCheck = 0;
 }
 /**

package/src/monitor/classify.js CHANGED Viewed

@@ -73,7 +73,9 @@ const HIGH_CONFIDENCE_MALICE_TYPES = new Set([
   // cap since the attack uses optionalDependencies + prepare hook (no direct lifecycle).
   'env_charcode_reconstruction',           // fromCharCode + process.env[computed] (TeamPCP credential stealer)
   'ide_hook_autoexec',                     // .claude/settings.json SessionStart hook, .vscode/tasks.json folderOpen (Shai-Hulud)
-  'workflow_secrets_dump'                  // toJSON(secrets) in GitHub Actions workflow (Shai-Hulud)
+  'workflow_secrets_dump',                  // toJSON(secrets) in GitHub Actions workflow (Shai-Hulud)
+  // Phantom Gyp 2026-06: binding.gyp command-substitution = install-time RCE, quasi-never legit in benign packages
+  'gyp_command_exec'
 ]);
 // Lifecycle compound types that indicate real malicious intent beyond a simple postinstall

package/src/monitor/queue.js CHANGED Viewed

@@ -26,6 +26,7 @@ const {
   cacheTarball,
   updateScanStats,
   appendDetection,
+  appendScanLedger,
   maybePersistDailyStats,
   appendTemporalDetection,
   tarballCacheKey,
@@ -221,6 +222,20 @@ function recordTrainingSample(result, params) {
       sandboxResult: params.sandboxResult || null
     });
     appendTrainingRecord(record);
+    // Phase 0a: per-scan coverage ledger — record this terminal outcome (best-effort;
+    // appendScanLedger swallows its own write errors and never throws).
+    appendScanLedger({
+      name: params.name,
+      version: params.version,
+      ecosystem: params.ecosystem,
+      outcome: params.label || 'clean',
+      score: (result.summary && typeof result.summary.riskScore === 'number') ? result.summary.riskScore : null,
+      tier: params.tier,
+      maxSeverity: result.summary ? result.summary.riskLevel : null,
+      types: [...new Set((result.threats || []).map(t => t.type))],
+      sandbox: params.sandboxResult ? 'run' : 'none',
+      source: 'scan'
+    });
   } catch (err) {
     // Non-fatal: ML export must never crash the monitor
     console.error(`[ML] Failed to record training sample for ${params.name}: ${err.message}`);
@@ -521,6 +536,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
             stats.totalTimeMs += Date.now() - startTime;
             stats.clean++;
             updateScanStats('clean');
+            appendScanLedger({ name, version, ecosystem, outcome: 'size_skip', score: 0, source: 'size_skip_quick_clean' });
             return;
           }
         } catch {
@@ -541,6 +557,7 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
             stats.totalTimeMs += Date.now() - startTime;
             stats.clean++;
             updateScanStats('clean');
+            appendScanLedger({ name, version, ecosystem, outcome: 'size_skip', score: 0, source: 'size_skip_extract_failed' });
             return;
           }
         }

package/src/monitor/scan-queue.js CHANGED Viewed

@@ -32,9 +32,20 @@ let _lastHardDropLog = 0;
 function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
   let dropped = false;
   if (scanQueue.length >= max) {
-    scanQueue.shift(); // drop oldest
+    const evicted = scanQueue.shift(); // drop oldest
     dropped = true;
     if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
+    // Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
+    // "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
+    // require avoids any top-level coupling with state.js; best-effort, never throws.
+    try {
+      if (evicted && evicted.name) {
+        require('./state.js').appendScanLedger({
+          name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
+          outcome: 'dropped', source: 'queue_cap'
+        });
+      }
+    } catch { /* ledger is best-effort */ }
     const now = Date.now();
     if (now - _lastHardDropLog > HARD_DROP_LOG_INTERVAL_MS) {
       _lastHardDropLog = now;

package/src/monitor/state.js CHANGED Viewed

@@ -951,6 +951,114 @@ function _compactDetectionsJsonl() {
   }
 }
+// --- Per-scan ledger (Phase 0a: operational coverage observability) ---
+// Append-only record of EVERY package the monitor dequeues + its terminal outcome,
+// so we can distinguish never-scanned vs scanned-clean vs suspect vs dropped and
+// measure TRUE operational coverage (not just rule-TPR on the static corpus).
+// Mirrors the detections JSONL machinery (chunked iterate + periodic compaction).
+// Differences vs detections: (1) NO dedup — every scan event is a distinct record;
+// (2) higher cap + compaction interval since this logs every scan, not just findings.
+const SCAN_LEDGER_FILE = process.env.MUADDIB_SCAN_LEDGER_FILE || path.join(__dirname, '..', '..', 'data', 'scan-ledger.jsonl');
+const MAX_SCAN_LEDGER = (() => {
+  const raw = process.env.MUADDIB_SCAN_LEDGER_MAX;
+  const n = raw ? parseInt(raw, 10) : NaN;
+  return (Number.isFinite(n) && n >= 10 && n <= 5_000_000) ? n : 500_000;
+})();
+const SCAN_LEDGER_COMPACT_INTERVAL = 2000;
+let _scanLedgerAppendedSinceCompact = 0;
+// Terminal outcomes a dequeued package can reach. Unknown values normalize to 'clean'
+// so a typo at a call site can never crash the pipeline.
+const SCAN_LEDGER_OUTCOMES = new Set([
+  'clean', 'clean_low_signal', 'clean_tooling', 'suspect', 'ml_clean', 'llm_benign',
+  'sandbox_inconclusive', 'sandbox_unconfirmed', 'confirmed',
+  'static_timeout', 'size_skip', 'dropped'
+]);
+/**
+ * Append one per-scan ledger entry recording the terminal outcome of a dequeued
+ * package. Best-effort: NEVER throws (a ledger failure must not break scanning).
+ * No dedup — repeated scans of the same package are intentionally all recorded.
+ *
+ * @param {object} e
+ * @param {string}  e.name        package name (required)
+ * @param {string} [e.version]
+ * @param {string} [e.ecosystem]  'npm' | 'pypi' | ...
+ * @param {string} [e.outcome]    one of SCAN_LEDGER_OUTCOMES (default 'clean')
+ * @param {number} [e.score]      riskScore at the terminal decision
+ * @param {string} [e.tier]       suspect tier ('1a'|'1b'|2|3) if applicable
+ * @param {string} [e.maxSeverity]
+ * @param {string[]} [e.types]    threat types (capped to 12)
+ * @param {string} [e.sandbox]    'none' | 'run' | 'deferred' | 'skip'
+ * @param {boolean} [e.firstPublish]
+ * @param {string} [e.source]     where the record originated ('scan','queue_cap',...)
+ */
+function appendScanLedger(e) {
+  try {
+    if (!e || !e.name) return;
+    const dir = path.dirname(SCAN_LEDGER_FILE);
+    if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
+    const entry = {
+      ts: new Date().toISOString(),
+      name: e.name,
+      version: e.version || null,
+      ecosystem: e.ecosystem || null,
+      outcome: SCAN_LEDGER_OUTCOMES.has(e.outcome) ? e.outcome : 'clean',
+      score: (typeof e.score === 'number') ? e.score : null,
+      tier: (e.tier !== undefined && e.tier !== null) ? String(e.tier) : null,
+      maxSeverity: e.maxSeverity || null,
+      types: Array.isArray(e.types) ? e.types.slice(0, 12) : [],
+      sandbox: e.sandbox || 'none',
+      firstPublish: !!e.firstPublish,
+      source: e.source || 'scan'
+    };
+    fs.appendFileSync(SCAN_LEDGER_FILE, JSON.stringify(entry) + '\n', 'utf8');
+    _scanLedgerAppendedSinceCompact++;
+    if (_scanLedgerAppendedSinceCompact >= SCAN_LEDGER_COMPACT_INTERVAL) {
+      _scanLedgerAppendedSinceCompact = 0;
+      _compactScanLedgerJsonl();
+    }
+  } catch (err) {
+    if (err.code === 'EROFS' || err.code === 'EACCES' || err.code === 'EPERM') return;
+    if (err.code === 'ENOSPC') {
+      console.warn('[MONITOR] WARNING: disk full (ENOSPC) — cannot persist scan-ledger.');
+      return;
+    }
+    console.error(`[MONITOR] Failed to write scan-ledger: ${err.message}`);
+  }
+}
+/**
+ * Compact the scan-ledger JSONL: keep only the most recent MAX_SCAN_LEDGER entries.
+ * No-op when already under cap. Streams (never loads the whole file at once).
+ */
+function _compactScanLedgerJsonl() {
+  try {
+    const total = _countJsonlLines(SCAN_LEDGER_FILE);
+    if (total <= MAX_SCAN_LEDGER) return;
+    const toDrop = total - MAX_SCAN_LEDGER;
+    let skipped = 0;
+    const kept = [];
+    _iterateJsonlSync(SCAN_LEDGER_FILE, (entry) => {
+      if (skipped < toDrop) { skipped++; return; }
+      kept.push(JSON.stringify(entry));
+    });
+    const tmpFile = SCAN_LEDGER_FILE + '.tmp';
+    fs.writeFileSync(tmpFile, kept.length ? kept.join('\n') + '\n' : '', 'utf8');
+    fs.renameSync(tmpFile, SCAN_LEDGER_FILE);
+    console.log(`[MONITOR] COMPACT scan-ledger: ${total} -> ${kept.length} entries`);
+  } catch (err) {
+    console.error(`[MONITOR] Scan-ledger compaction failed: ${err.message}`);
+  }
+}
+/** Stream the scan-ledger into an array (tests + Phase 0b rollup). */
+function loadScanLedger() {
+  const entries = [];
+  try { _iterateJsonlSync(SCAN_LEDGER_FILE, (e) => { entries.push(e); }); } catch { /* ignore */ }
+  return entries;
+}
 // --- Scan stats (FP rate tracking) ---
 function loadScanStats() {
@@ -1420,6 +1528,8 @@ module.exports = {
   MAX_TEMPORAL_DETECTIONS,
   MAX_DAILY_ALERTS,
   DETECTION_COMPACT_INTERVAL,
+  SCAN_LEDGER_FILE,
+  MAX_SCAN_LEDGER,
   // Mutable state getters/setters
   getScanMemoryCache,
@@ -1456,6 +1566,9 @@ module.exports = {
   appendAlert,
   loadDetections,
   appendDetection,
+  appendScanLedger,
+  loadScanLedger,
+  _compactScanLedgerJsonl,
   getDetectionStats,
   runStateMigrations,
   // Internal — exported for tests and for the daemon hourly housekeeping.

package/src/monitor/tarball-archive.js CHANGED Viewed

@@ -45,6 +45,18 @@ function getMinFreeBytes() {
   return gb * 1024 * 1024 * 1024;
 }
+// Tarball download is gated on this score so the heavy .tgz is kept ONLY for
+// alert-threshold packages; the cheap JSON metadata is still written for every
+// suspect. Aligns with the webhook alert floor (20). Bounded to [0, 100], default 20.
+const DEFAULT_TGZ_MIN_SCORE = 20;
+function getArchiveTgzMinScore() {
+  const raw = process.env.MUADDIB_ARCHIVE_TGZ_MIN_SCORE;
+  if (raw === undefined || raw === '') return DEFAULT_TGZ_MIN_SCORE;
+  const n = parseInt(raw, 10);
+  if (!Number.isFinite(n) || n < 0 || n > 100) return DEFAULT_TGZ_MIN_SCORE;
+  return n;
+}
 function hasEnoughSpace(targetDir) {
   try {
     if (typeof fs.statfsSync !== 'function') return true; // Node <18.15 — fail-open
@@ -109,14 +121,20 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
   // Defense-in-depth: never archive packages that are statically clean.
   // Callers in the pipeline already gate on tier 1a/1b/2 classification, but a
-  // numeric score of 0 with no triggered rules is unambiguously CLEAN — those
-  // dominated archive volume in production.
+  // numeric score of 0 with no triggered rules is unambiguously CLEAN.
   const score = (scanResult && typeof scanResult.score === 'number') ? scanResult.score : 0;
   const rules = (scanResult && Array.isArray(scanResult.rulesTriggered)) ? scanResult.rulesTriggered : [];
   if (score === 0 && rules.length === 0) {
     return false;
   }
+  // Tarballs dominate archive volume (~439MB/day of .tgz vs ~3.6MB/day of JSON).
+  // Keep the cheap JSON metadata for EVERY suspect (audit trail + GT-promotion index),
+  // but download/retain the heavy .tgz ONLY for packages at/above the alert threshold
+  // (score >= MUADDIB_ARCHIVE_TGZ_MIN_SCORE, default 20 = webhook floor). This shrinks
+  // the archive from tens of GB to hundreds of MB without losing the record of what was seen.
+  const keepTarball = score >= getArchiveTgzMinScore();
   const dateStr = getArchiveDateString();
   const dayDir = path.join(ARCHIVE_DIR, dateStr);
   const safeName = sanitizeForFilename(packageName);
@@ -124,32 +142,55 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
   const tgzPath = path.join(dayDir, `${basename}.tgz`);
   const jsonPath = path.join(dayDir, `${basename}.json`);
-  // Dedup: skip if already archived
-  if (fs.existsSync(tgzPath)) {
-    return false;
-  }
+  // At/above the alert threshold: archive the full .tgz (existing behavior, unchanged).
+  // Below it: keep only the cheap JSON metadata (audit trail + GT-promotion index).
+  if (keepTarball) {
+    // Dedup: skip if already archived
+    if (fs.existsSync(tgzPath)) {
+      return false;
+    }
-  // Defense layer 3: skip if disk is nearly full, even if retention is well-configured.
-  // Prevents a burst of malicious campaigns from blowing past the 7-day budget
-  // before the 6h periodic cleanup tick can catch up.
-  if (!hasEnoughSpace(ARCHIVE_DIR)) {
-    console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
-    return false;
-  }
+    // Disk-space gate: don't let a burst of suspects run the volume to 100% between
+    // the periodic cleanups. Guards the heavy .tgz download.
+    if (!hasEnoughSpace(ARCHIVE_DIR)) {
+      console.warn(`[Archive] Skip ${packageName}@${version}: free space below ${DEFAULT_MIN_FREE_GB}GB threshold`);
+      return false;
+    }
-  // Ensure day directory exists
-  fs.mkdirSync(dayDir, { recursive: true });
+    // Ensure day directory exists
+    fs.mkdirSync(dayDir, { recursive: true });
-  // Download with semaphore (shares concurrency with rest of pipeline)
-  await acquireRegistrySlot();
-  try {
-    await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
-  } finally {
-    releaseRegistrySlot();
+    // Download with semaphore (shares concurrency with rest of pipeline). Download
+    // errors propagate to the fire-and-forget .catch() in the caller (queue.js).
+    await acquireRegistrySlot();
+    try {
+      await downloadToFile(tarballUrl, tgzPath, ARCHIVE_TIMEOUT_MS);
+    } finally {
+      releaseRegistrySlot();
+    }
+    const tarballSha256 = sha256File(tgzPath);
+    const metadata = {
+      package: packageName,
+      version,
+      timestamp: new Date().toISOString(),
+      score: scanResult.score || 0,
+      priority: scanResult.priority || null,
+      rules_triggered: scanResult.rulesTriggered || [],
+      llm_verdict: scanResult.llmVerdict || null,
+      tarball_archived: true,
+      tarball_sha256: tarballSha256
+    };
+    fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
+    return true;
   }
-  // Compute hash and write metadata
-  const tarballSha256 = sha256File(tgzPath);
+  // Below the alert threshold — record cheap JSON metadata only, skip the tarball.
+  // Dedup on the JSON record so re-scans of the same package@version don't rewrite it.
+  if (fs.existsSync(jsonPath)) {
+    return false;
+  }
+  fs.mkdirSync(dayDir, { recursive: true });
   const metadata = {
     package: packageName,
     version,
@@ -158,9 +199,9 @@ async function archiveSuspectTarball(packageName, version, tarballUrl, scanResul
     priority: scanResult.priority || null,
     rules_triggered: scanResult.rulesTriggered || [],
     llm_verdict: scanResult.llmVerdict || null,
-    tarball_sha256: tarballSha256
+    tarball_archived: false,
+    tarball_sha256: null
   };
   fs.writeFileSync(jsonPath, JSON.stringify(metadata, null, 2));
   return true;
 }
@@ -272,5 +313,6 @@ module.exports = {
   getArchiveDateString,
   getRetentionDays,
   getMinFreeBytes,
+  getArchiveTgzMinScore,
   parseArchiveDayDir
 };

package/src/response/playbooks.js CHANGED Viewed

@@ -1001,6 +1001,10 @@ const PLAYBOOKS = {
     'HAUTE: binding.gyp avec script lifecycle non-standard. Code natif compile a l\'installation. ' +
     'Verifier le contenu de binding.gyp et les sources C/C++. Installer avec --ignore-scripts si suspect.',
+  gyp_command_exec:
+    'CRITIQUE: binding.gyp utilise la command-substitution GYP <!(...) / <!@(...) — execution de code a l\'installation via node-gyp, sans script lifecycle (pattern Phantom Gyp). ' +
+    'Decoder la commande substituee. NE PAS installer : node-gyp l\'execute au build meme avec --ignore-scripts. Verifier la source officielle du package.',
   string_mutation_obfuscation:
     'HAUTE: Chaine de .replace() reconstruisant des noms d\'API dangereuses (leet-speak). ' +
     'Technique d\'evasion par substitution de caracteres. Decoder la chaine finale. Supprimer si malveillant.',

package/src/rules/index.js CHANGED Viewed

@@ -2949,6 +2949,19 @@ const RULES = {
     ],
     mitre: 'T1195.002'
   },
+  gyp_command_exec: {
+    id: 'MUADDIB-PKG-023',
+    name: 'GYP Command-Substitution Install Execution',
+    severity: 'CRITICAL',
+    confidence: 'high',
+    domain: 'malware',
+    description: 'binding.gyp utilise la command-substitution GYP <!(...) / <!@(...) — execution de code a l\'installation via node-gyp, sans script lifecycle package.json (pattern Phantom Gyp, juin 2026).',
+    references: [
+      'https://gyp.gsrc.io/docs/InputFormatReference.md',
+      'https://attack.mitre.org/techniques/T1195.002/'
+    ],
+    mitre: 'T1195.002'
+  },
   string_mutation_obfuscation: {
     id: 'MUADDIB-AST-074',
     name: 'String Mutation Obfuscation',

package/src/scanner/package.js CHANGED Viewed

@@ -252,6 +252,51 @@ async function scanPackageJson(targetPath) {
     // Check if binding.gyp references C/C++ source files
     const hasNativeSources = /\.(c|cc|cpp|cxx|h|hpp)\b/.test(gypContent);
+    // Phantom Gyp (June 2026): GYP command-substitution <!(...) / <!@(...) runs a command at
+    // *configure* time via `node-gyp`, which npm auto-runs on install whenever a binding.gyp is
+    // present — NO package.json lifecycle script required, so it slips past every lifecycle-gated
+    // check below. Distinct from <(...) / <@(...) (plain variable expansion, benign) which MUST
+    // NOT fire — the required `!` gates command execution.
+    //
+    // Legit native addons use <!(...) heavily for build-env queries — `node -p process.versions`,
+    // `node ./util/has_lib.js`, `pkg-config ... | sed`, `node -p "require('node-addon-api').include"`
+    // — and a build-helper `<!(node x.js)` is statically INDISTINGUISHABLE from a payload
+    // `<!(node index.js)`. To honor "FPR must never increase" we flag a command-sub ONLY when it
+    // carries a malice-specific marker, never the bare "runs a script" shape:
+    //   (1) GYP_DANGER — shell-level malice in the command line itself: the Phantom Gyp fake-source
+    //       trick (`; / && / | echo <name>.c`, returning a fabricated source so node-gyp doesn't
+    //       error), network fetch (curl/wget), pipe-to-shell (| sh, sh -c), eval/base64//dev/tcp,
+    //       char-code obfuscation (fromCharCode/atob);
+    //   (2) an inline interpreter payload — node|python|ruby|perl running -e/-c/-p/--eval/--print code
+    //       that reaches the NETWORK (require/import of https|http|net|dgram|dns|tls, optional node:
+    //       prefix; fetch; urllib/requests/httpx/http.client/urlopen; socket). Network at configure
+    //       time is never a legit build query. We deliberately do NOT key on child_process/exec/spawn
+    //       here — legit addons shell out to detect the toolchain (`node -e "...execSync('gcc
+    //       --version')..."`), which would FP; an exec of curl/wget is still caught by GYP_DANGER.
+    //       Catches `<!(node --eval require('node:https')...)`, `<!(python3 -c import requests)`.
+    // Honest limitation: this is a line-by-line SPEED-BUMP, not coverage. A bare `<!(node payload.js)`
+    // and any non-network inline payload are NOT flagged (indistinguishable from canvas/node-sass
+    // build helpers without false positives, FPR-first by design). Real closure needs a compound
+    // (configure-time sink × the run script's AST/dataflow verdict) — a separate effort.
+    const GYP_DANGER = /[;&|]\s*echo\s+[^|;&]*\.(?:c|cc|cpp|cxx|m|mm|cs)\b|\bcurl\b|\bwget\b|\|\s*(?:sh|bash|zsh)\b|\b(?:sh|bash|zsh)\s+-c\b|\beval\b|\bbase64\b|\/dev\/tcp|fromCharCode|\batob\b/i;
+    const GYP_INTERP = /\b(?:node|nodejs|python[0-9.]*|ruby|perl)\b[^|;&\n]{0,40}?\s--?(?:eval|print|e|c|p)\b/i;
+    const GYP_PAYLOAD_API = /(?:require|import)\s*\(\s*['"](?:node:)?(?:https?|net|dgram|dns|tls)['"]|\bfetch\s*\(|\burllib\b|\brequests\b|\bhttpx\b|http\.client|\burlopen\b|socket\.(?:socket|create_connection)/i;
+    let gypCommandExec = false;
+    const gypCmdSubRe = /<!@?\(([^\n]{0,400})/g;
+    let _gm;
+    while ((_gm = gypCmdSubRe.exec(gypContent)) !== null) {
+      const body = _gm[1];
+      if (GYP_DANGER.test(body) || (GYP_INTERP.test(body) && GYP_PAYLOAD_API.test(body))) { gypCommandExec = true; break; }
+    }
+    if (gypCommandExec) {
+      threats.push({
+        type: 'gyp_command_exec',
+        severity: 'CRITICAL',
+        message: `binding.gyp uses GYP command-substitution (<!(...) / <!@(...)) running a non-build command at install time via node-gyp, no lifecycle script required (Phantom Gyp pattern).`,
+        file: 'binding.gyp'
+      });
+    }
     if (hasShellActions) {
       threats.push({
         type: 'native_addon_install',

package/src/scoring.js CHANGED Viewed

@@ -130,7 +130,9 @@ const PACKAGE_LEVEL_TYPES = new Set([
   // audit DF-C1: emitted when MAX_GRAPH_NODES exceeded so cross-file blind spot is visible in scoring
   'large_package_graph_truncated',
   // audit MR-C1: informational signal that the scan target is a monorepo root (per-workspace scoring TBD)
-  'monorepo_detected'
+  'monorepo_detected',
+  // Phantom Gyp: binding.gyp command-substitution is a package-level (manifest) finding
+  'gyp_command_exec'
 ]);
 // ============================================