npm - @lateos/npm-scan - Versions diffs - 0.15.1 → 0.15.3 - Mend

@lateos/npm-scan 0.15.1 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +15 -2
package/backend/detectors/atk-002-obfusc.js +154 -30
package/backend/detectors/hf-impersonation/index.js +396 -0
package/backend/detectors/hf-impersonation/jaro-winkler.js +44 -0
package/backend/detectors/hf-impersonation/known-orgs.js +5 -0
package/backend/detectors/hf-impersonation/simhash.js +46 -0
package/backend/detectors/index.js +5 -1
package/backend/detectors/megalodon/d1-workflow-scan.js +147 -0
package/backend/detectors/megalodon/d2-credential-harvest.js +61 -0
package/backend/detectors/megalodon/d3-publish-velocity.js +67 -0
package/backend/detectors/megalodon/d4-publisher-drift.js +124 -0
package/backend/detectors/megalodon/d5-bot-commit-identity.js +3 -0
package/backend/detectors/megalodon/d6-date-anachronism.js +3 -0
package/backend/detectors/megalodon/index.js +80 -0
package/backend/detectors/megalodon/types.js +9 -0
package/backend/fetch.js +6 -1
package/cli/cli.js +2 -2
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -3,7 +3,7 @@
 [![npm version](https://img.shields.io/npm/v/@lateos/npm-scan?style=flat-square)](https://www.npmjs.com/package/@lateos/npm-scan)
 [![License](https://img.shields.io/badge/license-Apache%202.0%20%2B%20Commons%20Clause-blue?style=flat-square)](LICENSING.md)
 [![Node](https://img.shields.io/badge/node-%3E%3D18-brightgreen?style=flat-square)](package.json)
-[![Tests](https://img.shields.io/badge/tests-324%20passing-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
+[![Tests](https://img.shields.io/badge/tests-384%20passing-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
 [![Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
 [![Docker](https://img.shields.io/badge/docker-lateos%2Fnpm--scan-2496ED?style=flat-square&logo=docker)](https://hub.docker.com/r/lateos/npm-scan)
 [![Sigstore](https://img.shields.io/static/v1?label=Sigstore&message=Provenance&color=green&style=flat-square&logo=sigstore)](https://github.com/lateos-ai/npm-scan/actions/workflows/publish.yml)
@@ -24,6 +24,10 @@ The 2025–2026 wave of npm supply chain attacks proved that traditional tooling
 Attackers have moved past simple typosquatting. They now ship **obfuscated preinstall hooks**, **credential harvesters hidden behind environment detection**, **dormant backdoors with time-based activation**, and **worm-style transitive propagation** that spreads through peer dependencies.
+A growing attack vector is **HuggingFace org impersonation** — packages that masquerade as legitimate HF model repositories (e.g., `0penai/gpt2` instead of `openai/gpt2`) to trick users into downloading malicious model artifacts during CI/CD pipelines, often bundled with suspicious binaries (`.exe`, `.dll`) in model repos that deep-learned tools trust by default.
+The **Megalodon campaign** (2026) alone compromised 5,500+ repositories via fake GitHub PRs, malicious workflow injection, and cloud credential exfiltration — all coordinated through a single actor automating the entire kill chain. **@lateos/npm-scan** now detects artifacts of this campaign out of the box.
 **npm audit** checks known CVEs. **Snyk** scans for vulnerabilities. **Socket** looks at package behavior. None of them were designed for the generation of attacks that emerged in 2025 — attacks that look benign until they reach production.
 **@lateos/npm-scan** was built for this moment.
@@ -42,6 +46,8 @@ Attackers have moved past simple typosquatting. They now ship **obfuscated prein
 | Conditional trigger detection (ATK-009) | ❌ | ❌ | ❌ | ✅ |
 | Sandbox evasion detection (ATK-010) | ❌ | ❌ | ❌ | ✅ |
 | Transitive worm propagation (ATK-011) | ❌ | ❌ | ❌ | ✅ |
+| Campaign detection (Megalodon CI/CD) | ❌ | ❌ | ❌ | ✅ |
+| HF model repo impersonation + README clone | ❌ | ❌ | ❌ | ✅ |
 | Attack taxonomy (ATK series) | ❌ | ❌ | ❌ | ✅ |
 | SBOM output (CycloneDX + SPDX) | ❌ | ✅ | ❌ | ✅ |
 | SARIF v2.1 (GitHub Code Scanning) | ❌ | ❌ | ❌ | ✅ |
@@ -71,6 +77,7 @@ Attackers have moved past simple typosquatting. They now ship **obfuscated prein
 | 🛡️ | **Zero telemetry** | No data leaves your machine. No cloud. No callbacks. |
 | 💾 | **Local scan history** | SQLite-backed persistence, zero external dependencies |
 | 🪝 | **Pre-commit hook** | Block threats before commit — one-liner install, scans `package-lock.json` changes |
+| 🤖 | **HF impersonation detection** | Detects typosquatted HuggingFace orgs (Jaro-Winkler), README clones (SimHash), artifact mismatches (`.exe` in model repos), and new-org amplifier — with lazy two-stage evaluation, zero network in Stage 1 |
 | 📎 | **Yarn + pnpm support** | `scan-lockfile` parses `yarn.lock` and `pnpm-lock.yaml` alongside `package-lock.json` |
 ---
@@ -279,8 +286,12 @@ npm-scan report --pdf             # all scans (premium)
 | **ATK-009** | Conditional/dormant triggers (CI detection, time-based) | Behavioral | 🔴 high | SR-9.2 |
 | **ATK-010** | Sandbox evasion / anti-analysis | Behavioral | 🟠 medium | SR-10.3 |
 | **ATK-011** | Transitive propagation (worm-style lateral spread) | Behavioral | 🔴 high | SR-11.4 |
+| **MEGALODON** | Megalodon CI/CD campaign — workflow C2 exfil, credential harvest, publish velocity spike, publisher drift | Static + Registry | ⚫ critical | SR-3.1, SR-7.5 |
+| **HF_IMPERSONATION** | HuggingFace org spoof detection — Jaro-Winkler similarity against 15 known-good orgs, SimHash README clone detection, artifact mismatch (`.exe`/`.dll` in model repos), postinstall escalation, new-org amplifier | Static + Network (Stage 2) | 🔴 high / ⚫ critical | SR-2.1 |
 > **How evasive attacks are caught:** ATK-009 detects packages that check `process.env.CI`, probe hostnames, or use time-based activation. ATK-010 flags `debugger` statements, `os.hostname()` probes, and env fingerprinting. ATK-011 traces peer dependency graphs to detect worm-like propagation patterns.
+> **MEGALODON** campaign detection analyzes bundled `.github/workflows/` files for C2 co-occurrence and base64 decode chains, scans tarball files for credential + outbound network patterns, detects version publish velocity spikes via npm registry metadata, and identifies publisher account drift — all without any network calls beyond the initial package fetch.
+> **HF_IMPERSONATION** detection uses a lazy two-stage evaluation: Stage 1 scans `package.json` scripts and JS/TS sources for HuggingFace references (URLs, `from_pretrained()`, `hub.download()`) and runs Jaro-Winkler similarity against 15 known-good HF orgs — zero network. If spoofs are found, Stage 2 fetches the HF model API, computes SimHash of both READMEs for clone detection, validates artifact type consistency (e.g., `transformers` library with `.exe` files is flagged as critical), applies a new-org amplifier (<30 days), and escalates when the reference appears in a lifecycle script.
 > See [`docs/attack-taxonomy.md`](docs/attack-taxonomy.md) for full evasion surface documentation and PoC examples.
 ---
@@ -627,7 +638,7 @@ See the [Docker quick-start section](#-run-lateosnpm-scan-anywhere-with-docker--
 ### Free tier (shipped)
-- All 11 ATK detectors (static + behavioral)
+- All 11 ATK detectors + **MEGALODON** CI/CD campaign detection (D1–D6) + **HF_IMPERSONATION** detector
 - SBOM output (CycloneDX + SPDX)
 - HTML, text, and compliance reports (NIST + EU CRA)
 - Policy-as-code engine (YAML)
@@ -686,6 +697,7 @@ node --test test/detectors-corpus.test.js
 **Test structure:**
 - `test/fixtures/mock-data.js` — shared mock scans, packages, and code snippets
+- `test/megalodon.test.js` — 30 Megalodon campaign detection tests (D1–D4 + aggregator + runAll integration)
 - `test/db.test.js` — database CRUD (save, query, persist)
 - `test/detectors-edge-cases.test.js` — per-detector boundary tests (no-ops, clean clears, severity)
 - `test/detectors-corpus.test.js` — 33 malicious + 50 clean tarball integration (offline)
@@ -695,6 +707,7 @@ node --test test/detectors-corpus.test.js
 - `test/report-snapshots.test.js` — HTML/text/CRA/PDF format assertions
 - `test/report.test.js` — SARIF, CSV, STIG, risk score format tests
 - `test/lockfile.test.js` — npm/yarn/pnpm parser, auto-detect, ATK-007/011 lockfile tests
+- `test/hf-impersonation.test.js` — 13 HF impersonation detection tests (no-ref, exact match, spoof, README clone, artifact mismatch, postinstall escalation, new-org tag)
 - `test/cli.test.js` — commander integration tests (help, version, scan, report, error handling)
 - `test/cli-lockfile.test.js` — scan-lockfile CLI options, yarn/pnpm/monorepo/watch tests

package/backend/detectors/atk-002-obfusc.js CHANGED Viewed

@@ -1,12 +1,13 @@
 const DIST_BUILD_PATTERNS = [/\/dist\//, /\/build\//, /\/bundle/, /\/min\//, /\.min\.js$/, /\.bundled?\.js$/];
 const TEST_FIXTURE_PATTERNS = [/\/test\//, /\/tests\//, /\/__tests__\//, /\/spec\//, /\.test\.js$/, /\.spec\.js$/, /fixtures?/];
-const LIFECYCLE_HOOK_PATTERNS = [/postinstall/, /preinstall/, /['"]install['"]/, /['"]prepare['"]/];
 const KNOWN_SAFE_DOMAINS = [
   'registry.npmjs.org', 'cdn.jsdelivr.net', 'unpkg.com', 'cdn.skypack.dev',
   'esm.sh', 'deno.land', 'raw.githubusercontent.com', 'github.com',
   'npmjs.com', 'nodejs.org', 'v8.dev', 'typescriptlang.org'
 ];
+const LIFECYCLE_SCRIPT_NAMES = ['install', 'postinstall', 'preinstall', 'prepare', 'prepack', 'postpack'];
 function extractUrlDomain(code) {
   const urlMatch = code.match(/https?:\/\/([^/'"\s]+)/);
   return urlMatch ? urlMatch[1] : null;
@@ -20,23 +21,100 @@ function isTestOrFixture(filePath) {
   return TEST_FIXTURE_PATTERNS.some(p => p.test(filePath));
 }
-function isLifecycleHook(code) {
-  return LIFECYCLE_HOOK_PATTERNS.some(p => p.test(code));
-}
 function isKnownSafeDomain(domain) {
   if (!domain) return false;
   return KNOWN_SAFE_DOMAINS.some(safe => domain === safe || domain.endsWith('.' + safe));
 }
-function createContext(filePath, code) {
+function locateLine(code, pattern) {
+  const lines = code.split('\n');
+  for (let i = 0; i < lines.length; i++) {
+    if (pattern.test(lines[i])) return i + 1;
+  }
+  return null;
+}
+function decodePreview(code) {
+  const b64Match = code.match(/atob\(['"]([A-Za-z0-9+/=]{10,})['"]\)/);
+  if (b64Match) {
+    try {
+      const decoded = atob(b64Match[1]);
+      return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
+    } catch {}
+  }
+  const hexMatch = code.match(/Buffer\.from\(['"]([0-9a-fA-F]+)['"],\s*['"]hex['"]\)/);
+  if (hexMatch) {
+    try {
+      const decoded = Buffer.from(hexMatch[1], 'hex').toString();
+      return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
+    } catch {}
+  }
+  const btoaMatch = code.match(/btoa\(['"]([A-Za-z0-9+/=]{10,})['"]\)/);
+  if (btoaMatch) {
+    try {
+      const decoded = atob(btoaMatch[1]);
+      return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
+    } catch {}
+  }
+  return null;
+}
+function detectEncodingType(code) {
+  if (/Buffer\.from\(['"][0-9a-fA-F]+['"],\s*['"]hex['"]\)/.test(code)) return 'hex';
+  if (/atob\(/.test(code)) return 'base64';
+  if (/btoa\(/.test(code)) return 'base64';
+  if (/Buffer\.from\([A-Za-z0-9+/=]{10,}/.test(code)) return 'base64';
+  if (/String\.fromCharCode\(/.test(code)) return 'charcode';
+  if (/btoa\(.*btoa\(|atob\(.*atob\(/.test(code)) return 'double-base64';
+  return 'unknown';
+}
+function isFileInLifecycleScript(filePath, pkgJson) {
+  if (!pkgJson?.scripts) return false;
+  const scripts = pkgJson.scripts;
+  const fileName = filePath.split('/').pop();
+  const normalizedPath = filePath.replace(/^node_modules\//, '').replace(/^dist\//, '').replace(/^build\//, '');
+  for (const scriptName of LIFECYCLE_SCRIPT_NAMES) {
+    const scriptValue = scripts[scriptName];
+    if (!scriptValue) continue;
+    if (scriptValue.includes(filePath)) return true;
+    if (scriptValue.includes(fileName)) return true;
+    if (scriptValue.includes(normalizedPath)) return true;
+    const scriptFileMatch = scriptValue.match(/[^\s'"]+\.js$/);
+    if (scriptFileMatch && filePath.endsWith(scriptFileMatch[0])) return true;
+  }
+  return false;
+}
+function isLikelyLifecycleFileName(filePath) {
+  const name = filePath.split('/').pop().replace(/\.js$/, '');
+  return LIFECYCLE_SCRIPT_NAMES.includes(name) ||
+         name === 'setup' ||
+         name === 'install-helper';
+}
+function createEvidence(code, filePath, pattern, pkgJson) {
+  const encodingType = detectEncodingType(code);
+  const line = locateLine(code, pattern);
+  const decodedPreview = decodePreview(code);
+  const destinationHost = extractUrlDomain(code);
+  const lifecycleHook = isFileInLifecycleScript(filePath, pkgJson) || isLikelyLifecycleFileName(filePath);
   return {
-    file_path: filePath,
-    is_dist_build: isDistOrBuild(filePath),
-    is_test_fixture: isTestOrFixture(filePath),
-    is_lifecycle_hook: isLifecycleHook(code),
-    url_domain: extractUrlDomain(code),
-    is_known_safe_domain: isKnownSafeDomain(extractUrlDomain(code)),
+    file: filePath,
+    line: line,
+    lifecycle_hook: lifecycleHook,
+    decoded_preview: decodedPreview,
+    encoding_type: encodingType,
+    destination_host: destinationHost,
   };
 }
@@ -47,7 +125,12 @@ export async function scan(pkgJson, files = []) {
   for (const f of files) {
     const code = f.content;
-    const ctx = createContext(f.path, code);
+    const filePath = f.path;
+    const isDistBuild = isDistOrBuild(filePath);
+    const isTestFixture = isTestOrFixture(filePath);
+    const urlDomain = extractUrlDomain(code);
+    const isSafeDomain = isKnownSafeDomain(urlDomain);
     const hasEval = /eval\(|new Function\(|\bFunction\('/.test(code);
@@ -57,13 +140,21 @@ export async function scan(pkgJson, files = []) {
       const b64UrlDecode = /try\s*\{[^}]*atob\s*\(/s.test(code) || /btoa\(.*\)\s*[^;]*\.replace\(/s.test(code);
       if (hexDecode || b64Decode || b64UrlDecode) {
+        const evidence = createEvidence(code, filePath, /eval\(|new Function\(|\bFunction\('/, pkgJson);
         findings.push({
           id: 'ATK-002',
           severity: 'medium',
           title: 'Obfuscated payload',
           description: hexDecode ? 'Eval with hex-decoded payload' : 'Eval with base64-decoded payload',
-          evidence: 'eval + decode pattern detected',
-          context: ctx,
+          evidence: evidence,
+          context: {
+            file_path: filePath,
+            is_dist_build: isDistBuild,
+            is_test_fixture: isTestFixture,
+            is_lifecycle_hook: evidence.lifecycle_hook,
+            url_domain: urlDomain,
+            is_known_safe_domain: isSafeDomain,
+          },
         });
         return findings;
       }
@@ -71,13 +162,22 @@ export async function scan(pkgJson, files = []) {
       if (btoa(btoa('x')) === 'eDuke'.padEnd(5)) {
         const nested = /atob\([^)]*atob\(/s.test(code) || /btoa\([^)]*btoa\(/s.test(code);
         if (nested) {
+          const evidence = createEvidence(code, filePath, /btoa\(/, pkgJson);
           findings.push({
             id: 'ATK-002',
             severity: 'high',
             title: 'Obfuscated payload',
             description: 'Double-encoded nested payload',
-            evidence: 'nested encode/decode detected',
-            context: { ...ctx, is_multi_layer: true },
+            evidence: { ...evidence, is_multi_layer: true },
+            context: {
+              file_path: filePath,
+              is_dist_build: isDistBuild,
+              is_test_fixture: isTestFixture,
+              is_lifecycle_hook: evidence.lifecycle_hook,
+              url_domain: urlDomain,
+              is_known_safe_domain: isSafeDomain,
+              is_multi_layer: true,
+            },
           });
           return findings;
         }
@@ -88,46 +188,70 @@ export async function scan(pkgJson, files = []) {
       const isNetworkObfusc = /atob\(.*(https?:\/\/|\\x|http).*\)/s.test(code) ||
         /Buffer\.from\(['"`][0-9a-f]+['"`],\s*['"]hex['"].*fetch\(|fetch\(.*atob\(/s.test(code);
       if (isNetworkObfusc) {
+        const evidence = createEvidence(code, filePath, /atob\(|Buffer\.from/, pkgJson);
         findings.push({
           id: 'ATK-002',
           severity: 'medium',
           title: 'Obfuscated payload',
           description: 'Decoded string containing URL/fetch call',
-          evidence: 'obfuscation with network call',
-          context: ctx,
+          evidence: evidence,
+          context: {
+            file_path: filePath,
+            is_dist_build: isDistBuild,
+            is_test_fixture: isTestFixture,
+            is_lifecycle_hook: evidence.lifecycle_hook,
+            url_domain: urlDomain,
+            is_known_safe_domain: isSafeDomain,
+          },
         });
         return findings;
       }
     }
     if (/String\.fromCharCode\(.{20,}\)/.test(code) && hasEval) {
+      const evidence = createEvidence(code, filePath, /String\.fromCharCode\(/, pkgJson);
       findings.push({
         id: 'ATK-002',
         severity: 'medium',
         title: 'Obfuscated payload',
         description: 'Eval with String.fromCharCode obfuscation',
-        evidence: 'charcode obfuscation detected',
-        context: ctx,
+        evidence: evidence,
+        context: {
+          file_path: filePath,
+          is_dist_build: isDistBuild,
+          is_test_fixture: isTestFixture,
+          is_lifecycle_hook: evidence.lifecycle_hook,
+          url_domain: urlDomain,
+          is_known_safe_domain: isSafeDomain,
+        },
       });
       return findings;
     }
     const shellPatterns = [
-      /eval\s*\(\s*process\.env\.[A-Z_]{4,}/,
-      /exec\s*\(\s*Buffer\.from\(/,
-      /new Function\s*\(\s*(?:atob|process\.env)/,
-      /eval\s*\(\s*(?:require|import\s*\()/,
-      /Function\s*\(\s*'use\s*strict'\s*;?\s*(?:atob|require)/,
+      { regex: /eval\s*\(\s*process\.env\.[A-Z_]{4,}/, name: 'env-eval' },
+      { regex: /exec\s*\(\s*Buffer\.from\(/, name: 'exec-buffer' },
+      { regex: /new Function\s*\(\s*(?:atob|process\.env)/, name: 'function-eval' },
+      { regex: /eval\s*\(\s*(?:require|import\s*\()/, name: 'require-eval' },
+      { regex: /Function\s*\(\s*'use\s*strict'\s*;?\s*(?:atob|require)/, name: 'strict-eval' },
     ];
     for (const p of shellPatterns) {
-      if (p.test(code)) {
+      if (p.regex.test(code)) {
+        const evidence = createEvidence(code, filePath, p.regex, pkgJson);
         findings.push({
           id: 'ATK-002',
           severity: 'high',
           title: 'Obfuscated payload',
           description: 'Shell-code obfuscation pattern',
-          evidence: p.source.substring(0, 60),
-          context: ctx,
+          evidence: { ...evidence, pattern: p.name },
+          context: {
+            file_path: filePath,
+            is_dist_build: isDistBuild,
+            is_test_fixture: isTestFixture,
+            is_lifecycle_hook: evidence.lifecycle_hook,
+            url_domain: urlDomain,
+            is_known_safe_domain: isSafeDomain,
+          },
         });
         return findings;
       }
@@ -135,4 +259,4 @@ export async function scan(pkgJson, files = []) {
   }
   return findings;
-}
+}