@lateos/npm-scan 0.15.1 → 0.15.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,7 +3,7 @@
3
3
  [![npm version](https://img.shields.io/npm/v/@lateos/npm-scan?style=flat-square)](https://www.npmjs.com/package/@lateos/npm-scan)
4
4
  [![License](https://img.shields.io/badge/license-Apache%202.0%20%2B%20Commons%20Clause-blue?style=flat-square)](LICENSING.md)
5
5
  [![Node](https://img.shields.io/badge/node-%3E%3D18-brightgreen?style=flat-square)](package.json)
6
- [![Tests](https://img.shields.io/badge/tests-324%20passing-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
6
+ [![Tests](https://img.shields.io/badge/tests-384%20passing-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
7
7
  [![Coverage](https://img.shields.io/badge/coverage-90%25-brightgreen?style=flat-square)](https://github.com/lateos-ai/npm-scan)
8
8
  [![Docker](https://img.shields.io/badge/docker-lateos%2Fnpm--scan-2496ED?style=flat-square&logo=docker)](https://hub.docker.com/r/lateos/npm-scan)
9
9
  [![Sigstore](https://img.shields.io/static/v1?label=Sigstore&message=Provenance&color=green&style=flat-square&logo=sigstore)](https://github.com/lateos-ai/npm-scan/actions/workflows/publish.yml)
@@ -24,6 +24,10 @@ The 2025–2026 wave of npm supply chain attacks proved that traditional tooling
24
24
 
25
25
  Attackers have moved past simple typosquatting. They now ship **obfuscated preinstall hooks**, **credential harvesters hidden behind environment detection**, **dormant backdoors with time-based activation**, and **worm-style transitive propagation** that spreads through peer dependencies.
26
26
 
27
+ A growing attack vector is **HuggingFace org impersonation** — packages that masquerade as legitimate HF model repositories (e.g., `0penai/gpt2` instead of `openai/gpt2`) to trick users into downloading malicious model artifacts during CI/CD pipelines, often bundled with suspicious binaries (`.exe`, `.dll`) in model repos that deep-learned tools trust by default.
28
+
29
+ The **Megalodon campaign** (2026) alone compromised 5,500+ repositories via fake GitHub PRs, malicious workflow injection, and cloud credential exfiltration — all coordinated through a single actor automating the entire kill chain. **@lateos/npm-scan** now detects artifacts of this campaign out of the box.
30
+
27
31
  **npm audit** checks known CVEs. **Snyk** scans for vulnerabilities. **Socket** looks at package behavior. None of them were designed for the generation of attacks that emerged in 2025 — attacks that look benign until they reach production.
28
32
 
29
33
  **@lateos/npm-scan** was built for this moment.
@@ -42,6 +46,8 @@ Attackers have moved past simple typosquatting. They now ship **obfuscated prein
42
46
  | Conditional trigger detection (ATK-009) | ❌ | ❌ | ❌ | ✅ |
43
47
  | Sandbox evasion detection (ATK-010) | ❌ | ❌ | ❌ | ✅ |
44
48
  | Transitive worm propagation (ATK-011) | ❌ | ❌ | ❌ | ✅ |
49
+ | Campaign detection (Megalodon CI/CD) | ❌ | ❌ | ❌ | ✅ |
50
+ | HF model repo impersonation + README clone | ❌ | ❌ | ❌ | ✅ |
45
51
  | Attack taxonomy (ATK series) | ❌ | ❌ | ❌ | ✅ |
46
52
  | SBOM output (CycloneDX + SPDX) | ❌ | ✅ | ❌ | ✅ |
47
53
  | SARIF v2.1 (GitHub Code Scanning) | ❌ | ❌ | ❌ | ✅ |
@@ -71,6 +77,7 @@ Attackers have moved past simple typosquatting. They now ship **obfuscated prein
71
77
  | 🛡️ | **Zero telemetry** | No data leaves your machine. No cloud. No callbacks. |
72
78
  | 💾 | **Local scan history** | SQLite-backed persistence, zero external dependencies |
73
79
  | 🪝 | **Pre-commit hook** | Block threats before commit — one-liner install, scans `package-lock.json` changes |
80
+ | 🤖 | **HF impersonation detection** | Detects typosquatted HuggingFace orgs (Jaro-Winkler), README clones (SimHash), artifact mismatches (`.exe` in model repos), and new-org amplifier — with lazy two-stage evaluation, zero network in Stage 1 |
74
81
  | 📎 | **Yarn + pnpm support** | `scan-lockfile` parses `yarn.lock` and `pnpm-lock.yaml` alongside `package-lock.json` |
75
82
 
76
83
  ---
@@ -279,8 +286,12 @@ npm-scan report --pdf # all scans (premium)
279
286
  | **ATK-009** | Conditional/dormant triggers (CI detection, time-based) | Behavioral | 🔴 high | SR-9.2 |
280
287
  | **ATK-010** | Sandbox evasion / anti-analysis | Behavioral | 🟠 medium | SR-10.3 |
281
288
  | **ATK-011** | Transitive propagation (worm-style lateral spread) | Behavioral | 🔴 high | SR-11.4 |
289
+ | **MEGALODON** | Megalodon CI/CD campaign — workflow C2 exfil, credential harvest, publish velocity spike, publisher drift | Static + Registry | ⚫ critical | SR-3.1, SR-7.5 |
290
+ | **HF_IMPERSONATION** | HuggingFace org spoof detection — Jaro-Winkler similarity against 15 known-good orgs, SimHash README clone detection, artifact mismatch (`.exe`/`.dll` in model repos), postinstall escalation, new-org amplifier | Static + Network (Stage 2) | 🔴 high / ⚫ critical | SR-2.1 |
282
291
 
283
292
  > **How evasive attacks are caught:** ATK-009 detects packages that check `process.env.CI`, probe hostnames, or use time-based activation. ATK-010 flags `debugger` statements, `os.hostname()` probes, and env fingerprinting. ATK-011 traces peer dependency graphs to detect worm-like propagation patterns.
293
+ > **MEGALODON** campaign detection analyzes bundled `.github/workflows/` files for C2 co-occurrence and base64 decode chains, scans tarball files for credential + outbound network patterns, detects version publish velocity spikes via npm registry metadata, and identifies publisher account drift — all without any network calls beyond the initial package fetch.
294
+ > **HF_IMPERSONATION** detection uses a lazy two-stage evaluation: Stage 1 scans `package.json` scripts and JS/TS sources for HuggingFace references (URLs, `from_pretrained()`, `hub.download()`) and runs Jaro-Winkler similarity against 15 known-good HF orgs — zero network. If spoofs are found, Stage 2 fetches the HF model API, computes SimHash of both READMEs for clone detection, validates artifact type consistency (e.g., `transformers` library with `.exe` files is flagged as critical), applies a new-org amplifier (<30 days), and escalates when the reference appears in a lifecycle script.
284
295
  > See [`docs/attack-taxonomy.md`](docs/attack-taxonomy.md) for full evasion surface documentation and PoC examples.
285
296
 
286
297
  ---
@@ -627,7 +638,7 @@ See the [Docker quick-start section](#-run-lateosnpm-scan-anywhere-with-docker--
627
638
 
628
639
  ### Free tier (shipped)
629
640
 
630
- - All 11 ATK detectors (static + behavioral)
641
+ - All 11 ATK detectors + **MEGALODON** CI/CD campaign detection (D1–D6) + **HF_IMPERSONATION** detector
631
642
  - SBOM output (CycloneDX + SPDX)
632
643
  - HTML, text, and compliance reports (NIST + EU CRA)
633
644
  - Policy-as-code engine (YAML)
@@ -686,6 +697,7 @@ node --test test/detectors-corpus.test.js
686
697
 
687
698
  **Test structure:**
688
699
  - `test/fixtures/mock-data.js` — shared mock scans, packages, and code snippets
700
+ - `test/megalodon.test.js` — 30 Megalodon campaign detection tests (D1–D4 + aggregator + runAll integration)
689
701
  - `test/db.test.js` — database CRUD (save, query, persist)
690
702
  - `test/detectors-edge-cases.test.js` — per-detector boundary tests (no-ops, clean clears, severity)
691
703
  - `test/detectors-corpus.test.js` — 33 malicious + 50 clean tarball integration (offline)
@@ -695,6 +707,7 @@ node --test test/detectors-corpus.test.js
695
707
  - `test/report-snapshots.test.js` — HTML/text/CRA/PDF format assertions
696
708
  - `test/report.test.js` — SARIF, CSV, STIG, risk score format tests
697
709
  - `test/lockfile.test.js` — npm/yarn/pnpm parser, auto-detect, ATK-007/011 lockfile tests
710
+ - `test/hf-impersonation.test.js` — 13 HF impersonation detection tests (no-ref, exact match, spoof, README clone, artifact mismatch, postinstall escalation, new-org tag)
698
711
  - `test/cli.test.js` — commander integration tests (help, version, scan, report, error handling)
699
712
  - `test/cli-lockfile.test.js` — scan-lockfile CLI options, yarn/pnpm/monorepo/watch tests
700
713
 
@@ -1,12 +1,13 @@
1
1
  const DIST_BUILD_PATTERNS = [/\/dist\//, /\/build\//, /\/bundle/, /\/min\//, /\.min\.js$/, /\.bundled?\.js$/];
2
2
  const TEST_FIXTURE_PATTERNS = [/\/test\//, /\/tests\//, /\/__tests__\//, /\/spec\//, /\.test\.js$/, /\.spec\.js$/, /fixtures?/];
3
- const LIFECYCLE_HOOK_PATTERNS = [/postinstall/, /preinstall/, /['"]install['"]/, /['"]prepare['"]/];
4
3
  const KNOWN_SAFE_DOMAINS = [
5
4
  'registry.npmjs.org', 'cdn.jsdelivr.net', 'unpkg.com', 'cdn.skypack.dev',
6
5
  'esm.sh', 'deno.land', 'raw.githubusercontent.com', 'github.com',
7
6
  'npmjs.com', 'nodejs.org', 'v8.dev', 'typescriptlang.org'
8
7
  ];
9
8
 
9
+ const LIFECYCLE_SCRIPT_NAMES = ['install', 'postinstall', 'preinstall', 'prepare', 'prepack', 'postpack'];
10
+
10
11
  function extractUrlDomain(code) {
11
12
  const urlMatch = code.match(/https?:\/\/([^/'"\s]+)/);
12
13
  return urlMatch ? urlMatch[1] : null;
@@ -20,23 +21,100 @@ function isTestOrFixture(filePath) {
20
21
  return TEST_FIXTURE_PATTERNS.some(p => p.test(filePath));
21
22
  }
22
23
 
23
- function isLifecycleHook(code) {
24
- return LIFECYCLE_HOOK_PATTERNS.some(p => p.test(code));
25
- }
26
-
27
24
  function isKnownSafeDomain(domain) {
28
25
  if (!domain) return false;
29
26
  return KNOWN_SAFE_DOMAINS.some(safe => domain === safe || domain.endsWith('.' + safe));
30
27
  }
31
28
 
32
- function createContext(filePath, code) {
29
+ function locateLine(code, pattern) {
30
+ const lines = code.split('\n');
31
+ for (let i = 0; i < lines.length; i++) {
32
+ if (pattern.test(lines[i])) return i + 1;
33
+ }
34
+ return null;
35
+ }
36
+
37
+ function decodePreview(code) {
38
+ const b64Match = code.match(/atob\(['"]([A-Za-z0-9+/=]{10,})['"]\)/);
39
+ if (b64Match) {
40
+ try {
41
+ const decoded = atob(b64Match[1]);
42
+ return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
43
+ } catch {}
44
+ }
45
+
46
+ const hexMatch = code.match(/Buffer\.from\(['"]([0-9a-fA-F]+)['"],\s*['"]hex['"]\)/);
47
+ if (hexMatch) {
48
+ try {
49
+ const decoded = Buffer.from(hexMatch[1], 'hex').toString();
50
+ return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
51
+ } catch {}
52
+ }
53
+
54
+ const btoaMatch = code.match(/btoa\(['"]([A-Za-z0-9+/=]{10,})['"]\)/);
55
+ if (btoaMatch) {
56
+ try {
57
+ const decoded = atob(btoaMatch[1]);
58
+ return decoded.length > 80 ? decoded.slice(0, 80) + '...' : decoded;
59
+ } catch {}
60
+ }
61
+
62
+ return null;
63
+ }
64
+
65
+ function detectEncodingType(code) {
66
+ if (/Buffer\.from\(['"][0-9a-fA-F]+['"],\s*['"]hex['"]\)/.test(code)) return 'hex';
67
+ if (/atob\(/.test(code)) return 'base64';
68
+ if (/btoa\(/.test(code)) return 'base64';
69
+ if (/Buffer\.from\([A-Za-z0-9+/=]{10,}/.test(code)) return 'base64';
70
+ if (/String\.fromCharCode\(/.test(code)) return 'charcode';
71
+ if (/btoa\(.*btoa\(|atob\(.*atob\(/.test(code)) return 'double-base64';
72
+ return 'unknown';
73
+ }
74
+
75
+ function isFileInLifecycleScript(filePath, pkgJson) {
76
+ if (!pkgJson?.scripts) return false;
77
+
78
+ const scripts = pkgJson.scripts;
79
+ const fileName = filePath.split('/').pop();
80
+ const normalizedPath = filePath.replace(/^node_modules\//, '').replace(/^dist\//, '').replace(/^build\//, '');
81
+
82
+ for (const scriptName of LIFECYCLE_SCRIPT_NAMES) {
83
+ const scriptValue = scripts[scriptName];
84
+ if (!scriptValue) continue;
85
+
86
+ if (scriptValue.includes(filePath)) return true;
87
+ if (scriptValue.includes(fileName)) return true;
88
+ if (scriptValue.includes(normalizedPath)) return true;
89
+
90
+ const scriptFileMatch = scriptValue.match(/[^\s'"]+\.js$/);
91
+ if (scriptFileMatch && filePath.endsWith(scriptFileMatch[0])) return true;
92
+ }
93
+
94
+ return false;
95
+ }
96
+
97
+ function isLikelyLifecycleFileName(filePath) {
98
+ const name = filePath.split('/').pop().replace(/\.js$/, '');
99
+ return LIFECYCLE_SCRIPT_NAMES.includes(name) ||
100
+ name === 'setup' ||
101
+ name === 'install-helper';
102
+ }
103
+
104
+ function createEvidence(code, filePath, pattern, pkgJson) {
105
+ const encodingType = detectEncodingType(code);
106
+ const line = locateLine(code, pattern);
107
+ const decodedPreview = decodePreview(code);
108
+ const destinationHost = extractUrlDomain(code);
109
+ const lifecycleHook = isFileInLifecycleScript(filePath, pkgJson) || isLikelyLifecycleFileName(filePath);
110
+
33
111
  return {
34
- file_path: filePath,
35
- is_dist_build: isDistOrBuild(filePath),
36
- is_test_fixture: isTestOrFixture(filePath),
37
- is_lifecycle_hook: isLifecycleHook(code),
38
- url_domain: extractUrlDomain(code),
39
- is_known_safe_domain: isKnownSafeDomain(extractUrlDomain(code)),
112
+ file: filePath,
113
+ line: line,
114
+ lifecycle_hook: lifecycleHook,
115
+ decoded_preview: decodedPreview,
116
+ encoding_type: encodingType,
117
+ destination_host: destinationHost,
40
118
  };
41
119
  }
42
120
 
@@ -47,7 +125,12 @@ export async function scan(pkgJson, files = []) {
47
125
 
48
126
  for (const f of files) {
49
127
  const code = f.content;
50
- const ctx = createContext(f.path, code);
128
+ const filePath = f.path;
129
+
130
+ const isDistBuild = isDistOrBuild(filePath);
131
+ const isTestFixture = isTestOrFixture(filePath);
132
+ const urlDomain = extractUrlDomain(code);
133
+ const isSafeDomain = isKnownSafeDomain(urlDomain);
51
134
 
52
135
  const hasEval = /eval\(|new Function\(|\bFunction\('/.test(code);
53
136
 
@@ -57,13 +140,21 @@ export async function scan(pkgJson, files = []) {
57
140
  const b64UrlDecode = /try\s*\{[^}]*atob\s*\(/s.test(code) || /btoa\(.*\)\s*[^;]*\.replace\(/s.test(code);
58
141
 
59
142
  if (hexDecode || b64Decode || b64UrlDecode) {
143
+ const evidence = createEvidence(code, filePath, /eval\(|new Function\(|\bFunction\('/, pkgJson);
60
144
  findings.push({
61
145
  id: 'ATK-002',
62
146
  severity: 'medium',
63
147
  title: 'Obfuscated payload',
64
148
  description: hexDecode ? 'Eval with hex-decoded payload' : 'Eval with base64-decoded payload',
65
- evidence: 'eval + decode pattern detected',
66
- context: ctx,
149
+ evidence: evidence,
150
+ context: {
151
+ file_path: filePath,
152
+ is_dist_build: isDistBuild,
153
+ is_test_fixture: isTestFixture,
154
+ is_lifecycle_hook: evidence.lifecycle_hook,
155
+ url_domain: urlDomain,
156
+ is_known_safe_domain: isSafeDomain,
157
+ },
67
158
  });
68
159
  return findings;
69
160
  }
@@ -71,13 +162,22 @@ export async function scan(pkgJson, files = []) {
71
162
  if (btoa(btoa('x')) === 'eDuke'.padEnd(5)) {
72
163
  const nested = /atob\([^)]*atob\(/s.test(code) || /btoa\([^)]*btoa\(/s.test(code);
73
164
  if (nested) {
165
+ const evidence = createEvidence(code, filePath, /btoa\(/, pkgJson);
74
166
  findings.push({
75
167
  id: 'ATK-002',
76
168
  severity: 'high',
77
169
  title: 'Obfuscated payload',
78
170
  description: 'Double-encoded nested payload',
79
- evidence: 'nested encode/decode detected',
80
- context: { ...ctx, is_multi_layer: true },
171
+ evidence: { ...evidence, is_multi_layer: true },
172
+ context: {
173
+ file_path: filePath,
174
+ is_dist_build: isDistBuild,
175
+ is_test_fixture: isTestFixture,
176
+ is_lifecycle_hook: evidence.lifecycle_hook,
177
+ url_domain: urlDomain,
178
+ is_known_safe_domain: isSafeDomain,
179
+ is_multi_layer: true,
180
+ },
81
181
  });
82
182
  return findings;
83
183
  }
@@ -88,46 +188,70 @@ export async function scan(pkgJson, files = []) {
88
188
  const isNetworkObfusc = /atob\(.*(https?:\/\/|\\x|http).*\)/s.test(code) ||
89
189
  /Buffer\.from\(['"`][0-9a-f]+['"`],\s*['"]hex['"].*fetch\(|fetch\(.*atob\(/s.test(code);
90
190
  if (isNetworkObfusc) {
191
+ const evidence = createEvidence(code, filePath, /atob\(|Buffer\.from/, pkgJson);
91
192
  findings.push({
92
193
  id: 'ATK-002',
93
194
  severity: 'medium',
94
195
  title: 'Obfuscated payload',
95
196
  description: 'Decoded string containing URL/fetch call',
96
- evidence: 'obfuscation with network call',
97
- context: ctx,
197
+ evidence: evidence,
198
+ context: {
199
+ file_path: filePath,
200
+ is_dist_build: isDistBuild,
201
+ is_test_fixture: isTestFixture,
202
+ is_lifecycle_hook: evidence.lifecycle_hook,
203
+ url_domain: urlDomain,
204
+ is_known_safe_domain: isSafeDomain,
205
+ },
98
206
  });
99
207
  return findings;
100
208
  }
101
209
  }
102
210
 
103
211
  if (/String\.fromCharCode\(.{20,}\)/.test(code) && hasEval) {
212
+ const evidence = createEvidence(code, filePath, /String\.fromCharCode\(/, pkgJson);
104
213
  findings.push({
105
214
  id: 'ATK-002',
106
215
  severity: 'medium',
107
216
  title: 'Obfuscated payload',
108
217
  description: 'Eval with String.fromCharCode obfuscation',
109
- evidence: 'charcode obfuscation detected',
110
- context: ctx,
218
+ evidence: evidence,
219
+ context: {
220
+ file_path: filePath,
221
+ is_dist_build: isDistBuild,
222
+ is_test_fixture: isTestFixture,
223
+ is_lifecycle_hook: evidence.lifecycle_hook,
224
+ url_domain: urlDomain,
225
+ is_known_safe_domain: isSafeDomain,
226
+ },
111
227
  });
112
228
  return findings;
113
229
  }
114
230
 
115
231
  const shellPatterns = [
116
- /eval\s*\(\s*process\.env\.[A-Z_]{4,}/,
117
- /exec\s*\(\s*Buffer\.from\(/,
118
- /new Function\s*\(\s*(?:atob|process\.env)/,
119
- /eval\s*\(\s*(?:require|import\s*\()/,
120
- /Function\s*\(\s*'use\s*strict'\s*;?\s*(?:atob|require)/,
232
+ { regex: /eval\s*\(\s*process\.env\.[A-Z_]{4,}/, name: 'env-eval' },
233
+ { regex: /exec\s*\(\s*Buffer\.from\(/, name: 'exec-buffer' },
234
+ { regex: /new Function\s*\(\s*(?:atob|process\.env)/, name: 'function-eval' },
235
+ { regex: /eval\s*\(\s*(?:require|import\s*\()/, name: 'require-eval' },
236
+ { regex: /Function\s*\(\s*'use\s*strict'\s*;?\s*(?:atob|require)/, name: 'strict-eval' },
121
237
  ];
122
238
  for (const p of shellPatterns) {
123
- if (p.test(code)) {
239
+ if (p.regex.test(code)) {
240
+ const evidence = createEvidence(code, filePath, p.regex, pkgJson);
124
241
  findings.push({
125
242
  id: 'ATK-002',
126
243
  severity: 'high',
127
244
  title: 'Obfuscated payload',
128
245
  description: 'Shell-code obfuscation pattern',
129
- evidence: p.source.substring(0, 60),
130
- context: ctx,
246
+ evidence: { ...evidence, pattern: p.name },
247
+ context: {
248
+ file_path: filePath,
249
+ is_dist_build: isDistBuild,
250
+ is_test_fixture: isTestFixture,
251
+ is_lifecycle_hook: evidence.lifecycle_hook,
252
+ url_domain: urlDomain,
253
+ is_known_safe_domain: isSafeDomain,
254
+ },
131
255
  });
132
256
  return findings;
133
257
  }
@@ -135,4 +259,4 @@ export async function scan(pkgJson, files = []) {
135
259
  }
136
260
 
137
261
  return findings;
138
- }
262
+ }