@lateos/npm-scan 0.15.2 → 0.15.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/backend/detectors/hf-impersonation/index.js +396 -0
- package/backend/detectors/hf-impersonation/jaro-winkler.js +44 -0
- package/backend/detectors/hf-impersonation/known-orgs.js +5 -0
- package/backend/detectors/hf-impersonation/simhash.js +46 -0
- package/backend/detectors/index.js +2 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/@lateos/npm-scan)
|
|
4
4
|
[](LICENSING.md)
|
|
5
5
|
[](package.json)
|
|
6
|
-
[](https://github.com/lateos-ai/npm-scan)
|
|
7
7
|
[](https://github.com/lateos-ai/npm-scan)
|
|
8
8
|
[](https://hub.docker.com/r/lateos/npm-scan)
|
|
9
9
|
[](https://github.com/lateos-ai/npm-scan/actions/workflows/publish.yml)
|
|
@@ -24,6 +24,8 @@ The 2025–2026 wave of npm supply chain attacks proved that traditional tooling
|
|
|
24
24
|
|
|
25
25
|
Attackers have moved past simple typosquatting. They now ship **obfuscated preinstall hooks**, **credential harvesters hidden behind environment detection**, **dormant backdoors with time-based activation**, and **worm-style transitive propagation** that spreads through peer dependencies.
|
|
26
26
|
|
|
27
|
+
A growing attack vector is **HuggingFace org impersonation** — packages that masquerade as legitimate HF model repositories (e.g., `0penai/gpt2` instead of `openai/gpt2`) to trick users into downloading malicious model artifacts during CI/CD pipelines, often bundled with suspicious binaries (`.exe`, `.dll`) in model repos that deep-learned tools trust by default.
|
|
28
|
+
|
|
27
29
|
The **Megalodon campaign** (2026) alone compromised 5,500+ repositories via fake GitHub PRs, malicious workflow injection, and cloud credential exfiltration — all coordinated through a single actor automating the entire kill chain. **@lateos/npm-scan** now detects artifacts of this campaign out of the box.
|
|
28
30
|
|
|
29
31
|
**npm audit** checks known CVEs. **Snyk** scans for vulnerabilities. **Socket** looks at package behavior. None of them were designed for the generation of attacks that emerged in 2025 — attacks that look benign until they reach production.
|
|
@@ -45,6 +47,7 @@ The **Megalodon campaign** (2026) alone compromised 5,500+ repositories via fake
|
|
|
45
47
|
| Sandbox evasion detection (ATK-010) | ❌ | ❌ | ❌ | ✅ |
|
|
46
48
|
| Transitive worm propagation (ATK-011) | ❌ | ❌ | ❌ | ✅ |
|
|
47
49
|
| Campaign detection (Megalodon CI/CD) | ❌ | ❌ | ❌ | ✅ |
|
|
50
|
+
| HF model repo impersonation + README clone | ❌ | ❌ | ❌ | ✅ |
|
|
48
51
|
| Attack taxonomy (ATK series) | ❌ | ❌ | ❌ | ✅ |
|
|
49
52
|
| SBOM output (CycloneDX + SPDX) | ❌ | ✅ | ❌ | ✅ |
|
|
50
53
|
| SARIF v2.1 (GitHub Code Scanning) | ❌ | ❌ | ❌ | ✅ |
|
|
@@ -74,6 +77,7 @@ The **Megalodon campaign** (2026) alone compromised 5,500+ repositories via fake
|
|
|
74
77
|
| 🛡️ | **Zero telemetry** | No data leaves your machine. No cloud. No callbacks. |
|
|
75
78
|
| 💾 | **Local scan history** | SQLite-backed persistence, zero external dependencies |
|
|
76
79
|
| 🪝 | **Pre-commit hook** | Block threats before commit — one-liner install, scans `package-lock.json` changes |
|
|
80
|
+
| 🤖 | **HF impersonation detection** | Detects typosquatted HuggingFace orgs (Jaro-Winkler), README clones (SimHash), artifact mismatches (`.exe` in model repos), and new-org amplifier — with lazy two-stage evaluation, zero network in Stage 1 |
|
|
77
81
|
| 📎 | **Yarn + pnpm support** | `scan-lockfile` parses `yarn.lock` and `pnpm-lock.yaml` alongside `package-lock.json` |
|
|
78
82
|
|
|
79
83
|
---
|
|
@@ -283,9 +287,11 @@ npm-scan report --pdf # all scans (premium)
|
|
|
283
287
|
| **ATK-010** | Sandbox evasion / anti-analysis | Behavioral | 🟠 medium | SR-10.3 |
|
|
284
288
|
| **ATK-011** | Transitive propagation (worm-style lateral spread) | Behavioral | 🔴 high | SR-11.4 |
|
|
285
289
|
| **MEGALODON** | Megalodon CI/CD campaign — workflow C2 exfil, credential harvest, publish velocity spike, publisher drift | Static + Registry | ⚫ critical | SR-3.1, SR-7.5 |
|
|
290
|
+
| **HF_IMPERSONATION** | HuggingFace org spoof detection — Jaro-Winkler similarity against 15 known-good orgs, SimHash README clone detection, artifact mismatch (`.exe`/`.dll` in model repos), postinstall escalation, new-org amplifier | Static + Network (Stage 2) | 🔴 high / ⚫ critical | SR-2.1 |
|
|
286
291
|
|
|
287
292
|
> **How evasive attacks are caught:** ATK-009 detects packages that check `process.env.CI`, probe hostnames, or use time-based activation. ATK-010 flags `debugger` statements, `os.hostname()` probes, and env fingerprinting. ATK-011 traces peer dependency graphs to detect worm-like propagation patterns.
|
|
288
293
|
> **MEGALODON** campaign detection analyzes bundled `.github/workflows/` files for C2 co-occurrence and base64 decode chains, scans tarball files for credential + outbound network patterns, detects version publish velocity spikes via npm registry metadata, and identifies publisher account drift — all without any network calls beyond the initial package fetch.
|
|
294
|
+
> **HF_IMPERSONATION** detection uses a lazy two-stage evaluation: Stage 1 scans `package.json` scripts and JS/TS sources for HuggingFace references (URLs, `from_pretrained()`, `hub.download()`) and runs Jaro-Winkler similarity against 15 known-good HF orgs — zero network. If spoofs are found, Stage 2 fetches the HF model API, computes SimHash of both READMEs for clone detection, validates artifact type consistency (e.g., `transformers` library with `.exe` files is flagged as critical), applies a new-org amplifier (<30 days), and escalates when the reference appears in a lifecycle script.
|
|
289
295
|
> See [`docs/attack-taxonomy.md`](docs/attack-taxonomy.md) for full evasion surface documentation and PoC examples.
|
|
290
296
|
|
|
291
297
|
---
|
|
@@ -632,7 +638,7 @@ See the [Docker quick-start section](#-run-lateosnpm-scan-anywhere-with-docker--
|
|
|
632
638
|
|
|
633
639
|
### Free tier (shipped)
|
|
634
640
|
|
|
635
|
-
- All 11 ATK detectors + **MEGALODON** CI/CD campaign detection (D1–D6)
|
|
641
|
+
- All 11 ATK detectors + **MEGALODON** CI/CD campaign detection (D1–D6) + **HF_IMPERSONATION** detector
|
|
636
642
|
- SBOM output (CycloneDX + SPDX)
|
|
637
643
|
- HTML, text, and compliance reports (NIST + EU CRA)
|
|
638
644
|
- Policy-as-code engine (YAML)
|
|
@@ -701,6 +707,7 @@ node --test test/detectors-corpus.test.js
|
|
|
701
707
|
- `test/report-snapshots.test.js` — HTML/text/CRA/PDF format assertions
|
|
702
708
|
- `test/report.test.js` — SARIF, CSV, STIG, risk score format tests
|
|
703
709
|
- `test/lockfile.test.js` — npm/yarn/pnpm parser, auto-detect, ATK-007/011 lockfile tests
|
|
710
|
+
- `test/hf-impersonation.test.js` — 13 HF impersonation detection tests (no-ref, exact match, spoof, README clone, artifact mismatch, postinstall escalation, new-org tag)
|
|
704
711
|
- `test/cli.test.js` — commander integration tests (help, version, scan, report, error handling)
|
|
705
712
|
- `test/cli-lockfile.test.js` — scan-lockfile CLI options, yarn/pnpm/monorepo/watch tests
|
|
706
713
|
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
import { KNOWN_HF_ORGS } from './known-orgs.js';
|
|
2
|
+
import { jaroWinkler } from './jaro-winkler.js';
|
|
3
|
+
import { simhash, similarity as simhashSimilarity } from './simhash.js';
|
|
4
|
+
|
|
5
|
+
const HF_URL_PATTERN = /(?:huggingface\.co|hf\.co)\/([^\/\s"'>]+)\/([^\/\s"'>]+)/g;
|
|
6
|
+
const FROM_PRETRAINED_PATTERN = /from_pretrained\(\s*["']([^"']+\/[^"']+)["']/g;
|
|
7
|
+
const HUB_DOWNLOAD_SINGLE = /hub\.download\(\s*["']([^"']+\/[^"']+)["']/g;
|
|
8
|
+
const HUB_DOWNLOAD_DOUBLE = /hub\.download\(\s*["']([^"']+)["']\s*,\s*["']([^"']+)["']/g;
|
|
9
|
+
|
|
10
|
+
const LIFECYCLE_SCRIPTS = new Set(['postinstall', 'prepare', 'install']);
|
|
11
|
+
const API_BASE = 'https://huggingface.co';
|
|
12
|
+
|
|
13
|
+
const SEVERITY_SCORE = { none: 0, low: 1, medium: 2, high: 3, critical: 4 };
|
|
14
|
+
const SEVERITY_LABELS = ['none', 'low', 'medium', 'high', 'critical'];
|
|
15
|
+
|
|
16
|
+
const HF_ARTIFACT_LIBS = new Set(['transformers', 'diffusers', 'sentence-transformers', 'gguf', 'safetensors']);
|
|
17
|
+
const SUSPICIOUS_EXTENSIONS = /\.(exe|msi|bat|ps1|dll)$/i;
|
|
18
|
+
|
|
19
|
+
const _cache = new Map();
|
|
20
|
+
const CACHE_TTL = 3600 * 1000;
|
|
21
|
+
let _lastFetchTime = 0;
|
|
22
|
+
|
|
23
|
+
function severityIndex(sev) {
|
|
24
|
+
return SEVERITY_SCORE[sev] || 0;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function maxSeverity(a, b) {
|
|
28
|
+
return severityIndex(a) >= severityIndex(b) ? a : b;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function sleep(ms) {
|
|
32
|
+
return new Promise(r => setTimeout(r, ms));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
async function fetchWithCache(url) {
|
|
36
|
+
const cached = _cache.get(url);
|
|
37
|
+
if (cached && Date.now() - cached.fetchedAt < CACHE_TTL) {
|
|
38
|
+
return cached.data;
|
|
39
|
+
}
|
|
40
|
+
const now = Date.now();
|
|
41
|
+
const elapsed = now - _lastFetchTime;
|
|
42
|
+
if (elapsed < 100) {
|
|
43
|
+
await sleep(100 - elapsed);
|
|
44
|
+
}
|
|
45
|
+
_lastFetchTime = Date.now();
|
|
46
|
+
let res;
|
|
47
|
+
try {
|
|
48
|
+
res = await fetch(url);
|
|
49
|
+
if (res.status === 429) {
|
|
50
|
+
const retryAfter = parseInt(res.headers.get('Retry-After') || '5', 10);
|
|
51
|
+
await sleep(retryAfter * 1000);
|
|
52
|
+
res = await fetch(url);
|
|
53
|
+
}
|
|
54
|
+
if (!res.ok) {
|
|
55
|
+
console.debug(`HF API warning: ${url} returned ${res.status}`);
|
|
56
|
+
return null;
|
|
57
|
+
}
|
|
58
|
+
const data = await res.json();
|
|
59
|
+
_cache.set(url, { data, fetchedAt: Date.now() });
|
|
60
|
+
return data;
|
|
61
|
+
} catch (err) {
|
|
62
|
+
console.debug(`HF API warning: ${err.message}`);
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
async function fetchReadme(url) {
|
|
68
|
+
const cached = _cache.get(url);
|
|
69
|
+
if (cached && Date.now() - cached.fetchedAt < CACHE_TTL) {
|
|
70
|
+
return cached.data;
|
|
71
|
+
}
|
|
72
|
+
const now = Date.now();
|
|
73
|
+
const elapsed = now - _lastFetchTime;
|
|
74
|
+
if (elapsed < 100) {
|
|
75
|
+
await sleep(100 - elapsed);
|
|
76
|
+
}
|
|
77
|
+
_lastFetchTime = Date.now();
|
|
78
|
+
try {
|
|
79
|
+
const res = await fetch(url);
|
|
80
|
+
if (res.status === 429) {
|
|
81
|
+
const retryAfter = parseInt(res.headers.get('Retry-After') || '5', 10);
|
|
82
|
+
await sleep(retryAfter * 1000);
|
|
83
|
+
const retryRes = await fetch(url);
|
|
84
|
+
if (!retryRes.ok) return null;
|
|
85
|
+
const text = await retryRes.text();
|
|
86
|
+
_cache.set(url, { data: text, fetchedAt: Date.now() });
|
|
87
|
+
return text;
|
|
88
|
+
}
|
|
89
|
+
if (!res.ok) return null;
|
|
90
|
+
const text = await res.text();
|
|
91
|
+
_cache.set(url, { data: text, fetchedAt: Date.now() });
|
|
92
|
+
return text;
|
|
93
|
+
} catch (err) {
|
|
94
|
+
console.debug(`HF README warning: ${err.message}`);
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function findClosestOrg(spoofedOrg) {
|
|
100
|
+
const lowerOrg = String(spoofedOrg).toLowerCase();
|
|
101
|
+
let best = { org: null, score: 0 };
|
|
102
|
+
for (const known of KNOWN_HF_ORGS) {
|
|
103
|
+
const score = jaroWinkler(lowerOrg, known.toLowerCase());
|
|
104
|
+
if (score >= 0.82 && score > best.score) {
|
|
105
|
+
best = { org: known, score };
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return best;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function extractHFTuples(pkgJson, allFiles) {
|
|
112
|
+
const tuples = new Set();
|
|
113
|
+
let postinstallFetchFlag = false;
|
|
114
|
+
|
|
115
|
+
const scripts = pkgJson?.scripts || {};
|
|
116
|
+
let m;
|
|
117
|
+
for (const [hook, script] of Object.entries(scripts)) {
|
|
118
|
+
if (typeof script !== 'string') continue;
|
|
119
|
+
|
|
120
|
+
HF_URL_PATTERN.lastIndex = 0;
|
|
121
|
+
while ((m = HF_URL_PATTERN.exec(script)) !== null) {
|
|
122
|
+
tuples.add(`${m[1]}/${m[2]}`);
|
|
123
|
+
if (LIFECYCLE_SCRIPTS.has(hook)) {
|
|
124
|
+
postinstallFetchFlag = true;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
FROM_PRETRAINED_PATTERN.lastIndex = 0;
|
|
129
|
+
while ((m = FROM_PRETRAINED_PATTERN.exec(script)) !== null) {
|
|
130
|
+
tuples.add(m[1]);
|
|
131
|
+
if (LIFECYCLE_SCRIPTS.has(hook)) {
|
|
132
|
+
postinstallFetchFlag = true;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
HUB_DOWNLOAD_SINGLE.lastIndex = 0;
|
|
137
|
+
while ((m = HUB_DOWNLOAD_SINGLE.exec(script)) !== null) {
|
|
138
|
+
tuples.add(m[1]);
|
|
139
|
+
if (LIFECYCLE_SCRIPTS.has(hook)) {
|
|
140
|
+
postinstallFetchFlag = true;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
HUB_DOWNLOAD_DOUBLE.lastIndex = 0;
|
|
145
|
+
while ((m = HUB_DOWNLOAD_DOUBLE.exec(script)) !== null) {
|
|
146
|
+
tuples.add(`${m[1]}/${m[2]}`);
|
|
147
|
+
if (LIFECYCLE_SCRIPTS.has(hook)) {
|
|
148
|
+
postinstallFetchFlag = true;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (allFiles) {
|
|
154
|
+
for (const file of allFiles) {
|
|
155
|
+
if (!file.path?.match(/\.(js|ts|jsx|tsx|mjs|cjs)$/i)) continue;
|
|
156
|
+
const content = typeof file.content === 'string' ? file.content : '';
|
|
157
|
+
|
|
158
|
+
HF_URL_PATTERN.lastIndex = 0;
|
|
159
|
+
while ((m = HF_URL_PATTERN.exec(content)) !== null) {
|
|
160
|
+
tuples.add(`${m[1]}/${m[2]}`);
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
FROM_PRETRAINED_PATTERN.lastIndex = 0;
|
|
164
|
+
while ((m = FROM_PRETRAINED_PATTERN.exec(content)) !== null) {
|
|
165
|
+
tuples.add(m[1]);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
HUB_DOWNLOAD_SINGLE.lastIndex = 0;
|
|
169
|
+
while ((m = HUB_DOWNLOAD_SINGLE.exec(content)) !== null) {
|
|
170
|
+
tuples.add(m[1]);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
HUB_DOWNLOAD_DOUBLE.lastIndex = 0;
|
|
174
|
+
while ((m = HUB_DOWNLOAD_DOUBLE.exec(content)) !== null) {
|
|
175
|
+
tuples.add(`${m[1]}/${m[2]}`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return { tuples, postinstallFetchFlag };
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function buildHFOrgSpoofFinding(referencedRepo, org, canonicalOrg, similarityScore, postinstallFetchFlag, tags, hfMeta) {
|
|
184
|
+
const finding = {
|
|
185
|
+
id: 'HF_ORG_SPOOF',
|
|
186
|
+
severity: 'high',
|
|
187
|
+
title: 'HuggingFace org impersonation',
|
|
188
|
+
description: `Repository "${referencedRepo}" references org "${org}" which is similar to known HF org "${canonicalOrg.org}" (similarity: ${similarityScore.toFixed(3)})`,
|
|
189
|
+
evidence: JSON.stringify({
|
|
190
|
+
referencedRepo,
|
|
191
|
+
canonicalOrg: canonicalOrg.org,
|
|
192
|
+
similarityScore,
|
|
193
|
+
tags: tags || [],
|
|
194
|
+
}),
|
|
195
|
+
referencedRepo,
|
|
196
|
+
canonicalOrg: canonicalOrg.org,
|
|
197
|
+
similarityScore,
|
|
198
|
+
tags: tags || [],
|
|
199
|
+
ipiClass: 'SUPPLY_CHAIN',
|
|
200
|
+
};
|
|
201
|
+
if (hfMeta) {
|
|
202
|
+
finding.hfMeta = hfMeta;
|
|
203
|
+
}
|
|
204
|
+
return finding;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
async function runStage2(spoofFindings, orgsToCheck, postinstallFetchFlag) {
|
|
208
|
+
const newFindings = [];
|
|
209
|
+
|
|
210
|
+
for (const [referencedRepo, { org, canonicalOrg, similarityScore, finding }] of orgsToCheck) {
|
|
211
|
+
const tags = [];
|
|
212
|
+
let hfMeta = null;
|
|
213
|
+
|
|
214
|
+
const modelUrl = `${API_BASE}/api/models/${referencedRepo}`;
|
|
215
|
+
const canonicalUrl = canonicalOrg.org !== org ? `${API_BASE}/api/models/${canonicalOrg.org}/${referencedRepo.split('/')[1]}` : null;
|
|
216
|
+
const userUrl = `${API_BASE}/api/users/${org}`;
|
|
217
|
+
|
|
218
|
+
const spoofedModel = await fetchWithCache(modelUrl);
|
|
219
|
+
const canonicalModel = canonicalUrl ? await fetchWithCache(canonicalUrl) : null;
|
|
220
|
+
const userData = await fetchWithCache(userUrl);
|
|
221
|
+
|
|
222
|
+
// Org age check for NEW_ORG tag
|
|
223
|
+
if (userData?.dateCreated) {
|
|
224
|
+
const created = new Date(userData.dateCreated);
|
|
225
|
+
const ageDays = (Date.now() - created.getTime()) / (1000 * 60 * 60 * 24);
|
|
226
|
+
hfMeta = {
|
|
227
|
+
orgAgeDays: Math.round(ageDays),
|
|
228
|
+
repoDownloads: spoofedModel?.downloads ?? 0,
|
|
229
|
+
};
|
|
230
|
+
if (ageDays < 30) {
|
|
231
|
+
tags.push('NEW_ORG');
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
// README clone check
|
|
236
|
+
if (canonicalOrg.org !== org) {
|
|
237
|
+
const readmeSpoof = await fetchReadme(`${API_BASE}/${referencedRepo}/resolve/main/README.md`);
|
|
238
|
+
const readmeCanonical = await fetchReadme(`${API_BASE}/${canonicalOrg.org}/${referencedRepo.split('/')[1]}/resolve/main/README.md`);
|
|
239
|
+
|
|
240
|
+
if (readmeSpoof && readmeCanonical) {
|
|
241
|
+
const fp1 = simhash(readmeSpoof);
|
|
242
|
+
const fp2 = simhash(readmeCanonical);
|
|
243
|
+
const simScore = simhashSimilarity(fp1, fp2);
|
|
244
|
+
|
|
245
|
+
if (simScore >= 0.9) {
|
|
246
|
+
const readmeFinding = {
|
|
247
|
+
id: 'HF_README_CLONE',
|
|
248
|
+
severity: 'high',
|
|
249
|
+
title: 'HuggingFace README clone',
|
|
250
|
+
description: `README of "${referencedRepo}" is highly similar (${(simScore * 100).toFixed(1)}%) to canonical org "${canonicalOrg.org}/${referencedRepo.split('/')[1]}"`,
|
|
251
|
+
evidence: JSON.stringify({
|
|
252
|
+
referencedRepo,
|
|
253
|
+
canonicalOrg: canonicalOrg.org,
|
|
254
|
+
similarityScore: simScore,
|
|
255
|
+
tags: [],
|
|
256
|
+
}),
|
|
257
|
+
referencedRepo,
|
|
258
|
+
canonicalOrg: canonicalOrg.org,
|
|
259
|
+
similarityScore: simScore,
|
|
260
|
+
tags: [],
|
|
261
|
+
ipiClass: 'SUPPLY_CHAIN',
|
|
262
|
+
};
|
|
263
|
+
if (hfMeta) readmeFinding.hfMeta = hfMeta;
|
|
264
|
+
newFindings.push(readmeFinding);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// Artifact mismatch check
|
|
270
|
+
if (spoofedModel?.cardData?.library_name && spoofedModel?.siblings) {
|
|
271
|
+
const libName = spoofedModel.cardData.library_name;
|
|
272
|
+
if (HF_ARTIFACT_LIBS.has(libName)) {
|
|
273
|
+
for (const sibling of spoofedModel.siblings) {
|
|
274
|
+
const fn = sibling.rfilename || '';
|
|
275
|
+
if (SUSPICIOUS_EXTENSIONS.test(fn)) {
|
|
276
|
+
const artifactFinding = {
|
|
277
|
+
id: 'HF_ARTIFACT_MISMATCH',
|
|
278
|
+
severity: 'critical',
|
|
279
|
+
title: 'HF artifact mismatch — suspicious binary in model repo',
|
|
280
|
+
description: `Model "${referencedRepo}" declares library "${libName}" but contains suspicious file "${fn}"`,
|
|
281
|
+
evidence: JSON.stringify({
|
|
282
|
+
referencedRepo,
|
|
283
|
+
artifactConflict: { declaredType: libName, suspiciousFilename: fn },
|
|
284
|
+
tags: [],
|
|
285
|
+
}),
|
|
286
|
+
referencedRepo,
|
|
287
|
+
artifactConflict: { declaredType: libName, suspiciousFilename: fn },
|
|
288
|
+
tags: [],
|
|
289
|
+
ipiClass: 'SUPPLY_CHAIN',
|
|
290
|
+
};
|
|
291
|
+
if (hfMeta) artifactFinding.hfMeta = hfMeta;
|
|
292
|
+
newFindings.push(artifactFinding);
|
|
293
|
+
break;
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// Apply NEW_ORG and POSTINSTALL_FETCH tags to all findings for this repo
|
|
300
|
+
const repoSpoofFindings = spoofFindings.filter(f => f.referencedRepo === referencedRepo);
|
|
301
|
+
for (const sf of repoSpoofFindings) {
|
|
302
|
+
if (tags.length > 0) {
|
|
303
|
+
if (!sf.tags) sf.tags = [];
|
|
304
|
+
for (const t of tags) {
|
|
305
|
+
if (!sf.tags.includes(t)) sf.tags.push(t);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
if (hfMeta) {
|
|
309
|
+
sf.hfMeta = hfMeta;
|
|
310
|
+
}
|
|
311
|
+
}
|
|
312
|
+
for (const nf of newFindings) {
|
|
313
|
+
if (nf.referencedRepo === referencedRepo) {
|
|
314
|
+
if (tags.length > 0) {
|
|
315
|
+
if (!nf.tags) nf.tags = [];
|
|
316
|
+
for (const t of tags) {
|
|
317
|
+
if (!nf.tags.includes(t)) nf.tags.push(t);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
// POSTINSTALL_FETCH escalation
|
|
325
|
+
if (postinstallFetchFlag) {
|
|
326
|
+
const allStage2Findings = [...spoofFindings, ...newFindings];
|
|
327
|
+
const escalatedRepos = new Set();
|
|
328
|
+
for (const f of allStage2Findings) {
|
|
329
|
+
if (f.referencedRepo) escalatedRepos.add(f.referencedRepo);
|
|
330
|
+
}
|
|
331
|
+
for (const f of allStage2Findings) {
|
|
332
|
+
if (escalatedRepos.has(f.referencedRepo)) {
|
|
333
|
+
if (severityIndex(f.severity) < severityIndex('critical')) {
|
|
334
|
+
f.severity = 'critical';
|
|
335
|
+
}
|
|
336
|
+
if (!f.tags) f.tags = [];
|
|
337
|
+
if (!f.tags.includes('POSTINSTALL_ESCALATED')) {
|
|
338
|
+
f.tags.push('POSTINSTALL_ESCALATED');
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
return newFindings;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
export async function scan(pkgJson, files = [], registryMeta = null, allFiles = null) {
|
|
348
|
+
const { tuples, postinstallFetchFlag } = extractHFTuples(pkgJson, allFiles || files);
|
|
349
|
+
|
|
350
|
+
if (tuples.size === 0) return [];
|
|
351
|
+
|
|
352
|
+
// Stage 1: org spoof detection (local only)
|
|
353
|
+
const spoofFindings = [];
|
|
354
|
+
const orgsToCheck = []; // [referencedRepo, { org, canonicalOrg, similarityScore, finding }]
|
|
355
|
+
|
|
356
|
+
for (const tuple of tuples) {
|
|
357
|
+
const parts = tuple.split('/');
|
|
358
|
+
if (parts.length < 2) continue;
|
|
359
|
+
const org = parts[0];
|
|
360
|
+
|
|
361
|
+
const canonicalOrg = findClosestOrg(org);
|
|
362
|
+
if (!canonicalOrg.org) continue;
|
|
363
|
+
if (org.toLowerCase() === canonicalOrg.org.toLowerCase()) continue;
|
|
364
|
+
|
|
365
|
+
const finding = buildHFOrgSpoofFinding(tuple, org, canonicalOrg, canonicalOrg.score, postinstallFetchFlag, []);
|
|
366
|
+
spoofFindings.push(finding);
|
|
367
|
+
orgsToCheck.push([tuple, { org, canonicalOrg, similarityScore: canonicalOrg.score, finding }]);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
if (spoofFindings.length === 0) return [];
|
|
371
|
+
|
|
372
|
+
// Stage 2: network checks
|
|
373
|
+
const stage2Findings = await runStage2(spoofFindings, orgsToCheck, postinstallFetchFlag);
|
|
374
|
+
|
|
375
|
+
// Deduplicate POSTINSTALL_ESCALATED tag in evidence
|
|
376
|
+
for (const f of [...spoofFindings, ...stage2Findings]) {
|
|
377
|
+
if (f.tags && f.tags.length > 0) {
|
|
378
|
+
try {
|
|
379
|
+
const ev = JSON.parse(f.evidence);
|
|
380
|
+
ev.tags = [...f.tags];
|
|
381
|
+
f.evidence = JSON.stringify(ev);
|
|
382
|
+
} catch {
|
|
383
|
+
// evidence wasn't JSON, leave as-is
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
return [...spoofFindings, ...stage2Findings];
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
export function clearCache() {
|
|
392
|
+
_cache.clear();
|
|
393
|
+
_lastFetchTime = 0;
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
export { KNOWN_HF_ORGS, jaroWinkler, simhash, simhashSimilarity };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function jaroWinkler(s1, s2) {
|
|
2
|
+
if (s1 === s2) return 1;
|
|
3
|
+
const len1 = s1.length, len2 = s2.length;
|
|
4
|
+
if (len1 === 0 || len2 === 0) return 0;
|
|
5
|
+
|
|
6
|
+
const matchDist = Math.floor(Math.max(len1, len2) / 2) - 1;
|
|
7
|
+
const matches1 = new Array(len1).fill(false);
|
|
8
|
+
const matches2 = new Array(len2).fill(false);
|
|
9
|
+
let matches = 0;
|
|
10
|
+
|
|
11
|
+
for (let i = 0; i < len1; i++) {
|
|
12
|
+
const start = Math.max(0, i - matchDist);
|
|
13
|
+
const end = Math.min(len2, i + matchDist + 1);
|
|
14
|
+
for (let j = start; j < end; j++) {
|
|
15
|
+
if (matches2[j]) continue;
|
|
16
|
+
if (s1[i] !== s2[j]) continue;
|
|
17
|
+
matches1[i] = true;
|
|
18
|
+
matches2[j] = true;
|
|
19
|
+
matches++;
|
|
20
|
+
break;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (matches === 0) return 0;
|
|
25
|
+
|
|
26
|
+
let transpositions = 0, k = 0;
|
|
27
|
+
for (let i = 0; i < len1; i++) {
|
|
28
|
+
if (!matches1[i]) continue;
|
|
29
|
+
while (!matches2[k]) k++;
|
|
30
|
+
if (s1[i] !== s2[k]) transpositions++;
|
|
31
|
+
k++;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const jaro = (matches / len1 + matches / len2 + (matches - transpositions / 2) / matches) / 3;
|
|
35
|
+
|
|
36
|
+
let prefix = 0;
|
|
37
|
+
const maxPrefix = Math.min(4, len1, len2);
|
|
38
|
+
for (let i = 0; i < maxPrefix; i++) {
|
|
39
|
+
if (s1[i] === s2[i]) prefix++;
|
|
40
|
+
else break;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
return jaro + prefix * 0.1 * (1 - jaro);
|
|
44
|
+
}
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
function hashToken(str) {
|
|
2
|
+
let hash = 5381;
|
|
3
|
+
for (let i = 0; i < str.length; i++) {
|
|
4
|
+
hash = ((hash << 5) + hash) + str.charCodeAt(i);
|
|
5
|
+
hash = hash & hash;
|
|
6
|
+
}
|
|
7
|
+
return hash >>> 0;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export function simhash(text) {
|
|
11
|
+
const v = new Array(64).fill(0);
|
|
12
|
+
const tokens = text.toLowerCase().split(/\s+/).filter(Boolean);
|
|
13
|
+
|
|
14
|
+
for (const token of tokens) {
|
|
15
|
+
const h = hashToken(token);
|
|
16
|
+
for (let i = 0; i < 64; i++) {
|
|
17
|
+
if ((h >> i) & 1) {
|
|
18
|
+
v[i] += 1;
|
|
19
|
+
} else {
|
|
20
|
+
v[i] -= 1;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
let fingerprint = 0n;
|
|
26
|
+
for (let i = 0; i < 64; i++) {
|
|
27
|
+
if (v[i] > 0) {
|
|
28
|
+
fingerprint |= (1n << BigInt(i));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return fingerprint;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function hammingDistance(a, b) {
|
|
35
|
+
let xor = a ^ b;
|
|
36
|
+
let count = 0;
|
|
37
|
+
while (xor > 0n) {
|
|
38
|
+
count += Number(xor & 1n);
|
|
39
|
+
xor >>= 1n;
|
|
40
|
+
}
|
|
41
|
+
return count;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function similarity(a, b) {
|
|
45
|
+
return 1 - hammingDistance(a, b) / 64;
|
|
46
|
+
}
|
|
@@ -10,6 +10,7 @@ import * as atk009 from './atk-009-dormant-trigger.js';
|
|
|
10
10
|
import * as atk010 from './atk-010-sandbox-evasion.js';
|
|
11
11
|
import * as atk011 from './atk-011-transitive-prop.js';
|
|
12
12
|
import { scanAll as megalodonScan } from './megalodon/index.js';
|
|
13
|
+
import { scan as hfScan } from './hf-impersonation/index.js';
|
|
13
14
|
|
|
14
15
|
export async function runAll(pkgJson, files = [], registryMeta = null, allFiles = null) {
|
|
15
16
|
const findings = [];
|
|
@@ -25,5 +26,6 @@ export async function runAll(pkgJson, files = [], registryMeta = null, allFiles
|
|
|
25
26
|
findings.push(...await atk010.scan(pkgJson, files));
|
|
26
27
|
findings.push(...await atk011.scan(pkgJson, files));
|
|
27
28
|
findings.push(...await megalodonScan(pkgJson, allFiles || files, registryMeta));
|
|
29
|
+
findings.push(...await hfScan(pkgJson, files, registryMeta, allFiles || files));
|
|
28
30
|
return findings.sort((a, b) => b.severity.localeCompare(a.severity));
|
|
29
31
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lateos/npm-scan",
|
|
3
|
-
"version": "0.15.
|
|
3
|
+
"version": "0.15.3",
|
|
4
4
|
"description": "Modern npm supply chain security scanner — detects obfuscated payloads, credential stealers, conditional triggers, sandbox evasion, and worm-like propagation. 11 attack types, SBOM, NIST/EU CRA compliance reporting.",
|
|
5
5
|
"main": "backend/index.js",
|
|
6
6
|
"bin": {
|