muaddib-scanner 2.11.73 → 2.11.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.73.json → self-scan-v2.11.75.json} +1 -1
- package/src/ioc/ghsa-poller.js +26 -12
- package/src/monitor/classify.js +21 -14
- package/src/monitor/ingestion.js +25 -1
- package/src/monitor/queue.js +4 -0
- package/src/monitor/webhook.js +52 -27
- package/src/scanner/typosquat.js +77 -1
package/package.json
CHANGED
package/src/ioc/ghsa-poller.js
CHANGED
|
@@ -31,7 +31,7 @@ const path = require('path');
|
|
|
31
31
|
const https = require('https');
|
|
32
32
|
|
|
33
33
|
const GHSA_API_HOST = 'api.github.com';
|
|
34
|
-
const GHSA_ECOSYSTEMS = ['npm', 'pypi'];
|
|
34
|
+
const GHSA_ECOSYSTEMS = ['npm', 'pypi', 'crates'];
|
|
35
35
|
const GHSA_CURSOR_FILE = process.env.MUADDIB_GHSA_CURSOR_FILE ||
|
|
36
36
|
path.join(__dirname, '..', '..', 'data', 'ghsa-cursor.json');
|
|
37
37
|
const GHSA_MALWARE_FILE = process.env.MUADDIB_GHSA_MALWARE_FILE ||
|
|
@@ -84,9 +84,10 @@ function _httpGetJson(pathName, { token, httpImpl = https, timeoutMs = 20_000 }
|
|
|
84
84
|
*/
|
|
85
85
|
async function _defaultFetch(ecosystem, opts = {}) {
|
|
86
86
|
const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
|
|
87
|
-
// GHSA names the Python ecosystem "pip" (not "pypi")
|
|
88
|
-
//
|
|
89
|
-
|
|
87
|
+
// GHSA names the Python ecosystem "pip" (not "pypi") and Rust "rust" (we call it
|
|
88
|
+
// "crates") in BOTH the query and the response; querying ecosystem=pypi returns HTTP
|
|
89
|
+
// 422. Map our internal name to GHSA's for the query.
|
|
90
|
+
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
|
|
90
91
|
const p = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=updated&direction=desc`;
|
|
91
92
|
const { status, json } = await _httpGetJson(p, { token, httpImpl: opts.httpImpl });
|
|
92
93
|
if (status !== 200 || !Array.isArray(json)) {
|
|
@@ -112,7 +113,7 @@ function _nextLink(linkHeader) {
|
|
|
112
113
|
async function fetchAllGhsaMalware(ecosystem, opts = {}) {
|
|
113
114
|
const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
|
|
114
115
|
const maxPages = Number.isFinite(opts.maxPages) ? opts.maxPages : 30;
|
|
115
|
-
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
|
|
116
|
+
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
|
|
116
117
|
let pathName = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=published&direction=desc`;
|
|
117
118
|
const rows = [];
|
|
118
119
|
for (let page = 0; page < maxPages && pathName; page++) {
|
|
@@ -141,6 +142,7 @@ function parseAdvisory(adv, ecosystems = GHSA_ECOSYSTEMS) {
|
|
|
141
142
|
if (!pkg || !pkg.name || !pkg.ecosystem) continue;
|
|
142
143
|
let eco = String(pkg.ecosystem).toLowerCase();
|
|
143
144
|
if (eco === 'pip') eco = 'pypi'; // normalize GHSA's "pip" to our internal "pypi"
|
|
145
|
+
else if (eco === 'rust') eco = 'crates'; // normalize GHSA's "rust" to our internal "crates"
|
|
144
146
|
if (ecosystems && !ecosystems.includes(eco)) continue;
|
|
145
147
|
out.push({
|
|
146
148
|
ghsa_id: adv.ghsa_id,
|
|
@@ -210,17 +212,29 @@ function _maybeCompactMalware(file) {
|
|
|
210
212
|
function buildGhsaPreAlertEmbed(row) {
|
|
211
213
|
const link = row.ecosystem === 'pypi'
|
|
212
214
|
? `https://pypi.org/project/${encodeURIComponent(row.name)}/`
|
|
213
|
-
:
|
|
215
|
+
: row.ecosystem === 'crates'
|
|
216
|
+
? `https://crates.io/crates/${encodeURIComponent(row.name)}`
|
|
217
|
+
: `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
|
|
218
|
+
const fields = [
|
|
219
|
+
{ name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
|
|
220
|
+
{ name: 'Range', value: String(row.versionRange || '*'), inline: true },
|
|
221
|
+
{ name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
|
|
222
|
+
{ name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
|
|
223
|
+
];
|
|
224
|
+
// crates enrichment: flag if the malicious crate name typosquats a popular crate.
|
|
225
|
+
// Lazy require keeps the poller light; findCratesTyposquatMatch is pure.
|
|
226
|
+
if (row.ecosystem === 'crates') {
|
|
227
|
+
try {
|
|
228
|
+
const { findCratesTyposquatMatch } = require('../scanner/typosquat.js');
|
|
229
|
+
const m = findCratesTyposquatMatch(row.name);
|
|
230
|
+
if (m) fields.push({ name: 'Typosquat', value: `looks like \`${m.original}\` (distance ${m.distance})`, inline: true });
|
|
231
|
+
} catch { /* enrichment is best-effort */ }
|
|
232
|
+
}
|
|
214
233
|
return {
|
|
215
234
|
embeds: [{
|
|
216
235
|
title: '⚠️ GHSA PRE-ALERT — Fresh Malware Advisory',
|
|
217
236
|
color: 0xe74c3c,
|
|
218
|
-
fields
|
|
219
|
-
{ name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
|
|
220
|
-
{ name: 'Range', value: String(row.versionRange || '*'), inline: true },
|
|
221
|
-
{ name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
|
|
222
|
-
{ name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
|
|
223
|
-
],
|
|
237
|
+
fields,
|
|
224
238
|
footer: { text: `MUAD'DIB GHSA Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}` },
|
|
225
239
|
timestamp: new Date().toISOString()
|
|
226
240
|
}]
|
package/src/monitor/classify.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const { levenshteinDistance } = require('../scanner/typosquat.js');
|
|
3
|
+
const { levenshteinDistance, findPyPITyposquatMatch } = require('../scanner/typosquat.js');
|
|
4
4
|
const { loadCachedIOCs } = require('../ioc/updater.js');
|
|
5
5
|
|
|
6
6
|
// --- Popular npm names (used for quick typosquat check) ---
|
|
@@ -351,32 +351,39 @@ function quickTyposquatCheck(name) {
|
|
|
351
351
|
* Layer 3: Determine if a package should be cached and at what retention level.
|
|
352
352
|
* @param {string} name - Package name
|
|
353
353
|
* @param {Object|null} docMeta - Metadata from extractTarballFromDoc
|
|
354
|
-
* @param {Object|null} doc - Full CouchDB doc
|
|
354
|
+
* @param {Object|null} doc - Full CouchDB doc (npm; carries `versions` for first-publish)
|
|
355
|
+
* @param {Object} [opts] - Non-npm ecosystem hints:
|
|
356
|
+
* { ecosystem?: 'npm'|'pypi', versionCount?: number }. PyPI has no packument at
|
|
357
|
+
* ingest time, so the version count comes from preResolvePyPIBatch via opts.
|
|
355
358
|
* @returns {{ shouldCache: boolean, reason: string, retentionDays: number }}
|
|
356
359
|
*/
|
|
357
|
-
function evaluateCacheTrigger(name, docMeta, doc) {
|
|
358
|
-
|
|
360
|
+
function evaluateCacheTrigger(name, docMeta, doc, opts = {}) {
|
|
361
|
+
const ecosystem = opts.ecosystem || 'npm';
|
|
362
|
+
|
|
363
|
+
// Trigger 1: IOC match -- 30-day retention. PyPI IOCs are namespaced "pypi:<name>".
|
|
359
364
|
try {
|
|
360
365
|
const iocs = loadCachedIOCs();
|
|
361
|
-
|
|
362
|
-
|
|
366
|
+
const inSet = (s) => s && (s.has(name) || (ecosystem === 'pypi' && s.has(`pypi:${name}`)));
|
|
367
|
+
if (inSet(iocs.wildcardPackages) || inSet(iocs.packagesMap)) {
|
|
363
368
|
return { shouldCache: true, reason: 'ioc_match', retentionDays: TARBALL_CACHE_HIGH_RISK_RETENTION_DAYS };
|
|
364
369
|
}
|
|
365
370
|
} catch { /* non-fatal */ }
|
|
366
371
|
|
|
367
|
-
// Trigger 2: Typosquat signal -- 7-day retention
|
|
372
|
+
// Trigger 2: Typosquat signal -- 7-day retention (ecosystem-specific popular list)
|
|
368
373
|
try {
|
|
369
|
-
|
|
374
|
+
const typo = ecosystem === 'pypi' ? !!findPyPITyposquatMatch(name) : quickTyposquatCheck(name);
|
|
375
|
+
if (typo) {
|
|
370
376
|
return { shouldCache: true, reason: 'typosquat_signal', retentionDays: TARBALL_CACHE_DEFAULT_RETENTION_DAYS };
|
|
371
377
|
}
|
|
372
378
|
} catch { /* non-fatal */ }
|
|
373
379
|
|
|
374
|
-
// Trigger 3: First publish (single version
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
+
// Trigger 3: First publish (single version) -- 7-day retention.
|
|
381
|
+
// npm: count from the CouchDB doc; pypi: count passed via opts.versionCount.
|
|
382
|
+
const versionCount = ecosystem === 'pypi'
|
|
383
|
+
? (Number.isFinite(opts.versionCount) ? opts.versionCount : null)
|
|
384
|
+
: (doc && doc.versions ? Object.keys(doc.versions).length : null);
|
|
385
|
+
if (versionCount === 1) {
|
|
386
|
+
return { shouldCache: true, reason: 'first_publish', retentionDays: TARBALL_CACHE_DEFAULT_RETENTION_DAYS };
|
|
380
387
|
}
|
|
381
388
|
|
|
382
389
|
return { shouldCache: false, reason: '', retentionDays: 0 };
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -618,6 +618,16 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
|
|
|
618
618
|
age_days: pypiInfo.age_days,
|
|
619
619
|
version_count: pypiInfo.version_count,
|
|
620
620
|
};
|
|
621
|
+
// First-publish parity with npm: derive the cache trigger + flag from the
|
|
622
|
+
// version count (PyPI has no packument at ingest, so the count comes from
|
|
623
|
+
// the registry fetch above). Feeds tarball retention, the scan-ledger
|
|
624
|
+
// firstPublish field, and Phase 2b protected eviction. The first-publish
|
|
625
|
+
// *sandbox* stays npm-only (runSandbox can't pip-install) — gated in queue.js.
|
|
626
|
+
const trig = evaluateCacheTrigger(item.name, null, null, {
|
|
627
|
+
ecosystem: 'pypi', versionCount: pypiInfo.version_count
|
|
628
|
+
});
|
|
629
|
+
item._cacheTrigger = trig.shouldCache ? trig : null;
|
|
630
|
+
item.firstPublish = trig.reason === 'first_publish';
|
|
621
631
|
resolved++;
|
|
622
632
|
} else {
|
|
623
633
|
failed++;
|
|
@@ -1186,12 +1196,26 @@ async function pollPyPIChangelog(state, scanQueue, stats) {
|
|
|
1186
1196
|
if (isKnownIOC) {
|
|
1187
1197
|
console.log(`[MONITOR] IOC PRE-ALERT (pypi): ${ev.name} — known malicious package`);
|
|
1188
1198
|
stats.iocPreAlerts = (stats.iocPreAlerts || 0) + 1;
|
|
1189
|
-
sendIOCPreAlert(ev.name).catch(err => {
|
|
1199
|
+
sendIOCPreAlert(ev.name, ev.version, 'pypi').catch(err => {
|
|
1190
1200
|
console.error(`[MONITOR] IOC pre-alert webhook failed for ${ev.name}: ${err.message}`);
|
|
1191
1201
|
});
|
|
1192
1202
|
}
|
|
1193
1203
|
} catch { /* IOC load failure is non-fatal */ }
|
|
1194
1204
|
|
|
1205
|
+
// Campaign pre-alert (mirror of the npm Layer 1b): fire on name-pattern
|
|
1206
|
+
// matches when the package isn't already a known IOC. Campaigns can target
|
|
1207
|
+
// PyPI too; matchCampaignPattern is a pure name match, ecosystem-agnostic.
|
|
1208
|
+
if (!isKnownIOC) {
|
|
1209
|
+
const campaign = matchCampaignPattern(ev.name);
|
|
1210
|
+
if (campaign) {
|
|
1211
|
+
console.log(`[MONITOR] CAMPAIGN PRE-ALERT (pypi): ${ev.name} — matches ${campaign}`);
|
|
1212
|
+
stats.campaignPreAlerts = (stats.campaignPreAlerts || 0) + 1;
|
|
1213
|
+
sendCampaignPreAlert(ev.name, campaign, 'pypi').catch(err => {
|
|
1214
|
+
console.error(`[MONITOR] campaign pre-alert webhook failed for ${ev.name}: ${err.message}`);
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
1217
|
+
}
|
|
1218
|
+
|
|
1195
1219
|
newItems.push({
|
|
1196
1220
|
name: ev.name,
|
|
1197
1221
|
version: ev.version,
|
package/src/monitor/queue.js
CHANGED
|
@@ -651,7 +651,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
|
|
|
651
651
|
|
|
652
652
|
// First-publish sandbox priority: sandbox even with 0 static findings
|
|
653
653
|
// if the package is from a new/unknown maintainer without a linked repository.
|
|
654
|
+
// First-publish sandbox is npm-only: runSandbox does `npm install <name>` and
|
|
655
|
+
// cannot install PyPI sdists/wheels. PyPI first-publish items still carry the
|
|
656
|
+
// flag + cache trigger + ledger firstPublish (Phase 2a) but skip the sandbox.
|
|
654
657
|
const firstPublishSandbox = isFirstPublish &&
|
|
658
|
+
ecosystem === 'npm' &&
|
|
655
659
|
FIRST_PUBLISH_SANDBOX_ENABLED &&
|
|
656
660
|
isFirstPublishHighRisk(cacheTrigger, npmRegistryMeta) &&
|
|
657
661
|
isSandboxEnabled() && sandboxAvailable &&
|
package/src/monitor/webhook.js
CHANGED
|
@@ -150,25 +150,30 @@ function buildMonitorWebhookPayload(name, version, ecosystem, result, sandboxRes
|
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
/**
|
|
153
|
-
*
|
|
154
|
-
*
|
|
155
|
-
*
|
|
156
|
-
* @param {string} name - Package name matching IOC database
|
|
157
|
-
* @param {string} [version] - Version if known (from CouchDB doc)
|
|
153
|
+
* Build the registry web link for a package, ecosystem-aware. Mirrors the link
|
|
154
|
+
* logic in ghsa-poller.js so pre-alerts point at the correct registry instead of
|
|
155
|
+
* always npmjs.com (PyPI IOC pre-alerts previously mislinked to npm).
|
|
158
156
|
*/
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
if (
|
|
157
|
+
function registryLink(ecosystem, name) {
|
|
158
|
+
if (ecosystem === 'pypi') return `https://pypi.org/project/${encodeURIComponent(name)}/`;
|
|
159
|
+
if (ecosystem === 'crates') return `https://crates.io/crates/${encodeURIComponent(name)}`;
|
|
160
|
+
return `https://www.npmjs.com/package/${encodeURIComponent(name)}`;
|
|
161
|
+
}
|
|
162
162
|
|
|
163
|
-
|
|
163
|
+
/**
|
|
164
|
+
* Layer 1: Build the IOC pre-alert embed (pure \u2014 no network). Exported for tests.
|
|
165
|
+
* @param {string} name - Package name matching IOC database
|
|
166
|
+
* @param {string} [version] - Version if known
|
|
167
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi' (link target)
|
|
168
|
+
*/
|
|
169
|
+
function buildIOCPreAlertEmbed(name, version, ecosystem = 'npm') {
|
|
164
170
|
const versionStr = version ? `@${version}` : '';
|
|
165
|
-
|
|
166
|
-
const payload = {
|
|
171
|
+
return {
|
|
167
172
|
embeds: [{
|
|
168
173
|
title: '\u26a0\ufe0f IOC PRE-ALERT \u2014 Known Malicious Package',
|
|
169
174
|
color: 0xe74c3c,
|
|
170
175
|
fields: [
|
|
171
|
-
{ name: 'Package', value: `[${name}${versionStr}](${
|
|
176
|
+
{ name: 'Package', value: `[${ecosystem}/${name}${versionStr}](${registryLink(ecosystem, name)})`, inline: true },
|
|
172
177
|
{ name: 'Source', value: 'IOC Database Match', inline: true },
|
|
173
178
|
{ name: 'Detection', value: 'Changes stream pre-scan', inline: true },
|
|
174
179
|
{ name: 'Status', value: 'Full scan queued \u2014 this is an early warning. Package may be unpublished before scan completes.', inline: false }
|
|
@@ -179,31 +184,35 @@ async function sendIOCPreAlert(name, version) {
|
|
|
179
184
|
timestamp: new Date().toISOString()
|
|
180
185
|
}]
|
|
181
186
|
};
|
|
182
|
-
|
|
183
|
-
await sendWebhook(url, payload, { rawPayload: true });
|
|
184
187
|
}
|
|
185
188
|
|
|
186
189
|
/**
|
|
187
|
-
* Layer
|
|
188
|
-
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
191
|
-
*
|
|
192
|
-
* @param {string}
|
|
193
|
-
* @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
|
|
190
|
+
* Layer 1: Send immediate IOC pre-alert webhook when a known malicious package
|
|
191
|
+
* appears in the changes stream, BEFORE tarball download. Safety net for packages
|
|
192
|
+
* that get unpublished before scanning completes.
|
|
193
|
+
* @param {string} name - Package name matching IOC database
|
|
194
|
+
* @param {string} [version] - Version if known (from CouchDB doc)
|
|
195
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi'
|
|
194
196
|
*/
|
|
195
|
-
async function
|
|
197
|
+
async function sendIOCPreAlert(name, version, ecosystem = 'npm') {
|
|
196
198
|
const url = getWebhookUrl();
|
|
197
199
|
if (!url) return;
|
|
200
|
+
await sendWebhook(url, buildIOCPreAlertEmbed(name, version, ecosystem), { rawPayload: true });
|
|
201
|
+
}
|
|
198
202
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
203
|
+
/**
|
|
204
|
+
* Layer 1b: Build the campaign pre-alert embed (pure \u2014 no network). Exported for tests.
|
|
205
|
+
* @param {string} name - Package name that matched the campaign pattern
|
|
206
|
+
* @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
|
|
207
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi' (link target)
|
|
208
|
+
*/
|
|
209
|
+
function buildCampaignPreAlertEmbed(name, campaign, ecosystem = 'npm') {
|
|
210
|
+
return {
|
|
202
211
|
embeds: [{
|
|
203
212
|
title: '\u26a0\ufe0f CAMPAIGN PRE-ALERT \u2014 Suspected Active Campaign',
|
|
204
213
|
color: 0xe67e22,
|
|
205
214
|
fields: [
|
|
206
|
-
{ name: 'Package', value: `[${name}](${
|
|
215
|
+
{ name: 'Package', value: `[${ecosystem}/${name}](${registryLink(ecosystem, name)})`, inline: true },
|
|
207
216
|
{ name: 'Source', value: `Name pattern: ${campaign}`, inline: true },
|
|
208
217
|
{ name: 'Detection', value: 'Changes stream pre-scan', inline: true },
|
|
209
218
|
{ name: 'Status', value: 'Suspected campaign publication \u2014 not yet confirmed malicious. Full scan queued; treat as suspect until verdict lands.', inline: false }
|
|
@@ -214,8 +223,21 @@ async function sendCampaignPreAlert(name, campaign) {
|
|
|
214
223
|
timestamp: new Date().toISOString()
|
|
215
224
|
}]
|
|
216
225
|
};
|
|
226
|
+
}
|
|
217
227
|
|
|
218
|
-
|
|
228
|
+
/**
|
|
229
|
+
* Layer 1b: Send a campaign pre-alert webhook when a package name matches an
|
|
230
|
+
* active-campaign pattern (e.g. `did-NNNN`). Fires BEFORE tarball download \u2014 IOC
|
|
231
|
+
* lists lag the campaign by hours to days, so name-pattern watch is the only
|
|
232
|
+
* real-time signal while the campaign is in flight.
|
|
233
|
+
* @param {string} name - Package name that matched the campaign pattern
|
|
234
|
+
* @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
|
|
235
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi'
|
|
236
|
+
*/
|
|
237
|
+
async function sendCampaignPreAlert(name, campaign, ecosystem = 'npm') {
|
|
238
|
+
const url = getWebhookUrl();
|
|
239
|
+
if (!url) return;
|
|
240
|
+
await sendWebhook(url, buildCampaignPreAlertEmbed(name, campaign, ecosystem), { rawPayload: true });
|
|
219
241
|
}
|
|
220
242
|
|
|
221
243
|
/**
|
|
@@ -1372,7 +1394,10 @@ module.exports = {
|
|
|
1372
1394
|
getWebhookThreshold,
|
|
1373
1395
|
shouldSendWebhook,
|
|
1374
1396
|
buildMonitorWebhookPayload,
|
|
1397
|
+
registryLink,
|
|
1398
|
+
buildIOCPreAlertEmbed,
|
|
1375
1399
|
sendIOCPreAlert,
|
|
1400
|
+
buildCampaignPreAlertEmbed,
|
|
1376
1401
|
sendCampaignPreAlert,
|
|
1377
1402
|
matchVersionedIOC,
|
|
1378
1403
|
computeRiskLevel,
|
package/src/scanner/typosquat.js
CHANGED
|
@@ -764,4 +764,80 @@ function findPyPITyposquatMatch(name) {
|
|
|
764
764
|
return null;
|
|
765
765
|
}
|
|
766
766
|
|
|
767
|
-
|
|
767
|
+
// ============================================
|
|
768
|
+
// crates.io (Rust) TYPOSQUATTING — Phase 4
|
|
769
|
+
// ============================================
|
|
770
|
+
// Pre-alert enrichment ONLY: flags when an incoming crate name (from the GHSA rust
|
|
771
|
+
// malware feed) typosquats a popular crate. No crates ingestion / build.rs / scan-time
|
|
772
|
+
// Cargo parsing (non-goal). Mirrors the PyPI block above.
|
|
773
|
+
|
|
774
|
+
// Top crates.io packages by downloads (typosquat targets). Hardcoded snapshot.
|
|
775
|
+
const POPULAR_CRATES = [
|
|
776
|
+
'serde', 'serde_json', 'serde_derive', 'serde_yaml', 'syn', 'quote', 'proc-macro2',
|
|
777
|
+
'libc', 'rand', 'rand_core', 'log', 'cfg-if', 'bitflags', 'itertools', 'once_cell',
|
|
778
|
+
'lazy_static', 'regex', 'regex-syntax', 'aho-corasick', 'base64', 'num-traits',
|
|
779
|
+
'unicode-ident', 'tokio', 'tokio-util', 'futures', 'futures-util', 'bytes',
|
|
780
|
+
'hashbrown', 'smallvec', 'parking_lot', 'anyhow', 'thiserror', 'indexmap', 'memchr',
|
|
781
|
+
'chrono', 'semver', 'getrandom', 'clap', 'time', 'uuid', 'hyper', 'reqwest',
|
|
782
|
+
'async-trait', 'tracing', 'tracing-core', 'tracing-subscriber', 'url',
|
|
783
|
+
'percent-encoding', 'idna', 'socket2', 'httparse', 'tower', 'rayon', 'num_cpus',
|
|
784
|
+
'either', 'toml', 'winapi', 'windows-sys', 'env_logger', 'generic-array', 'digest',
|
|
785
|
+
'sha2', 'typenum', 'subtle', 'rustls', 'ring', 'openssl', 'flate2', 'miniz_oxide',
|
|
786
|
+
'crc32fast', 'walkdir', 'tempfile', 'dirs', 'nix', 'backtrace', 'scopeguard',
|
|
787
|
+
'pin-project', 'pin-project-lite', 'slab', 'lock_api', 'crossbeam-utils',
|
|
788
|
+
'crossbeam-channel', 'crossbeam-epoch', 'ahash', 'fnv', 'mio', 'h2', 'http'
|
|
789
|
+
];
|
|
790
|
+
|
|
791
|
+
// crates.io treats '-' and '_' as equivalent and is case-insensitive for name
|
|
792
|
+
// uniqueness; normalize the same way for typosquat comparison.
|
|
793
|
+
function normalizeCrate(name) {
|
|
794
|
+
return name.toLowerCase().replace(/[-_]+/g, '-');
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
const POPULAR_CRATES_NORMALIZED = POPULAR_CRATES.map(normalizeCrate);
|
|
798
|
+
const POPULAR_CRATES_SET = new Set(POPULAR_CRATES_NORMALIZED);
|
|
799
|
+
|
|
800
|
+
// Legitimate crates within edit-distance of a popular crate but not squats.
|
|
801
|
+
const CRATES_WHITELIST = new Set([
|
|
802
|
+
'mime', // distance 1 from 'time' — both real & popular
|
|
803
|
+
'rand-chacha', // rand ecosystem sibling (normalized)
|
|
804
|
+
'serde-with', // serde ecosystem sibling
|
|
805
|
+
'futures-core',
|
|
806
|
+
]);
|
|
807
|
+
|
|
808
|
+
const MIN_CRATE_LENGTH = 4;
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Find a crates.io typosquat match (Levenshtein over the popular-crate list).
|
|
812
|
+
* Pure + IOC-independent. Used by the GHSA rust pre-alert to enrich the embed.
|
|
813
|
+
*
|
|
814
|
+
* @param {string} name - crate name
|
|
815
|
+
* @returns {{original: string, type: string, distance: number}|null}
|
|
816
|
+
*/
|
|
817
|
+
function findCratesTyposquatMatch(name) {
|
|
818
|
+
if (typeof name !== 'string' || !name) return null;
|
|
819
|
+
const normalized = normalizeCrate(name);
|
|
820
|
+
|
|
821
|
+
if (POPULAR_CRATES_SET.has(normalized)) return null; // it IS a popular crate
|
|
822
|
+
if (CRATES_WHITELIST.has(normalized)) return null;
|
|
823
|
+
if (normalized.length < MIN_CRATE_LENGTH) return null;
|
|
824
|
+
|
|
825
|
+
for (let i = 0; i < POPULAR_CRATES.length; i++) {
|
|
826
|
+
const popularNorm = POPULAR_CRATES_NORMALIZED[i];
|
|
827
|
+
const popular = POPULAR_CRATES[i];
|
|
828
|
+
if (normalized === popularNorm) continue;
|
|
829
|
+
if (popularNorm.length < MIN_CRATE_LENGTH) continue;
|
|
830
|
+
if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
|
|
831
|
+
|
|
832
|
+
const distance = levenshteinDistance(normalized, popularNorm);
|
|
833
|
+
if (distance === 1) {
|
|
834
|
+
return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
|
|
835
|
+
}
|
|
836
|
+
if (distance === 2 && popularNorm.length >= 5) {
|
|
837
|
+
return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
return null;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findCratesTyposquatMatch, findTyposquatMatch };
|