muaddib-scanner 2.11.74 → 2.11.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/{self-scan-v2.11.74.json → self-scan-v2.11.76.json} +1 -1
- package/src/ioc/ghsa-poller.js +26 -12
- package/src/monitor/ingestion.js +12 -5
- package/src/monitor/queue.js +36 -0
- package/src/monitor/scan-queue.js +32 -6
- package/src/monitor/webhook.js +39 -0
- package/src/scanner/typosquat.js +77 -1
package/package.json
CHANGED
package/src/ioc/ghsa-poller.js
CHANGED
|
@@ -31,7 +31,7 @@ const path = require('path');
|
|
|
31
31
|
const https = require('https');
|
|
32
32
|
|
|
33
33
|
const GHSA_API_HOST = 'api.github.com';
|
|
34
|
-
const GHSA_ECOSYSTEMS = ['npm', 'pypi'];
|
|
34
|
+
const GHSA_ECOSYSTEMS = ['npm', 'pypi', 'crates'];
|
|
35
35
|
const GHSA_CURSOR_FILE = process.env.MUADDIB_GHSA_CURSOR_FILE ||
|
|
36
36
|
path.join(__dirname, '..', '..', 'data', 'ghsa-cursor.json');
|
|
37
37
|
const GHSA_MALWARE_FILE = process.env.MUADDIB_GHSA_MALWARE_FILE ||
|
|
@@ -84,9 +84,10 @@ function _httpGetJson(pathName, { token, httpImpl = https, timeoutMs = 20_000 }
|
|
|
84
84
|
*/
|
|
85
85
|
async function _defaultFetch(ecosystem, opts = {}) {
|
|
86
86
|
const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
|
|
87
|
-
// GHSA names the Python ecosystem "pip" (not "pypi")
|
|
88
|
-
//
|
|
89
|
-
|
|
87
|
+
// GHSA names the Python ecosystem "pip" (not "pypi") and Rust "rust" (we call it
|
|
88
|
+
// "crates") in BOTH the query and the response; querying ecosystem=pypi returns HTTP
|
|
89
|
+
// 422. Map our internal name to GHSA's for the query.
|
|
90
|
+
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
|
|
90
91
|
const p = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=updated&direction=desc`;
|
|
91
92
|
const { status, json } = await _httpGetJson(p, { token, httpImpl: opts.httpImpl });
|
|
92
93
|
if (status !== 200 || !Array.isArray(json)) {
|
|
@@ -112,7 +113,7 @@ function _nextLink(linkHeader) {
|
|
|
112
113
|
async function fetchAllGhsaMalware(ecosystem, opts = {}) {
|
|
113
114
|
const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
|
|
114
115
|
const maxPages = Number.isFinite(opts.maxPages) ? opts.maxPages : 30;
|
|
115
|
-
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
|
|
116
|
+
const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
|
|
116
117
|
let pathName = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=published&direction=desc`;
|
|
117
118
|
const rows = [];
|
|
118
119
|
for (let page = 0; page < maxPages && pathName; page++) {
|
|
@@ -141,6 +142,7 @@ function parseAdvisory(adv, ecosystems = GHSA_ECOSYSTEMS) {
|
|
|
141
142
|
if (!pkg || !pkg.name || !pkg.ecosystem) continue;
|
|
142
143
|
let eco = String(pkg.ecosystem).toLowerCase();
|
|
143
144
|
if (eco === 'pip') eco = 'pypi'; // normalize GHSA's "pip" to our internal "pypi"
|
|
145
|
+
else if (eco === 'rust') eco = 'crates'; // normalize GHSA's "rust" to our internal "crates"
|
|
144
146
|
if (ecosystems && !ecosystems.includes(eco)) continue;
|
|
145
147
|
out.push({
|
|
146
148
|
ghsa_id: adv.ghsa_id,
|
|
@@ -210,17 +212,29 @@ function _maybeCompactMalware(file) {
|
|
|
210
212
|
function buildGhsaPreAlertEmbed(row) {
|
|
211
213
|
const link = row.ecosystem === 'pypi'
|
|
212
214
|
? `https://pypi.org/project/${encodeURIComponent(row.name)}/`
|
|
213
|
-
:
|
|
215
|
+
: row.ecosystem === 'crates'
|
|
216
|
+
? `https://crates.io/crates/${encodeURIComponent(row.name)}`
|
|
217
|
+
: `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
|
|
218
|
+
const fields = [
|
|
219
|
+
{ name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
|
|
220
|
+
{ name: 'Range', value: String(row.versionRange || '*'), inline: true },
|
|
221
|
+
{ name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
|
|
222
|
+
{ name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
|
|
223
|
+
];
|
|
224
|
+
// crates enrichment: flag if the malicious crate name typosquats a popular crate.
|
|
225
|
+
// Lazy require keeps the poller light; findCratesTyposquatMatch is pure.
|
|
226
|
+
if (row.ecosystem === 'crates') {
|
|
227
|
+
try {
|
|
228
|
+
const { findCratesTyposquatMatch } = require('../scanner/typosquat.js');
|
|
229
|
+
const m = findCratesTyposquatMatch(row.name);
|
|
230
|
+
if (m) fields.push({ name: 'Typosquat', value: `looks like \`${m.original}\` (distance ${m.distance})`, inline: true });
|
|
231
|
+
} catch { /* enrichment is best-effort */ }
|
|
232
|
+
}
|
|
214
233
|
return {
|
|
215
234
|
embeds: [{
|
|
216
235
|
title: '⚠️ GHSA PRE-ALERT — Fresh Malware Advisory',
|
|
217
236
|
color: 0xe74c3c,
|
|
218
|
-
fields
|
|
219
|
-
{ name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
|
|
220
|
-
{ name: 'Range', value: String(row.versionRange || '*'), inline: true },
|
|
221
|
-
{ name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
|
|
222
|
-
{ name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
|
|
223
|
-
],
|
|
237
|
+
fields,
|
|
224
238
|
footer: { text: `MUAD'DIB GHSA Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}` },
|
|
225
239
|
timestamp: new Date().toISOString()
|
|
226
240
|
}]
|
package/src/monitor/ingestion.js
CHANGED
|
@@ -370,7 +370,8 @@ const RECENT_PUBLISH_MAX = 5;
|
|
|
370
370
|
* @returns {Object|null} - {
|
|
371
371
|
* version, tarball, unpackedSize, scripts, homepage, description,
|
|
372
372
|
* latestTagVersion, // dist-tags.latest (may differ from `version` under ATO)
|
|
373
|
-
* recentVersions: [{ version, tarball, unpackedSize, scripts }, ...]
|
|
373
|
+
* recentVersions: [{ version, tarball, unpackedSize, scripts }, ...], // capped at maxRecent
|
|
374
|
+
* recentWindowCount, // TRUE (uncapped) count of versions in the window (Phase 2b burst)
|
|
374
375
|
* } or null if no usable version found
|
|
375
376
|
*/
|
|
376
377
|
function selectMostRecentVersion(packument, options = {}) {
|
|
@@ -419,14 +420,19 @@ function selectMostRecentVersion(packument, options = {}) {
|
|
|
419
420
|
recentVersions: [],
|
|
420
421
|
};
|
|
421
422
|
|
|
422
|
-
// Burst extras: other versions published within the recent window, excluding
|
|
423
|
-
//
|
|
424
|
-
//
|
|
423
|
+
// Burst extras: other versions published within the recent window, excluding the
|
|
424
|
+
// most-recent one. The enqueue list is bounded by maxRecent, but recentWindowCount is
|
|
425
|
+
// the TRUE (uncapped) number of versions in the window — Phase 2b burst detection uses it
|
|
426
|
+
// so a 96-version Miasma burst is distinguishable from a legit 3-5 patch-release day (the
|
|
427
|
+
// capped list alone tops out at maxRecent+1 and can't tell them apart).
|
|
428
|
+
result.recentWindowCount = 1; // includes the most-recent version itself
|
|
425
429
|
if (versionTimes.length > 1) {
|
|
426
430
|
const cutoff = versionTimes[0][1] - recentWindowMs;
|
|
427
|
-
for (let i = 1; i < versionTimes.length
|
|
431
|
+
for (let i = 1; i < versionTimes.length; i++) {
|
|
428
432
|
const [v, ts] = versionTimes[i];
|
|
429
433
|
if (ts < cutoff) break; // sorted desc, so once we cross the cutoff we're done
|
|
434
|
+
result.recentWindowCount++;
|
|
435
|
+
if (result.recentVersions.length >= maxRecent) continue; // enqueue list capped; count continues
|
|
430
436
|
const vData = versions[v];
|
|
431
437
|
if (!vData) continue;
|
|
432
438
|
result.recentVersions.push({
|
|
@@ -819,6 +825,7 @@ async function pollNpmChanges(state, scanQueue, stats) {
|
|
|
819
825
|
unpackedSize: docMeta ? docMeta.unpackedSize : 0,
|
|
820
826
|
registryScripts: docMeta ? docMeta.scripts : null,
|
|
821
827
|
_cacheTrigger: cacheTrigger.shouldCache ? cacheTrigger : null,
|
|
828
|
+
firstPublish: cacheTrigger.shouldCache && cacheTrigger.reason === 'first_publish',
|
|
822
829
|
isIOCMatch: isKnownIOC
|
|
823
830
|
});
|
|
824
831
|
queued++;
|
package/src/monitor/queue.js
CHANGED
|
@@ -57,6 +57,7 @@ const {
|
|
|
57
57
|
buildAlertData,
|
|
58
58
|
persistAlert,
|
|
59
59
|
sendIOCPreAlert,
|
|
60
|
+
sendBurstPreAlert,
|
|
60
61
|
matchVersionedIOC,
|
|
61
62
|
buildCanaryExfiltrationWebhookEmbed,
|
|
62
63
|
getWebhookUrl,
|
|
@@ -130,6 +131,21 @@ const RECENTLY_SCANNED_MAX = 50_000; // FIFO cap for the dedup Set (P0c — boun
|
|
|
130
131
|
const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
|
|
131
132
|
const FIRST_PUBLISH_SANDBOX_ENABLED = process.env.MUADDIB_FIRST_PUBLISH_SANDBOX !== '0';
|
|
132
133
|
|
|
134
|
+
// Phase 2b: burst (Miasma) pre-alert. A burst = >= this many versions of ONE name in the
|
|
135
|
+
// recent-publish window (the TRUE uncapped count, selectMostRecentVersion.recentWindowCount).
|
|
136
|
+
// Default 10: detection is PER-NAME, so legit multi-PLATFORM publishers (different names,
|
|
137
|
+
// e.g. @opencode-ai/cli-*-* binaries) are never caught; legit same-name release days rarely
|
|
138
|
+
// reach 10; Miasma's 96-in-72s clears it easily. Per-name + deduped + non-scoring (Discord
|
|
139
|
+
// heads-up only, no FPR impact). Env-tunable up if a feed proves noisy.
|
|
140
|
+
const BURST_PREALERT_MIN_VERSIONS = (() => {
|
|
141
|
+
const n = parseInt(process.env.MUADDIB_BURST_MIN_VERSIONS, 10);
|
|
142
|
+
return Number.isFinite(n) && n >= 2 ? n : 10;
|
|
143
|
+
})();
|
|
144
|
+
// Dedup burst pings: one per name per process window (bounded — cleared at the cap so it
|
|
145
|
+
// can never grow without limit, CLAUDE.md §2).
|
|
146
|
+
const _burstAlerted = new Set();
|
|
147
|
+
const BURST_ALERTED_MAX = 20_000;
|
|
148
|
+
|
|
133
149
|
// Stage 3 — sandbox gate. Static-score threshold below which T1b/T2 packages
|
|
134
150
|
// are NOT sandboxed (static result alone is authoritative). Tightens the prior
|
|
135
151
|
// "T1b sandbox if score >= 25 or queue < 20" to remove low-signal sandbox runs
|
|
@@ -1429,6 +1445,25 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
|
|
|
1429
1445
|
// only scan whichever version happened to be the most recent at resolution
|
|
1430
1446
|
// time, racing the publish stream.
|
|
1431
1447
|
const recents = Array.isArray(npmInfo.recentVersions) ? npmInfo.recentVersions : [];
|
|
1448
|
+
// Phase 2b: burst = TRUE count of versions of this name in the recent window
|
|
1449
|
+
// (uncapped recentWindowCount), NOT the capped extras list — so a 96-version Miasma
|
|
1450
|
+
// burst is distinguishable from a legit multi-version day. At/above the threshold,
|
|
1451
|
+
// flag the item (protects it + its extras from queue-cap eviction) and fire ONE
|
|
1452
|
+
// burst pre-alert per name (deduped, bounded).
|
|
1453
|
+
const burstCount = Number.isFinite(npmInfo.recentWindowCount) ? npmInfo.recentWindowCount : (recents.length + 1);
|
|
1454
|
+
const isBurst = burstCount >= BURST_PREALERT_MIN_VERSIONS;
|
|
1455
|
+
if (isBurst) {
|
|
1456
|
+
item.isBurst = true;
|
|
1457
|
+
if (!_burstAlerted.has(item.name)) {
|
|
1458
|
+
if (_burstAlerted.size >= BURST_ALERTED_MAX) _burstAlerted.clear();
|
|
1459
|
+
_burstAlerted.add(item.name);
|
|
1460
|
+
stats.burstPreAlerts = (stats.burstPreAlerts || 0) + 1;
|
|
1461
|
+
console.log(`[MONITOR] BURST PRE-ALERT: ${item.name} — ${burstCount} versions in the recent window`);
|
|
1462
|
+
sendBurstPreAlert(item.name, burstCount, item.ecosystem).catch(err => {
|
|
1463
|
+
console.error(`[MONITOR] burst pre-alert webhook failed for ${item.name}: ${err.message}`);
|
|
1464
|
+
});
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1432
1467
|
for (const recent of recents) {
|
|
1433
1468
|
if (!recent || !recent.tarball || !recent.version) continue;
|
|
1434
1469
|
const dedupeKey = `${item.name}@${recent.version}`;
|
|
@@ -1441,6 +1476,7 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
|
|
|
1441
1476
|
unpackedSize: recent.unpackedSize || 0,
|
|
1442
1477
|
registryScripts: recent.scripts || null,
|
|
1443
1478
|
atoSignal: item.atoSignal === true,
|
|
1479
|
+
isBurst,
|
|
1444
1480
|
isATOBurstExtra: true,
|
|
1445
1481
|
}, stats);
|
|
1446
1482
|
}
|
|
@@ -24,32 +24,58 @@ const MAX_SCAN_QUEUE = (() => {
|
|
|
24
24
|
const HARD_DROP_LOG_INTERVAL_MS = 10_000;
|
|
25
25
|
let _lastHardDropLog = 0;
|
|
26
26
|
|
|
27
|
+
// Phase 2b: classes we never want to drop blindly when the queue caps out — the
|
|
28
|
+
// specifically-targeted scans (known-malicious, burst/ATO, first-publish). Eviction drops
|
|
29
|
+
// the oldest UNPROTECTED item instead; only if a bounded head-window is entirely protected
|
|
30
|
+
// do we fall back to strict-oldest (still ledgered, with a distinct source).
|
|
31
|
+
function _isProtected(item) {
|
|
32
|
+
return !!(item && (item.isIOCMatch || item.isBurst || item.firstPublish || item.atoSignal || item.isATOBurstExtra));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// How far from the head we scan for an unprotected victim. Protected items are a small
|
|
36
|
+
// fraction of the flood, so a victim is almost always found within a few slots; the bound
|
|
37
|
+
// keeps eviction O(window) under sustained overflow (CLAUDE.md §2 bounded resources).
|
|
38
|
+
const PROTECTED_EVICTION_SCAN_MAX = (() => {
|
|
39
|
+
const v = parseInt(process.env.MUADDIB_PROTECTED_EVICTION_SCAN_MAX, 10);
|
|
40
|
+
return Number.isFinite(v) && v > 0 ? v : 1024;
|
|
41
|
+
})();
|
|
42
|
+
|
|
27
43
|
/**
|
|
28
|
-
* Push an item onto the scan queue, enforcing the hard cap
|
|
29
|
-
*
|
|
30
|
-
*
|
|
44
|
+
* Push an item onto the scan queue, enforcing the hard cap when at capacity. Evicts the
|
|
45
|
+
* oldest UNPROTECTED item (within a bounded head-window), falling back to strict-oldest if
|
|
46
|
+
* that window is all-protected. `max` defaults to MAX_SCAN_QUEUE (overridable for tests).
|
|
47
|
+
* Returns true iff an item was dropped to make room.
|
|
31
48
|
*/
|
|
32
49
|
function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
|
|
33
50
|
let dropped = false;
|
|
34
51
|
if (scanQueue.length >= max) {
|
|
35
|
-
|
|
52
|
+
// Victim = oldest unprotected item within the bounded head-window; else strict oldest.
|
|
53
|
+
let victimIdx = -1;
|
|
54
|
+
const scanLimit = Math.min(scanQueue.length, PROTECTED_EVICTION_SCAN_MAX);
|
|
55
|
+
for (let i = 0; i < scanLimit; i++) {
|
|
56
|
+
if (!_isProtected(scanQueue[i])) { victimIdx = i; break; }
|
|
57
|
+
}
|
|
58
|
+
const protectedFallback = victimIdx === -1;
|
|
59
|
+
const evicted = protectedFallback ? scanQueue.shift() : scanQueue.splice(victimIdx, 1)[0];
|
|
36
60
|
dropped = true;
|
|
37
61
|
if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
|
|
38
62
|
// Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
|
|
39
63
|
// "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
|
|
40
64
|
// require avoids any top-level coupling with state.js; best-effort, never throws.
|
|
65
|
+
// A dropped PROTECTED item (all-protected head-window) gets a distinct source so the
|
|
66
|
+
// rare case stays visible in the 0b ledger rollup.
|
|
41
67
|
try {
|
|
42
68
|
if (evicted && evicted.name) {
|
|
43
69
|
require('./state.js').appendScanLedger({
|
|
44
70
|
name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
|
|
45
|
-
outcome: 'dropped', source: 'queue_cap'
|
|
71
|
+
outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
|
|
46
72
|
});
|
|
47
73
|
}
|
|
48
74
|
} catch { /* ledger is best-effort */ }
|
|
49
75
|
const now = Date.now();
|
|
50
76
|
if (now - _lastHardDropLog > HARD_DROP_LOG_INTERVAL_MS) {
|
|
51
77
|
_lastHardDropLog = now;
|
|
52
|
-
console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping oldest item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
|
|
78
|
+
console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping ${protectedFallback ? 'OLDEST (head-window all protected)' : 'oldest unprotected'} item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
|
|
53
79
|
}
|
|
54
80
|
}
|
|
55
81
|
scanQueue.push(item);
|
package/src/monitor/webhook.js
CHANGED
|
@@ -240,6 +240,43 @@ async function sendCampaignPreAlert(name, campaign, ecosystem = 'npm') {
|
|
|
240
240
|
await sendWebhook(url, buildCampaignPreAlertEmbed(name, campaign, ecosystem), { rawPayload: true });
|
|
241
241
|
}
|
|
242
242
|
|
|
243
|
+
/**
|
|
244
|
+
* Layer 1c: Build the burst pre-alert embed (pure — no network). Exported for tests.
|
|
245
|
+
* Fires when ≥K versions of one package land in a short window (account-takeover /
|
|
246
|
+
* "Miasma" burst-publish). Amber to distinguish from IOC (red) and campaign (orange).
|
|
247
|
+
* @param {string} name - Package name
|
|
248
|
+
* @param {number} count - Number of versions seen in the burst window
|
|
249
|
+
* @param {string} [ecosystem='npm'] - 'npm' | 'pypi' | 'crates' (link target)
|
|
250
|
+
*/
|
|
251
|
+
function buildBurstPreAlertEmbed(name, count, ecosystem = 'npm') {
|
|
252
|
+
return {
|
|
253
|
+
embeds: [{
|
|
254
|
+
title: '⚠️ BURST PRE-ALERT — Rapid Multi-Version Publish',
|
|
255
|
+
color: 0xf39c12,
|
|
256
|
+
fields: [
|
|
257
|
+
{ name: 'Package', value: `[${ecosystem}/${name}](${registryLink(ecosystem, name)})`, inline: true },
|
|
258
|
+
{ name: 'Versions', value: `${count} in a short window`, inline: true },
|
|
259
|
+
{ name: 'Detection', value: 'Burst-publish (possible ATO / Miasma)', inline: true },
|
|
260
|
+
{ name: 'Status', value: 'Multiple versions published rapidly — every version queued for scan and protected from queue-cap eviction. Treat as suspect until verdicts land.', inline: false }
|
|
261
|
+
],
|
|
262
|
+
footer: {
|
|
263
|
+
text: `MUAD'DIB Burst Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}`
|
|
264
|
+
},
|
|
265
|
+
timestamp: new Date().toISOString()
|
|
266
|
+
}]
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* Layer 1c: Send a burst pre-alert webhook. Fire-and-forget; callers dedupe per
|
|
272
|
+
* name/window so a burst pings once, not once per version.
|
|
273
|
+
*/
|
|
274
|
+
async function sendBurstPreAlert(name, count, ecosystem = 'npm') {
|
|
275
|
+
const url = getWebhookUrl();
|
|
276
|
+
if (!url) return;
|
|
277
|
+
await sendWebhook(url, buildBurstPreAlertEmbed(name, count, ecosystem), { rawPayload: true });
|
|
278
|
+
}
|
|
279
|
+
|
|
243
280
|
/**
|
|
244
281
|
* Check if a specific package@version matches a versioned IOC entry.
|
|
245
282
|
* Returns the matching IOC entry or null.
|
|
@@ -1399,6 +1436,8 @@ module.exports = {
|
|
|
1399
1436
|
sendIOCPreAlert,
|
|
1400
1437
|
buildCampaignPreAlertEmbed,
|
|
1401
1438
|
sendCampaignPreAlert,
|
|
1439
|
+
buildBurstPreAlertEmbed,
|
|
1440
|
+
sendBurstPreAlert,
|
|
1402
1441
|
matchVersionedIOC,
|
|
1403
1442
|
computeRiskLevel,
|
|
1404
1443
|
computeRiskScore,
|
package/src/scanner/typosquat.js
CHANGED
|
@@ -764,4 +764,80 @@ function findPyPITyposquatMatch(name) {
|
|
|
764
764
|
return null;
|
|
765
765
|
}
|
|
766
766
|
|
|
767
|
-
|
|
767
|
+
// ============================================
|
|
768
|
+
// crates.io (Rust) TYPOSQUATTING — Phase 4
|
|
769
|
+
// ============================================
|
|
770
|
+
// Pre-alert enrichment ONLY: flags when an incoming crate name (from the GHSA rust
|
|
771
|
+
// malware feed) typosquats a popular crate. No crates ingestion / build.rs / scan-time
|
|
772
|
+
// Cargo parsing (non-goal). Mirrors the PyPI block above.
|
|
773
|
+
|
|
774
|
+
// Top crates.io packages by downloads (typosquat targets). Hardcoded snapshot.
|
|
775
|
+
const POPULAR_CRATES = [
|
|
776
|
+
'serde', 'serde_json', 'serde_derive', 'serde_yaml', 'syn', 'quote', 'proc-macro2',
|
|
777
|
+
'libc', 'rand', 'rand_core', 'log', 'cfg-if', 'bitflags', 'itertools', 'once_cell',
|
|
778
|
+
'lazy_static', 'regex', 'regex-syntax', 'aho-corasick', 'base64', 'num-traits',
|
|
779
|
+
'unicode-ident', 'tokio', 'tokio-util', 'futures', 'futures-util', 'bytes',
|
|
780
|
+
'hashbrown', 'smallvec', 'parking_lot', 'anyhow', 'thiserror', 'indexmap', 'memchr',
|
|
781
|
+
'chrono', 'semver', 'getrandom', 'clap', 'time', 'uuid', 'hyper', 'reqwest',
|
|
782
|
+
'async-trait', 'tracing', 'tracing-core', 'tracing-subscriber', 'url',
|
|
783
|
+
'percent-encoding', 'idna', 'socket2', 'httparse', 'tower', 'rayon', 'num_cpus',
|
|
784
|
+
'either', 'toml', 'winapi', 'windows-sys', 'env_logger', 'generic-array', 'digest',
|
|
785
|
+
'sha2', 'typenum', 'subtle', 'rustls', 'ring', 'openssl', 'flate2', 'miniz_oxide',
|
|
786
|
+
'crc32fast', 'walkdir', 'tempfile', 'dirs', 'nix', 'backtrace', 'scopeguard',
|
|
787
|
+
'pin-project', 'pin-project-lite', 'slab', 'lock_api', 'crossbeam-utils',
|
|
788
|
+
'crossbeam-channel', 'crossbeam-epoch', 'ahash', 'fnv', 'mio', 'h2', 'http'
|
|
789
|
+
];
|
|
790
|
+
|
|
791
|
+
// crates.io treats '-' and '_' as equivalent and is case-insensitive for name
|
|
792
|
+
// uniqueness; normalize the same way for typosquat comparison.
|
|
793
|
+
function normalizeCrate(name) {
|
|
794
|
+
return name.toLowerCase().replace(/[-_]+/g, '-');
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
const POPULAR_CRATES_NORMALIZED = POPULAR_CRATES.map(normalizeCrate);
|
|
798
|
+
const POPULAR_CRATES_SET = new Set(POPULAR_CRATES_NORMALIZED);
|
|
799
|
+
|
|
800
|
+
// Legitimate crates within edit-distance of a popular crate but not squats.
|
|
801
|
+
const CRATES_WHITELIST = new Set([
|
|
802
|
+
'mime', // distance 1 from 'time' — both real & popular
|
|
803
|
+
'rand-chacha', // rand ecosystem sibling (normalized)
|
|
804
|
+
'serde-with', // serde ecosystem sibling
|
|
805
|
+
'futures-core',
|
|
806
|
+
]);
|
|
807
|
+
|
|
808
|
+
const MIN_CRATE_LENGTH = 4;
|
|
809
|
+
|
|
810
|
+
/**
|
|
811
|
+
* Find a crates.io typosquat match (Levenshtein over the popular-crate list).
|
|
812
|
+
* Pure + IOC-independent. Used by the GHSA rust pre-alert to enrich the embed.
|
|
813
|
+
*
|
|
814
|
+
* @param {string} name - crate name
|
|
815
|
+
* @returns {{original: string, type: string, distance: number}|null}
|
|
816
|
+
*/
|
|
817
|
+
function findCratesTyposquatMatch(name) {
|
|
818
|
+
if (typeof name !== 'string' || !name) return null;
|
|
819
|
+
const normalized = normalizeCrate(name);
|
|
820
|
+
|
|
821
|
+
if (POPULAR_CRATES_SET.has(normalized)) return null; // it IS a popular crate
|
|
822
|
+
if (CRATES_WHITELIST.has(normalized)) return null;
|
|
823
|
+
if (normalized.length < MIN_CRATE_LENGTH) return null;
|
|
824
|
+
|
|
825
|
+
for (let i = 0; i < POPULAR_CRATES.length; i++) {
|
|
826
|
+
const popularNorm = POPULAR_CRATES_NORMALIZED[i];
|
|
827
|
+
const popular = POPULAR_CRATES[i];
|
|
828
|
+
if (normalized === popularNorm) continue;
|
|
829
|
+
if (popularNorm.length < MIN_CRATE_LENGTH) continue;
|
|
830
|
+
if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
|
|
831
|
+
|
|
832
|
+
const distance = levenshteinDistance(normalized, popularNorm);
|
|
833
|
+
if (distance === 1) {
|
|
834
|
+
return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
|
|
835
|
+
}
|
|
836
|
+
if (distance === 2 && popularNorm.length >= 5) {
|
|
837
|
+
return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
return null;
|
|
841
|
+
}
|
|
842
|
+
|
|
843
|
+
module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findCratesTyposquatMatch, findTyposquatMatch };
|