muaddib-scanner 2.11.74 → 2.11.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.74",
3
+ "version": "2.11.76",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-07T18:51:19.187Z",
3
+ "timestamp": "2026-06-07T19:47:48.330Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -31,7 +31,7 @@ const path = require('path');
31
31
  const https = require('https');
32
32
 
33
33
  const GHSA_API_HOST = 'api.github.com';
34
- const GHSA_ECOSYSTEMS = ['npm', 'pypi'];
34
+ const GHSA_ECOSYSTEMS = ['npm', 'pypi', 'crates'];
35
35
  const GHSA_CURSOR_FILE = process.env.MUADDIB_GHSA_CURSOR_FILE ||
36
36
  path.join(__dirname, '..', '..', 'data', 'ghsa-cursor.json');
37
37
  const GHSA_MALWARE_FILE = process.env.MUADDIB_GHSA_MALWARE_FILE ||
@@ -84,9 +84,10 @@ function _httpGetJson(pathName, { token, httpImpl = https, timeoutMs = 20_000 }
84
84
  */
85
85
  async function _defaultFetch(ecosystem, opts = {}) {
86
86
  const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
87
- // GHSA names the Python ecosystem "pip" (not "pypi") in BOTH the query and the response;
88
- // querying ecosystem=pypi returns HTTP 422. Map our internal name to GHSA's for the query.
89
- const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
87
+ // GHSA names the Python ecosystem "pip" (not "pypi") and Rust "rust" (we call it
88
+ // "crates") in BOTH the query and the response; querying ecosystem=pypi returns HTTP
89
+ // 422. Map our internal name to GHSA's for the query.
90
+ const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
90
91
  const p = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=updated&direction=desc`;
91
92
  const { status, json } = await _httpGetJson(p, { token, httpImpl: opts.httpImpl });
92
93
  if (status !== 200 || !Array.isArray(json)) {
@@ -112,7 +113,7 @@ function _nextLink(linkHeader) {
112
113
  async function fetchAllGhsaMalware(ecosystem, opts = {}) {
113
114
  const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
114
115
  const maxPages = Number.isFinite(opts.maxPages) ? opts.maxPages : 30;
115
- const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
116
+ const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
116
117
  let pathName = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=published&direction=desc`;
117
118
  const rows = [];
118
119
  for (let page = 0; page < maxPages && pathName; page++) {
@@ -141,6 +142,7 @@ function parseAdvisory(adv, ecosystems = GHSA_ECOSYSTEMS) {
141
142
  if (!pkg || !pkg.name || !pkg.ecosystem) continue;
142
143
  let eco = String(pkg.ecosystem).toLowerCase();
143
144
  if (eco === 'pip') eco = 'pypi'; // normalize GHSA's "pip" to our internal "pypi"
145
+ else if (eco === 'rust') eco = 'crates'; // normalize GHSA's "rust" to our internal "crates"
144
146
  if (ecosystems && !ecosystems.includes(eco)) continue;
145
147
  out.push({
146
148
  ghsa_id: adv.ghsa_id,
@@ -210,17 +212,29 @@ function _maybeCompactMalware(file) {
210
212
  function buildGhsaPreAlertEmbed(row) {
211
213
  const link = row.ecosystem === 'pypi'
212
214
  ? `https://pypi.org/project/${encodeURIComponent(row.name)}/`
213
- : `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
215
+ : row.ecosystem === 'crates'
216
+ ? `https://crates.io/crates/${encodeURIComponent(row.name)}`
217
+ : `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
218
+ const fields = [
219
+ { name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
220
+ { name: 'Range', value: String(row.versionRange || '*'), inline: true },
221
+ { name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
222
+ { name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
223
+ ];
224
+ // crates enrichment: flag if the malicious crate name typosquats a popular crate.
225
+ // Lazy require keeps the poller light; findCratesTyposquatMatch is pure.
226
+ if (row.ecosystem === 'crates') {
227
+ try {
228
+ const { findCratesTyposquatMatch } = require('../scanner/typosquat.js');
229
+ const m = findCratesTyposquatMatch(row.name);
230
+ if (m) fields.push({ name: 'Typosquat', value: `looks like \`${m.original}\` (distance ${m.distance})`, inline: true });
231
+ } catch { /* enrichment is best-effort */ }
232
+ }
214
233
  return {
215
234
  embeds: [{
216
235
  title: '⚠️ GHSA PRE-ALERT — Fresh Malware Advisory',
217
236
  color: 0xe74c3c,
218
- fields: [
219
- { name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
220
- { name: 'Range', value: String(row.versionRange || '*'), inline: true },
221
- { name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
222
- { name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
223
- ],
237
+ fields,
224
238
  footer: { text: `MUAD'DIB GHSA Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}` },
225
239
  timestamp: new Date().toISOString()
226
240
  }]
@@ -370,7 +370,8 @@ const RECENT_PUBLISH_MAX = 5;
370
370
  * @returns {Object|null} - {
371
371
  * version, tarball, unpackedSize, scripts, homepage, description,
372
372
  * latestTagVersion, // dist-tags.latest (may differ from `version` under ATO)
373
- * recentVersions: [{ version, tarball, unpackedSize, scripts }, ...]
373
+ * recentVersions: [{ version, tarball, unpackedSize, scripts }, ...], // capped at maxRecent
374
+ * recentWindowCount, // TRUE (uncapped) count of versions in the window (Phase 2b burst)
374
375
  * } or null if no usable version found
375
376
  */
376
377
  function selectMostRecentVersion(packument, options = {}) {
@@ -419,14 +420,19 @@ function selectMostRecentVersion(packument, options = {}) {
419
420
  recentVersions: [],
420
421
  };
421
422
 
422
- // Burst extras: other versions published within the recent window, excluding
423
- // the most-recent one. Bounded by maxRecent. Each extra carries enough
424
- // metadata for the queue to enqueue it directly without re-fetching the packument.
423
+ // Burst extras: other versions published within the recent window, excluding the
424
+ // most-recent one. The enqueue list is bounded by maxRecent, but recentWindowCount is
425
+ // the TRUE (uncapped) number of versions in the window Phase 2b burst detection uses it
426
+ // so a 96-version Miasma burst is distinguishable from a legit 3-5 patch-release day (the
427
+ // capped list alone tops out at maxRecent+1 and can't tell them apart).
428
+ result.recentWindowCount = 1; // includes the most-recent version itself
425
429
  if (versionTimes.length > 1) {
426
430
  const cutoff = versionTimes[0][1] - recentWindowMs;
427
- for (let i = 1; i < versionTimes.length && result.recentVersions.length < maxRecent; i++) {
431
+ for (let i = 1; i < versionTimes.length; i++) {
428
432
  const [v, ts] = versionTimes[i];
429
433
  if (ts < cutoff) break; // sorted desc, so once we cross the cutoff we're done
434
+ result.recentWindowCount++;
435
+ if (result.recentVersions.length >= maxRecent) continue; // enqueue list capped; count continues
430
436
  const vData = versions[v];
431
437
  if (!vData) continue;
432
438
  result.recentVersions.push({
@@ -819,6 +825,7 @@ async function pollNpmChanges(state, scanQueue, stats) {
819
825
  unpackedSize: docMeta ? docMeta.unpackedSize : 0,
820
826
  registryScripts: docMeta ? docMeta.scripts : null,
821
827
  _cacheTrigger: cacheTrigger.shouldCache ? cacheTrigger : null,
828
+ firstPublish: cacheTrigger.shouldCache && cacheTrigger.reason === 'first_publish',
822
829
  isIOCMatch: isKnownIOC
823
830
  });
824
831
  queued++;
@@ -57,6 +57,7 @@ const {
57
57
  buildAlertData,
58
58
  persistAlert,
59
59
  sendIOCPreAlert,
60
+ sendBurstPreAlert,
60
61
  matchVersionedIOC,
61
62
  buildCanaryExfiltrationWebhookEmbed,
62
63
  getWebhookUrl,
@@ -130,6 +131,21 @@ const RECENTLY_SCANNED_MAX = 50_000; // FIFO cap for the dedup Set (P0c — boun
130
131
  const FIRST_PUBLISH_SANDBOX_MAX_QUEUE = parseInt(process.env.MUADDIB_FIRST_PUBLISH_SANDBOX_MAX_QUEUE, 10) || 10;
131
132
  const FIRST_PUBLISH_SANDBOX_ENABLED = process.env.MUADDIB_FIRST_PUBLISH_SANDBOX !== '0';
132
133
 
134
+ // Phase 2b: burst (Miasma) pre-alert. A burst = >= this many versions of ONE name in the
135
+ // recent-publish window (the TRUE uncapped count, selectMostRecentVersion.recentWindowCount).
136
+ // Default 10: detection is PER-NAME, so legit multi-PLATFORM publishers (different names,
137
+ // e.g. @opencode-ai/cli-*-* binaries) are never caught; legit same-name release days rarely
138
+ // reach 10; Miasma's 96-in-72s clears it easily. Per-name + deduped + non-scoring (Discord
139
+ // heads-up only, no FPR impact). Env-tunable up if a feed proves noisy.
140
+ const BURST_PREALERT_MIN_VERSIONS = (() => {
141
+ const n = parseInt(process.env.MUADDIB_BURST_MIN_VERSIONS, 10);
142
+ return Number.isFinite(n) && n >= 2 ? n : 10;
143
+ })();
144
+ // Dedup burst pings: one per name per process window (bounded — cleared at the cap so it
145
+ // can never grow without limit, CLAUDE.md §2).
146
+ const _burstAlerted = new Set();
147
+ const BURST_ALERTED_MAX = 20_000;
148
+
133
149
  // Stage 3 — sandbox gate. Static-score threshold below which T1b/T2 packages
134
150
  // are NOT sandboxed (static result alone is authoritative). Tightens the prior
135
151
  // "T1b sandbox if score >= 25 or queue < 20" to remove low-signal sandbox runs
@@ -1429,6 +1445,25 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1429
1445
  // only scan whichever version happened to be the most recent at resolution
1430
1446
  // time, racing the publish stream.
1431
1447
  const recents = Array.isArray(npmInfo.recentVersions) ? npmInfo.recentVersions : [];
1448
+ // Phase 2b: burst = TRUE count of versions of this name in the recent window
1449
+ // (uncapped recentWindowCount), NOT the capped extras list — so a 96-version Miasma
1450
+ // burst is distinguishable from a legit multi-version day. At/above the threshold,
1451
+ // flag the item (protects it + its extras from queue-cap eviction) and fire ONE
1452
+ // burst pre-alert per name (deduped, bounded).
1453
+ const burstCount = Number.isFinite(npmInfo.recentWindowCount) ? npmInfo.recentWindowCount : (recents.length + 1);
1454
+ const isBurst = burstCount >= BURST_PREALERT_MIN_VERSIONS;
1455
+ if (isBurst) {
1456
+ item.isBurst = true;
1457
+ if (!_burstAlerted.has(item.name)) {
1458
+ if (_burstAlerted.size >= BURST_ALERTED_MAX) _burstAlerted.clear();
1459
+ _burstAlerted.add(item.name);
1460
+ stats.burstPreAlerts = (stats.burstPreAlerts || 0) + 1;
1461
+ console.log(`[MONITOR] BURST PRE-ALERT: ${item.name} — ${burstCount} versions in the recent window`);
1462
+ sendBurstPreAlert(item.name, burstCount, item.ecosystem).catch(err => {
1463
+ console.error(`[MONITOR] burst pre-alert webhook failed for ${item.name}: ${err.message}`);
1464
+ });
1465
+ }
1466
+ }
1432
1467
  for (const recent of recents) {
1433
1468
  if (!recent || !recent.tarball || !recent.version) continue;
1434
1469
  const dedupeKey = `${item.name}@${recent.version}`;
@@ -1441,6 +1476,7 @@ async function resolveTarballAndScan(item, stats, dailyAlerts, recentlyScanned,
1441
1476
  unpackedSize: recent.unpackedSize || 0,
1442
1477
  registryScripts: recent.scripts || null,
1443
1478
  atoSignal: item.atoSignal === true,
1479
+ isBurst,
1444
1480
  isATOBurstExtra: true,
1445
1481
  }, stats);
1446
1482
  }
@@ -24,32 +24,58 @@ const MAX_SCAN_QUEUE = (() => {
24
24
  const HARD_DROP_LOG_INTERVAL_MS = 10_000;
25
25
  let _lastHardDropLog = 0;
26
26
 
27
+ // Phase 2b: classes we never want to drop blindly when the queue caps out — the
28
+ // specifically-targeted scans (known-malicious, burst/ATO, first-publish). Eviction drops
29
+ // the oldest UNPROTECTED item instead; only if a bounded head-window is entirely protected
30
+ // do we fall back to strict-oldest (still ledgered, with a distinct source).
31
+ function _isProtected(item) {
32
+ return !!(item && (item.isIOCMatch || item.isBurst || item.firstPublish || item.atoSignal || item.isATOBurstExtra));
33
+ }
34
+
35
+ // How far from the head we scan for an unprotected victim. Protected items are a small
36
+ // fraction of the flood, so a victim is almost always found within a few slots; the bound
37
+ // keeps eviction O(window) under sustained overflow (CLAUDE.md §2 bounded resources).
38
+ const PROTECTED_EVICTION_SCAN_MAX = (() => {
39
+ const v = parseInt(process.env.MUADDIB_PROTECTED_EVICTION_SCAN_MAX, 10);
40
+ return Number.isFinite(v) && v > 0 ? v : 1024;
41
+ })();
42
+
27
43
  /**
28
- * Push an item onto the scan queue, enforcing the hard cap by dropping the oldest item
29
- * when at capacity. `max` defaults to MAX_SCAN_QUEUE (overridable for tests). Returns
30
- * true iff an item was dropped to make room.
44
+ * Push an item onto the scan queue, enforcing the hard cap when at capacity. Evicts the
45
+ * oldest UNPROTECTED item (within a bounded head-window), falling back to strict-oldest if
46
+ * that window is all-protected. `max` defaults to MAX_SCAN_QUEUE (overridable for tests).
47
+ * Returns true iff an item was dropped to make room.
31
48
  */
32
49
  function enqueueScan(scanQueue, item, stats, max = MAX_SCAN_QUEUE) {
33
50
  let dropped = false;
34
51
  if (scanQueue.length >= max) {
35
- const evicted = scanQueue.shift(); // drop oldest
52
+ // Victim = oldest unprotected item within the bounded head-window; else strict oldest.
53
+ let victimIdx = -1;
54
+ const scanLimit = Math.min(scanQueue.length, PROTECTED_EVICTION_SCAN_MAX);
55
+ for (let i = 0; i < scanLimit; i++) {
56
+ if (!_isProtected(scanQueue[i])) { victimIdx = i; break; }
57
+ }
58
+ const protectedFallback = victimIdx === -1;
59
+ const evicted = protectedFallback ? scanQueue.shift() : scanQueue.splice(victimIdx, 1)[0];
36
60
  dropped = true;
37
61
  if (stats) stats.queueHardDrops = (stats.queueHardDrops || 0) + 1;
38
62
  // Phase 0a: record the dropped item so a coverage loss keeps an identity — answers
39
63
  // "which versions were never scanned" (e.g. the Miasma 72s/96-version burst). Lazy
40
64
  // require avoids any top-level coupling with state.js; best-effort, never throws.
65
+ // A dropped PROTECTED item (all-protected head-window) gets a distinct source so the
66
+ // rare case stays visible in the 0b ledger rollup.
41
67
  try {
42
68
  if (evicted && evicted.name) {
43
69
  require('./state.js').appendScanLedger({
44
70
  name: evicted.name, version: evicted.version, ecosystem: evicted.ecosystem,
45
- outcome: 'dropped', source: 'queue_cap'
71
+ outcome: 'dropped', source: protectedFallback ? 'queue_cap_protected' : 'queue_cap'
46
72
  });
47
73
  }
48
74
  } catch { /* ledger is best-effort */ }
49
75
  const now = Date.now();
50
76
  if (now - _lastHardDropLog > HARD_DROP_LOG_INTERVAL_MS) {
51
77
  _lastHardDropLog = now;
52
- console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping oldest item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
78
+ console.warn(`[MONITOR] QUEUE_HARD_DROP: scan queue at cap ${max} — dropping ${protectedFallback ? 'OLDEST (head-window all protected)' : 'oldest unprotected'} item(s) (total dropped this session: ${stats ? stats.queueHardDrops : '?'}). Ingestion is outrunning scanning.`);
53
79
  }
54
80
  }
55
81
  scanQueue.push(item);
@@ -240,6 +240,43 @@ async function sendCampaignPreAlert(name, campaign, ecosystem = 'npm') {
240
240
  await sendWebhook(url, buildCampaignPreAlertEmbed(name, campaign, ecosystem), { rawPayload: true });
241
241
  }
242
242
 
243
+ /**
244
+ * Layer 1c: Build the burst pre-alert embed (pure — no network). Exported for tests.
245
+ * Fires when ≥K versions of one package land in a short window (account-takeover /
246
+ * "Miasma" burst-publish). Amber to distinguish from IOC (red) and campaign (orange).
247
+ * @param {string} name - Package name
248
+ * @param {number} count - Number of versions seen in the burst window
249
+ * @param {string} [ecosystem='npm'] - 'npm' | 'pypi' | 'crates' (link target)
250
+ */
251
+ function buildBurstPreAlertEmbed(name, count, ecosystem = 'npm') {
252
+ return {
253
+ embeds: [{
254
+ title: '⚠️ BURST PRE-ALERT — Rapid Multi-Version Publish',
255
+ color: 0xf39c12,
256
+ fields: [
257
+ { name: 'Package', value: `[${ecosystem}/${name}](${registryLink(ecosystem, name)})`, inline: true },
258
+ { name: 'Versions', value: `${count} in a short window`, inline: true },
259
+ { name: 'Detection', value: 'Burst-publish (possible ATO / Miasma)', inline: true },
260
+ { name: 'Status', value: 'Multiple versions published rapidly — every version queued for scan and protected from queue-cap eviction. Treat as suspect until verdicts land.', inline: false }
261
+ ],
262
+ footer: {
263
+ text: `MUAD'DIB Burst Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}`
264
+ },
265
+ timestamp: new Date().toISOString()
266
+ }]
267
+ };
268
+ }
269
+
270
+ /**
271
+ * Layer 1c: Send a burst pre-alert webhook. Fire-and-forget; callers dedupe per
272
+ * name/window so a burst pings once, not once per version.
273
+ */
274
+ async function sendBurstPreAlert(name, count, ecosystem = 'npm') {
275
+ const url = getWebhookUrl();
276
+ if (!url) return;
277
+ await sendWebhook(url, buildBurstPreAlertEmbed(name, count, ecosystem), { rawPayload: true });
278
+ }
279
+
243
280
  /**
244
281
  * Check if a specific package@version matches a versioned IOC entry.
245
282
  * Returns the matching IOC entry or null.
@@ -1399,6 +1436,8 @@ module.exports = {
1399
1436
  sendIOCPreAlert,
1400
1437
  buildCampaignPreAlertEmbed,
1401
1438
  sendCampaignPreAlert,
1439
+ buildBurstPreAlertEmbed,
1440
+ sendBurstPreAlert,
1402
1441
  matchVersionedIOC,
1403
1442
  computeRiskLevel,
1404
1443
  computeRiskScore,
@@ -764,4 +764,80 @@ function findPyPITyposquatMatch(name) {
764
764
  return null;
765
765
  }
766
766
 
767
- module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findTyposquatMatch };
767
+ // ============================================
768
+ // crates.io (Rust) TYPOSQUATTING — Phase 4
769
+ // ============================================
770
+ // Pre-alert enrichment ONLY: flags when an incoming crate name (from the GHSA rust
771
+ // malware feed) typosquats a popular crate. No crates ingestion / build.rs / scan-time
772
+ // Cargo parsing (non-goal). Mirrors the PyPI block above.
773
+
774
+ // Top crates.io packages by downloads (typosquat targets). Hardcoded snapshot.
775
+ const POPULAR_CRATES = [
776
+ 'serde', 'serde_json', 'serde_derive', 'serde_yaml', 'syn', 'quote', 'proc-macro2',
777
+ 'libc', 'rand', 'rand_core', 'log', 'cfg-if', 'bitflags', 'itertools', 'once_cell',
778
+ 'lazy_static', 'regex', 'regex-syntax', 'aho-corasick', 'base64', 'num-traits',
779
+ 'unicode-ident', 'tokio', 'tokio-util', 'futures', 'futures-util', 'bytes',
780
+ 'hashbrown', 'smallvec', 'parking_lot', 'anyhow', 'thiserror', 'indexmap', 'memchr',
781
+ 'chrono', 'semver', 'getrandom', 'clap', 'time', 'uuid', 'hyper', 'reqwest',
782
+ 'async-trait', 'tracing', 'tracing-core', 'tracing-subscriber', 'url',
783
+ 'percent-encoding', 'idna', 'socket2', 'httparse', 'tower', 'rayon', 'num_cpus',
784
+ 'either', 'toml', 'winapi', 'windows-sys', 'env_logger', 'generic-array', 'digest',
785
+ 'sha2', 'typenum', 'subtle', 'rustls', 'ring', 'openssl', 'flate2', 'miniz_oxide',
786
+ 'crc32fast', 'walkdir', 'tempfile', 'dirs', 'nix', 'backtrace', 'scopeguard',
787
+ 'pin-project', 'pin-project-lite', 'slab', 'lock_api', 'crossbeam-utils',
788
+ 'crossbeam-channel', 'crossbeam-epoch', 'ahash', 'fnv', 'mio', 'h2', 'http'
789
+ ];
790
+
791
+ // crates.io treats '-' and '_' as equivalent and is case-insensitive for name
792
+ // uniqueness; normalize the same way for typosquat comparison.
793
+ function normalizeCrate(name) {
794
+ return name.toLowerCase().replace(/[-_]+/g, '-');
795
+ }
796
+
797
+ const POPULAR_CRATES_NORMALIZED = POPULAR_CRATES.map(normalizeCrate);
798
+ const POPULAR_CRATES_SET = new Set(POPULAR_CRATES_NORMALIZED);
799
+
800
+ // Legitimate crates within edit-distance of a popular crate but not squats.
801
+ const CRATES_WHITELIST = new Set([
802
+ 'mime', // distance 1 from 'time' — both real & popular
803
+ 'rand-chacha', // rand ecosystem sibling (normalized)
804
+ 'serde-with', // serde ecosystem sibling
805
+ 'futures-core',
806
+ ]);
807
+
808
+ const MIN_CRATE_LENGTH = 4;
809
+
810
+ /**
811
+ * Find a crates.io typosquat match (Levenshtein over the popular-crate list).
812
+ * Pure + IOC-independent. Used by the GHSA rust pre-alert to enrich the embed.
813
+ *
814
+ * @param {string} name - crate name
815
+ * @returns {{original: string, type: string, distance: number}|null}
816
+ */
817
+ function findCratesTyposquatMatch(name) {
818
+ if (typeof name !== 'string' || !name) return null;
819
+ const normalized = normalizeCrate(name);
820
+
821
+ if (POPULAR_CRATES_SET.has(normalized)) return null; // it IS a popular crate
822
+ if (CRATES_WHITELIST.has(normalized)) return null;
823
+ if (normalized.length < MIN_CRATE_LENGTH) return null;
824
+
825
+ for (let i = 0; i < POPULAR_CRATES.length; i++) {
826
+ const popularNorm = POPULAR_CRATES_NORMALIZED[i];
827
+ const popular = POPULAR_CRATES[i];
828
+ if (normalized === popularNorm) continue;
829
+ if (popularNorm.length < MIN_CRATE_LENGTH) continue;
830
+ if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
831
+
832
+ const distance = levenshteinDistance(normalized, popularNorm);
833
+ if (distance === 1) {
834
+ return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
835
+ }
836
+ if (distance === 2 && popularNorm.length >= 5) {
837
+ return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
838
+ }
839
+ }
840
+ return null;
841
+ }
842
+
843
+ module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findCratesTyposquatMatch, findTyposquatMatch };