muaddib-scanner 2.11.73 → 2.11.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "muaddib-scanner",
3
- "version": "2.11.73",
3
+ "version": "2.11.75",
4
4
  "description": "Supply-chain threat detection & response for npm & PyPI/Python",
5
5
  "main": "src/index.js",
6
6
  "bin": {
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "target": "node_modules",
3
- "timestamp": "2026-06-07T17:25:13.830Z",
3
+ "timestamp": "2026-06-07T19:18:50.434Z",
4
4
  "threats": [
5
5
  {
6
6
  "type": "string_mutation_obfuscation",
@@ -31,7 +31,7 @@ const path = require('path');
31
31
  const https = require('https');
32
32
 
33
33
  const GHSA_API_HOST = 'api.github.com';
34
- const GHSA_ECOSYSTEMS = ['npm', 'pypi'];
34
+ const GHSA_ECOSYSTEMS = ['npm', 'pypi', 'crates'];
35
35
  const GHSA_CURSOR_FILE = process.env.MUADDIB_GHSA_CURSOR_FILE ||
36
36
  path.join(__dirname, '..', '..', 'data', 'ghsa-cursor.json');
37
37
  const GHSA_MALWARE_FILE = process.env.MUADDIB_GHSA_MALWARE_FILE ||
@@ -84,9 +84,10 @@ function _httpGetJson(pathName, { token, httpImpl = https, timeoutMs = 20_000 }
84
84
  */
85
85
  async function _defaultFetch(ecosystem, opts = {}) {
86
86
  const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
87
- // GHSA names the Python ecosystem "pip" (not "pypi") in BOTH the query and the response;
88
- // querying ecosystem=pypi returns HTTP 422. Map our internal name to GHSA's for the query.
89
- const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
87
+ // GHSA names the Python ecosystem "pip" (not "pypi") and Rust "rust" (we call it
88
+ // "crates") in BOTH the query and the response; querying ecosystem=pypi returns HTTP
89
+ // 422. Map our internal name to GHSA's for the query.
90
+ const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
90
91
  const p = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=updated&direction=desc`;
91
92
  const { status, json } = await _httpGetJson(p, { token, httpImpl: opts.httpImpl });
92
93
  if (status !== 200 || !Array.isArray(json)) {
@@ -112,7 +113,7 @@ function _nextLink(linkHeader) {
112
113
  async function fetchAllGhsaMalware(ecosystem, opts = {}) {
113
114
  const token = opts.token || process.env.GITHUB_TOKEN || process.env.GH_TOKEN || null;
114
115
  const maxPages = Number.isFinite(opts.maxPages) ? opts.maxPages : 30;
115
- const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem;
116
+ const apiEco = ecosystem === 'pypi' ? 'pip' : ecosystem === 'crates' ? 'rust' : ecosystem;
116
117
  let pathName = `/advisories?type=malware&ecosystem=${encodeURIComponent(apiEco)}&per_page=100&sort=published&direction=desc`;
117
118
  const rows = [];
118
119
  for (let page = 0; page < maxPages && pathName; page++) {
@@ -141,6 +142,7 @@ function parseAdvisory(adv, ecosystems = GHSA_ECOSYSTEMS) {
141
142
  if (!pkg || !pkg.name || !pkg.ecosystem) continue;
142
143
  let eco = String(pkg.ecosystem).toLowerCase();
143
144
  if (eco === 'pip') eco = 'pypi'; // normalize GHSA's "pip" to our internal "pypi"
145
+ else if (eco === 'rust') eco = 'crates'; // normalize GHSA's "rust" to our internal "crates"
144
146
  if (ecosystems && !ecosystems.includes(eco)) continue;
145
147
  out.push({
146
148
  ghsa_id: adv.ghsa_id,
@@ -210,17 +212,29 @@ function _maybeCompactMalware(file) {
210
212
  function buildGhsaPreAlertEmbed(row) {
211
213
  const link = row.ecosystem === 'pypi'
212
214
  ? `https://pypi.org/project/${encodeURIComponent(row.name)}/`
213
- : `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
215
+ : row.ecosystem === 'crates'
216
+ ? `https://crates.io/crates/${encodeURIComponent(row.name)}`
217
+ : `https://www.npmjs.com/package/${encodeURIComponent(row.name)}`;
218
+ const fields = [
219
+ { name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
220
+ { name: 'Range', value: String(row.versionRange || '*'), inline: true },
221
+ { name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
222
+ { name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
223
+ ];
224
+ // crates enrichment: flag if the malicious crate name typosquats a popular crate.
225
+ // Lazy require keeps the poller light; findCratesTyposquatMatch is pure.
226
+ if (row.ecosystem === 'crates') {
227
+ try {
228
+ const { findCratesTyposquatMatch } = require('../scanner/typosquat.js');
229
+ const m = findCratesTyposquatMatch(row.name);
230
+ if (m) fields.push({ name: 'Typosquat', value: `looks like \`${m.original}\` (distance ${m.distance})`, inline: true });
231
+ } catch { /* enrichment is best-effort */ }
232
+ }
214
233
  return {
215
234
  embeds: [{
216
235
  title: '⚠️ GHSA PRE-ALERT — Fresh Malware Advisory',
217
236
  color: 0xe74c3c,
218
- fields: [
219
- { name: 'Package', value: `[${row.ecosystem}/${row.name}](${link})`, inline: true },
220
- { name: 'Range', value: String(row.versionRange || '*'), inline: true },
221
- { name: 'Advisory', value: `[${row.ghsa_id}](https://github.com/advisories/${row.ghsa_id})`, inline: true },
222
- { name: 'Source', value: 'GitHub Advisory DB (type=malware) — active poller', inline: false }
223
- ],
237
+ fields,
224
238
  footer: { text: `MUAD'DIB GHSA Pre-Alert | ${new Date().toISOString().replace('T', ' ').replace(/\.\d+Z$/, ' UTC')}` },
225
239
  timestamp: new Date().toISOString()
226
240
  }]
@@ -1,6 +1,6 @@
1
1
  'use strict';
2
2
 
3
- const { levenshteinDistance } = require('../scanner/typosquat.js');
3
+ const { levenshteinDistance, findPyPITyposquatMatch } = require('../scanner/typosquat.js');
4
4
  const { loadCachedIOCs } = require('../ioc/updater.js');
5
5
 
6
6
  // --- Popular npm names (used for quick typosquat check) ---
@@ -351,32 +351,39 @@ function quickTyposquatCheck(name) {
351
351
  * Layer 3: Determine if a package should be cached and at what retention level.
352
352
  * @param {string} name - Package name
353
353
  * @param {Object|null} docMeta - Metadata from extractTarballFromDoc
354
- * @param {Object|null} doc - Full CouchDB doc
354
+ * @param {Object|null} doc - Full CouchDB doc (npm; carries `versions` for first-publish)
355
+ * @param {Object} [opts] - Non-npm ecosystem hints:
356
+ * { ecosystem?: 'npm'|'pypi', versionCount?: number }. PyPI has no packument at
357
+ * ingest time, so the version count comes from preResolvePyPIBatch via opts.
355
358
  * @returns {{ shouldCache: boolean, reason: string, retentionDays: number }}
356
359
  */
357
- function evaluateCacheTrigger(name, docMeta, doc) {
358
- // Trigger 1: IOC match -- 30-day retention
360
+ function evaluateCacheTrigger(name, docMeta, doc, opts = {}) {
361
+ const ecosystem = opts.ecosystem || 'npm';
362
+
363
+ // Trigger 1: IOC match -- 30-day retention. PyPI IOCs are namespaced "pypi:<name>".
359
364
  try {
360
365
  const iocs = loadCachedIOCs();
361
- if ((iocs.wildcardPackages && iocs.wildcardPackages.has(name)) ||
362
- (iocs.packagesMap && iocs.packagesMap.has(name))) {
366
+ const inSet = (s) => s && (s.has(name) || (ecosystem === 'pypi' && s.has(`pypi:${name}`)));
367
+ if (inSet(iocs.wildcardPackages) || inSet(iocs.packagesMap)) {
363
368
  return { shouldCache: true, reason: 'ioc_match', retentionDays: TARBALL_CACHE_HIGH_RISK_RETENTION_DAYS };
364
369
  }
365
370
  } catch { /* non-fatal */ }
366
371
 
367
- // Trigger 2: Typosquat signal -- 7-day retention
372
+ // Trigger 2: Typosquat signal -- 7-day retention (ecosystem-specific popular list)
368
373
  try {
369
- if (quickTyposquatCheck(name)) {
374
+ const typo = ecosystem === 'pypi' ? !!findPyPITyposquatMatch(name) : quickTyposquatCheck(name);
375
+ if (typo) {
370
376
  return { shouldCache: true, reason: 'typosquat_signal', retentionDays: TARBALL_CACHE_DEFAULT_RETENTION_DAYS };
371
377
  }
372
378
  } catch { /* non-fatal */ }
373
379
 
374
- // Trigger 3: First publish (single version in doc) -- 7-day retention
375
- if (doc && doc.versions) {
376
- const versionCount = Object.keys(doc.versions).length;
377
- if (versionCount === 1) {
378
- return { shouldCache: true, reason: 'first_publish', retentionDays: TARBALL_CACHE_DEFAULT_RETENTION_DAYS };
379
- }
380
+ // Trigger 3: First publish (single version) -- 7-day retention.
381
+ // npm: count from the CouchDB doc; pypi: count passed via opts.versionCount.
382
+ const versionCount = ecosystem === 'pypi'
383
+ ? (Number.isFinite(opts.versionCount) ? opts.versionCount : null)
384
+ : (doc && doc.versions ? Object.keys(doc.versions).length : null);
385
+ if (versionCount === 1) {
386
+ return { shouldCache: true, reason: 'first_publish', retentionDays: TARBALL_CACHE_DEFAULT_RETENTION_DAYS };
380
387
  }
381
388
 
382
389
  return { shouldCache: false, reason: '', retentionDays: 0 };
@@ -618,6 +618,16 @@ async function preResolvePyPIBatch(items, stats, scanQueue) {
618
618
  age_days: pypiInfo.age_days,
619
619
  version_count: pypiInfo.version_count,
620
620
  };
621
+ // First-publish parity with npm: derive the cache trigger + flag from the
622
+ // version count (PyPI has no packument at ingest, so the count comes from
623
+ // the registry fetch above). Feeds tarball retention, the scan-ledger
624
+ // firstPublish field, and Phase 2b protected eviction. The first-publish
625
+ // *sandbox* stays npm-only (runSandbox can't pip-install) — gated in queue.js.
626
+ const trig = evaluateCacheTrigger(item.name, null, null, {
627
+ ecosystem: 'pypi', versionCount: pypiInfo.version_count
628
+ });
629
+ item._cacheTrigger = trig.shouldCache ? trig : null;
630
+ item.firstPublish = trig.reason === 'first_publish';
621
631
  resolved++;
622
632
  } else {
623
633
  failed++;
@@ -1186,12 +1196,26 @@ async function pollPyPIChangelog(state, scanQueue, stats) {
1186
1196
  if (isKnownIOC) {
1187
1197
  console.log(`[MONITOR] IOC PRE-ALERT (pypi): ${ev.name} — known malicious package`);
1188
1198
  stats.iocPreAlerts = (stats.iocPreAlerts || 0) + 1;
1189
- sendIOCPreAlert(ev.name).catch(err => {
1199
+ sendIOCPreAlert(ev.name, ev.version, 'pypi').catch(err => {
1190
1200
  console.error(`[MONITOR] IOC pre-alert webhook failed for ${ev.name}: ${err.message}`);
1191
1201
  });
1192
1202
  }
1193
1203
  } catch { /* IOC load failure is non-fatal */ }
1194
1204
 
1205
+ // Campaign pre-alert (mirror of the npm Layer 1b): fire on name-pattern
1206
+ // matches when the package isn't already a known IOC. Campaigns can target
1207
+ // PyPI too; matchCampaignPattern is a pure name match, ecosystem-agnostic.
1208
+ if (!isKnownIOC) {
1209
+ const campaign = matchCampaignPattern(ev.name);
1210
+ if (campaign) {
1211
+ console.log(`[MONITOR] CAMPAIGN PRE-ALERT (pypi): ${ev.name} — matches ${campaign}`);
1212
+ stats.campaignPreAlerts = (stats.campaignPreAlerts || 0) + 1;
1213
+ sendCampaignPreAlert(ev.name, campaign, 'pypi').catch(err => {
1214
+ console.error(`[MONITOR] campaign pre-alert webhook failed for ${ev.name}: ${err.message}`);
1215
+ });
1216
+ }
1217
+ }
1218
+
1195
1219
  newItems.push({
1196
1220
  name: ev.name,
1197
1221
  version: ev.version,
@@ -651,7 +651,11 @@ async function scanPackage(name, version, ecosystem, tarballUrl, registryMeta, s
651
651
 
652
652
  // First-publish sandbox priority: sandbox even with 0 static findings
653
653
  // if the package is from a new/unknown maintainer without a linked repository.
654
+ // First-publish sandbox is npm-only: runSandbox does `npm install <name>` and
655
+ // cannot install PyPI sdists/wheels. PyPI first-publish items still carry the
656
+ // flag + cache trigger + ledger firstPublish (Phase 2a) but skip the sandbox.
654
657
  const firstPublishSandbox = isFirstPublish &&
658
+ ecosystem === 'npm' &&
655
659
  FIRST_PUBLISH_SANDBOX_ENABLED &&
656
660
  isFirstPublishHighRisk(cacheTrigger, npmRegistryMeta) &&
657
661
  isSandboxEnabled() && sandboxAvailable &&
@@ -150,25 +150,30 @@ function buildMonitorWebhookPayload(name, version, ecosystem, result, sandboxRes
150
150
  }
151
151
 
152
152
  /**
153
- * Layer 1: Send immediate IOC pre-alert webhook when a known malicious package
154
- * appears in the changes stream, BEFORE tarball download.
155
- * Safety net for packages that get unpublished before scanning completes.
156
- * @param {string} name - Package name matching IOC database
157
- * @param {string} [version] - Version if known (from CouchDB doc)
153
+ * Build the registry web link for a package, ecosystem-aware. Mirrors the link
154
+ * logic in ghsa-poller.js so pre-alerts point at the correct registry instead of
155
+ * always npmjs.com (PyPI IOC pre-alerts previously mislinked to npm).
158
156
  */
159
- async function sendIOCPreAlert(name, version) {
160
- const url = getWebhookUrl();
161
- if (!url) return;
157
+ function registryLink(ecosystem, name) {
158
+ if (ecosystem === 'pypi') return `https://pypi.org/project/${encodeURIComponent(name)}/`;
159
+ if (ecosystem === 'crates') return `https://crates.io/crates/${encodeURIComponent(name)}`;
160
+ return `https://www.npmjs.com/package/${encodeURIComponent(name)}`;
161
+ }
162
162
 
163
- const npmLink = `https://www.npmjs.com/package/${encodeURIComponent(name)}`;
163
+ /**
164
+ * Layer 1: Build the IOC pre-alert embed (pure \u2014 no network). Exported for tests.
165
+ * @param {string} name - Package name matching IOC database
166
+ * @param {string} [version] - Version if known
167
+ * @param {string} [ecosystem='npm'] - 'npm' | 'pypi' (link target)
168
+ */
169
+ function buildIOCPreAlertEmbed(name, version, ecosystem = 'npm') {
164
170
  const versionStr = version ? `@${version}` : '';
165
-
166
- const payload = {
171
+ return {
167
172
  embeds: [{
168
173
  title: '\u26a0\ufe0f IOC PRE-ALERT \u2014 Known Malicious Package',
169
174
  color: 0xe74c3c,
170
175
  fields: [
171
- { name: 'Package', value: `[${name}${versionStr}](${npmLink})`, inline: true },
176
+ { name: 'Package', value: `[${ecosystem}/${name}${versionStr}](${registryLink(ecosystem, name)})`, inline: true },
172
177
  { name: 'Source', value: 'IOC Database Match', inline: true },
173
178
  { name: 'Detection', value: 'Changes stream pre-scan', inline: true },
174
179
  { name: 'Status', value: 'Full scan queued \u2014 this is an early warning. Package may be unpublished before scan completes.', inline: false }
@@ -179,31 +184,35 @@ async function sendIOCPreAlert(name, version) {
179
184
  timestamp: new Date().toISOString()
180
185
  }]
181
186
  };
182
-
183
- await sendWebhook(url, payload, { rawPayload: true });
184
187
  }
185
188
 
186
189
  /**
187
- * Layer 1b: Send immediate pre-alert webhook when a package name matches an
188
- * active-campaign pattern (e.g. `did-NNNN` in May 2026). Fires BEFORE tarball
189
- * download \u2014 IOC lists are eventually-consistent and lag the campaign by
190
- * hours to days, so name-pattern watch is the only signal available in real
191
- * time while the campaign is in flight.
192
- * @param {string} name - Package name that matched the campaign pattern
193
- * @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
190
+ * Layer 1: Send immediate IOC pre-alert webhook when a known malicious package
191
+ * appears in the changes stream, BEFORE tarball download. Safety net for packages
192
+ * that get unpublished before scanning completes.
193
+ * @param {string} name - Package name matching IOC database
194
+ * @param {string} [version] - Version if known (from CouchDB doc)
195
+ * @param {string} [ecosystem='npm'] - 'npm' | 'pypi'
194
196
  */
195
- async function sendCampaignPreAlert(name, campaign) {
197
+ async function sendIOCPreAlert(name, version, ecosystem = 'npm') {
196
198
  const url = getWebhookUrl();
197
199
  if (!url) return;
200
+ await sendWebhook(url, buildIOCPreAlertEmbed(name, version, ecosystem), { rawPayload: true });
201
+ }
198
202
 
199
- const npmLink = `https://www.npmjs.com/package/${encodeURIComponent(name)}`;
200
-
201
- const payload = {
203
+ /**
204
+ * Layer 1b: Build the campaign pre-alert embed (pure \u2014 no network). Exported for tests.
205
+ * @param {string} name - Package name that matched the campaign pattern
206
+ * @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
207
+ * @param {string} [ecosystem='npm'] - 'npm' | 'pypi' (link target)
208
+ */
209
+ function buildCampaignPreAlertEmbed(name, campaign, ecosystem = 'npm') {
210
+ return {
202
211
  embeds: [{
203
212
  title: '\u26a0\ufe0f CAMPAIGN PRE-ALERT \u2014 Suspected Active Campaign',
204
213
  color: 0xe67e22,
205
214
  fields: [
206
- { name: 'Package', value: `[${name}](${npmLink})`, inline: true },
215
+ { name: 'Package', value: `[${ecosystem}/${name}](${registryLink(ecosystem, name)})`, inline: true },
207
216
  { name: 'Source', value: `Name pattern: ${campaign}`, inline: true },
208
217
  { name: 'Detection', value: 'Changes stream pre-scan', inline: true },
209
218
  { name: 'Status', value: 'Suspected campaign publication \u2014 not yet confirmed malicious. Full scan queued; treat as suspect until verdict lands.', inline: false }
@@ -214,8 +223,21 @@ async function sendCampaignPreAlert(name, campaign) {
214
223
  timestamp: new Date().toISOString()
215
224
  }]
216
225
  };
226
+ }
217
227
 
218
- await sendWebhook(url, payload, { rawPayload: true });
228
+ /**
229
+ * Layer 1b: Send a campaign pre-alert webhook when a package name matches an
230
+ * active-campaign pattern (e.g. `did-NNNN`). Fires BEFORE tarball download \u2014 IOC
231
+ * lists lag the campaign by hours to days, so name-pattern watch is the only
232
+ * real-time signal while the campaign is in flight.
233
+ * @param {string} name - Package name that matched the campaign pattern
234
+ * @param {string} campaign - Short campaign label (e.g. 'did-NNNN')
235
+ * @param {string} [ecosystem='npm'] - 'npm' | 'pypi'
236
+ */
237
+ async function sendCampaignPreAlert(name, campaign, ecosystem = 'npm') {
238
+ const url = getWebhookUrl();
239
+ if (!url) return;
240
+ await sendWebhook(url, buildCampaignPreAlertEmbed(name, campaign, ecosystem), { rawPayload: true });
219
241
  }
220
242
 
221
243
  /**
@@ -1372,7 +1394,10 @@ module.exports = {
1372
1394
  getWebhookThreshold,
1373
1395
  shouldSendWebhook,
1374
1396
  buildMonitorWebhookPayload,
1397
+ registryLink,
1398
+ buildIOCPreAlertEmbed,
1375
1399
  sendIOCPreAlert,
1400
+ buildCampaignPreAlertEmbed,
1376
1401
  sendCampaignPreAlert,
1377
1402
  matchVersionedIOC,
1378
1403
  computeRiskLevel,
@@ -764,4 +764,80 @@ function findPyPITyposquatMatch(name) {
764
764
  return null;
765
765
  }
766
766
 
767
- module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findTyposquatMatch };
767
+ // ============================================
768
+ // crates.io (Rust) TYPOSQUATTING — Phase 4
769
+ // ============================================
770
+ // Pre-alert enrichment ONLY: flags when an incoming crate name (from the GHSA rust
771
+ // malware feed) typosquats a popular crate. No crates ingestion / build.rs / scan-time
772
+ // Cargo parsing (non-goal). Mirrors the PyPI block above.
773
+
774
+ // Top crates.io packages by downloads (typosquat targets). Hardcoded snapshot.
775
+ const POPULAR_CRATES = [
776
+ 'serde', 'serde_json', 'serde_derive', 'serde_yaml', 'syn', 'quote', 'proc-macro2',
777
+ 'libc', 'rand', 'rand_core', 'log', 'cfg-if', 'bitflags', 'itertools', 'once_cell',
778
+ 'lazy_static', 'regex', 'regex-syntax', 'aho-corasick', 'base64', 'num-traits',
779
+ 'unicode-ident', 'tokio', 'tokio-util', 'futures', 'futures-util', 'bytes',
780
+ 'hashbrown', 'smallvec', 'parking_lot', 'anyhow', 'thiserror', 'indexmap', 'memchr',
781
+ 'chrono', 'semver', 'getrandom', 'clap', 'time', 'uuid', 'hyper', 'reqwest',
782
+ 'async-trait', 'tracing', 'tracing-core', 'tracing-subscriber', 'url',
783
+ 'percent-encoding', 'idna', 'socket2', 'httparse', 'tower', 'rayon', 'num_cpus',
784
+ 'either', 'toml', 'winapi', 'windows-sys', 'env_logger', 'generic-array', 'digest',
785
+ 'sha2', 'typenum', 'subtle', 'rustls', 'ring', 'openssl', 'flate2', 'miniz_oxide',
786
+ 'crc32fast', 'walkdir', 'tempfile', 'dirs', 'nix', 'backtrace', 'scopeguard',
787
+ 'pin-project', 'pin-project-lite', 'slab', 'lock_api', 'crossbeam-utils',
788
+ 'crossbeam-channel', 'crossbeam-epoch', 'ahash', 'fnv', 'mio', 'h2', 'http'
789
+ ];
790
+
791
+ // crates.io treats '-' and '_' as equivalent and is case-insensitive for name
792
+ // uniqueness; normalize the same way for typosquat comparison.
793
+ function normalizeCrate(name) {
794
+ return name.toLowerCase().replace(/[-_]+/g, '-');
795
+ }
796
+
797
+ const POPULAR_CRATES_NORMALIZED = POPULAR_CRATES.map(normalizeCrate);
798
+ const POPULAR_CRATES_SET = new Set(POPULAR_CRATES_NORMALIZED);
799
+
800
+ // Legitimate crates within edit-distance of a popular crate but not squats.
801
+ const CRATES_WHITELIST = new Set([
802
+ 'mime', // distance 1 from 'time' — both real & popular
803
+ 'rand-chacha', // rand ecosystem sibling (normalized)
804
+ 'serde-with', // serde ecosystem sibling
805
+ 'futures-core',
806
+ ]);
807
+
808
+ const MIN_CRATE_LENGTH = 4;
809
+
810
+ /**
811
+ * Find a crates.io typosquat match (Levenshtein over the popular-crate list).
812
+ * Pure + IOC-independent. Used by the GHSA rust pre-alert to enrich the embed.
813
+ *
814
+ * @param {string} name - crate name
815
+ * @returns {{original: string, type: string, distance: number}|null}
816
+ */
817
+ function findCratesTyposquatMatch(name) {
818
+ if (typeof name !== 'string' || !name) return null;
819
+ const normalized = normalizeCrate(name);
820
+
821
+ if (POPULAR_CRATES_SET.has(normalized)) return null; // it IS a popular crate
822
+ if (CRATES_WHITELIST.has(normalized)) return null;
823
+ if (normalized.length < MIN_CRATE_LENGTH) return null;
824
+
825
+ for (let i = 0; i < POPULAR_CRATES.length; i++) {
826
+ const popularNorm = POPULAR_CRATES_NORMALIZED[i];
827
+ const popular = POPULAR_CRATES[i];
828
+ if (normalized === popularNorm) continue;
829
+ if (popularNorm.length < MIN_CRATE_LENGTH) continue;
830
+ if (Math.abs(normalized.length - popularNorm.length) > 2) continue;
831
+
832
+ const distance = levenshteinDistance(normalized, popularNorm);
833
+ if (distance === 1) {
834
+ return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
835
+ }
836
+ if (distance === 2 && popularNorm.length >= 5) {
837
+ return { original: popular, type: detectTyposquatType(normalized, popularNorm), distance };
838
+ }
839
+ }
840
+ return null;
841
+ }
842
+
843
+ module.exports = { scanTyposquatting, levenshteinDistance, clearMetadataCache, findPyPITyposquatMatch, findCratesTyposquatMatch, findTyposquatMatch };