tldts 7.1.2 → 7.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -93,6 +93,10 @@ your inputs.
93
93
  validateHostname: boolean;
94
94
  // Perform IP address detection (default: true).
95
95
  detectIp: boolean;
96
+ // Detect IANA special-use domains (RFC 6761 et al.) and expose the result as
97
+ // `isSpecialUse` (default: false). Off by default so the common path does no
98
+ // extra work; the field stays `null` unless this is enabled.
99
+ detectSpecialUse: boolean;
96
100
  // Assume that both URLs and hostnames can be given as input (default: true)
97
101
  // If set to `false` we assume only URLs will be given as input, which
98
102
  // speed-ups processing.
@@ -181,6 +185,21 @@ tldts.parse('tldts@emailprovider.co.uk'); // email
181
185
  | `isIcann` | `bool` | Does TLD come from ICANN part of the list |
182
186
  | `isPrivate` | `bool` | Does TLD come from Private part of the list |
183
187
  | `isIP` | `bool` | Is `hostname` an IP address? |
188
+ | `isSpecialUse` | `bool` | Is `hostname` an IANA special-use domain? |
189
+
190
+ ## Special-use domains (RFC 6761 / IANA)
191
+
192
+ Set `{ detectSpecialUse: true }` to flag reserved special-use names such as `localhost`, `*.test`, `*.local`, `*.onion`, and `home.arpa` via the `isSpecialUse` result field. `isIcann`/`isPrivate` don't identify these: most aren't in the Public Suffix List, and the few that are (e.g. `onion`, `home.arpa`) appear there as ordinary ICANN suffixes. The field is `null` unless the option is enabled, so the default path does no extra work:
193
+
194
+ ```js
195
+ parse('http://printer.local/', { detectSpecialUse: true });
196
+ // { ...
197
+ // isSpecialUse: true,
198
+ // publicSuffix: 'local',
199
+ // subdomain: '' }
200
+ ```
201
+
202
+ The list tracks the IANA [Special-Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/) registry.
184
203
 
185
204
  ## Single purpose methods
186
205
 
package/dist/cjs/index.js CHANGED
@@ -291,29 +291,50 @@ function extractHostname(url, urlIsValidHostname) {
291
291
  if (!allDigits) {
292
292
  const special = getSpecialScheme(url, start, indexOfColon);
293
293
  if (special === 0) {
294
- // No "://" anywhere on the cold path, so a non-special scheme has
295
- // no authority: opaque path, no host ("mailto:x", "foo:bar").
296
- return null;
297
- }
298
- isSpecial = true;
299
- start = indexOfColon + 1;
300
- if (special === 2) {
301
- // file (e.g. "file:\\host"): host only between "//" and next slash.
302
- let slashes = 0;
303
- while ((url.charCodeAt(start) === 47 ||
304
- url.charCodeAt(start) === 92) &&
305
- slashes < 2) {
306
- start += 1;
307
- slashes += 1;
294
+ // No "://" anywhere on the cold path and not a special scheme.
295
+ // A second ':' before the host's end marks a bare, unbracketed
296
+ // IPv6 literal ("2a01:e35::1"): fall through and let the host
297
+ // loop + isIp classify it. Without one this is an opaque path
298
+ // with no host ("mailto:x", "foo:bar").
299
+ let isBareIpv6 = false;
300
+ for (let j = indexOfColon + 1; j < end; j += 1) {
301
+ const code = url.charCodeAt(j);
302
+ if (code === 47 ||
303
+ code === 92 ||
304
+ code === 63 ||
305
+ code === 35) {
306
+ break;
307
+ }
308
+ if (code === 58 /* ':' */) {
309
+ isBareIpv6 = true;
310
+ break;
311
+ }
308
312
  }
309
- if (slashes < 2) {
313
+ if (!isBareIpv6) {
310
314
  return null;
311
315
  }
312
316
  }
313
317
  else {
314
- while (url.charCodeAt(start) === 47 ||
315
- url.charCodeAt(start) === 92) {
316
- start += 1;
318
+ isSpecial = true;
319
+ start = indexOfColon + 1;
320
+ if (special === 2) {
321
+ // file (e.g. "file:\\host"): host only between "//" and next slash.
322
+ let slashes = 0;
323
+ while ((url.charCodeAt(start) === 47 ||
324
+ url.charCodeAt(start) === 92) &&
325
+ slashes < 2) {
326
+ start += 1;
327
+ slashes += 1;
328
+ }
329
+ if (slashes < 2) {
330
+ return null;
331
+ }
332
+ }
333
+ else {
334
+ while (url.charCodeAt(start) === 47 ||
335
+ url.charCodeAt(start) === 92) {
336
+ start += 1;
337
+ }
317
338
  }
318
339
  }
319
340
  }
@@ -323,11 +344,14 @@ function extractHostname(url, urlIsValidHostname) {
323
344
  }
324
345
  // Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
325
346
  // which WHATWG treats like '/'). Track the last '@', ']' and ':' for
326
- // userinfo, ipv6 and port; flag uppercase and a stray tab/newline. The loop
327
- // is split on `code < 64` so common host characters take fewer comparisons.
347
+ // userinfo, ipv6 and port, plus the first ':' of the host (reset at each
348
+ // '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
349
+ // flag uppercase and a stray tab/newline. The loop is split on `code < 64`
350
+ // so common host characters take fewer comparisons.
328
351
  let indexOfIdentifier = -1;
329
352
  let indexOfClosingBracket = -1;
330
353
  let indexOfPort = -1;
354
+ let indexOfFirstColon = -1;
331
355
  let hasControl = false;
332
356
  for (let i = start; i < end; i += 1) {
333
357
  const code = url.charCodeAt(i);
@@ -337,6 +361,9 @@ function extractHostname(url, urlIsValidHostname) {
337
361
  break;
338
362
  }
339
363
  else if (code === 58 /* ':' */) {
364
+ if (indexOfFirstColon === -1) {
365
+ indexOfFirstColon = i;
366
+ }
340
367
  indexOfPort = i;
341
368
  }
342
369
  else if (code === 9 || code === 10 || code === 13) {
@@ -349,6 +376,7 @@ function extractHostname(url, urlIsValidHostname) {
349
376
  }
350
377
  else if (code === 64 /* '@' */) {
351
378
  indexOfIdentifier = i;
379
+ indexOfFirstColon = -1; // colons before '@' are userinfo, not the host
352
380
  }
353
381
  else if (code === 93 /* ']' */) {
354
382
  indexOfClosingBracket = i;
@@ -374,7 +402,13 @@ function extractHostname(url, urlIsValidHostname) {
374
402
  }
375
403
  return null;
376
404
  }
377
- else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
405
+ else if (indexOfPort !== -1 &&
406
+ indexOfPort > start &&
407
+ indexOfPort < end &&
408
+ // A host:port has exactly one ':' in the host (so its first ':' is its
409
+ // last); a bare, unbracketed IPv6 literal ("2a01:e35::1") has >= 2, so
410
+ // its first ':' precedes the last. Only the former has a ':port' to trim.
411
+ indexOfFirstColon === indexOfPort) {
378
412
  end = indexOfPort; // trim ':port'
379
413
  }
380
414
  // Empty authority ("http://", "file:///path", "//"); only reachable here via
@@ -463,6 +497,68 @@ function isIp(hostname) {
463
497
  return isProbablyIpv6(hostname) || isProbablyIpv4(hostname);
464
498
  }
465
499
 
500
+ /**
501
+ * Special-use domain names from the IANA "Special-Use Domain Names" registry:
502
+ * the authoritative list, created by RFC 6761 and maintained as new RFCs add to
503
+ * it: https://www.iana.org/assignments/special-use-domain-names/
504
+ * Snapshot: 2026-05-24. (RFC 6761 is not obsoleted; draft-hoffman-rfc6761bis
505
+ * proposes to retire its prose but keep this registry, so the registry is the
506
+ * source of truth; re-sync this list against it.)
507
+ *
508
+ * These names never correspond to a public registration, yet neither
509
+ * `isIcann` nor `isPrivate` marks one as special-use: most are absent from the
510
+ * Public Suffix List (so `a.test` looks like a registrable domain), and the
511
+ * few that are listed (`onion`, `home.arpa`) appear there as ordinary ICANN
512
+ * suffixes. `isSpecialUse` is the single signal that covers them all.
513
+ *
514
+ * Per the registry and RFC 6761 ("and any names falling within these domains"),
515
+ * the designation covers each listed name AND all of its sub-domains. DNS labels
516
+ * are case-insensitive (RFC 4343); `hostname` is expected to be already
517
+ * lower-cased and trailing-dot-stripped, as produced by `extractHostname`, the
518
+ * same normalization the Public-Suffix-List lookup relies on.
519
+ *
520
+ * Two groups of registry entries are intentionally excluded: the numeric
521
+ * reverse-DNS delegation zones (`10.in-addr.arpa`, the `*.ip6.arpa` ranges, …),
522
+ * which are reverse-DNS PTR zones rather than hostnames and whose parents
523
+ * (`in-addr.arpa`/`ip6.arpa`) are already in the Public Suffix List; and the
524
+ * deprecated `eap-noob.arpa` entry.
525
+ */
526
+ const SPECIAL_USE_DOMAINS = [
527
+ 'test', // RFC 6761
528
+ 'localhost', // RFC 6761
529
+ 'invalid', // RFC 6761
530
+ 'example', // RFC 6761
531
+ 'example.com', // RFC 6761
532
+ 'example.net', // RFC 6761
533
+ 'example.org', // RFC 6761
534
+ 'local', // RFC 6762 (mDNS)
535
+ 'onion', // RFC 7686 (Tor)
536
+ 'alt', // RFC 9476
537
+ 'home.arpa', // RFC 8375
538
+ 'ipv4only.arpa', // RFC 8880
539
+ 'resolver.arpa', // RFC 9462
540
+ 'service.arpa', // RFC 9665
541
+ '6tisch.arpa', // RFC 9031
542
+ 'eap.arpa', // RFC 9965
543
+ ];
544
+ /**
545
+ * Return `true` if `hostname` is, or is a sub-domain of, a special-use domain
546
+ * (see the registry note above). Expects an already-normalized `hostname`.
547
+ */
548
+ function isSpecialUse(hostname) {
549
+ for (const name of SPECIAL_USE_DOMAINS) {
550
+ // Match on a label boundary: `hostname` is either exactly `name` or ends
551
+ // with `.name` (so `latest` is not matched by `test`, nor `myexample.com`
552
+ // by `example.com`).
553
+ if (hostname.endsWith(name) &&
554
+ (hostname.length === name.length ||
555
+ hostname.charCodeAt(hostname.length - name.length - 1) === 46) /* '.' */) {
556
+ return true;
557
+ }
558
+ }
559
+ return false;
560
+ }
561
+
466
562
  /**
467
563
  * Implements fast shallow verification of hostnames. This does not perform a
468
564
  * struct check on the content of labels (classes of Unicode characters, etc.)
@@ -529,11 +625,12 @@ function isValidHostname (hostname) {
529
625
  lastCharCode !== 45);
530
626
  }
531
627
 
532
- function setDefaultsImpl({ allowIcannDomains = true, allowPrivateDomains = false, detectIp = true, extractHostname = true, mixedInputs = true, validHosts = null, validateHostname = true, }) {
628
+ function setDefaultsImpl({ allowIcannDomains = true, allowPrivateDomains = false, detectIp = true, detectSpecialUse = false, extractHostname = true, mixedInputs = true, validHosts = null, validateHostname = true, }) {
533
629
  return {
534
630
  allowIcannDomains,
535
631
  allowPrivateDomains,
536
632
  detectIp,
633
+ detectSpecialUse,
537
634
  extractHostname,
538
635
  mixedInputs,
539
636
  validHosts,
@@ -572,6 +669,7 @@ function getEmptyResult() {
572
669
  isIcann: null,
573
670
  isIp: null,
574
671
  isPrivate: null,
672
+ isSpecialUse: null,
575
673
  publicSuffix: null,
576
674
  subdomain: null,
577
675
  };
@@ -583,6 +681,7 @@ function resetResult(result) {
583
681
  result.isIcann = null;
584
682
  result.isIp = null;
585
683
  result.isPrivate = null;
684
+ result.isSpecialUse = null;
586
685
  result.publicSuffix = null;
587
686
  result.subdomain = null;
588
687
  }
@@ -642,6 +741,13 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
642
741
  if (step === 0 /* FLAG.HOSTNAME */ || result.hostname === null) {
643
742
  return result;
644
743
  }
744
+ // Flag special-use domains, only when opted in (`detectSpecialUse`) and only
745
+ // for the full `parse()` result (FLAG.ALL). Computed here, before the
746
+ // public-suffix/domain early-returns below, so single-label names like
747
+ // `localhost` (which have no registrable domain) are still flagged.
748
+ if (step === 5 /* FLAG.ALL */ && options.detectSpecialUse) {
749
+ result.isSpecialUse = isSpecialUse(result.hostname);
750
+ }
645
751
  // Extract public suffix
646
752
  suffixLookup(result.hostname, options, result);
647
753
  if (step === 2 /* FLAG.PUBLIC_SUFFIX */ || result.publicSuffix === null) {