tldts 7.1.2 → 7.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -0
- package/dist/cjs/index.js +128 -22
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/index.cjs.min.js +1 -1
- package/dist/index.cjs.min.js.map +1 -1
- package/dist/index.esm.min.js +1 -1
- package/dist/index.esm.min.js.map +1 -1
- package/dist/index.umd.min.js +1 -1
- package/dist/index.umd.min.js.map +1 -1
- package/package.json +4 -4
package/README.md
CHANGED
|
@@ -93,6 +93,10 @@ your inputs.
|
|
|
93
93
|
validateHostname: boolean;
|
|
94
94
|
// Perform IP address detection (default: true).
|
|
95
95
|
detectIp: boolean;
|
|
96
|
+
// Detect IANA special-use domains (RFC 6761 et al.) and expose the result as
|
|
97
|
+
// `isSpecialUse` (default: false). Off by default so the common path does no
|
|
98
|
+
// extra work; the field stays `null` unless this is enabled.
|
|
99
|
+
detectSpecialUse: boolean;
|
|
96
100
|
// Assume that both URLs and hostnames can be given as input (default: true)
|
|
97
101
|
// If set to `false` we assume only URLs will be given as input, which
|
|
98
102
|
// speed-ups processing.
|
|
@@ -181,6 +185,21 @@ tldts.parse('tldts@emailprovider.co.uk'); // email
|
|
|
181
185
|
| `isIcann` | `bool` | Does TLD come from ICANN part of the list |
|
|
182
186
|
| `isPrivate` | `bool` | Does TLD come from Private part of the list |
|
|
183
187
|
| `isIP` | `bool` | Is `hostname` an IP address? |
|
|
188
|
+
| `isSpecialUse` | `bool` | Is `hostname` an IANA special-use domain? |
|
|
189
|
+
|
|
190
|
+
## Special-use domains (RFC 6761 / IANA)
|
|
191
|
+
|
|
192
|
+
Set `{ detectSpecialUse: true }` to flag reserved special-use names such as `localhost`, `*.test`, `*.local`, `*.onion`, and `home.arpa` via the `isSpecialUse` result field. `isIcann`/`isPrivate` don't identify these: most aren't in the Public Suffix List, and the few that are (e.g. `onion`, `home.arpa`) appear there as ordinary ICANN suffixes. The field is `null` unless the option is enabled, so the default path does no extra work:
|
|
193
|
+
|
|
194
|
+
```js
|
|
195
|
+
parse('http://printer.local/', { detectSpecialUse: true });
|
|
196
|
+
// { ...
|
|
197
|
+
// isSpecialUse: true,
|
|
198
|
+
// publicSuffix: 'local',
|
|
199
|
+
// subdomain: '' }
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
The list tracks the IANA [Special-Use Domain Names](https://www.iana.org/assignments/special-use-domain-names/) registry.
|
|
184
203
|
|
|
185
204
|
## Single purpose methods
|
|
186
205
|
|
package/dist/cjs/index.js
CHANGED
|
@@ -291,29 +291,50 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
291
291
|
if (!allDigits) {
|
|
292
292
|
const special = getSpecialScheme(url, start, indexOfColon);
|
|
293
293
|
if (special === 0) {
|
|
294
|
-
// No "://" anywhere on the cold path
|
|
295
|
-
//
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
294
|
+
// No "://" anywhere on the cold path and not a special scheme.
|
|
295
|
+
// A second ':' before the host's end marks a bare, unbracketed
|
|
296
|
+
// IPv6 literal ("2a01:e35::1"): fall through and let the host
|
|
297
|
+
// loop + isIp classify it. Without one this is an opaque path
|
|
298
|
+
// with no host ("mailto:x", "foo:bar").
|
|
299
|
+
let isBareIpv6 = false;
|
|
300
|
+
for (let j = indexOfColon + 1; j < end; j += 1) {
|
|
301
|
+
const code = url.charCodeAt(j);
|
|
302
|
+
if (code === 47 ||
|
|
303
|
+
code === 92 ||
|
|
304
|
+
code === 63 ||
|
|
305
|
+
code === 35) {
|
|
306
|
+
break;
|
|
307
|
+
}
|
|
308
|
+
if (code === 58 /* ':' */) {
|
|
309
|
+
isBareIpv6 = true;
|
|
310
|
+
break;
|
|
311
|
+
}
|
|
308
312
|
}
|
|
309
|
-
if (
|
|
313
|
+
if (!isBareIpv6) {
|
|
310
314
|
return null;
|
|
311
315
|
}
|
|
312
316
|
}
|
|
313
317
|
else {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
318
|
+
isSpecial = true;
|
|
319
|
+
start = indexOfColon + 1;
|
|
320
|
+
if (special === 2) {
|
|
321
|
+
// file (e.g. "file:\\host"): host only between "//" and next slash.
|
|
322
|
+
let slashes = 0;
|
|
323
|
+
while ((url.charCodeAt(start) === 47 ||
|
|
324
|
+
url.charCodeAt(start) === 92) &&
|
|
325
|
+
slashes < 2) {
|
|
326
|
+
start += 1;
|
|
327
|
+
slashes += 1;
|
|
328
|
+
}
|
|
329
|
+
if (slashes < 2) {
|
|
330
|
+
return null;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
else {
|
|
334
|
+
while (url.charCodeAt(start) === 47 ||
|
|
335
|
+
url.charCodeAt(start) === 92) {
|
|
336
|
+
start += 1;
|
|
337
|
+
}
|
|
317
338
|
}
|
|
318
339
|
}
|
|
319
340
|
}
|
|
@@ -323,11 +344,14 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
323
344
|
}
|
|
324
345
|
// Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
|
|
325
346
|
// which WHATWG treats like '/'). Track the last '@', ']' and ':' for
|
|
326
|
-
// userinfo, ipv6 and port
|
|
327
|
-
//
|
|
347
|
+
// userinfo, ipv6 and port, plus the first ':' of the host (reset at each
|
|
348
|
+
// '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
|
|
349
|
+
// flag uppercase and a stray tab/newline. The loop is split on `code < 64`
|
|
350
|
+
// so common host characters take fewer comparisons.
|
|
328
351
|
let indexOfIdentifier = -1;
|
|
329
352
|
let indexOfClosingBracket = -1;
|
|
330
353
|
let indexOfPort = -1;
|
|
354
|
+
let indexOfFirstColon = -1;
|
|
331
355
|
let hasControl = false;
|
|
332
356
|
for (let i = start; i < end; i += 1) {
|
|
333
357
|
const code = url.charCodeAt(i);
|
|
@@ -337,6 +361,9 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
337
361
|
break;
|
|
338
362
|
}
|
|
339
363
|
else if (code === 58 /* ':' */) {
|
|
364
|
+
if (indexOfFirstColon === -1) {
|
|
365
|
+
indexOfFirstColon = i;
|
|
366
|
+
}
|
|
340
367
|
indexOfPort = i;
|
|
341
368
|
}
|
|
342
369
|
else if (code === 9 || code === 10 || code === 13) {
|
|
@@ -349,6 +376,7 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
349
376
|
}
|
|
350
377
|
else if (code === 64 /* '@' */) {
|
|
351
378
|
indexOfIdentifier = i;
|
|
379
|
+
indexOfFirstColon = -1; // colons before '@' are userinfo, not the host
|
|
352
380
|
}
|
|
353
381
|
else if (code === 93 /* ']' */) {
|
|
354
382
|
indexOfClosingBracket = i;
|
|
@@ -374,7 +402,13 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
374
402
|
}
|
|
375
403
|
return null;
|
|
376
404
|
}
|
|
377
|
-
else if (indexOfPort !== -1 &&
|
|
405
|
+
else if (indexOfPort !== -1 &&
|
|
406
|
+
indexOfPort > start &&
|
|
407
|
+
indexOfPort < end &&
|
|
408
|
+
// A host:port has exactly one ':' in the host (so its first ':' is its
|
|
409
|
+
// last); a bare, unbracketed IPv6 literal ("2a01:e35::1") has >= 2, so
|
|
410
|
+
// its first ':' precedes the last. Only the former has a ':port' to trim.
|
|
411
|
+
indexOfFirstColon === indexOfPort) {
|
|
378
412
|
end = indexOfPort; // trim ':port'
|
|
379
413
|
}
|
|
380
414
|
// Empty authority ("http://", "file:///path", "//"); only reachable here via
|
|
@@ -463,6 +497,68 @@ function isIp(hostname) {
|
|
|
463
497
|
return isProbablyIpv6(hostname) || isProbablyIpv4(hostname);
|
|
464
498
|
}
|
|
465
499
|
|
|
500
|
+
/**
|
|
501
|
+
* Special-use domain names from the IANA "Special-Use Domain Names" registry:
|
|
502
|
+
* the authoritative list, created by RFC 6761 and maintained as new RFCs add to
|
|
503
|
+
* it: https://www.iana.org/assignments/special-use-domain-names/
|
|
504
|
+
* Snapshot: 2026-05-24. (RFC 6761 is not obsoleted; draft-hoffman-rfc6761bis
|
|
505
|
+
* proposes to retire its prose but keep this registry, so the registry is the
|
|
506
|
+
* source of truth; re-sync this list against it.)
|
|
507
|
+
*
|
|
508
|
+
* These names never correspond to a public registration, yet neither
|
|
509
|
+
* `isIcann` nor `isPrivate` marks one as special-use: most are absent from the
|
|
510
|
+
* Public Suffix List (so `a.test` looks like a registrable domain), and the
|
|
511
|
+
* few that are listed (`onion`, `home.arpa`) appear there as ordinary ICANN
|
|
512
|
+
* suffixes. `isSpecialUse` is the single signal that covers them all.
|
|
513
|
+
*
|
|
514
|
+
* Per the registry and RFC 6761 ("and any names falling within these domains"),
|
|
515
|
+
* the designation covers each listed name AND all of its sub-domains. DNS labels
|
|
516
|
+
* are case-insensitive (RFC 4343); `hostname` is expected to be already
|
|
517
|
+
* lower-cased and trailing-dot-stripped, as produced by `extractHostname`, the
|
|
518
|
+
* same normalization the Public-Suffix-List lookup relies on.
|
|
519
|
+
*
|
|
520
|
+
* Two groups of registry entries are intentionally excluded: the numeric
|
|
521
|
+
* reverse-DNS delegation zones (`10.in-addr.arpa`, the `*.ip6.arpa` ranges, …),
|
|
522
|
+
* which are reverse-DNS PTR zones rather than hostnames and whose parents
|
|
523
|
+
* (`in-addr.arpa`/`ip6.arpa`) are already in the Public Suffix List; and the
|
|
524
|
+
* deprecated `eap-noob.arpa` entry.
|
|
525
|
+
*/
|
|
526
|
+
const SPECIAL_USE_DOMAINS = [
|
|
527
|
+
'test', // RFC 6761
|
|
528
|
+
'localhost', // RFC 6761
|
|
529
|
+
'invalid', // RFC 6761
|
|
530
|
+
'example', // RFC 6761
|
|
531
|
+
'example.com', // RFC 6761
|
|
532
|
+
'example.net', // RFC 6761
|
|
533
|
+
'example.org', // RFC 6761
|
|
534
|
+
'local', // RFC 6762 (mDNS)
|
|
535
|
+
'onion', // RFC 7686 (Tor)
|
|
536
|
+
'alt', // RFC 9476
|
|
537
|
+
'home.arpa', // RFC 8375
|
|
538
|
+
'ipv4only.arpa', // RFC 8880
|
|
539
|
+
'resolver.arpa', // RFC 9462
|
|
540
|
+
'service.arpa', // RFC 9665
|
|
541
|
+
'6tisch.arpa', // RFC 9031
|
|
542
|
+
'eap.arpa', // RFC 9965
|
|
543
|
+
];
|
|
544
|
+
/**
|
|
545
|
+
* Return `true` if `hostname` is, or is a sub-domain of, a special-use domain
|
|
546
|
+
* (see the registry note above). Expects an already-normalized `hostname`.
|
|
547
|
+
*/
|
|
548
|
+
function isSpecialUse(hostname) {
|
|
549
|
+
for (const name of SPECIAL_USE_DOMAINS) {
|
|
550
|
+
// Match on a label boundary: `hostname` is either exactly `name` or ends
|
|
551
|
+
// with `.name` (so `latest` is not matched by `test`, nor `myexample.com`
|
|
552
|
+
// by `example.com`).
|
|
553
|
+
if (hostname.endsWith(name) &&
|
|
554
|
+
(hostname.length === name.length ||
|
|
555
|
+
hostname.charCodeAt(hostname.length - name.length - 1) === 46) /* '.' */) {
|
|
556
|
+
return true;
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
return false;
|
|
560
|
+
}
|
|
561
|
+
|
|
466
562
|
/**
|
|
467
563
|
* Implements fast shallow verification of hostnames. This does not perform a
|
|
468
564
|
* struct check on the content of labels (classes of Unicode characters, etc.)
|
|
@@ -529,11 +625,12 @@ function isValidHostname (hostname) {
|
|
|
529
625
|
lastCharCode !== 45);
|
|
530
626
|
}
|
|
531
627
|
|
|
532
|
-
function setDefaultsImpl({ allowIcannDomains = true, allowPrivateDomains = false, detectIp = true, extractHostname = true, mixedInputs = true, validHosts = null, validateHostname = true, }) {
|
|
628
|
+
function setDefaultsImpl({ allowIcannDomains = true, allowPrivateDomains = false, detectIp = true, detectSpecialUse = false, extractHostname = true, mixedInputs = true, validHosts = null, validateHostname = true, }) {
|
|
533
629
|
return {
|
|
534
630
|
allowIcannDomains,
|
|
535
631
|
allowPrivateDomains,
|
|
536
632
|
detectIp,
|
|
633
|
+
detectSpecialUse,
|
|
537
634
|
extractHostname,
|
|
538
635
|
mixedInputs,
|
|
539
636
|
validHosts,
|
|
@@ -572,6 +669,7 @@ function getEmptyResult() {
|
|
|
572
669
|
isIcann: null,
|
|
573
670
|
isIp: null,
|
|
574
671
|
isPrivate: null,
|
|
672
|
+
isSpecialUse: null,
|
|
575
673
|
publicSuffix: null,
|
|
576
674
|
subdomain: null,
|
|
577
675
|
};
|
|
@@ -583,6 +681,7 @@ function resetResult(result) {
|
|
|
583
681
|
result.isIcann = null;
|
|
584
682
|
result.isIp = null;
|
|
585
683
|
result.isPrivate = null;
|
|
684
|
+
result.isSpecialUse = null;
|
|
586
685
|
result.publicSuffix = null;
|
|
587
686
|
result.subdomain = null;
|
|
588
687
|
}
|
|
@@ -642,6 +741,13 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
|
|
|
642
741
|
if (step === 0 /* FLAG.HOSTNAME */ || result.hostname === null) {
|
|
643
742
|
return result;
|
|
644
743
|
}
|
|
744
|
+
// Flag special-use domains, only when opted in (`detectSpecialUse`) and only
|
|
745
|
+
// for the full `parse()` result (FLAG.ALL). Computed here, before the
|
|
746
|
+
// public-suffix/domain early-returns below, so single-label names like
|
|
747
|
+
// `localhost` (which have no registrable domain) are still flagged.
|
|
748
|
+
if (step === 5 /* FLAG.ALL */ && options.detectSpecialUse) {
|
|
749
|
+
result.isSpecialUse = isSpecialUse(result.hostname);
|
|
750
|
+
}
|
|
645
751
|
// Extract public suffix
|
|
646
752
|
suffixLookup(result.hostname, options, result);
|
|
647
753
|
if (step === 2 /* FLAG.PUBLIC_SUFFIX */ || result.publicSuffix === null) {
|