tldts-icann 7.2.0 → 7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +165 -27
- package/dist/cjs/index.js.map +1 -1
- package/dist/index.cjs.min.js +1 -1
- package/dist/index.cjs.min.js.map +1 -1
- package/dist/index.esm.min.js +1 -1
- package/dist/index.esm.min.js.map +1 -1
- package/dist/index.umd.min.js +1 -1
- package/dist/index.umd.min.js.map +1 -1
- package/package.json +4 -4
package/dist/cjs/index.js
CHANGED
|
@@ -98,6 +98,34 @@ function getDomainWithoutSuffix$1(domain, suffix) {
|
|
|
98
98
|
* re-parse) on the rare input that actually contains one.
|
|
99
99
|
*/
|
|
100
100
|
const CONTROL_CHARS = /[\t\n\r]/g;
|
|
101
|
+
// Set by `extractHostname` (a module-scope flag, read synchronously by
|
|
102
|
+
// `parseImpl` right after the call — same pattern as the reused RESULT object).
|
|
103
|
+
// `true` ONLY when extraction validated the returned host inline (a confirmed-
|
|
104
|
+
// valid, "simple" authority) so `parseImpl` can skip the separate
|
|
105
|
+
// `isValidHostname` pass. `false` in every other case (validation disabled, a
|
|
106
|
+
// complex authority — userinfo/port/brackets/trailing-dot/control — an invalid
|
|
107
|
+
// host, or a non-main return path); `parseImpl` then validates as usual. The
|
|
108
|
+
// fast path can only ever SKIP a redundant scan for hosts already known valid,
|
|
109
|
+
// never accept an invalid one.
|
|
110
|
+
let extractedHostnameValidated = false;
|
|
111
|
+
/**
|
|
112
|
+
* True if char `code` is a valid hostname character. This is the per-char half
|
|
113
|
+
* of `is-valid.ts`'s `isValidAscii` (a-z, 0-9, > U+007F) PLUS three additions:
|
|
114
|
+
* A-Z (the host is lowercased before validation, so uppercase ≡ a valid
|
|
115
|
+
* lowercase letter) and '-' / '_' (valid inside a label). KEEP IN SYNC with
|
|
116
|
+
* `is-valid.ts`: these rules are deliberately duplicated to validate during
|
|
117
|
+
* extraction, so any change to the accepted character set there must be
|
|
118
|
+
* mirrored here (and vice-versa).
|
|
119
|
+
*/
|
|
120
|
+
function isValidHostnameChar(code) {
|
|
121
|
+
return ((code >= 97 && code <= 122) || // a-z
|
|
122
|
+
(code >= 48 && code <= 57) || // 0-9
|
|
123
|
+
code > 127 || // non-ASCII (accepted, not punycode-checked)
|
|
124
|
+
(code >= 65 && code <= 90) || // A-Z (becomes valid once lowercased)
|
|
125
|
+
code === 45 || // '-'
|
|
126
|
+
code === 95 // '_'
|
|
127
|
+
);
|
|
128
|
+
}
|
|
101
129
|
/**
|
|
102
130
|
* Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
|
|
103
131
|
* scheme without allocating a substring (case-insensitive via `| 32`).
|
|
@@ -154,12 +182,16 @@ function getSpecialScheme(url, schemeStart, colonIndex) {
|
|
|
154
182
|
* @param urlIsValidHostname - when true, `url` is already a valid hostname and is
|
|
155
183
|
* returned by the same reference (factory.ts skips re-validation on that
|
|
156
184
|
* identity), keeping the common path allocation-free.
|
|
185
|
+
* @param validate - when true, validate the host inline during the authority
|
|
186
|
+
* scan and publish the verdict via `extractedHostnameValidated` so `parseImpl`
|
|
187
|
+
* can skip the redundant `isValidHostname` pass for simple authorities.
|
|
157
188
|
*/
|
|
158
|
-
function extractHostname(url, urlIsValidHostname) {
|
|
189
|
+
function extractHostname(url, urlIsValidHostname, validate = false) {
|
|
159
190
|
let start = 0;
|
|
160
191
|
let end = url.length;
|
|
161
192
|
let hasUpper = false;
|
|
162
193
|
let isSpecial = false;
|
|
194
|
+
extractedHostnameValidated = false;
|
|
163
195
|
if (!urlIsValidHostname) {
|
|
164
196
|
// Data URLs never carry a host (and may be huge — short-circuit them).
|
|
165
197
|
if (url.startsWith('data:')) {
|
|
@@ -221,7 +253,7 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
221
253
|
)) {
|
|
222
254
|
const raw = url.charCodeAt(i);
|
|
223
255
|
if (raw === 9 || raw === 10 || raw === 13) {
|
|
224
|
-
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
256
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
|
|
225
257
|
}
|
|
226
258
|
return null;
|
|
227
259
|
}
|
|
@@ -246,7 +278,7 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
246
278
|
for (let i = start; i < end; i += 1) {
|
|
247
279
|
const code = url.charCodeAt(i);
|
|
248
280
|
if (code === 9 || code === 10 || code === 13) {
|
|
249
|
-
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
281
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
|
|
250
282
|
}
|
|
251
283
|
if (code === 58 /* ':' */) {
|
|
252
284
|
indexOfColon = i;
|
|
@@ -291,29 +323,50 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
291
323
|
if (!allDigits) {
|
|
292
324
|
const special = getSpecialScheme(url, start, indexOfColon);
|
|
293
325
|
if (special === 0) {
|
|
294
|
-
// No "://" anywhere on the cold path
|
|
295
|
-
//
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
326
|
+
// No "://" anywhere on the cold path and not a special scheme.
|
|
327
|
+
// A second ':' before the host's end marks a bare, unbracketed
|
|
328
|
+
// IPv6 literal ("2a01:e35::1"): fall through and let the host
|
|
329
|
+
// loop + isIp classify it. Without one this is an opaque path
|
|
330
|
+
// with no host ("mailto:x", "foo:bar").
|
|
331
|
+
let isBareIpv6 = false;
|
|
332
|
+
for (let j = indexOfColon + 1; j < end; j += 1) {
|
|
333
|
+
const code = url.charCodeAt(j);
|
|
334
|
+
if (code === 47 ||
|
|
335
|
+
code === 92 ||
|
|
336
|
+
code === 63 ||
|
|
337
|
+
code === 35) {
|
|
338
|
+
break;
|
|
339
|
+
}
|
|
340
|
+
if (code === 58 /* ':' */) {
|
|
341
|
+
isBareIpv6 = true;
|
|
342
|
+
break;
|
|
343
|
+
}
|
|
308
344
|
}
|
|
309
|
-
if (
|
|
345
|
+
if (!isBareIpv6) {
|
|
310
346
|
return null;
|
|
311
347
|
}
|
|
312
348
|
}
|
|
313
349
|
else {
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
350
|
+
isSpecial = true;
|
|
351
|
+
start = indexOfColon + 1;
|
|
352
|
+
if (special === 2) {
|
|
353
|
+
// file (e.g. "file:\\host"): host only between "//" and next slash.
|
|
354
|
+
let slashes = 0;
|
|
355
|
+
while ((url.charCodeAt(start) === 47 ||
|
|
356
|
+
url.charCodeAt(start) === 92) &&
|
|
357
|
+
slashes < 2) {
|
|
358
|
+
start += 1;
|
|
359
|
+
slashes += 1;
|
|
360
|
+
}
|
|
361
|
+
if (slashes < 2) {
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
while (url.charCodeAt(start) === 47 ||
|
|
367
|
+
url.charCodeAt(start) === 92) {
|
|
368
|
+
start += 1;
|
|
369
|
+
}
|
|
317
370
|
}
|
|
318
371
|
}
|
|
319
372
|
}
|
|
@@ -323,12 +376,35 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
323
376
|
}
|
|
324
377
|
// Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
|
|
325
378
|
// which WHATWG treats like '/'). Track the last '@', ']' and ':' for
|
|
326
|
-
// userinfo, ipv6 and port
|
|
327
|
-
//
|
|
379
|
+
// userinfo, ipv6 and port, plus the first ':' of the host (reset at each
|
|
380
|
+
// '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
|
|
381
|
+
// flag uppercase and a stray tab/newline. The loop is split on `code < 64`
|
|
382
|
+
// so common host characters take fewer comparisons.
|
|
383
|
+
//
|
|
384
|
+
// When `validate`, also accumulate `is-valid.ts`'s checks over the scanned
|
|
385
|
+
// run so a simple authority's host can be validated in this single pass.
|
|
386
|
+
// `vValid` only stays meaningful for a "simple" authority (no userinfo, port,
|
|
387
|
+
// brackets, control or trailing dot); those cases clear it / are rejected by
|
|
388
|
+
// the guard below, falling back to `isValidHostname`.
|
|
328
389
|
let indexOfIdentifier = -1;
|
|
329
390
|
let indexOfClosingBracket = -1;
|
|
330
391
|
let indexOfPort = -1;
|
|
392
|
+
let indexOfFirstColon = -1;
|
|
331
393
|
let hasControl = false;
|
|
394
|
+
let vValid = validate; // seeded true when validating; cleared on the first invalid char
|
|
395
|
+
let vLastDot = start - 1; // mirrors is-valid.ts `lastDotIndex = -1` at host start
|
|
396
|
+
let vLastCode = -1;
|
|
397
|
+
if (validate && start < end) {
|
|
398
|
+
// First-char rule: must be a valid host char, '.', or '_' (NOT '-').
|
|
399
|
+
const c0 = url.charCodeAt(start);
|
|
400
|
+
if (!(
|
|
401
|
+
/*@__INLINE__*/ (isValidHostnameChar(c0) ||
|
|
402
|
+
c0 === 46 /* '.' */ ||
|
|
403
|
+
c0 === 95 /* '_' */)) ||
|
|
404
|
+
c0 === 45 /* '-' (isValidHostnameChar allows it mid-label, not first) */) {
|
|
405
|
+
vValid = false;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
332
408
|
for (let i = start; i < end; i += 1) {
|
|
333
409
|
const code = url.charCodeAt(i);
|
|
334
410
|
if (code < 64) {
|
|
@@ -337,11 +413,29 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
337
413
|
break;
|
|
338
414
|
}
|
|
339
415
|
else if (code === 58 /* ':' */) {
|
|
416
|
+
if (indexOfFirstColon === -1) {
|
|
417
|
+
indexOfFirstColon = i;
|
|
418
|
+
}
|
|
340
419
|
indexOfPort = i;
|
|
341
420
|
}
|
|
342
421
|
else if (code === 9 || code === 10 || code === 13) {
|
|
343
422
|
hasControl = true;
|
|
344
423
|
}
|
|
424
|
+
else if (validate) {
|
|
425
|
+
if (code === 46 /* '.' */) {
|
|
426
|
+
if (i - vLastDot > 64 || vLastCode === 46 || vLastCode === 45) {
|
|
427
|
+
vValid = false;
|
|
428
|
+
}
|
|
429
|
+
vLastDot = i;
|
|
430
|
+
}
|
|
431
|
+
else if (code < 48 || code > 57) {
|
|
432
|
+
// < 64 and not a delimiter/dot/digit => only '-' (45) is a valid
|
|
433
|
+
// host char here; everything else (space, %, !, etc.) is invalid.
|
|
434
|
+
if (code !== 45) {
|
|
435
|
+
vValid = false;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
}
|
|
345
439
|
}
|
|
346
440
|
else if (isSpecial && code === 92 /* '\' */) {
|
|
347
441
|
end = i;
|
|
@@ -349,6 +443,7 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
349
443
|
}
|
|
350
444
|
else if (code === 64 /* '@' */) {
|
|
351
445
|
indexOfIdentifier = i;
|
|
446
|
+
indexOfFirstColon = -1; // colons before '@' are userinfo, not the host
|
|
352
447
|
}
|
|
353
448
|
else if (code === 93 /* ']' */) {
|
|
354
449
|
indexOfClosingBracket = i;
|
|
@@ -356,10 +451,17 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
356
451
|
else if (code >= 65 && code <= 90) {
|
|
357
452
|
hasUpper = true;
|
|
358
453
|
}
|
|
454
|
+
else if (validate && !( /*@__INLINE__*/isValidHostnameChar(code))) {
|
|
455
|
+
// >= 64, not '@'/']'/upper: valid only if a-z, '_', or non-ASCII.
|
|
456
|
+
vValid = false;
|
|
457
|
+
}
|
|
458
|
+
if (validate) {
|
|
459
|
+
vLastCode = code;
|
|
460
|
+
}
|
|
359
461
|
}
|
|
360
462
|
// A tab/newline inside the authority: strip everything and re-parse (rare).
|
|
361
463
|
if (hasControl) {
|
|
362
|
-
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
464
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
|
|
363
465
|
}
|
|
364
466
|
// Skip userinfo. '>= start' so an empty userinfo ("http://@host") works too.
|
|
365
467
|
if (indexOfIdentifier !== -1 &&
|
|
@@ -374,7 +476,13 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
374
476
|
}
|
|
375
477
|
return null;
|
|
376
478
|
}
|
|
377
|
-
else if (indexOfPort !== -1 &&
|
|
479
|
+
else if (indexOfPort !== -1 &&
|
|
480
|
+
indexOfPort > start &&
|
|
481
|
+
indexOfPort < end &&
|
|
482
|
+
// A host:port has exactly one ':' in the host (so its first ':' is its
|
|
483
|
+
// last); a bare, unbracketed IPv6 literal ("2a01:e35::1") has >= 2, so
|
|
484
|
+
// its first ':' precedes the last. Only the former has a ':port' to trim.
|
|
485
|
+
indexOfFirstColon === indexOfPort) {
|
|
378
486
|
end = indexOfPort; // trim ':port'
|
|
379
487
|
}
|
|
380
488
|
// Empty authority ("http://", "file:///path", "//"); only reachable here via
|
|
@@ -382,6 +490,28 @@ function extractHostname(url, urlIsValidHostname) {
|
|
|
382
490
|
if (start >= end) {
|
|
383
491
|
return null;
|
|
384
492
|
}
|
|
493
|
+
// Publish the inline-validation verdict — but only for a "simple" authority,
|
|
494
|
+
// where the scanned run equals the final host: no userinfo skip, no port
|
|
495
|
+
// trim, no brackets, no trailing dot (trimmed below), and length within RFC
|
|
496
|
+
// limits. Anything else leaves it `false` so `parseImpl` re-validates.
|
|
497
|
+
//
|
|
498
|
+
// Every clause below is load-bearing for CORRECTNESS, not just speed: the
|
|
499
|
+
// loop accumulates `vValid` over the whole scanned run (it does not stop at
|
|
500
|
+
// ':' or '@', so any port/userinfo bytes are included), so the verdict is
|
|
501
|
+
// only sound when that run equals the final host. Do not drop a clause as
|
|
502
|
+
// "redundant" — e.g. without `indexOfPort === -1`, `host:8080` would be
|
|
503
|
+
// wrongly accepted.
|
|
504
|
+
if (validate &&
|
|
505
|
+
vValid &&
|
|
506
|
+
indexOfIdentifier === -1 &&
|
|
507
|
+
indexOfPort === -1 &&
|
|
508
|
+
indexOfClosingBracket === -1 &&
|
|
509
|
+
url.charCodeAt(end - 1) !== 46 /* no trailing dot */ &&
|
|
510
|
+
end - start <= 255 && // total length
|
|
511
|
+
end - vLastDot - 1 <= 63 && // last label length
|
|
512
|
+
vLastCode !== 45 /* last char not '-' */) {
|
|
513
|
+
extractedHostnameValidated = true;
|
|
514
|
+
}
|
|
385
515
|
}
|
|
386
516
|
// Trim trailing dots
|
|
387
517
|
while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
|
|
@@ -533,6 +663,11 @@ function isSpecialUse(hostname) {
|
|
|
533
663
|
*
|
|
534
664
|
* If you need stricter validation, consider using an external library.
|
|
535
665
|
*/
|
|
666
|
+
// KEEP IN SYNC with `extract-hostname.ts` `isValidHostnameChar` + its inline
|
|
667
|
+
// scan/verdict, which duplicate these structural rules to validate during
|
|
668
|
+
// extraction (a perf fusion). That copy additionally accepts A-Z (the host is
|
|
669
|
+
// not yet lowercased there) and folds in '-' / '_'. Any change to the accepted
|
|
670
|
+
// character set or the label/length rules here must be mirrored there.
|
|
536
671
|
function isValidAscii(code) {
|
|
537
672
|
return ((code >= 97 && code <= 122) || (code >= 48 && code <= 57) || code > 127);
|
|
538
673
|
}
|
|
@@ -678,10 +813,10 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
|
|
|
678
813
|
}
|
|
679
814
|
else if (options.mixedInputs) {
|
|
680
815
|
urlIsValid = isValidHostname(url);
|
|
681
|
-
result.hostname = extractHostname(url, urlIsValid);
|
|
816
|
+
result.hostname = extractHostname(url, urlIsValid, options.validateHostname);
|
|
682
817
|
}
|
|
683
818
|
else {
|
|
684
|
-
result.hostname = extractHostname(url, false);
|
|
819
|
+
result.hostname = extractHostname(url, false, options.validateHostname);
|
|
685
820
|
}
|
|
686
821
|
// Check if `hostname` is a valid ip address
|
|
687
822
|
if (options.detectIp && result.hostname !== null) {
|
|
@@ -700,6 +835,9 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
|
|
|
700
835
|
// Skip the re-scan when `url` was already validated and extractHostname
|
|
701
836
|
// returned it unchanged (same reference => identical string, still valid).
|
|
702
837
|
!(urlIsValid && result.hostname === url) &&
|
|
838
|
+
// Skip the re-scan when extractHostname already validated the host inline
|
|
839
|
+
// (a confirmed-valid simple authority — see extract-hostname.ts).
|
|
840
|
+
!extractedHostnameValidated &&
|
|
703
841
|
!isValidHostname(result.hostname)) {
|
|
704
842
|
result.hostname = null;
|
|
705
843
|
return result;
|