tldts-icann 7.2.1 → 7.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -98,6 +98,34 @@ function getDomainWithoutSuffix$1(domain, suffix) {
98
98
  * re-parse) on the rare input that actually contains one.
99
99
  */
100
100
  const CONTROL_CHARS = /[\t\n\r]/g;
101
+ // Set by `extractHostname` (a module-scope flag, read synchronously by
102
+ // `parseImpl` right after the call — same pattern as the reused RESULT object).
103
+ // `true` ONLY when extraction validated the returned host inline (a confirmed-
104
+ // valid, "simple" authority) so `parseImpl` can skip the separate
105
+ // `isValidHostname` pass. `false` in every other case (validation disabled, a
106
+ // complex authority — userinfo/port/brackets/trailing-dot/control — an invalid
107
+ // host, or a non-main return path); `parseImpl` then validates as usual. The
108
+ // fast path can only ever SKIP a redundant scan for hosts already known valid,
109
+ // never accept an invalid one.
110
+ let extractedHostnameValidated = false;
111
+ /**
112
+ * True if char `code` is a valid hostname character. This is the per-char half
113
+ * of `is-valid.ts`'s `isValidAscii` (a-z, 0-9, > U+007F) PLUS three additions:
114
+ * A-Z (the host is lowercased before validation, so uppercase ≡ a valid
115
+ * lowercase letter) and '-' / '_' (valid inside a label). KEEP IN SYNC with
116
+ * `is-valid.ts`: these rules are deliberately duplicated to validate during
117
+ * extraction, so any change to the accepted character set there must be
118
+ * mirrored here (and vice-versa).
119
+ */
120
+ function isValidHostnameChar(code) {
121
+ return ((code >= 97 && code <= 122) || // a-z
122
+ (code >= 48 && code <= 57) || // 0-9
123
+ code > 127 || // non-ASCII (accepted, not punycode-checked)
124
+ (code >= 65 && code <= 90) || // A-Z (becomes valid once lowercased)
125
+ code === 45 || // '-'
126
+ code === 95 // '_'
127
+ );
128
+ }
101
129
  /**
102
130
  * Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
103
131
  * scheme without allocating a substring (case-insensitive via `| 32`).
@@ -154,12 +182,16 @@ function getSpecialScheme(url, schemeStart, colonIndex) {
154
182
  * @param urlIsValidHostname - when true, `url` is already a valid hostname and is
155
183
  * returned by the same reference (factory.ts skips re-validation on that
156
184
  * identity), keeping the common path allocation-free.
185
+ * @param validate - when true, validate the host inline during the authority
186
+ * scan and publish the verdict via `extractedHostnameValidated` so `parseImpl`
187
+ * can skip the redundant `isValidHostname` pass for simple authorities.
157
188
  */
158
- function extractHostname(url, urlIsValidHostname) {
189
+ function extractHostname(url, urlIsValidHostname, validate = false) {
159
190
  let start = 0;
160
191
  let end = url.length;
161
192
  let hasUpper = false;
162
193
  let isSpecial = false;
194
+ extractedHostnameValidated = false;
163
195
  if (!urlIsValidHostname) {
164
196
  // Data URLs never carry a host (and may be huge — short-circuit them).
165
197
  if (url.startsWith('data:')) {
@@ -221,7 +253,7 @@ function extractHostname(url, urlIsValidHostname) {
221
253
  )) {
222
254
  const raw = url.charCodeAt(i);
223
255
  if (raw === 9 || raw === 10 || raw === 13) {
224
- return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
256
+ return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
225
257
  }
226
258
  return null;
227
259
  }
@@ -246,7 +278,7 @@ function extractHostname(url, urlIsValidHostname) {
246
278
  for (let i = start; i < end; i += 1) {
247
279
  const code = url.charCodeAt(i);
248
280
  if (code === 9 || code === 10 || code === 13) {
249
- return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
281
+ return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
250
282
  }
251
283
  if (code === 58 /* ':' */) {
252
284
  indexOfColon = i;
@@ -348,11 +380,31 @@ function extractHostname(url, urlIsValidHostname) {
348
380
  // '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
349
381
  // flag uppercase and a stray tab/newline. The loop is split on `code < 64`
350
382
  // so common host characters take fewer comparisons.
383
+ //
384
+ // When `validate`, also accumulate `is-valid.ts`'s checks over the scanned
385
+ // run so a simple authority's host can be validated in this single pass.
386
+ // `vValid` only stays meaningful for a "simple" authority (no userinfo, port,
387
+ // brackets, control or trailing dot); those cases clear it / are rejected by
388
+ // the guard below, falling back to `isValidHostname`.
351
389
  let indexOfIdentifier = -1;
352
390
  let indexOfClosingBracket = -1;
353
391
  let indexOfPort = -1;
354
392
  let indexOfFirstColon = -1;
355
393
  let hasControl = false;
394
+ let vValid = validate; // seeded true when validating; cleared on the first invalid char
395
+ let vLastDot = start - 1; // mirrors is-valid.ts `lastDotIndex = -1` at host start
396
+ let vLastCode = -1;
397
+ if (validate && start < end) {
398
+ // First-char rule: must be a valid host char, '.', or '_' (NOT '-').
399
+ const c0 = url.charCodeAt(start);
400
+ if (!(
401
+ /*@__INLINE__*/ (isValidHostnameChar(c0) ||
402
+ c0 === 46 /* '.' */ ||
403
+ c0 === 95 /* '_' */)) ||
404
+ c0 === 45 /* '-' (isValidHostnameChar allows it mid-label, not first) */) {
405
+ vValid = false;
406
+ }
407
+ }
356
408
  for (let i = start; i < end; i += 1) {
357
409
  const code = url.charCodeAt(i);
358
410
  if (code < 64) {
@@ -369,6 +421,24 @@ function extractHostname(url, urlIsValidHostname) {
369
421
  else if (code === 9 || code === 10 || code === 13) {
370
422
  hasControl = true;
371
423
  }
424
+ else if (validate) {
425
+ if (code === 46 /* '.' */) {
426
+ if (i - vLastDot > 64 || vLastCode === 46 || vLastCode === 45) {
427
+ vValid = false;
428
+ }
429
+ vLastDot = i;
430
+ }
431
+ else if (code < 48 || code > 57) {
432
+ // < 64 and not a delimiter/dot/digit => only '-' (45) is a valid
433
+ // host char here; everything else (space, %, !, etc.) is invalid.
434
+ // A '-' must also not START a label (the byte right after a '.') —
435
+ // mirrors is-valid.ts; the first label is covered by the first-char
436
+ // rule above. (RFC 1034 §3.5 / RFC 1035 §2.3.1 LDH.)
437
+ if (code !== 45 || vLastCode === 46 /* label-leading '-' */) {
438
+ vValid = false;
439
+ }
440
+ }
441
+ }
372
442
  }
373
443
  else if (isSpecial && code === 92 /* '\' */) {
374
444
  end = i;
@@ -384,10 +454,17 @@ function extractHostname(url, urlIsValidHostname) {
384
454
  else if (code >= 65 && code <= 90) {
385
455
  hasUpper = true;
386
456
  }
457
+ else if (validate && !( /*@__INLINE__*/isValidHostnameChar(code))) {
458
+ // >= 64, not '@'/']'/upper: valid only if a-z, '_', or non-ASCII.
459
+ vValid = false;
460
+ }
461
+ if (validate) {
462
+ vLastCode = code;
463
+ }
387
464
  }
388
465
  // A tab/newline inside the authority: strip everything and re-parse (rare).
389
466
  if (hasControl) {
390
- return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
467
+ return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname, validate);
391
468
  }
392
469
  // Skip userinfo. '>= start' so an empty userinfo ("http://@host") works too.
393
470
  if (indexOfIdentifier !== -1 &&
@@ -416,6 +493,28 @@ function extractHostname(url, urlIsValidHostname) {
416
493
  if (start >= end) {
417
494
  return null;
418
495
  }
496
+ // Publish the inline-validation verdict — but only for a "simple" authority,
497
+ // where the scanned run equals the final host: no userinfo skip, no port
498
+ // trim, no brackets, no trailing dot (trimmed below), and length within RFC
499
+ // limits. Anything else leaves it `false` so `parseImpl` re-validates.
500
+ //
501
+ // Every clause below is load-bearing for CORRECTNESS, not just speed: the
502
+ // loop accumulates `vValid` over the whole scanned run (it does not stop at
503
+ // ':' or '@', so any port/userinfo bytes are included), so the verdict is
504
+ // only sound when that run equals the final host. Do not drop a clause as
505
+ // "redundant" — e.g. without `indexOfPort === -1`, `host:8080` would be
506
+ // wrongly accepted.
507
+ if (validate &&
508
+ vValid &&
509
+ indexOfIdentifier === -1 &&
510
+ indexOfPort === -1 &&
511
+ indexOfClosingBracket === -1 &&
512
+ url.charCodeAt(end - 1) !== 46 /* no trailing dot */ &&
513
+ end - start <= 255 && // total length
514
+ end - vLastDot - 1 <= 63 && // last label length
515
+ vLastCode !== 45 /* last char not '-' */) {
516
+ extractedHostnameValidated = true;
517
+ }
419
518
  }
420
519
  // Trim trailing dots
421
520
  while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
@@ -567,6 +666,11 @@ function isSpecialUse(hostname) {
567
666
  *
568
667
  * If you need stricter validation, consider using an external library.
569
668
  */
669
+ // KEEP IN SYNC with `extract-hostname.ts` `isValidHostnameChar` + its inline
670
+ // scan/verdict, which duplicate these structural rules to validate during
671
+ // extraction (a perf fusion). That copy additionally accepts A-Z (the host is
672
+ // not yet lowercased there) and folds in '-' / '_'. Any change to the accepted
673
+ // character set or the label/length rules here must be mirrored there.
570
674
  function isValidAscii(code) {
571
675
  return ((code >= 97 && code <= 122) || (code >= 48 && code <= 57) || code > 127);
572
676
  }
@@ -610,8 +714,14 @@ function isValidHostname (hostname) {
610
714
  }
611
715
  lastDotIndex = i;
612
716
  }
613
- else if (!( /*@__INLINE__*/(isValidAscii(code) || code === 45 || code === 95))) {
614
- // Check if there is a forbidden character in the label
717
+ else if (
718
+ // A forbidden character in the label...
719
+ !( /*@__INLINE__*/(isValidAscii(code) || code === 45 || code === 95)) ||
720
+ // ...or a '-' starting a label (the byte right after a '.'). A label must
721
+ // not begin with a hyphen (RFC 1034 §3.5 / RFC 1035 §2.3.1 LDH, as amended
722
+ // by RFC 1123 §2.1; cf. UTS #46 CheckHyphens). The first label is covered by
723
+ // the leading-character guard above; mirrors the trailing-'-' rule below.
724
+ (code === 45 && lastCharCode === 46)) {
615
725
  return false;
616
726
  }
617
727
  lastCharCode = code;
@@ -712,10 +822,10 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
712
822
  }
713
823
  else if (options.mixedInputs) {
714
824
  urlIsValid = isValidHostname(url);
715
- result.hostname = extractHostname(url, urlIsValid);
825
+ result.hostname = extractHostname(url, urlIsValid, options.validateHostname);
716
826
  }
717
827
  else {
718
- result.hostname = extractHostname(url, false);
828
+ result.hostname = extractHostname(url, false, options.validateHostname);
719
829
  }
720
830
  // Check if `hostname` is a valid ip address
721
831
  if (options.detectIp && result.hostname !== null) {
@@ -734,6 +844,9 @@ function parseImpl(url, step, suffixLookup, partialOptions, result) {
734
844
  // Skip the re-scan when `url` was already validated and extractHostname
735
845
  // returned it unchanged (same reference => identical string, still valid).
736
846
  !(urlIsValid && result.hostname === url) &&
847
+ // Skip the re-scan when extractHostname already validated the host inline
848
+ // (a confirmed-valid simple authority — see extract-hostname.ts).
849
+ !extractedHostnameValidated &&
737
850
  !isValidHostname(result.hostname)) {
738
851
  result.hostname = null;
739
852
  return result;