tldts-core 7.2.1 → 7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +110 -6
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/src/extract-hostname.js +101 -4
- package/dist/cjs/src/extract-hostname.js.map +1 -1
- package/dist/cjs/src/factory.js +5 -2
- package/dist/cjs/src/factory.js.map +1 -1
- package/dist/cjs/src/is-valid.js +5 -0
- package/dist/cjs/src/is-valid.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/src/extract-hostname.js +100 -4
- package/dist/es6/src/extract-hostname.js.map +1 -1
- package/dist/es6/src/factory.js +6 -3
- package/dist/es6/src/factory.js.map +1 -1
- package/dist/es6/src/is-valid.js +5 -0
- package/dist/es6/src/is-valid.js.map +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/types/src/extract-hostname.d.ts +5 -1
- package/package.json +2 -2
- package/src/extract-hostname.ts +108 -0
- package/src/factory.ts +12 -3
- package/src/is-valid.ts +5 -0
package/src/extract-hostname.ts
CHANGED
|
@@ -5,6 +5,37 @@
|
|
|
5
5
|
*/
|
|
6
6
|
const CONTROL_CHARS = /[\t\n\r]/g;
|
|
7
7
|
|
|
8
|
+
// Set by `extractHostname` (a module-scope flag, read synchronously by
|
|
9
|
+
// `parseImpl` right after the call — same pattern as the reused RESULT object).
|
|
10
|
+
// `true` ONLY when extraction validated the returned host inline (a confirmed-
|
|
11
|
+
// valid, "simple" authority) so `parseImpl` can skip the separate
|
|
12
|
+
// `isValidHostname` pass. `false` in every other case (validation disabled, a
|
|
13
|
+
// complex authority — userinfo/port/brackets/trailing-dot/control — an invalid
|
|
14
|
+
// host, or a non-main return path); `parseImpl` then validates as usual. The
|
|
15
|
+
// fast path can only ever SKIP a redundant scan for hosts already known valid,
|
|
16
|
+
// never accept an invalid one.
|
|
17
|
+
export let extractedHostnameValidated = false;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* True if char `code` is a valid hostname character. This is the per-char half
|
|
21
|
+
* of `is-valid.ts`'s `isValidAscii` (a-z, 0-9, > U+007F) PLUS three additions:
|
|
22
|
+
* A-Z (the host is lowercased before validation, so uppercase ≡ a valid
|
|
23
|
+
* lowercase letter) and '-' / '_' (valid inside a label). KEEP IN SYNC with
|
|
24
|
+
* `is-valid.ts`: these rules are deliberately duplicated to validate during
|
|
25
|
+
* extraction, so any change to the accepted character set there must be
|
|
26
|
+
* mirrored here (and vice-versa).
|
|
27
|
+
*/
|
|
28
|
+
function isValidHostnameChar(code: number): boolean {
|
|
29
|
+
return (
|
|
30
|
+
(code >= 97 && code <= 122) || // a-z
|
|
31
|
+
(code >= 48 && code <= 57) || // 0-9
|
|
32
|
+
code > 127 || // non-ASCII (accepted, not punycode-checked)
|
|
33
|
+
(code >= 65 && code <= 90) || // A-Z (becomes valid once lowercased)
|
|
34
|
+
code === 45 || // '-'
|
|
35
|
+
code === 95 // '_'
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
8
39
|
/**
|
|
9
40
|
* Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
|
|
10
41
|
* scheme without allocating a substring (case-insensitive via `| 32`).
|
|
@@ -59,15 +90,20 @@ function getSpecialScheme(
|
|
|
59
90
|
* @param urlIsValidHostname - when true, `url` is already a valid hostname and is
|
|
60
91
|
* returned by the same reference (factory.ts skips re-validation on that
|
|
61
92
|
* identity), keeping the common path allocation-free.
|
|
93
|
+
* @param validate - when true, validate the host inline during the authority
|
|
94
|
+
* scan and publish the verdict via `extractedHostnameValidated` so `parseImpl`
|
|
95
|
+
* can skip the redundant `isValidHostname` pass for simple authorities.
|
|
62
96
|
*/
|
|
63
97
|
export default function extractHostname(
|
|
64
98
|
url: string,
|
|
65
99
|
urlIsValidHostname: boolean,
|
|
100
|
+
validate = false,
|
|
66
101
|
): string | null {
|
|
67
102
|
let start = 0;
|
|
68
103
|
let end: number = url.length;
|
|
69
104
|
let hasUpper = false;
|
|
70
105
|
let isSpecial = false;
|
|
106
|
+
extractedHostnameValidated = false;
|
|
71
107
|
|
|
72
108
|
if (!urlIsValidHostname) {
|
|
73
109
|
// Data URLs never carry a host (and may be huge — short-circuit them).
|
|
@@ -143,6 +179,7 @@ export default function extractHostname(
|
|
|
143
179
|
return extractHostname(
|
|
144
180
|
url.replace(CONTROL_CHARS, ''),
|
|
145
181
|
urlIsValidHostname,
|
|
182
|
+
validate,
|
|
146
183
|
);
|
|
147
184
|
}
|
|
148
185
|
return null;
|
|
@@ -169,6 +206,7 @@ export default function extractHostname(
|
|
|
169
206
|
return extractHostname(
|
|
170
207
|
url.replace(CONTROL_CHARS, ''),
|
|
171
208
|
urlIsValidHostname,
|
|
209
|
+
validate,
|
|
172
210
|
);
|
|
173
211
|
}
|
|
174
212
|
if (code === 58 /* ':' */) {
|
|
@@ -279,11 +317,36 @@ export default function extractHostname(
|
|
|
279
317
|
// '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
|
|
280
318
|
// flag uppercase and a stray tab/newline. The loop is split on `code < 64`
|
|
281
319
|
// so common host characters take fewer comparisons.
|
|
320
|
+
//
|
|
321
|
+
// When `validate`, also accumulate `is-valid.ts`'s checks over the scanned
|
|
322
|
+
// run so a simple authority's host can be validated in this single pass.
|
|
323
|
+
// `vValid` only stays meaningful for a "simple" authority (no userinfo, port,
|
|
324
|
+
// brackets, control or trailing dot); those cases clear it / are rejected by
|
|
325
|
+
// the guard below, falling back to `isValidHostname`.
|
|
282
326
|
let indexOfIdentifier = -1;
|
|
283
327
|
let indexOfClosingBracket = -1;
|
|
284
328
|
let indexOfPort = -1;
|
|
285
329
|
let indexOfFirstColon = -1;
|
|
286
330
|
let hasControl = false;
|
|
331
|
+
let vValid = validate; // seeded true when validating; cleared on the first invalid char
|
|
332
|
+
let vLastDot = start - 1; // mirrors is-valid.ts `lastDotIndex = -1` at host start
|
|
333
|
+
let vLastCode = -1;
|
|
334
|
+
if (validate && start < end) {
|
|
335
|
+
// First-char rule: must be a valid host char, '.', or '_' (NOT '-').
|
|
336
|
+
const c0 = url.charCodeAt(start);
|
|
337
|
+
if (
|
|
338
|
+
!(
|
|
339
|
+
/*@__INLINE__*/ (
|
|
340
|
+
isValidHostnameChar(c0) ||
|
|
341
|
+
c0 === 46 /* '.' */ ||
|
|
342
|
+
c0 === 95 /* '_' */
|
|
343
|
+
)
|
|
344
|
+
) ||
|
|
345
|
+
c0 === 45 /* '-' (isValidHostnameChar allows it mid-label, not first) */
|
|
346
|
+
) {
|
|
347
|
+
vValid = false;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
287
350
|
for (let i = start; i < end; i += 1) {
|
|
288
351
|
const code: number = url.charCodeAt(i);
|
|
289
352
|
if (code < 64) {
|
|
@@ -297,6 +360,19 @@ export default function extractHostname(
|
|
|
297
360
|
indexOfPort = i;
|
|
298
361
|
} else if (code === 9 || code === 10 || code === 13) {
|
|
299
362
|
hasControl = true;
|
|
363
|
+
} else if (validate) {
|
|
364
|
+
if (code === 46 /* '.' */) {
|
|
365
|
+
if (i - vLastDot > 64 || vLastCode === 46 || vLastCode === 45) {
|
|
366
|
+
vValid = false;
|
|
367
|
+
}
|
|
368
|
+
vLastDot = i;
|
|
369
|
+
} else if (code < 48 || code > 57) {
|
|
370
|
+
// < 64 and not a delimiter/dot/digit => only '-' (45) is a valid
|
|
371
|
+
// host char here; everything else (space, %, !, etc.) is invalid.
|
|
372
|
+
if (code !== 45) {
|
|
373
|
+
vValid = false;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
300
376
|
}
|
|
301
377
|
} else if (isSpecial && code === 92 /* '\' */) {
|
|
302
378
|
end = i;
|
|
@@ -308,6 +384,12 @@ export default function extractHostname(
|
|
|
308
384
|
indexOfClosingBracket = i;
|
|
309
385
|
} else if (code >= 65 && code <= 90) {
|
|
310
386
|
hasUpper = true;
|
|
387
|
+
} else if (validate && !(/*@__INLINE__*/ isValidHostnameChar(code))) {
|
|
388
|
+
// >= 64, not '@'/']'/upper: valid only if a-z, '_', or non-ASCII.
|
|
389
|
+
vValid = false;
|
|
390
|
+
}
|
|
391
|
+
if (validate) {
|
|
392
|
+
vLastCode = code;
|
|
311
393
|
}
|
|
312
394
|
}
|
|
313
395
|
|
|
@@ -316,6 +398,7 @@ export default function extractHostname(
|
|
|
316
398
|
return extractHostname(
|
|
317
399
|
url.replace(CONTROL_CHARS, ''),
|
|
318
400
|
urlIsValidHostname,
|
|
401
|
+
validate,
|
|
319
402
|
);
|
|
320
403
|
}
|
|
321
404
|
|
|
@@ -351,6 +434,31 @@ export default function extractHostname(
|
|
|
351
434
|
if (start >= end) {
|
|
352
435
|
return null;
|
|
353
436
|
}
|
|
437
|
+
|
|
438
|
+
// Publish the inline-validation verdict — but only for a "simple" authority,
|
|
439
|
+
// where the scanned run equals the final host: no userinfo skip, no port
|
|
440
|
+
// trim, no brackets, no trailing dot (trimmed below), and length within RFC
|
|
441
|
+
// limits. Anything else leaves it `false` so `parseImpl` re-validates.
|
|
442
|
+
//
|
|
443
|
+
// Every clause below is load-bearing for CORRECTNESS, not just speed: the
|
|
444
|
+
// loop accumulates `vValid` over the whole scanned run (it does not stop at
|
|
445
|
+
// ':' or '@', so any port/userinfo bytes are included), so the verdict is
|
|
446
|
+
// only sound when that run equals the final host. Do not drop a clause as
|
|
447
|
+
// "redundant" — e.g. without `indexOfPort === -1`, `host:8080` would be
|
|
448
|
+
// wrongly accepted.
|
|
449
|
+
if (
|
|
450
|
+
validate &&
|
|
451
|
+
vValid &&
|
|
452
|
+
indexOfIdentifier === -1 &&
|
|
453
|
+
indexOfPort === -1 &&
|
|
454
|
+
indexOfClosingBracket === -1 &&
|
|
455
|
+
url.charCodeAt(end - 1) !== 46 /* no trailing dot */ &&
|
|
456
|
+
end - start <= 255 && // total length
|
|
457
|
+
end - vLastDot - 1 <= 63 && // last label length
|
|
458
|
+
vLastCode !== 45 /* last char not '-' */
|
|
459
|
+
) {
|
|
460
|
+
extractedHostnameValidated = true;
|
|
461
|
+
}
|
|
354
462
|
}
|
|
355
463
|
|
|
356
464
|
// Trim trailing dots
|
package/src/factory.ts
CHANGED
|
@@ -6,7 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
import getDomain from './domain';
|
|
8
8
|
import getDomainWithoutSuffix from './domain-without-suffix';
|
|
9
|
-
import extractHostname
|
|
9
|
+
import extractHostname, {
|
|
10
|
+
extractedHostnameValidated,
|
|
11
|
+
} from './extract-hostname';
|
|
10
12
|
import isIp from './is-ip';
|
|
11
13
|
import isSpecialUse from './is-special-use';
|
|
12
14
|
import isValidHostname from './is-valid';
|
|
@@ -120,9 +122,13 @@ export function parseImpl(
|
|
|
120
122
|
result.hostname = url;
|
|
121
123
|
} else if (options.mixedInputs) {
|
|
122
124
|
urlIsValid = isValidHostname(url);
|
|
123
|
-
result.hostname = extractHostname(
|
|
125
|
+
result.hostname = extractHostname(
|
|
126
|
+
url,
|
|
127
|
+
urlIsValid,
|
|
128
|
+
options.validateHostname,
|
|
129
|
+
);
|
|
124
130
|
} else {
|
|
125
|
-
result.hostname = extractHostname(url, false);
|
|
131
|
+
result.hostname = extractHostname(url, false, options.validateHostname);
|
|
126
132
|
}
|
|
127
133
|
|
|
128
134
|
// Check if `hostname` is a valid ip address
|
|
@@ -144,6 +150,9 @@ export function parseImpl(
|
|
|
144
150
|
// Skip the re-scan when `url` was already validated and extractHostname
|
|
145
151
|
// returned it unchanged (same reference => identical string, still valid).
|
|
146
152
|
!(urlIsValid && result.hostname === url) &&
|
|
153
|
+
// Skip the re-scan when extractHostname already validated the host inline
|
|
154
|
+
// (a confirmed-valid simple authority — see extract-hostname.ts).
|
|
155
|
+
!extractedHostnameValidated &&
|
|
147
156
|
!isValidHostname(result.hostname)
|
|
148
157
|
) {
|
|
149
158
|
result.hostname = null;
|
package/src/is-valid.ts
CHANGED
|
@@ -7,6 +7,11 @@
|
|
|
7
7
|
* If you need stricter validation, consider using an external library.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
// KEEP IN SYNC with `extract-hostname.ts` `isValidHostnameChar` + its inline
|
|
11
|
+
// scan/verdict, which duplicate these structural rules to validate during
|
|
12
|
+
// extraction (a perf fusion). That copy additionally accepts A-Z (the host is
|
|
13
|
+
// not yet lowercased there) and folds in '-' / '_'. Any change to the accepted
|
|
14
|
+
// character set or the label/length rules here must be mirrored there.
|
|
10
15
|
function isValidAscii(code: number): boolean {
|
|
11
16
|
return (
|
|
12
17
|
(code >= 97 && code <= 122) || (code >= 48 && code <= 57) || code > 127
|