tldts-core 7.2.0 → 7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +165 -27
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/src/extract-hostname.js +156 -25
- package/dist/cjs/src/extract-hostname.js.map +1 -1
- package/dist/cjs/src/factory.js +5 -2
- package/dist/cjs/src/factory.js.map +1 -1
- package/dist/cjs/src/is-valid.js +5 -0
- package/dist/cjs/src/is-valid.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/src/extract-hostname.js +155 -25
- package/dist/es6/src/extract-hostname.js.map +1 -1
- package/dist/es6/src/factory.js +6 -3
- package/dist/es6/src/factory.js.map +1 -1
- package/dist/es6/src/is-valid.js +5 -0
- package/dist/es6/src/is-valid.js.map +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/types/src/extract-hostname.d.ts +5 -1
- package/package.json +2 -2
- package/src/extract-hostname.ts +170 -25
- package/src/factory.ts +16 -6
- package/src/is-valid.ts +5 -0
package/src/extract-hostname.ts
CHANGED
|
@@ -5,6 +5,37 @@
|
|
|
5
5
|
*/
|
|
6
6
|
const CONTROL_CHARS = /[\t\n\r]/g;
|
|
7
7
|
|
|
8
|
+
// Set by `extractHostname` (a module-scope flag, read synchronously by
|
|
9
|
+
// `parseImpl` right after the call — same pattern as the reused RESULT object).
|
|
10
|
+
// `true` ONLY when extraction validated the returned host inline (a confirmed-
|
|
11
|
+
// valid, "simple" authority) so `parseImpl` can skip the separate
|
|
12
|
+
// `isValidHostname` pass. `false` in every other case (validation disabled, a
|
|
13
|
+
// complex authority — userinfo/port/brackets/trailing-dot/control — an invalid
|
|
14
|
+
// host, or a non-main return path); `parseImpl` then validates as usual. The
|
|
15
|
+
// fast path can only ever SKIP a redundant scan for hosts already known valid,
|
|
16
|
+
// never accept an invalid one.
|
|
17
|
+
export let extractedHostnameValidated = false;
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* True if char `code` is a valid hostname character. This is the per-char half
|
|
21
|
+
* of `is-valid.ts`'s `isValidAscii` (a-z, 0-9, > U+007F) PLUS three additions:
|
|
22
|
+
* A-Z (the host is lowercased before validation, so uppercase ≡ a valid
|
|
23
|
+
* lowercase letter) and '-' / '_' (valid inside a label). KEEP IN SYNC with
|
|
24
|
+
* `is-valid.ts`: these rules are deliberately duplicated to validate during
|
|
25
|
+
* extraction, so any change to the accepted character set there must be
|
|
26
|
+
* mirrored here (and vice-versa).
|
|
27
|
+
*/
|
|
28
|
+
function isValidHostnameChar(code: number): boolean {
|
|
29
|
+
return (
|
|
30
|
+
(code >= 97 && code <= 122) || // a-z
|
|
31
|
+
(code >= 48 && code <= 57) || // 0-9
|
|
32
|
+
code > 127 || // non-ASCII (accepted, not punycode-checked)
|
|
33
|
+
(code >= 65 && code <= 90) || // A-Z (becomes valid once lowercased)
|
|
34
|
+
code === 45 || // '-'
|
|
35
|
+
code === 95 // '_'
|
|
36
|
+
);
|
|
37
|
+
}
|
|
38
|
+
|
|
8
39
|
/**
|
|
9
40
|
* Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
|
|
10
41
|
* scheme without allocating a substring (case-insensitive via `| 32`).
|
|
@@ -59,15 +90,20 @@ function getSpecialScheme(
|
|
|
59
90
|
* @param urlIsValidHostname - when true, `url` is already a valid hostname and is
|
|
60
91
|
* returned by the same reference (factory.ts skips re-validation on that
|
|
61
92
|
* identity), keeping the common path allocation-free.
|
|
93
|
+
* @param validate - when true, validate the host inline during the authority
|
|
94
|
+
* scan and publish the verdict via `extractedHostnameValidated` so `parseImpl`
|
|
95
|
+
* can skip the redundant `isValidHostname` pass for simple authorities.
|
|
62
96
|
*/
|
|
63
97
|
export default function extractHostname(
|
|
64
98
|
url: string,
|
|
65
99
|
urlIsValidHostname: boolean,
|
|
100
|
+
validate = false,
|
|
66
101
|
): string | null {
|
|
67
102
|
let start = 0;
|
|
68
103
|
let end: number = url.length;
|
|
69
104
|
let hasUpper = false;
|
|
70
105
|
let isSpecial = false;
|
|
106
|
+
extractedHostnameValidated = false;
|
|
71
107
|
|
|
72
108
|
if (!urlIsValidHostname) {
|
|
73
109
|
// Data URLs never carry a host (and may be huge — short-circuit them).
|
|
@@ -143,6 +179,7 @@ export default function extractHostname(
|
|
|
143
179
|
return extractHostname(
|
|
144
180
|
url.replace(CONTROL_CHARS, ''),
|
|
145
181
|
urlIsValidHostname,
|
|
182
|
+
validate,
|
|
146
183
|
);
|
|
147
184
|
}
|
|
148
185
|
return null;
|
|
@@ -169,6 +206,7 @@ export default function extractHostname(
|
|
|
169
206
|
return extractHostname(
|
|
170
207
|
url.replace(CONTROL_CHARS, ''),
|
|
171
208
|
urlIsValidHostname,
|
|
209
|
+
validate,
|
|
172
210
|
);
|
|
173
211
|
}
|
|
174
212
|
if (code === 58 /* ':' */) {
|
|
@@ -217,32 +255,54 @@ export default function extractHostname(
|
|
|
217
255
|
if (!allDigits) {
|
|
218
256
|
const special = getSpecialScheme(url, start, indexOfColon);
|
|
219
257
|
if (special === 0) {
|
|
220
|
-
// No "://" anywhere on the cold path
|
|
221
|
-
//
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
258
|
+
// No "://" anywhere on the cold path and not a special scheme.
|
|
259
|
+
// A second ':' before the host's end marks a bare, unbracketed
|
|
260
|
+
// IPv6 literal ("2a01:e35::1"): fall through and let the host
|
|
261
|
+
// loop + isIp classify it. Without one this is an opaque path
|
|
262
|
+
// with no host ("mailto:x", "foo:bar").
|
|
263
|
+
let isBareIpv6 = false;
|
|
264
|
+
for (let j = indexOfColon + 1; j < end; j += 1) {
|
|
265
|
+
const code = url.charCodeAt(j);
|
|
266
|
+
if (
|
|
267
|
+
code === 47 ||
|
|
268
|
+
code === 92 ||
|
|
269
|
+
code === 63 ||
|
|
270
|
+
code === 35
|
|
271
|
+
) {
|
|
272
|
+
break;
|
|
273
|
+
}
|
|
274
|
+
if (code === 58 /* ':' */) {
|
|
275
|
+
isBareIpv6 = true;
|
|
276
|
+
break;
|
|
277
|
+
}
|
|
236
278
|
}
|
|
237
|
-
if (
|
|
279
|
+
if (!isBareIpv6) {
|
|
238
280
|
return null;
|
|
239
281
|
}
|
|
240
282
|
} else {
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
283
|
+
isSpecial = true;
|
|
284
|
+
start = indexOfColon + 1;
|
|
285
|
+
if (special === 2) {
|
|
286
|
+
// file (e.g. "file:\\host"): host only between "//" and next slash.
|
|
287
|
+
let slashes = 0;
|
|
288
|
+
while (
|
|
289
|
+
(url.charCodeAt(start) === 47 ||
|
|
290
|
+
url.charCodeAt(start) === 92) &&
|
|
291
|
+
slashes < 2
|
|
292
|
+
) {
|
|
293
|
+
start += 1;
|
|
294
|
+
slashes += 1;
|
|
295
|
+
}
|
|
296
|
+
if (slashes < 2) {
|
|
297
|
+
return null;
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
while (
|
|
301
|
+
url.charCodeAt(start) === 47 ||
|
|
302
|
+
url.charCodeAt(start) === 92
|
|
303
|
+
) {
|
|
304
|
+
start += 1;
|
|
305
|
+
}
|
|
246
306
|
}
|
|
247
307
|
}
|
|
248
308
|
}
|
|
@@ -253,12 +313,40 @@ export default function extractHostname(
|
|
|
253
313
|
|
|
254
314
|
// Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
|
|
255
315
|
// which WHATWG treats like '/'). Track the last '@', ']' and ':' for
|
|
256
|
-
// userinfo, ipv6 and port
|
|
257
|
-
//
|
|
316
|
+
// userinfo, ipv6 and port, plus the first ':' of the host (reset at each
|
|
317
|
+
// '@') to tell a bare IPv6 (>= 2 colons) from a host:port (exactly one);
|
|
318
|
+
// flag uppercase and a stray tab/newline. The loop is split on `code < 64`
|
|
319
|
+
// so common host characters take fewer comparisons.
|
|
320
|
+
//
|
|
321
|
+
// When `validate`, also accumulate `is-valid.ts`'s checks over the scanned
|
|
322
|
+
// run so a simple authority's host can be validated in this single pass.
|
|
323
|
+
// `vValid` only stays meaningful for a "simple" authority (no userinfo, port,
|
|
324
|
+
// brackets, control or trailing dot); those cases clear it / are rejected by
|
|
325
|
+
// the guard below, falling back to `isValidHostname`.
|
|
258
326
|
let indexOfIdentifier = -1;
|
|
259
327
|
let indexOfClosingBracket = -1;
|
|
260
328
|
let indexOfPort = -1;
|
|
329
|
+
let indexOfFirstColon = -1;
|
|
261
330
|
let hasControl = false;
|
|
331
|
+
let vValid = validate; // seeded true when validating; cleared on the first invalid char
|
|
332
|
+
let vLastDot = start - 1; // mirrors is-valid.ts `lastDotIndex = -1` at host start
|
|
333
|
+
let vLastCode = -1;
|
|
334
|
+
if (validate && start < end) {
|
|
335
|
+
// First-char rule: must be a valid host char, '.', or '_' (NOT '-').
|
|
336
|
+
const c0 = url.charCodeAt(start);
|
|
337
|
+
if (
|
|
338
|
+
!(
|
|
339
|
+
/*@__INLINE__*/ (
|
|
340
|
+
isValidHostnameChar(c0) ||
|
|
341
|
+
c0 === 46 /* '.' */ ||
|
|
342
|
+
c0 === 95 /* '_' */
|
|
343
|
+
)
|
|
344
|
+
) ||
|
|
345
|
+
c0 === 45 /* '-' (isValidHostnameChar allows it mid-label, not first) */
|
|
346
|
+
) {
|
|
347
|
+
vValid = false;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
262
350
|
for (let i = start; i < end; i += 1) {
|
|
263
351
|
const code: number = url.charCodeAt(i);
|
|
264
352
|
if (code < 64) {
|
|
@@ -266,19 +354,42 @@ export default function extractHostname(
|
|
|
266
354
|
end = i;
|
|
267
355
|
break;
|
|
268
356
|
} else if (code === 58 /* ':' */) {
|
|
357
|
+
if (indexOfFirstColon === -1) {
|
|
358
|
+
indexOfFirstColon = i;
|
|
359
|
+
}
|
|
269
360
|
indexOfPort = i;
|
|
270
361
|
} else if (code === 9 || code === 10 || code === 13) {
|
|
271
362
|
hasControl = true;
|
|
363
|
+
} else if (validate) {
|
|
364
|
+
if (code === 46 /* '.' */) {
|
|
365
|
+
if (i - vLastDot > 64 || vLastCode === 46 || vLastCode === 45) {
|
|
366
|
+
vValid = false;
|
|
367
|
+
}
|
|
368
|
+
vLastDot = i;
|
|
369
|
+
} else if (code < 48 || code > 57) {
|
|
370
|
+
// < 64 and not a delimiter/dot/digit => only '-' (45) is a valid
|
|
371
|
+
// host char here; everything else (space, %, !, etc.) is invalid.
|
|
372
|
+
if (code !== 45) {
|
|
373
|
+
vValid = false;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
272
376
|
}
|
|
273
377
|
} else if (isSpecial && code === 92 /* '\' */) {
|
|
274
378
|
end = i;
|
|
275
379
|
break;
|
|
276
380
|
} else if (code === 64 /* '@' */) {
|
|
277
381
|
indexOfIdentifier = i;
|
|
382
|
+
indexOfFirstColon = -1; // colons before '@' are userinfo, not the host
|
|
278
383
|
} else if (code === 93 /* ']' */) {
|
|
279
384
|
indexOfClosingBracket = i;
|
|
280
385
|
} else if (code >= 65 && code <= 90) {
|
|
281
386
|
hasUpper = true;
|
|
387
|
+
} else if (validate && !(/*@__INLINE__*/ isValidHostnameChar(code))) {
|
|
388
|
+
// >= 64, not '@'/']'/upper: valid only if a-z, '_', or non-ASCII.
|
|
389
|
+
vValid = false;
|
|
390
|
+
}
|
|
391
|
+
if (validate) {
|
|
392
|
+
vLastCode = code;
|
|
282
393
|
}
|
|
283
394
|
}
|
|
284
395
|
|
|
@@ -287,6 +398,7 @@ export default function extractHostname(
|
|
|
287
398
|
return extractHostname(
|
|
288
399
|
url.replace(CONTROL_CHARS, ''),
|
|
289
400
|
urlIsValidHostname,
|
|
401
|
+
validate,
|
|
290
402
|
);
|
|
291
403
|
}
|
|
292
404
|
|
|
@@ -305,7 +417,15 @@ export default function extractHostname(
|
|
|
305
417
|
return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
|
|
306
418
|
}
|
|
307
419
|
return null;
|
|
308
|
-
} else if (
|
|
420
|
+
} else if (
|
|
421
|
+
indexOfPort !== -1 &&
|
|
422
|
+
indexOfPort > start &&
|
|
423
|
+
indexOfPort < end &&
|
|
424
|
+
// A host:port has exactly one ':' in the host (so its first ':' is its
|
|
425
|
+
// last); a bare, unbracketed IPv6 literal ("2a01:e35::1") has >= 2, so
|
|
426
|
+
// its first ':' precedes the last. Only the former has a ':port' to trim.
|
|
427
|
+
indexOfFirstColon === indexOfPort
|
|
428
|
+
) {
|
|
309
429
|
end = indexOfPort; // trim ':port'
|
|
310
430
|
}
|
|
311
431
|
|
|
@@ -314,6 +434,31 @@ export default function extractHostname(
|
|
|
314
434
|
if (start >= end) {
|
|
315
435
|
return null;
|
|
316
436
|
}
|
|
437
|
+
|
|
438
|
+
// Publish the inline-validation verdict — but only for a "simple" authority,
|
|
439
|
+
// where the scanned run equals the final host: no userinfo skip, no port
|
|
440
|
+
// trim, no brackets, no trailing dot (trimmed below), and length within RFC
|
|
441
|
+
// limits. Anything else leaves it `false` so `parseImpl` re-validates.
|
|
442
|
+
//
|
|
443
|
+
// Every clause below is load-bearing for CORRECTNESS, not just speed: the
|
|
444
|
+
// loop accumulates `vValid` over the whole scanned run (it does not stop at
|
|
445
|
+
// ':' or '@', so any port/userinfo bytes are included), so the verdict is
|
|
446
|
+
// only sound when that run equals the final host. Do not drop a clause as
|
|
447
|
+
// "redundant" — e.g. without `indexOfPort === -1`, `host:8080` would be
|
|
448
|
+
// wrongly accepted.
|
|
449
|
+
if (
|
|
450
|
+
validate &&
|
|
451
|
+
vValid &&
|
|
452
|
+
indexOfIdentifier === -1 &&
|
|
453
|
+
indexOfPort === -1 &&
|
|
454
|
+
indexOfClosingBracket === -1 &&
|
|
455
|
+
url.charCodeAt(end - 1) !== 46 /* no trailing dot */ &&
|
|
456
|
+
end - start <= 255 && // total length
|
|
457
|
+
end - vLastDot - 1 <= 63 && // last label length
|
|
458
|
+
vLastCode !== 45 /* last char not '-' */
|
|
459
|
+
) {
|
|
460
|
+
extractedHostnameValidated = true;
|
|
461
|
+
}
|
|
317
462
|
}
|
|
318
463
|
|
|
319
464
|
// Trim trailing dots
|
package/src/factory.ts
CHANGED
|
@@ -6,7 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
import getDomain from './domain';
|
|
8
8
|
import getDomainWithoutSuffix from './domain-without-suffix';
|
|
9
|
-
import extractHostname
|
|
9
|
+
import extractHostname, {
|
|
10
|
+
extractedHostnameValidated,
|
|
11
|
+
} from './extract-hostname';
|
|
10
12
|
import isIp from './is-ip';
|
|
11
13
|
import isSpecialUse from './is-special-use';
|
|
12
14
|
import isValidHostname from './is-valid';
|
|
@@ -16,9 +18,10 @@ import getSubdomain from './subdomain';
|
|
|
16
18
|
|
|
17
19
|
export interface IResult {
|
|
18
20
|
// `hostname` is either a registered name (including but not limited to a
|
|
19
|
-
// hostname), or an IP address
|
|
20
|
-
//
|
|
21
|
-
//
|
|
21
|
+
// hostname), or an IP address, directly extracted from the input URL. IPv4
|
|
22
|
+
// addresses are in dot-decimal notation. IPv6 is returned without its
|
|
23
|
+
// surrounding brackets; both bracketed (in URLs, e.g. `http://[::1]/`) and
|
|
24
|
+
// bare unbracketed (e.g. `2a01:e35::1`) IPv6 literals are accepted.
|
|
22
25
|
hostname: string | null;
|
|
23
26
|
|
|
24
27
|
// Is `hostname` an IP? (IPv4 or IPv6)
|
|
@@ -119,9 +122,13 @@ export function parseImpl(
|
|
|
119
122
|
result.hostname = url;
|
|
120
123
|
} else if (options.mixedInputs) {
|
|
121
124
|
urlIsValid = isValidHostname(url);
|
|
122
|
-
result.hostname = extractHostname(
|
|
125
|
+
result.hostname = extractHostname(
|
|
126
|
+
url,
|
|
127
|
+
urlIsValid,
|
|
128
|
+
options.validateHostname,
|
|
129
|
+
);
|
|
123
130
|
} else {
|
|
124
|
-
result.hostname = extractHostname(url, false);
|
|
131
|
+
result.hostname = extractHostname(url, false, options.validateHostname);
|
|
125
132
|
}
|
|
126
133
|
|
|
127
134
|
// Check if `hostname` is a valid ip address
|
|
@@ -143,6 +150,9 @@ export function parseImpl(
|
|
|
143
150
|
// Skip the re-scan when `url` was already validated and extractHostname
|
|
144
151
|
// returned it unchanged (same reference => identical string, still valid).
|
|
145
152
|
!(urlIsValid && result.hostname === url) &&
|
|
153
|
+
// Skip the re-scan when extractHostname already validated the host inline
|
|
154
|
+
// (a confirmed-valid simple authority — see extract-hostname.ts).
|
|
155
|
+
!extractedHostnameValidated &&
|
|
146
156
|
!isValidHostname(result.hostname)
|
|
147
157
|
) {
|
|
148
158
|
result.hostname = null;
|
package/src/is-valid.ts
CHANGED
|
@@ -7,6 +7,11 @@
|
|
|
7
7
|
* If you need stricter validation, consider using an external library.
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
|
+
// KEEP IN SYNC with `extract-hostname.ts` `isValidHostnameChar` + its inline
|
|
11
|
+
// scan/verdict, which duplicate these structural rules to validate during
|
|
12
|
+
// extraction (a perf fusion). That copy additionally accepts A-Z (the host is
|
|
13
|
+
// not yet lowercased there) and folds in '-' / '_'. Any change to the accepted
|
|
14
|
+
// character set or the label/length rules here must be mirrored there.
|
|
10
15
|
function isValidAscii(code: number): boolean {
|
|
11
16
|
return (
|
|
12
17
|
(code >= 97 && code <= 122) || (code >= 48 && code <= 57) || code > 127
|