tldts-core 7.1.0 → 7.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +230 -66
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/src/extract-hostname.js +227 -71
- package/dist/cjs/src/extract-hostname.js.map +1 -1
- package/dist/cjs/src/is-ip.js +1 -1
- package/dist/cjs/src/is-ip.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/src/extract-hostname.js +227 -71
- package/dist/es6/src/extract-hostname.js.map +1 -1
- package/dist/es6/src/is-ip.js +1 -1
- package/dist/es6/src/is-ip.js.map +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/types/src/extract-hostname.d.ts +10 -2
- package/package.json +2 -2
- package/src/extract-hostname.ts +241 -79
- package/src/is-ip.ts +2 -2
package/src/extract-hostname.ts
CHANGED
|
@@ -1,6 +1,64 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
2
|
+
* Matches an ASCII tab (U+0009) or newline (U+000A / U+000D). The WHATWG URL
|
|
3
|
+
* parser strips these before parsing; we only allocate a cleaned copy (and
|
|
4
|
+
* re-parse) on the rare input that actually contains one.
|
|
5
|
+
*/
|
|
6
|
+
const CONTROL_CHARS = /[\t\n\r]/g;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
|
|
10
|
+
* scheme without allocating a substring (case-insensitive via `| 32`).
|
|
11
|
+
* Special schemes: ftp, file, http, https, ws, wss
|
|
12
|
+
* (https://url.spec.whatwg.org/#special-scheme).
|
|
13
|
+
*
|
|
14
|
+
* @returns 0 = not special, 1 = special, 2 = file (its host sits only between
|
|
15
|
+
* "//" and the next slash).
|
|
16
|
+
*/
|
|
17
|
+
function getSpecialScheme(
|
|
18
|
+
url: string,
|
|
19
|
+
schemeStart: number,
|
|
20
|
+
colonIndex: number,
|
|
21
|
+
): number {
|
|
22
|
+
const length = colonIndex - schemeStart;
|
|
23
|
+
const c0 = url.charCodeAt(schemeStart) | 32;
|
|
24
|
+
if (length === 2) {
|
|
25
|
+
return c0 === 119 && (url.charCodeAt(schemeStart + 1) | 32) === 115 ? 1 : 0; // ws
|
|
26
|
+
} else if (length === 3) {
|
|
27
|
+
const c1 = url.charCodeAt(schemeStart + 1) | 32;
|
|
28
|
+
const c2 = url.charCodeAt(schemeStart + 2) | 32;
|
|
29
|
+
if (c0 === 119 && c1 === 115 && c2 === 115) return 1; // wss
|
|
30
|
+
if (c0 === 102 && c1 === 116 && c2 === 112) return 1; // ftp
|
|
31
|
+
return 0;
|
|
32
|
+
} else if (length === 4) {
|
|
33
|
+
const c1 = url.charCodeAt(schemeStart + 1) | 32;
|
|
34
|
+
const c2 = url.charCodeAt(schemeStart + 2) | 32;
|
|
35
|
+
const c3 = url.charCodeAt(schemeStart + 3) | 32;
|
|
36
|
+
if (c0 === 104 && c1 === 116 && c2 === 116 && c3 === 112) return 1; // http
|
|
37
|
+
if (c0 === 102 && c1 === 105 && c2 === 108 && c3 === 101) return 2; // file
|
|
38
|
+
return 0;
|
|
39
|
+
} else if (length === 5) {
|
|
40
|
+
return c0 === 104 &&
|
|
41
|
+
(url.charCodeAt(schemeStart + 1) | 32) === 116 &&
|
|
42
|
+
(url.charCodeAt(schemeStart + 2) | 32) === 116 &&
|
|
43
|
+
(url.charCodeAt(schemeStart + 3) | 32) === 112 &&
|
|
44
|
+
(url.charCodeAt(schemeStart + 4) | 32) === 115
|
|
45
|
+
? 1
|
|
46
|
+
: 0; // https
|
|
47
|
+
}
|
|
48
|
+
return 0;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Extract a hostname from `url`, matching a WHATWG URL parser's host-boundary
|
|
53
|
+
* behaviour (https://url.spec.whatwg.org/#concept-basic-url-parser) for tldts'
|
|
54
|
+
* scope. It deliberately does NOT normalise the host (no IDNA/punycode or IPv4
|
|
55
|
+
* canonicalisation; IPv6 brackets are stripped, not compressed), strips trailing
|
|
56
|
+
* dots, and stays lenient where a strict parser rejects (bare host:port,
|
|
57
|
+
* out-of-range port, user@host) — all documented deviations.
|
|
58
|
+
*
|
|
59
|
+
* @param urlIsValidHostname - when true, `url` is already a valid hostname and is
|
|
60
|
+
* returned by the same reference (factory.ts skips re-validation on that
|
|
61
|
+
* identity), keeping the common path allocation-free.
|
|
4
62
|
*/
|
|
5
63
|
export default function extractHostname(
|
|
6
64
|
url: string,
|
|
@@ -9,148 +67,252 @@ export default function extractHostname(
|
|
|
9
67
|
let start = 0;
|
|
10
68
|
let end: number = url.length;
|
|
11
69
|
let hasUpper = false;
|
|
70
|
+
let isSpecial = false;
|
|
12
71
|
|
|
13
|
-
// If url is not already a valid hostname, then try to extract hostname.
|
|
14
72
|
if (!urlIsValidHostname) {
|
|
15
|
-
//
|
|
73
|
+
// Data URLs never carry a host (and may be huge — short-circuit them).
|
|
16
74
|
if (url.startsWith('data:')) {
|
|
17
75
|
return null;
|
|
18
76
|
}
|
|
19
77
|
|
|
20
|
-
//
|
|
78
|
+
// WHATWG step 1: trim leading/trailing C0 control or space (<= U+0020).
|
|
79
|
+
// Tab/newline elsewhere are handled lazily below.
|
|
21
80
|
while (start < url.length && url.charCodeAt(start) <= 32) {
|
|
22
81
|
start += 1;
|
|
23
82
|
}
|
|
24
|
-
|
|
25
|
-
// Trim trailing spaces
|
|
26
83
|
while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
|
|
27
84
|
end -= 1;
|
|
28
85
|
}
|
|
29
86
|
|
|
30
|
-
// Skip scheme.
|
|
31
87
|
if (
|
|
32
88
|
url.charCodeAt(start) === 47 /* '/' */ &&
|
|
33
89
|
url.charCodeAt(start + 1) === 47 /* '/' */
|
|
34
90
|
) {
|
|
91
|
+
// Scheme-relative reference ("//host/path").
|
|
35
92
|
start += 2;
|
|
36
93
|
} else {
|
|
37
94
|
const indexOfProtocol = url.indexOf(':/', start);
|
|
38
95
|
if (indexOfProtocol !== -1) {
|
|
39
|
-
//
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
c0 === 119 /* 'w' */ &&
|
|
69
|
-
c1 === 115 /* 's' */ &&
|
|
70
|
-
c2 === 115 /* 's' */
|
|
71
|
-
) {
|
|
72
|
-
// wss
|
|
73
|
-
} else if (
|
|
74
|
-
protocolSize === 2 &&
|
|
75
|
-
c0 === 119 /* 'w' */ &&
|
|
76
|
-
c1 === 115 /* 's' */
|
|
77
|
-
) {
|
|
78
|
-
// ws
|
|
96
|
+
// "scheme://…". Classify the scheme, then position `start` at the host.
|
|
97
|
+
const special = getSpecialScheme(url, start, indexOfProtocol);
|
|
98
|
+
if (special === 1) {
|
|
99
|
+
// Special scheme: skip the run of '/' and '\' after it
|
|
100
|
+
// (special-authority-(ignore-)slashes states; '\' acts as '/').
|
|
101
|
+
isSpecial = true;
|
|
102
|
+
start = indexOfProtocol + 2;
|
|
103
|
+
while (
|
|
104
|
+
url.charCodeAt(start) === 47 /* '/' */ ||
|
|
105
|
+
url.charCodeAt(start) === 92 /* '\' */
|
|
106
|
+
) {
|
|
107
|
+
start += 1;
|
|
108
|
+
}
|
|
109
|
+
} else if (special === 2) {
|
|
110
|
+
// file: the host is only what sits between "//" and the next slash, so
|
|
111
|
+
// "file://h/x" => "h" but "file:///x" / "file:/x" => no host.
|
|
112
|
+
isSpecial = true;
|
|
113
|
+
start = indexOfProtocol + 1;
|
|
114
|
+
let slashes = 0;
|
|
115
|
+
while (
|
|
116
|
+
(url.charCodeAt(start) === 47 || url.charCodeAt(start) === 92) &&
|
|
117
|
+
slashes < 2
|
|
118
|
+
) {
|
|
119
|
+
start += 1;
|
|
120
|
+
slashes += 1;
|
|
121
|
+
}
|
|
122
|
+
if (slashes < 2) {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
79
125
|
} else {
|
|
80
|
-
//
|
|
126
|
+
// Unknown scheme: validate the WHATWG scheme grammar [A-Za-z0-9+.-];
|
|
127
|
+
// a control char means it was split by a tab/newline (strip + re-parse).
|
|
81
128
|
for (let i = start; i < indexOfProtocol; i += 1) {
|
|
82
|
-
const
|
|
129
|
+
const code = url.charCodeAt(i) | 32;
|
|
83
130
|
if (
|
|
84
131
|
!(
|
|
85
132
|
(
|
|
86
|
-
(
|
|
87
|
-
(
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
133
|
+
(code >= 97 && code <= 122) || // [a, z]
|
|
134
|
+
(code >= 48 && code <= 57) || // [0, 9]
|
|
135
|
+
code === 46 || // '.'
|
|
136
|
+
code === 45 || // '-'
|
|
137
|
+
code === 43
|
|
91
138
|
) // '+'
|
|
92
139
|
)
|
|
93
140
|
) {
|
|
141
|
+
const raw = url.charCodeAt(i);
|
|
142
|
+
if (raw === 9 || raw === 10 || raw === 13) {
|
|
143
|
+
return extractHostname(
|
|
144
|
+
url.replace(CONTROL_CHARS, ''),
|
|
145
|
+
urlIsValidHostname,
|
|
146
|
+
);
|
|
147
|
+
}
|
|
94
148
|
return null;
|
|
95
149
|
}
|
|
96
150
|
}
|
|
151
|
+
// A non-special scheme has an authority only after "//" (else it is an
|
|
152
|
+
// opaque path with no host). `indexOf(':/')` already gave the first '/'.
|
|
153
|
+
if (url.charCodeAt(indexOfProtocol + 2) === 47 /* '/' */) {
|
|
154
|
+
start = indexOfProtocol + 3;
|
|
155
|
+
} else {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
97
158
|
}
|
|
159
|
+
} else if (url.charCodeAt(start) !== 91 /* '[' */) {
|
|
160
|
+
// Cold path: no scheme "://", and not a bare IPv6 literal (whose first
|
|
161
|
+
// ':' would otherwise look like a scheme separator; "[…]" falls through
|
|
162
|
+
// to the ipv6 handling below). May be a bare host, a host:port, a
|
|
163
|
+
// user@host, a slash-less special scheme ("https:host"), or an opaque
|
|
164
|
+
// URI ("mailto:", "tel:", "urn:…").
|
|
165
|
+
let indexOfColon = -1;
|
|
166
|
+
for (let i = start; i < end; i += 1) {
|
|
167
|
+
const code = url.charCodeAt(i);
|
|
168
|
+
if (code === 9 || code === 10 || code === 13) {
|
|
169
|
+
return extractHostname(
|
|
170
|
+
url.replace(CONTROL_CHARS, ''),
|
|
171
|
+
urlIsValidHostname,
|
|
172
|
+
);
|
|
173
|
+
}
|
|
174
|
+
if (code === 58 /* ':' */) {
|
|
175
|
+
indexOfColon = i;
|
|
176
|
+
break;
|
|
177
|
+
}
|
|
178
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
179
|
+
break;
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (indexOfColon !== -1) {
|
|
184
|
+
// An '@' before the next delimiter => the ':' is userinfo, not a
|
|
185
|
+
// scheme ("user:pass@host", "mailto:a@b"): keep the whole authority.
|
|
186
|
+
let hasIdentifier = false;
|
|
187
|
+
for (let i = indexOfColon + 1; i < end; i += 1) {
|
|
188
|
+
const code = url.charCodeAt(i);
|
|
189
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
190
|
+
break;
|
|
191
|
+
}
|
|
192
|
+
if (code === 64 /* '@' */) {
|
|
193
|
+
hasIdentifier = true;
|
|
194
|
+
break;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (!hasIdentifier) {
|
|
199
|
+
// All-digits after ':' => a bare "host:port" (tldts accepts
|
|
200
|
+
// hostnames too); keep `start` and let the port handling trim it.
|
|
201
|
+
let allDigits = true;
|
|
202
|
+
let i = indexOfColon + 1;
|
|
203
|
+
for (; i < end; i += 1) {
|
|
204
|
+
const code = url.charCodeAt(i);
|
|
205
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
206
|
+
break;
|
|
207
|
+
}
|
|
208
|
+
if (code < 48 /* '0' */ || code > 57 /* '9' */) {
|
|
209
|
+
allDigits = false;
|
|
210
|
+
break;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
if (i === indexOfColon + 1) {
|
|
214
|
+
allDigits = false; // nothing after ':' => not a port
|
|
215
|
+
}
|
|
98
216
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
217
|
+
if (!allDigits) {
|
|
218
|
+
const special = getSpecialScheme(url, start, indexOfColon);
|
|
219
|
+
if (special === 0) {
|
|
220
|
+
// No "://" anywhere on the cold path, so a non-special scheme has
|
|
221
|
+
// no authority: opaque path, no host ("mailto:x", "foo:bar").
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
isSpecial = true;
|
|
225
|
+
start = indexOfColon + 1;
|
|
226
|
+
if (special === 2) {
|
|
227
|
+
// file (e.g. "file:\\host"): host only between "//" and next slash.
|
|
228
|
+
let slashes = 0;
|
|
229
|
+
while (
|
|
230
|
+
(url.charCodeAt(start) === 47 ||
|
|
231
|
+
url.charCodeAt(start) === 92) &&
|
|
232
|
+
slashes < 2
|
|
233
|
+
) {
|
|
234
|
+
start += 1;
|
|
235
|
+
slashes += 1;
|
|
236
|
+
}
|
|
237
|
+
if (slashes < 2) {
|
|
238
|
+
return null;
|
|
239
|
+
}
|
|
240
|
+
} else {
|
|
241
|
+
while (
|
|
242
|
+
url.charCodeAt(start) === 47 ||
|
|
243
|
+
url.charCodeAt(start) === 92
|
|
244
|
+
) {
|
|
245
|
+
start += 1;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}
|
|
103
250
|
}
|
|
104
251
|
}
|
|
105
252
|
}
|
|
106
253
|
|
|
107
|
-
//
|
|
108
|
-
//
|
|
109
|
-
//
|
|
254
|
+
// Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
|
|
255
|
+
// which WHATWG treats like '/'). Track the last '@', ']' and ':' for
|
|
256
|
+
// userinfo, ipv6 and port; flag uppercase and a stray tab/newline. The loop
|
|
257
|
+
// is split on `code < 64` so common host characters take fewer comparisons.
|
|
110
258
|
let indexOfIdentifier = -1;
|
|
111
259
|
let indexOfClosingBracket = -1;
|
|
112
260
|
let indexOfPort = -1;
|
|
261
|
+
let hasControl = false;
|
|
113
262
|
for (let i = start; i < end; i += 1) {
|
|
114
263
|
const code: number = url.charCodeAt(i);
|
|
115
|
-
if (
|
|
116
|
-
code === 35 ||
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
264
|
+
if (code < 64) {
|
|
265
|
+
if (code === 47 || code === 35 || code === 63) {
|
|
266
|
+
end = i;
|
|
267
|
+
break;
|
|
268
|
+
} else if (code === 58 /* ':' */) {
|
|
269
|
+
indexOfPort = i;
|
|
270
|
+
} else if (code === 9 || code === 10 || code === 13) {
|
|
271
|
+
hasControl = true;
|
|
272
|
+
}
|
|
273
|
+
} else if (isSpecial && code === 92 /* '\' */) {
|
|
120
274
|
end = i;
|
|
121
275
|
break;
|
|
122
|
-
} else if (code === 64) {
|
|
123
|
-
// '@'
|
|
276
|
+
} else if (code === 64 /* '@' */) {
|
|
124
277
|
indexOfIdentifier = i;
|
|
125
|
-
} else if (code === 93) {
|
|
126
|
-
// ']'
|
|
278
|
+
} else if (code === 93 /* ']' */) {
|
|
127
279
|
indexOfClosingBracket = i;
|
|
128
|
-
} else if (code === 58) {
|
|
129
|
-
// ':'
|
|
130
|
-
indexOfPort = i;
|
|
131
280
|
} else if (code >= 65 && code <= 90) {
|
|
132
281
|
hasUpper = true;
|
|
133
282
|
}
|
|
134
283
|
}
|
|
135
284
|
|
|
136
|
-
//
|
|
285
|
+
// A tab/newline inside the authority: strip everything and re-parse (rare).
|
|
286
|
+
if (hasControl) {
|
|
287
|
+
return extractHostname(
|
|
288
|
+
url.replace(CONTROL_CHARS, ''),
|
|
289
|
+
urlIsValidHostname,
|
|
290
|
+
);
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Skip userinfo. '>= start' so an empty userinfo ("http://@host") works too.
|
|
137
294
|
if (
|
|
138
295
|
indexOfIdentifier !== -1 &&
|
|
139
|
-
indexOfIdentifier
|
|
296
|
+
indexOfIdentifier >= start &&
|
|
140
297
|
indexOfIdentifier < end
|
|
141
298
|
) {
|
|
142
299
|
start = indexOfIdentifier + 1;
|
|
143
300
|
}
|
|
144
301
|
|
|
145
|
-
// Handle ipv6 addresses
|
|
146
302
|
if (url.charCodeAt(start) === 91 /* '[' */) {
|
|
303
|
+
// ipv6 address: return what is between the brackets, or null if unclosed.
|
|
147
304
|
if (indexOfClosingBracket !== -1) {
|
|
148
305
|
return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
|
|
149
306
|
}
|
|
150
307
|
return null;
|
|
151
308
|
} else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
|
|
152
|
-
//
|
|
153
|
-
|
|
309
|
+
end = indexOfPort; // trim ':port'
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
// Empty authority ("http://", "file:///path", "//"); only reachable here via
|
|
313
|
+
// extraction — a bare valid hostname never lands here.
|
|
314
|
+
if (start >= end) {
|
|
315
|
+
return null;
|
|
154
316
|
}
|
|
155
317
|
}
|
|
156
318
|
|
package/src/is-ip.ts
CHANGED
|
@@ -66,8 +66,8 @@ function isProbablyIpv6(hostname: string): boolean {
|
|
|
66
66
|
(
|
|
67
67
|
(code >= 48 && code <= 57) || // 0-9
|
|
68
68
|
(code >= 97 && code <= 102) || // a-f
|
|
69
|
-
(code >= 65 && code <=
|
|
70
|
-
) // A-F
|
|
69
|
+
(code >= 65 && code <= 70)
|
|
70
|
+
) // A-F (RFC 4291 §2.2: an IPv6 hextet is hex digits only)
|
|
71
71
|
)
|
|
72
72
|
) {
|
|
73
73
|
return false;
|