tldts 7.1.0 → 7.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +229 -65
- package/dist/cjs/index.js.map +1 -1
- package/dist/index.cjs.min.js +1 -1
- package/dist/index.cjs.min.js.map +1 -1
- package/dist/index.esm.min.js +1 -1
- package/dist/index.esm.min.js.map +1 -1
- package/dist/index.umd.min.js +1 -1
- package/dist/index.umd.min.js.map +1 -1
- package/package.json +4 -4
package/dist/cjs/index.js
CHANGED
|
@@ -93,130 +93,294 @@ function getDomainWithoutSuffix$1(domain, suffix) {
|
|
|
93
93
|
}
|
|
94
94
|
|
|
95
95
|
/**
|
|
96
|
-
*
|
|
97
|
-
*
|
|
96
|
+
* Matches an ASCII tab (U+0009) or newline (U+000A / U+000D). The WHATWG URL
|
|
97
|
+
* parser strips these before parsing; we only allocate a cleaned copy (and
|
|
98
|
+
* re-parse) on the rare input that actually contains one.
|
|
99
|
+
*/
|
|
100
|
+
const CONTROL_CHARS = /[\t\n\r]/g;
|
|
101
|
+
/**
|
|
102
|
+
* Classify scheme `url.slice(schemeStart, colonIndex)` as a WHATWG special
|
|
103
|
+
* scheme without allocating a substring (case-insensitive via `| 32`).
|
|
104
|
+
* Special schemes: ftp, file, http, https, ws, wss
|
|
105
|
+
* (https://url.spec.whatwg.org/#special-scheme).
|
|
106
|
+
*
|
|
107
|
+
* @returns 0 = not special, 1 = special, 2 = file (its host sits only between
|
|
108
|
+
* "//" and the next slash).
|
|
109
|
+
*/
|
|
110
|
+
function getSpecialScheme(url, schemeStart, colonIndex) {
|
|
111
|
+
const length = colonIndex - schemeStart;
|
|
112
|
+
const c0 = url.charCodeAt(schemeStart) | 32;
|
|
113
|
+
if (length === 2) {
|
|
114
|
+
return c0 === 119 && (url.charCodeAt(schemeStart + 1) | 32) === 115 ? 1 : 0; // ws
|
|
115
|
+
}
|
|
116
|
+
else if (length === 3) {
|
|
117
|
+
const c1 = url.charCodeAt(schemeStart + 1) | 32;
|
|
118
|
+
const c2 = url.charCodeAt(schemeStart + 2) | 32;
|
|
119
|
+
if (c0 === 119 && c1 === 115 && c2 === 115)
|
|
120
|
+
return 1; // wss
|
|
121
|
+
if (c0 === 102 && c1 === 116 && c2 === 112)
|
|
122
|
+
return 1; // ftp
|
|
123
|
+
return 0;
|
|
124
|
+
}
|
|
125
|
+
else if (length === 4) {
|
|
126
|
+
const c1 = url.charCodeAt(schemeStart + 1) | 32;
|
|
127
|
+
const c2 = url.charCodeAt(schemeStart + 2) | 32;
|
|
128
|
+
const c3 = url.charCodeAt(schemeStart + 3) | 32;
|
|
129
|
+
if (c0 === 104 && c1 === 116 && c2 === 116 && c3 === 112)
|
|
130
|
+
return 1; // http
|
|
131
|
+
if (c0 === 102 && c1 === 105 && c2 === 108 && c3 === 101)
|
|
132
|
+
return 2; // file
|
|
133
|
+
return 0;
|
|
134
|
+
}
|
|
135
|
+
else if (length === 5) {
|
|
136
|
+
return c0 === 104 &&
|
|
137
|
+
(url.charCodeAt(schemeStart + 1) | 32) === 116 &&
|
|
138
|
+
(url.charCodeAt(schemeStart + 2) | 32) === 116 &&
|
|
139
|
+
(url.charCodeAt(schemeStart + 3) | 32) === 112 &&
|
|
140
|
+
(url.charCodeAt(schemeStart + 4) | 32) === 115
|
|
141
|
+
? 1
|
|
142
|
+
: 0; // https
|
|
143
|
+
}
|
|
144
|
+
return 0;
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Extract a hostname from `url`, matching a WHATWG URL parser's host-boundary
|
|
148
|
+
* behaviour (https://url.spec.whatwg.org/#concept-basic-url-parser) for tldts'
|
|
149
|
+
* scope. It deliberately does NOT normalise the host (no IDNA/punycode or IPv4
|
|
150
|
+
* canonicalisation; IPv6 brackets are stripped, not compressed), strips trailing
|
|
151
|
+
* dots, and stays lenient where a strict parser rejects (bare host:port,
|
|
152
|
+
* out-of-range port, user@host) — all documented deviations.
|
|
153
|
+
*
|
|
154
|
+
* @param urlIsValidHostname - when true, `url` is already a valid hostname and is
|
|
155
|
+
* returned by the same reference (factory.ts skips re-validation on that
|
|
156
|
+
* identity), keeping the common path allocation-free.
|
|
98
157
|
*/
|
|
99
158
|
function extractHostname(url, urlIsValidHostname) {
|
|
100
159
|
let start = 0;
|
|
101
160
|
let end = url.length;
|
|
102
161
|
let hasUpper = false;
|
|
103
|
-
|
|
162
|
+
let isSpecial = false;
|
|
104
163
|
if (!urlIsValidHostname) {
|
|
105
|
-
//
|
|
164
|
+
// Data URLs never carry a host (and may be huge — short-circuit them).
|
|
106
165
|
if (url.startsWith('data:')) {
|
|
107
166
|
return null;
|
|
108
167
|
}
|
|
109
|
-
//
|
|
168
|
+
// WHATWG step 1: trim leading/trailing C0 control or space (<= U+0020).
|
|
169
|
+
// Tab/newline elsewhere are handled lazily below.
|
|
110
170
|
while (start < url.length && url.charCodeAt(start) <= 32) {
|
|
111
171
|
start += 1;
|
|
112
172
|
}
|
|
113
|
-
// Trim trailing spaces
|
|
114
173
|
while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
|
|
115
174
|
end -= 1;
|
|
116
175
|
}
|
|
117
|
-
// Skip scheme.
|
|
118
176
|
if (url.charCodeAt(start) === 47 /* '/' */ &&
|
|
119
177
|
url.charCodeAt(start + 1) === 47 /* '/' */) {
|
|
178
|
+
// Scheme-relative reference ("//host/path").
|
|
120
179
|
start += 2;
|
|
121
180
|
}
|
|
122
181
|
else {
|
|
123
182
|
const indexOfProtocol = url.indexOf(':/', start);
|
|
124
183
|
if (indexOfProtocol !== -1) {
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
184
|
+
// "scheme://…". Classify the scheme, then position `start` at the host.
|
|
185
|
+
const special = getSpecialScheme(url, start, indexOfProtocol);
|
|
186
|
+
if (special === 1) {
|
|
187
|
+
// Special scheme: skip the run of '/' and '\' after it
|
|
188
|
+
// (special-authority-(ignore-)slashes states; '\' acts as '/').
|
|
189
|
+
isSpecial = true;
|
|
190
|
+
start = indexOfProtocol + 2;
|
|
191
|
+
while (url.charCodeAt(start) === 47 /* '/' */ ||
|
|
192
|
+
url.charCodeAt(start) === 92 /* '\' */) {
|
|
193
|
+
start += 1;
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
else if (special === 2) {
|
|
197
|
+
// file: the host is only what sits between "//" and the next slash, so
|
|
198
|
+
// "file://h/x" => "h" but "file:///x" / "file:/x" => no host.
|
|
199
|
+
isSpecial = true;
|
|
200
|
+
start = indexOfProtocol + 1;
|
|
201
|
+
let slashes = 0;
|
|
202
|
+
while ((url.charCodeAt(start) === 47 || url.charCodeAt(start) === 92) &&
|
|
203
|
+
slashes < 2) {
|
|
204
|
+
start += 1;
|
|
205
|
+
slashes += 1;
|
|
206
|
+
}
|
|
207
|
+
if (slashes < 2) {
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
152
211
|
else {
|
|
153
|
-
//
|
|
212
|
+
// Unknown scheme: validate the WHATWG scheme grammar [A-Za-z0-9+.-];
|
|
213
|
+
// a control char means it was split by a tab/newline (strip + re-parse).
|
|
154
214
|
for (let i = start; i < indexOfProtocol; i += 1) {
|
|
155
|
-
const
|
|
156
|
-
if (!(((
|
|
157
|
-
(
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
215
|
+
const code = url.charCodeAt(i) | 32;
|
|
216
|
+
if (!(((code >= 97 && code <= 122) || // [a, z]
|
|
217
|
+
(code >= 48 && code <= 57) || // [0, 9]
|
|
218
|
+
code === 46 || // '.'
|
|
219
|
+
code === 45 || // '-'
|
|
220
|
+
code === 43) // '+'
|
|
161
221
|
)) {
|
|
222
|
+
const raw = url.charCodeAt(i);
|
|
223
|
+
if (raw === 9 || raw === 10 || raw === 13) {
|
|
224
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
225
|
+
}
|
|
162
226
|
return null;
|
|
163
227
|
}
|
|
164
228
|
}
|
|
229
|
+
// A non-special scheme has an authority only after "//" (else it is an
|
|
230
|
+
// opaque path with no host). `indexOf(':/')` already gave the first '/'.
|
|
231
|
+
if (url.charCodeAt(indexOfProtocol + 2) === 47 /* '/' */) {
|
|
232
|
+
start = indexOfProtocol + 3;
|
|
233
|
+
}
|
|
234
|
+
else {
|
|
235
|
+
return null;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
else if (url.charCodeAt(start) !== 91 /* '[' */) {
|
|
240
|
+
// Cold path: no scheme "://", and not a bare IPv6 literal (whose first
|
|
241
|
+
// ':' would otherwise look like a scheme separator; "[…]" falls through
|
|
242
|
+
// to the ipv6 handling below). May be a bare host, a host:port, a
|
|
243
|
+
// user@host, a slash-less special scheme ("https:host"), or an opaque
|
|
244
|
+
// URI ("mailto:", "tel:", "urn:…").
|
|
245
|
+
let indexOfColon = -1;
|
|
246
|
+
for (let i = start; i < end; i += 1) {
|
|
247
|
+
const code = url.charCodeAt(i);
|
|
248
|
+
if (code === 9 || code === 10 || code === 13) {
|
|
249
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
250
|
+
}
|
|
251
|
+
if (code === 58 /* ':' */) {
|
|
252
|
+
indexOfColon = i;
|
|
253
|
+
break;
|
|
254
|
+
}
|
|
255
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
256
|
+
break;
|
|
257
|
+
}
|
|
165
258
|
}
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
259
|
+
if (indexOfColon !== -1) {
|
|
260
|
+
// An '@' before the next delimiter => the ':' is userinfo, not a
|
|
261
|
+
// scheme ("user:pass@host", "mailto:a@b"): keep the whole authority.
|
|
262
|
+
let hasIdentifier = false;
|
|
263
|
+
for (let i = indexOfColon + 1; i < end; i += 1) {
|
|
264
|
+
const code = url.charCodeAt(i);
|
|
265
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
266
|
+
break;
|
|
267
|
+
}
|
|
268
|
+
if (code === 64 /* '@' */) {
|
|
269
|
+
hasIdentifier = true;
|
|
270
|
+
break;
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
if (!hasIdentifier) {
|
|
274
|
+
// All-digits after ':' => a bare "host:port" (tldts accepts
|
|
275
|
+
// hostnames too); keep `start` and let the port handling trim it.
|
|
276
|
+
let allDigits = true;
|
|
277
|
+
let i = indexOfColon + 1;
|
|
278
|
+
for (; i < end; i += 1) {
|
|
279
|
+
const code = url.charCodeAt(i);
|
|
280
|
+
if (code === 47 || code === 92 || code === 63 || code === 35) {
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
if (code < 48 /* '0' */ || code > 57 /* '9' */) {
|
|
284
|
+
allDigits = false;
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
if (i === indexOfColon + 1) {
|
|
289
|
+
allDigits = false; // nothing after ':' => not a port
|
|
290
|
+
}
|
|
291
|
+
if (!allDigits) {
|
|
292
|
+
const special = getSpecialScheme(url, start, indexOfColon);
|
|
293
|
+
if (special === 0) {
|
|
294
|
+
// No "://" anywhere on the cold path, so a non-special scheme has
|
|
295
|
+
// no authority: opaque path, no host ("mailto:x", "foo:bar").
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
isSpecial = true;
|
|
299
|
+
start = indexOfColon + 1;
|
|
300
|
+
if (special === 2) {
|
|
301
|
+
// file (e.g. "file:\\host"): host only between "//" and next slash.
|
|
302
|
+
let slashes = 0;
|
|
303
|
+
while ((url.charCodeAt(start) === 47 ||
|
|
304
|
+
url.charCodeAt(start) === 92) &&
|
|
305
|
+
slashes < 2) {
|
|
306
|
+
start += 1;
|
|
307
|
+
slashes += 1;
|
|
308
|
+
}
|
|
309
|
+
if (slashes < 2) {
|
|
310
|
+
return null;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
else {
|
|
314
|
+
while (url.charCodeAt(start) === 47 ||
|
|
315
|
+
url.charCodeAt(start) === 92) {
|
|
316
|
+
start += 1;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
}
|
|
170
321
|
}
|
|
171
322
|
}
|
|
172
323
|
}
|
|
173
|
-
//
|
|
174
|
-
//
|
|
175
|
-
//
|
|
324
|
+
// Find the host's end: first '/', '?' or '#' (and '\' for special URLs,
|
|
325
|
+
// which WHATWG treats like '/'). Track the last '@', ']' and ':' for
|
|
326
|
+
// userinfo, ipv6 and port; flag uppercase and a stray tab/newline. The loop
|
|
327
|
+
// is split on `code < 64` so common host characters take fewer comparisons.
|
|
176
328
|
let indexOfIdentifier = -1;
|
|
177
329
|
let indexOfClosingBracket = -1;
|
|
178
330
|
let indexOfPort = -1;
|
|
331
|
+
let hasControl = false;
|
|
179
332
|
for (let i = start; i < end; i += 1) {
|
|
180
333
|
const code = url.charCodeAt(i);
|
|
181
|
-
if (code
|
|
182
|
-
code === 47 ||
|
|
183
|
-
|
|
184
|
-
|
|
334
|
+
if (code < 64) {
|
|
335
|
+
if (code === 47 || code === 35 || code === 63) {
|
|
336
|
+
end = i;
|
|
337
|
+
break;
|
|
338
|
+
}
|
|
339
|
+
else if (code === 58 /* ':' */) {
|
|
340
|
+
indexOfPort = i;
|
|
341
|
+
}
|
|
342
|
+
else if (code === 9 || code === 10 || code === 13) {
|
|
343
|
+
hasControl = true;
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
else if (isSpecial && code === 92 /* '\' */) {
|
|
185
347
|
end = i;
|
|
186
348
|
break;
|
|
187
349
|
}
|
|
188
|
-
else if (code === 64) {
|
|
189
|
-
// '@'
|
|
350
|
+
else if (code === 64 /* '@' */) {
|
|
190
351
|
indexOfIdentifier = i;
|
|
191
352
|
}
|
|
192
|
-
else if (code === 93) {
|
|
193
|
-
// ']'
|
|
353
|
+
else if (code === 93 /* ']' */) {
|
|
194
354
|
indexOfClosingBracket = i;
|
|
195
355
|
}
|
|
196
|
-
else if (code === 58) {
|
|
197
|
-
// ':'
|
|
198
|
-
indexOfPort = i;
|
|
199
|
-
}
|
|
200
356
|
else if (code >= 65 && code <= 90) {
|
|
201
357
|
hasUpper = true;
|
|
202
358
|
}
|
|
203
359
|
}
|
|
204
|
-
//
|
|
360
|
+
// A tab/newline inside the authority: strip everything and re-parse (rare).
|
|
361
|
+
if (hasControl) {
|
|
362
|
+
return extractHostname(url.replace(CONTROL_CHARS, ''), urlIsValidHostname);
|
|
363
|
+
}
|
|
364
|
+
// Skip userinfo. '>= start' so an empty userinfo ("http://@host") works too.
|
|
205
365
|
if (indexOfIdentifier !== -1 &&
|
|
206
|
-
indexOfIdentifier
|
|
366
|
+
indexOfIdentifier >= start &&
|
|
207
367
|
indexOfIdentifier < end) {
|
|
208
368
|
start = indexOfIdentifier + 1;
|
|
209
369
|
}
|
|
210
|
-
// Handle ipv6 addresses
|
|
211
370
|
if (url.charCodeAt(start) === 91 /* '[' */) {
|
|
371
|
+
// ipv6 address: return what is between the brackets, or null if unclosed.
|
|
212
372
|
if (indexOfClosingBracket !== -1) {
|
|
213
373
|
return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
|
|
214
374
|
}
|
|
215
375
|
return null;
|
|
216
376
|
}
|
|
217
377
|
else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
|
|
218
|
-
//
|
|
219
|
-
|
|
378
|
+
end = indexOfPort; // trim ':port'
|
|
379
|
+
}
|
|
380
|
+
// Empty authority ("http://", "file:///path", "//"); only reachable here via
|
|
381
|
+
// extraction — a bare valid hostname never lands here.
|
|
382
|
+
if (start >= end) {
|
|
383
|
+
return null;
|
|
220
384
|
}
|
|
221
385
|
}
|
|
222
386
|
// Trim trailing dots
|