@coroboros/uri 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +5 -0
- package/LICENSE.md +21 -0
- package/README.md +990 -0
- package/dist/index.cjs +1665 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +560 -0
- package/dist/index.d.cts.map +1 -0
- package/dist/index.d.mts +560 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +1639 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +93 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,1639 @@
|
|
|
1
|
+
import { domainToASCII, domainToUnicode } from "node:url";
|
|
2
|
+
//#region src/config/index.ts
|
|
3
|
+
/**
|
|
4
|
+
* config module
|
|
5
|
+
*/
|
|
6
|
+
const maxLengthURL = 2048;
|
|
7
|
+
const maxPortInteger = 65535;
|
|
8
|
+
//#endregion
|
|
9
|
+
//#region src/helpers/object.ts
|
|
10
|
+
/**
|
|
11
|
+
* Internal type guards.
|
|
12
|
+
*
|
|
13
|
+
* - exists(thing) -> boolean
|
|
14
|
+
* - is(Type, thing) -> boolean
|
|
15
|
+
*/
|
|
16
|
+
/**
|
|
17
|
+
* Whether the specified value is not null, undefined or NaN.
|
|
18
|
+
*/
|
|
19
|
+
const exists = function exists(thing) {
|
|
20
|
+
return !(thing === void 0 || thing === null || Number.isNaN(thing));
|
|
21
|
+
};
|
|
22
|
+
function is(Type, thing) {
|
|
23
|
+
return exists(Type) && exists(thing) && (thing.constructor === Type || thing instanceof Type);
|
|
24
|
+
}
|
|
25
|
+
//#endregion
|
|
26
|
+
//#region src/checkers/chars.ts
|
|
27
|
+
/**
|
|
28
|
+
* chars checkers
|
|
29
|
+
*
|
|
30
|
+
* - isSchemeChar(char, { start } = {}) -> Boolean
|
|
31
|
+
* - isUserinfoChar(char, encode) -> Boolean
|
|
32
|
+
* - isSitemapUserinfoChar(char, encode) -> Boolean
|
|
33
|
+
* - isDomainChar(char, { start, end } = {}) -> Boolean
|
|
34
|
+
* - isPathChar(char, encode) -> Boolean
|
|
35
|
+
* - isSitemapPathChar(char, encode) -> Boolean
|
|
36
|
+
* - isQueryOrFragmentChar(char, encode) -> Boolean
|
|
37
|
+
* - isSitemapQueryOrFragmentChar(char, encode) -> Boolean
|
|
38
|
+
* - isPercentEncodingChar(char) -> Boolean
|
|
39
|
+
*/
|
|
40
|
+
/**
|
|
41
|
+
* @func isSchemeChar
|
|
42
|
+
*
|
|
43
|
+
* Check scheme legal ascii codes according to
|
|
44
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.1.
|
|
45
|
+
*
|
|
46
|
+
* Scheme must start with a letter and be followed by any combination of
|
|
47
|
+
* letters, digits, plus ("+"), period ("."), or hyphen ("-").
|
|
48
|
+
*
|
|
49
|
+
* Letters must be in lowercase.
|
|
50
|
+
*
|
|
51
|
+
* 43 +
|
|
52
|
+
* 45 -
|
|
53
|
+
* 46 .
|
|
54
|
+
* 48 to 57 0-9
|
|
55
|
+
* 97 to 122 a-z
|
|
56
|
+
*/
|
|
57
|
+
const isSchemeChar = function isSchemeChar(char, { start } = {}) {
|
|
58
|
+
if (!is(String, char)) return false;
|
|
59
|
+
const code = char.charCodeAt(0);
|
|
60
|
+
if (start) return code >= 97 && code <= 122;
|
|
61
|
+
return code >= 48 && code <= 57 || code >= 97 && code <= 122 || code === 43 || code === 45 || code === 46;
|
|
62
|
+
};
|
|
63
|
+
/**
|
|
64
|
+
* @func isUserinfoChar
|
|
65
|
+
*
|
|
66
|
+
* Check userinfo legal ascii codes according to
|
|
67
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.2.1.
|
|
68
|
+
*
|
|
69
|
+
* 33 !
|
|
70
|
+
* 36 $
|
|
71
|
+
* 37 % (not allowed when encoding)
|
|
72
|
+
* 38 to 46 &'()*+,-.
|
|
73
|
+
* 48 to 57 0-9
|
|
74
|
+
* 58 :
|
|
75
|
+
* 59 ;
|
|
76
|
+
* 61 =
|
|
77
|
+
* 65 to 90 A-Z
|
|
78
|
+
* 95 _
|
|
79
|
+
* 97 to 122 a-z
|
|
80
|
+
* 126 ~
|
|
81
|
+
*/
|
|
82
|
+
const isUserinfoChar = function isUserinfoChar(char, encode) {
|
|
83
|
+
if (!is(String, char)) return false;
|
|
84
|
+
const encoding = encode === true;
|
|
85
|
+
const code = char.charCodeAt(0);
|
|
86
|
+
if (code === 37) return !encoding;
|
|
87
|
+
return code >= 38 && code <= 46 || code >= 48 && code <= 57 || code >= 65 && code <= 90 || code >= 97 && code <= 122 || code === 33 || code === 36 || code === 58 || code === 59 || code === 61 || code === 95 || code === 126;
|
|
88
|
+
};
|
|
89
|
+
/**
|
|
90
|
+
* @func isSitemapUserinfoChar
|
|
91
|
+
*
|
|
92
|
+
* Check sitemap userinfo legal ascii codes according to
|
|
93
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.2.1;
|
|
94
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
95
|
+
*
|
|
96
|
+
* Removed AZ ' and *
|
|
97
|
+
*
|
|
98
|
+
* 33 !
|
|
99
|
+
* 36 $
|
|
100
|
+
* 37 % (not allowed when encoding)
|
|
101
|
+
* 38 & (allowed but must be a proper escape code)
|
|
102
|
+
* 40 to 41 ()
|
|
103
|
+
* 43 to 46 +,-.
|
|
104
|
+
* 48 to 57 0-9
|
|
105
|
+
* 58 :
|
|
106
|
+
* 59 ;
|
|
107
|
+
* 61 =
|
|
108
|
+
* 95 _
|
|
109
|
+
* 97 to 122 a-z
|
|
110
|
+
* 126 ~
|
|
111
|
+
*/
|
|
112
|
+
const isSitemapUserinfoChar = function isSitemapUserinfoChar(char, encode) {
|
|
113
|
+
if (!is(String, char)) return false;
|
|
114
|
+
const encoding = encode === true;
|
|
115
|
+
const code = char.charCodeAt(0);
|
|
116
|
+
if (code === 37) return !encoding;
|
|
117
|
+
return code >= 40 && code <= 41 || code >= 43 && code <= 46 || code >= 48 && code <= 57 || code >= 97 && code <= 122 || code === 33 || code === 36 || code === 38 || code === 58 || code === 59 || code === 61 || code === 95 || code === 126;
|
|
118
|
+
};
|
|
119
|
+
/**
|
|
120
|
+
* @func isDomainChar
|
|
121
|
+
*
|
|
122
|
+
* Check domain legal codes according to
|
|
123
|
+
* - RFC-1034 https://www.ietf.org/rfc/rfc1034.txt.
|
|
124
|
+
*
|
|
125
|
+
* 45 -
|
|
126
|
+
* 48 to 57 0-9
|
|
127
|
+
* 97 to 122 a-z
|
|
128
|
+
*/
|
|
129
|
+
const isDomainChar = function isDomainChar(char, { start, end } = {}) {
|
|
130
|
+
if (!is(String, char)) return false;
|
|
131
|
+
const code = char.charCodeAt(0);
|
|
132
|
+
if ((start === true || end === true) && code === 45) return false;
|
|
133
|
+
return code >= 48 && code <= 57 || code >= 97 && code <= 122 || code === 45;
|
|
134
|
+
};
|
|
135
|
+
/**
|
|
136
|
+
* @func isPathChar
|
|
137
|
+
*
|
|
138
|
+
* Check path legal ascii codes according to
|
|
139
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.3.
|
|
140
|
+
*
|
|
141
|
+
* 33 !
|
|
142
|
+
* 36 to 59 $%&'()*+,-./0-9:;
|
|
143
|
+
* 61 =
|
|
144
|
+
* 64 to 90 @A-Z
|
|
145
|
+
* 95 _
|
|
146
|
+
* 97 to 122 a-z
|
|
147
|
+
* 126 ~
|
|
148
|
+
*/
|
|
149
|
+
const isPathChar = function isPathChar(char, encode) {
|
|
150
|
+
if (!is(String, char)) return false;
|
|
151
|
+
const encoding = encode === true;
|
|
152
|
+
const code = char.charCodeAt(0);
|
|
153
|
+
if (code === 37) return !encoding;
|
|
154
|
+
return code >= 36 && code <= 59 || code >= 64 && code <= 90 || code >= 97 && code <= 122 || code === 33 || code === 61 || code === 95 || code === 126;
|
|
155
|
+
};
|
|
156
|
+
/**
|
|
157
|
+
* @func isSitemapPathChar
|
|
158
|
+
*
|
|
159
|
+
* Check path legal ascii codes according to
|
|
160
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.3.
|
|
161
|
+
*
|
|
162
|
+
* Removed AZ ' and *
|
|
163
|
+
*
|
|
164
|
+
* 33 !
|
|
165
|
+
* 36 to 38 $%& (& allowed but must be a proper escape code)
|
|
166
|
+
* 40 to 41 ()
|
|
167
|
+
* 43 to 59 +,-./0-9:;
|
|
168
|
+
* 61 =
|
|
169
|
+
* 64 @
|
|
170
|
+
* 95 _
|
|
171
|
+
* 97 to 122 a-z
|
|
172
|
+
* 126 ~
|
|
173
|
+
*/
|
|
174
|
+
const isSitemapPathChar = function isSitemapPathChar(char, encode) {
|
|
175
|
+
if (!is(String, char)) return false;
|
|
176
|
+
const encoding = encode === true;
|
|
177
|
+
const code = char.charCodeAt(0);
|
|
178
|
+
if (code === 37) return !encoding;
|
|
179
|
+
return code >= 36 && code <= 38 || code >= 40 && code <= 41 || code >= 43 && code <= 59 || code >= 97 && code <= 122 || code === 33 || code === 61 || code === 64 || code === 95 || code === 126;
|
|
180
|
+
};
|
|
181
|
+
/**
|
|
182
|
+
* @func isQueryOrFragmentChar
|
|
183
|
+
*
|
|
184
|
+
* Check query/fragment legal ascii codes according to
|
|
185
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.4;
|
|
186
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.5.
|
|
187
|
+
*
|
|
188
|
+
* path char
|
|
189
|
+
* 63 ?
|
|
190
|
+
*/
|
|
191
|
+
const isQueryOrFragmentChar = function isQueryOrFragmentChar(char, encode) {
|
|
192
|
+
if (isPathChar(char, encode)) return true;
|
|
193
|
+
return is(String, char) && char.charCodeAt(0) === 63;
|
|
194
|
+
};
|
|
195
|
+
/**
|
|
196
|
+
* @func isSitemapQueryOrFragmentChar
|
|
197
|
+
*
|
|
198
|
+
* Check query/fragment legal ascii codes according to
|
|
199
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.4;
|
|
200
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.5.
|
|
201
|
+
*
|
|
202
|
+
* sitemap path char
|
|
203
|
+
* 63 ?
|
|
204
|
+
*/
|
|
205
|
+
const isSitemapQueryOrFragmentChar = function isSitemapQueryOrFragmentChar(char, encode) {
|
|
206
|
+
if (isSitemapPathChar(char, encode)) return true;
|
|
207
|
+
return is(String, char) && char.charCodeAt(0) === 63;
|
|
208
|
+
};
|
|
209
|
+
/**
|
|
210
|
+
* @func isPercentEncodingChar
|
|
211
|
+
*
|
|
212
|
+
* Check percent encoding legal ascii codes according to RFC-3986 https://tools.ietf.org/html/rfc3986#section-2.1.
|
|
213
|
+
*
|
|
214
|
+
* HEXDIG is case-insensitive: %3a and %3A are equivalent
|
|
215
|
+
* (RFC-3986 https://tools.ietf.org/html/rfc3986#section-6.2.2.1).
|
|
216
|
+
*
|
|
217
|
+
* 48 to 57 0-9
|
|
218
|
+
* 65 to 70 A-F
|
|
219
|
+
* 97 to 102 a-f
|
|
220
|
+
*/
|
|
221
|
+
const isPercentEncodingChar = function isPercentEncodingChar(char) {
|
|
222
|
+
if (!is(String, char)) return false;
|
|
223
|
+
const code = char.charCodeAt(0);
|
|
224
|
+
return code >= 48 && code <= 57 || code >= 65 && code <= 70 || code >= 97 && code <= 102;
|
|
225
|
+
};
|
|
226
|
+
//#endregion
|
|
227
|
+
//#region src/ip/index.ts
|
|
228
|
+
/**
|
|
229
|
+
* IP validator
|
|
230
|
+
*
|
|
231
|
+
* - isIP(ip) -> Boolean
|
|
232
|
+
* - isIPv4(ip) -> Boolean
|
|
233
|
+
* - isIPv6(ip) -> Boolean
|
|
234
|
+
*/
|
|
235
|
+
const v4 = "(?:25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]\\d|\\d)(?:\\.(?:25[0-5]|2[0-4]\\d|1\\d\\d|[1-9]\\d|\\d)){3}";
|
|
236
|
+
const v6seg = "[a-fA-F\\d]{1,4}";
|
|
237
|
+
const v6 = `
|
|
238
|
+
(
|
|
239
|
+
(?:${v6seg}:){7}(?:${v6seg}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
|
|
240
|
+
(?:${v6seg}:){6}(?:${v4}|:${v6seg}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
|
|
241
|
+
(?:${v6seg}:){5}(?::${v4}|(:${v6seg}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
|
|
242
|
+
(?:${v6seg}:){4}(?:(:${v6seg}){0,1}:${v4}|(:${v6seg}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
|
|
243
|
+
(?:${v6seg}:){3}(?:(:${v6seg}){0,2}:${v4}|(:${v6seg}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
|
|
244
|
+
(?:${v6seg}:){2}(?:(:${v6seg}){0,3}:${v4}|(:${v6seg}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
|
|
245
|
+
(?:${v6seg}:){1}(?:(:${v6seg}){0,4}:${v4}|(:${v6seg}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
|
|
246
|
+
(?::((?::${v6seg}){0,5}:${v4}|(?::${v6seg}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
|
|
247
|
+
)(%[0-9a-zA-Z]{1,})? // %eth0 %1
|
|
248
|
+
`.replace(/\s*\/\/.*$/gm, "").replace(/\n/g, "").trim();
|
|
249
|
+
const ipv4Regexp = new RegExp(`^${v4}$`);
|
|
250
|
+
const ipv6Regexp$1 = new RegExp(`^${v6}$`);
|
|
251
|
+
const ipRegexp = new RegExp(`(?:^${v4}$)|(?:^${v6}$)`);
|
|
252
|
+
/**
|
|
253
|
+
* @func isIP
|
|
254
|
+
*
|
|
255
|
+
* Test a string is a valid IP.
|
|
256
|
+
*/
|
|
257
|
+
const isIP = function isIP(ip) {
|
|
258
|
+
if (!is(String, ip)) return false;
|
|
259
|
+
return ipRegexp.test(ip);
|
|
260
|
+
};
|
|
261
|
+
/**
|
|
262
|
+
* @func isIPv4
|
|
263
|
+
*
|
|
264
|
+
* Test a string is a valid IPv4.
|
|
265
|
+
*/
|
|
266
|
+
const isIPv4 = function isIPv4(ip) {
|
|
267
|
+
if (!is(String, ip)) return false;
|
|
268
|
+
return ipv4Regexp.test(ip);
|
|
269
|
+
};
|
|
270
|
+
/**
|
|
271
|
+
* @func isIPv6
|
|
272
|
+
*
|
|
273
|
+
* Test a string is a valid IPv6.
|
|
274
|
+
*/
|
|
275
|
+
const isIPv6 = function isIPv6(ip) {
|
|
276
|
+
if (!is(String, ip)) return false;
|
|
277
|
+
return ipv6Regexp$1.test(ip);
|
|
278
|
+
};
|
|
279
|
+
//#endregion
|
|
280
|
+
//#region src/punycode/index.ts
|
|
281
|
+
/**
|
|
282
|
+
* punycode and punydecode
|
|
283
|
+
*
|
|
284
|
+
* - punycode(domain) -> String
|
|
285
|
+
* - punydecode(domain) -> String
|
|
286
|
+
*/
|
|
287
|
+
/**
|
|
288
|
+
* @func punycode
|
|
289
|
+
*
|
|
290
|
+
* Returns the Punycode ASCII serialization of the domain.
|
|
291
|
+
* If domain is an invalid domain, the empty string is returned.
|
|
292
|
+
*
|
|
293
|
+
* Note:
|
|
294
|
+
* - native function url.domainToASCII does not support IPv6 only IPv4;
|
|
295
|
+
* - native function url.domainToASCII throws if no domain is provided or returns
|
|
296
|
+
* `null`, `undefined`, `nan` for `null`, `undefined` or `NaN` values which is
|
|
297
|
+
* not what to be expected.
|
|
298
|
+
*/
|
|
299
|
+
const punycode = function punycode(domain) {
|
|
300
|
+
if (isIPv6(domain)) return domain;
|
|
301
|
+
return is(String, domain) ? domainToASCII(domain) : "";
|
|
302
|
+
};
|
|
303
|
+
/**
|
|
304
|
+
* @func punydecode
|
|
305
|
+
*
|
|
306
|
+
* Returns the Unicode serialization of the domain.
|
|
307
|
+
* If domain is an invalid domain, the empty string is returned.
|
|
308
|
+
*
|
|
309
|
+
* Note:
|
|
310
|
+
* - native function url.domainToUnicode does not support IPv6 only IPv4;
|
|
311
|
+
* - native function url.domainToUnicode throws if no domain is provided or returns
|
|
312
|
+
* `null`, `undefined`, `nan` for `null`, `undefined` or `NaN` values which is
|
|
313
|
+
* not what to be expected.
|
|
314
|
+
*/
|
|
315
|
+
const punydecode = function punydecode(domain) {
|
|
316
|
+
if (isIPv6(domain)) return domain;
|
|
317
|
+
return is(String, domain) ? domainToUnicode(domain) : "";
|
|
318
|
+
};
|
|
319
|
+
//#endregion
|
|
320
|
+
//#region src/domain/index.ts
|
|
321
|
+
/**
|
|
322
|
+
* domain
|
|
323
|
+
*
|
|
324
|
+
* - isDomainLabel(label) -> Boolean
|
|
325
|
+
* - isDomain(name) -> Boolean
|
|
326
|
+
*/
|
|
327
|
+
/**
|
|
328
|
+
* @func isDomainLabel
|
|
329
|
+
*
|
|
330
|
+
* Test a label is a valid domain label according to RFC-1034.
|
|
331
|
+
*
|
|
332
|
+
* "Note that while upper and lower case letters are allowed in domain names,
|
|
333
|
+
* no significance is attached to the case. That is, two names with the same
|
|
334
|
+
* spelling but different case are to be treated as if identical."
|
|
335
|
+
*
|
|
336
|
+
* By convention uppercased domain label will be considered invalid.
|
|
337
|
+
*
|
|
338
|
+
* Rules:
|
|
339
|
+
* 1. "Labels must be 63 characters or less.";
|
|
340
|
+
* 2. can be minimum one character;
|
|
341
|
+
* 3. must only use lowercase letters, digits or hyphens;
|
|
342
|
+
* 4. must not start or end with a hyphen;
|
|
343
|
+
* 5. must not have consecutive hyphens;
|
|
344
|
+
* 6. can start or end with a digit.
|
|
345
|
+
*
|
|
346
|
+
* Based on:
|
|
347
|
+
* - RFC-1034 https://www.ietf.org/rfc/rfc1034.txt.
|
|
348
|
+
*/
|
|
349
|
+
const isDomainLabel = function isDomainLabel(label) {
|
|
350
|
+
if (!is(String, label)) return false;
|
|
351
|
+
const len = label.length;
|
|
352
|
+
if (len < 1 || len > 63) return false;
|
|
353
|
+
for (let i = 0; i < len; i += 1) {
|
|
354
|
+
if (!isDomainChar(label.charAt(i), {
|
|
355
|
+
start: i === 0,
|
|
356
|
+
end: i === len - 1
|
|
357
|
+
})) return false;
|
|
358
|
+
if (label.charAt(i) === "-" && i + 1 < len && label.charAt(i + 1) === "-") return false;
|
|
359
|
+
}
|
|
360
|
+
return true;
|
|
361
|
+
};
|
|
362
|
+
/**
|
|
363
|
+
* @func isDomain
|
|
364
|
+
*
|
|
365
|
+
* Test a name is a valid domain according to RFC-1034.
|
|
366
|
+
*
|
|
367
|
+
* Supports Fully-Qualified Domain Name (FQDN) and Internationalized Domain Name (IDN).
|
|
368
|
+
*
|
|
369
|
+
* Rules:
|
|
370
|
+
* 1. labels rules apply;
|
|
371
|
+
* 2. "[...] the total number of octets that represent a domain name
|
|
372
|
+
* (i.e., the sum of all label octets and label lengths) is limited to 255.";
|
|
373
|
+
* 3. labels are separated by dots (".");
|
|
374
|
+
* 4. must have at least one extension label;
|
|
375
|
+
* 5. must have labels different from each other;
|
|
376
|
+
* 6. last label can be empty (root label ".");
|
|
377
|
+
* 7. labels can start with `xn--` for IDNs if the ASCII serialization is a valid Punycode;
|
|
378
|
+
* 8. check also Punycodes as ॐ gives xn--'-6xd where ' is not valid.
|
|
379
|
+
*
|
|
380
|
+
* Based on:
|
|
381
|
+
* - RFC-1034 https://www.ietf.org/rfc/rfc1034.txt.
|
|
382
|
+
*/
|
|
383
|
+
const isDomain = function isDomain(name) {
|
|
384
|
+
if (!is(String, name)) return false;
|
|
385
|
+
const domain = punycode(name);
|
|
386
|
+
if (domain === "localhost") return true;
|
|
387
|
+
const len = domain.length;
|
|
388
|
+
if (len <= 0 || len > 255) return false;
|
|
389
|
+
const labels = domain.split(".");
|
|
390
|
+
const labelsLen = labels.length;
|
|
391
|
+
if (labelsLen <= 1) return false;
|
|
392
|
+
const occurences = {};
|
|
393
|
+
for (const [i, current] of labels.entries()) if (!(i === labelsLen - 1 && current === "")) {
|
|
394
|
+
if (!isDomainLabel(current.startsWith("xn--") ? current.slice(4) : current)) return false;
|
|
395
|
+
occurences[current] = (occurences[current] || 0) + 1;
|
|
396
|
+
if (occurences[current] > 1) return false;
|
|
397
|
+
}
|
|
398
|
+
return true;
|
|
399
|
+
};
|
|
400
|
+
//#endregion
|
|
401
|
+
//#region src/helpers/cast.ts
|
|
402
|
+
/**
|
|
403
|
+
* Type casting helpers.
|
|
404
|
+
*
|
|
405
|
+
* Cast a value to a specific primitive type. If the value is
|
|
406
|
+
* not of this type or can not be infer from this type, undefined is returned.
|
|
407
|
+
*
|
|
408
|
+
* undefined is an interesting value. When stringifying an object, an undefined property
|
|
409
|
+
* disappears. Useful to respect data type schemas and where null values are not allowed.
|
|
410
|
+
*
|
|
411
|
+
* - num(thing, { le, ge } = {}) -> Number or undefined
|
|
412
|
+
* - int(thing, { le, ge } = {}) -> Integer Number or undefined
|
|
413
|
+
*/
|
|
414
|
+
/**
|
|
415
|
+
* @func number
|
|
416
|
+
*
|
|
417
|
+
* cast to primitive number if possible or returns undefined
|
|
418
|
+
* because Number(null) returns 0 and Number(undefined|NaN) returns NaN
|
|
419
|
+
* beware to call Number.isFinite only on number values
|
|
420
|
+
* NOTE: only finite values
|
|
421
|
+
*/
|
|
422
|
+
const number = function number(thing) {
|
|
423
|
+
let castNum;
|
|
424
|
+
if (exists(thing)) {
|
|
425
|
+
const value = thing.valueOf();
|
|
426
|
+
if (is(Number, value)) {
|
|
427
|
+
if (Number.isFinite(value)) castNum = value;
|
|
428
|
+
} else if (is(String, value) || is(Boolean, value)) {
|
|
429
|
+
const cast = Number(value);
|
|
430
|
+
if (Number.isFinite(cast)) castNum = cast;
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
return castNum;
|
|
434
|
+
};
|
|
435
|
+
/**
|
|
436
|
+
* @func integer
|
|
437
|
+
*
|
|
438
|
+
* cast to primitive integer number if possible or returns undefined
|
|
439
|
+
* NOTE: based on "number" function, in base 10 only
|
|
440
|
+
*/
|
|
441
|
+
const integer = function integer(thing) {
|
|
442
|
+
const castNum = number(thing);
|
|
443
|
+
let castInt;
|
|
444
|
+
if (castNum !== void 0) {
|
|
445
|
+
const int = parseInt(String(castNum), 10);
|
|
446
|
+
/* v8 ignore next -- unreachable: parseInt of a finite number's String is never NaN */
|
|
447
|
+
if (!Number.isNaN(int)) castInt = int;
|
|
448
|
+
}
|
|
449
|
+
return castInt;
|
|
450
|
+
};
|
|
451
|
+
/**
|
|
452
|
+
* @func int
|
|
453
|
+
*
|
|
454
|
+
* cast to primitive integer number, with 'less or equal than'
|
|
455
|
+
* or 'greater or equal than' options, or returns undefined
|
|
456
|
+
* NOTE: based on "integer" function, in base 10 only
|
|
457
|
+
*/
|
|
458
|
+
const int = function int(thing, { ge, le } = {}) {
|
|
459
|
+
let castInt = integer(thing);
|
|
460
|
+
if (castInt !== void 0) {
|
|
461
|
+
const lessThan = integer(le);
|
|
462
|
+
const greaterThan = integer(ge);
|
|
463
|
+
if (lessThan !== void 0 && greaterThan !== void 0) {
|
|
464
|
+
if (castInt < greaterThan || castInt > lessThan) castInt = void 0;
|
|
465
|
+
} else if (lessThan !== void 0 && castInt > lessThan) castInt = void 0;
|
|
466
|
+
else if (greaterThan !== void 0 && castInt < greaterThan) castInt = void 0;
|
|
467
|
+
}
|
|
468
|
+
return castInt;
|
|
469
|
+
};
|
|
470
|
+
/**
|
|
471
|
+
* @func isPort
|
|
472
|
+
*
|
|
473
|
+
* RFC-3986 §3.2.3: port = *DIGIT. True if the value is absent
|
|
474
|
+
* (null/undefined) or a possibly empty string of ASCII digits.
|
|
475
|
+
* The numeric range is validated separately by `int`.
|
|
476
|
+
*
|
|
477
|
+
* Rejects JS Number coercion artefacts (hex `0x1F`, scientific `1e3`,
|
|
478
|
+
* whitespace) that `Number()` would otherwise accept.
|
|
479
|
+
*/
|
|
480
|
+
const isPort = function isPort(thing) {
|
|
481
|
+
return thing === null || thing === void 0 || /^[0-9]*$/.test(String(thing));
|
|
482
|
+
};
|
|
483
|
+
//#endregion
|
|
484
|
+
//#region src/helpers/error.ts
|
|
485
|
+
/**
|
|
486
|
+
* @func fail
|
|
487
|
+
*
|
|
488
|
+
* Throw a URIError carrying a stable `code` string. The thrown value is
|
|
489
|
+
* always `instanceof URIError`.
|
|
490
|
+
*/
|
|
491
|
+
const fail = function fail(code, message) {
|
|
492
|
+
const error = new URIError(message);
|
|
493
|
+
error.code = code;
|
|
494
|
+
throw error;
|
|
495
|
+
};
|
|
496
|
+
//#endregion
|
|
497
|
+
//#region src/parser/index.ts
|
|
498
|
+
/**
|
|
499
|
+
* parser
|
|
500
|
+
*
|
|
501
|
+
* - hostToURI(host) -> String
|
|
502
|
+
* - recomposeURI({ scheme, userinfo, host port, path, query, fragment } = {}) -> String
|
|
503
|
+
* - parseURI(uri) -> Object
|
|
504
|
+
*/
|
|
505
|
+
const uriRegexp = /^(?:([^:/?#]+):)?(?:\/\/([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?/;
|
|
506
|
+
const ipv6Regexp = /^(?:\[([^\]]+)\]:?)([0-9]+)?$/;
|
|
507
|
+
/**
|
|
508
|
+
* @func hostToURI
|
|
509
|
+
*
|
|
510
|
+
* Format host with special [] for IPv6. The empty string is returned if host
|
|
511
|
+
* is not a string.
|
|
512
|
+
*/
|
|
513
|
+
const hostToURI = function hostToURI(host) {
|
|
514
|
+
if (!is(String, host)) return "";
|
|
515
|
+
return isIPv6(host) ? `[${host}]` : host;
|
|
516
|
+
};
|
|
517
|
+
/**
|
|
518
|
+
* @func recomposeURI
|
|
519
|
+
*
|
|
520
|
+
* Recompose an URI from its components with basic URI checking.
|
|
521
|
+
*
|
|
522
|
+
* The empty string is returned if unable to recompose the URI.
|
|
523
|
+
*
|
|
524
|
+
* Rules:
|
|
525
|
+
* 1. scheme is required and must be at least 1 character;
|
|
526
|
+
* 2. path is required and can be empty;
|
|
527
|
+
* 3. if host is present path must be empty or start with /;
|
|
528
|
+
* 4. if host is not present path must not start with //;
|
|
529
|
+
* 5. host, if any, must be at least 3 characters;
|
|
530
|
+
* 6. userinfo will be ignored if empty;
|
|
531
|
+
* 7. port will be ignored if empty or not an integer;
|
|
532
|
+
* 8. query is emitted when defined (a string, including ''); a null
|
|
533
|
+
* or undefined query is omitted (RFC-3986 §5.3);
|
|
534
|
+
* 9. fragment is emitted when defined (a string, including ''); a null
|
|
535
|
+
* or undefined fragment is omitted (RFC-3986 §5.3).
|
|
536
|
+
*
|
|
537
|
+
* Support:
|
|
538
|
+
* - IPv4 and IPv6.
|
|
539
|
+
*
|
|
540
|
+
* Note:
|
|
541
|
+
* / is added to any URI with a host and an empty path.
|
|
542
|
+
*
|
|
543
|
+
* Based on:
|
|
544
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986.
|
|
545
|
+
*/
|
|
546
|
+
const recomposeURI = function recomposeURI(components) {
|
|
547
|
+
const cpts = components || {};
|
|
548
|
+
const defaultReturnValue = "";
|
|
549
|
+
const { scheme, userinfo, host, port, path, query, fragment } = cpts;
|
|
550
|
+
if (!(is(String, scheme) && scheme.length > 0) || !is(String, path)) return defaultReturnValue;
|
|
551
|
+
let uri = scheme;
|
|
552
|
+
if (is(String, host)) {
|
|
553
|
+
if (!(path === "" || path.startsWith("/"))) return defaultReturnValue;
|
|
554
|
+
if (host.length <= 2) return defaultReturnValue;
|
|
555
|
+
uri += "://";
|
|
556
|
+
if (is(String, userinfo) && userinfo.length > 0) uri += `${userinfo}@`;
|
|
557
|
+
uri += hostToURI(host);
|
|
558
|
+
if (exists(port) && isPort(port) && int(port, {
|
|
559
|
+
ge: 0,
|
|
560
|
+
le: 65535
|
|
561
|
+
}) !== void 0) uri += `:${port}`;
|
|
562
|
+
} else {
|
|
563
|
+
if (path.startsWith("//")) return defaultReturnValue;
|
|
564
|
+
uri += ":";
|
|
565
|
+
}
|
|
566
|
+
if (path === "" && is(String, host)) uri += "/";
|
|
567
|
+
else uri += path;
|
|
568
|
+
if (is(String, query)) uri += `?${query}`;
|
|
569
|
+
if (is(String, fragment)) uri += `#${fragment}`;
|
|
570
|
+
return uri;
|
|
571
|
+
};
|
|
572
|
+
/**
|
|
573
|
+
* @func parseURI
|
|
574
|
+
*
|
|
575
|
+
* Parse a string to get URI components.
|
|
576
|
+
*
|
|
577
|
+
* Support:
|
|
578
|
+
* - IPv4 and IPv6 hosts;
|
|
579
|
+
* - Internationalized Domain Name (IDN).
|
|
580
|
+
*
|
|
581
|
+
* Note:
|
|
582
|
+
* - RegExp from RFC-3986 https://tools.ietf.org/html/rfc3986#appendix-B;
|
|
583
|
+
* - scheme and host strings will always be put in lowercase once parsed,
|
|
584
|
+
* as specified in RFC-3986;
|
|
585
|
+
* - authority and its components will be put at null values if authority
|
|
586
|
+
* parsed is missing or empty.
|
|
587
|
+
*
|
|
588
|
+
* Based on:
|
|
589
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986.
|
|
590
|
+
*/
|
|
591
|
+
const parseURI = function parseURI(uri) {
|
|
592
|
+
const parsed = {
|
|
593
|
+
scheme: null,
|
|
594
|
+
authority: null,
|
|
595
|
+
authorityPunydecoded: null,
|
|
596
|
+
userinfo: null,
|
|
597
|
+
host: null,
|
|
598
|
+
hostPunydecoded: null,
|
|
599
|
+
port: null,
|
|
600
|
+
path: null,
|
|
601
|
+
pathqf: null,
|
|
602
|
+
query: null,
|
|
603
|
+
fragment: null,
|
|
604
|
+
href: null
|
|
605
|
+
};
|
|
606
|
+
if (!(is(String, uri) && uri.length > 0)) return parsed;
|
|
607
|
+
/* v8 ignore next -- unreachable []: the all-optional Appendix-B regexp always matches a non-empty string */
|
|
608
|
+
const [, scheme, authorityParsed, path, queryParsed, fragmentParsed] = uri.match(uriRegexp) ?? [];
|
|
609
|
+
if (!(is(String, scheme) && scheme.length > 0)) return parsed;
|
|
610
|
+
let authority = null;
|
|
611
|
+
let authorityPunydecoded = null;
|
|
612
|
+
let userinfo = null;
|
|
613
|
+
let host = null;
|
|
614
|
+
let hostPunydecoded = null;
|
|
615
|
+
let port = null;
|
|
616
|
+
if (is(String, authorityParsed)) {
|
|
617
|
+
let hostAndPort = null;
|
|
618
|
+
const userinfoEnd = authorityParsed.lastIndexOf("@");
|
|
619
|
+
if (userinfoEnd === -1) hostAndPort = authorityParsed;
|
|
620
|
+
else {
|
|
621
|
+
userinfo = authorityParsed.slice(0, userinfoEnd);
|
|
622
|
+
hostAndPort = authorityParsed.slice(userinfoEnd + 1);
|
|
623
|
+
}
|
|
624
|
+
/* v8 ignore next -- unreachable false branch: hostAndPort is always an assigned string after the authority split */
|
|
625
|
+
if (is(String, hostAndPort)) {
|
|
626
|
+
const ipv6Match = hostAndPort.match(ipv6Regexp);
|
|
627
|
+
let hostParsed = null;
|
|
628
|
+
let portToCast = null;
|
|
629
|
+
if (Array.isArray(ipv6Match)) [, hostParsed = null, portToCast = null] = ipv6Match;
|
|
630
|
+
else {
|
|
631
|
+
const portStart = hostAndPort.lastIndexOf(":");
|
|
632
|
+
if (portStart === -1) hostParsed = hostAndPort;
|
|
633
|
+
else {
|
|
634
|
+
hostParsed = hostAndPort.slice(0, portStart);
|
|
635
|
+
portToCast = hostAndPort.slice(portStart + 1);
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
/* v8 ignore start -- unreachable null branch: the ipv6 regexp's required capture means hostParsed is always a string here */
|
|
639
|
+
const hostLowerCase = is(String, hostParsed) ? hostParsed.toLowerCase() : null;
|
|
640
|
+
const toASCII = punycode(hostLowerCase ?? "");
|
|
641
|
+
const toUnicode = punydecode(hostLowerCase ?? "");
|
|
642
|
+
/* v8 ignore stop */
|
|
643
|
+
if (hostLowerCase !== toASCII) {
|
|
644
|
+
host = toASCII;
|
|
645
|
+
hostPunydecoded = hostLowerCase;
|
|
646
|
+
} else if (hostLowerCase !== toUnicode) {
|
|
647
|
+
host = hostLowerCase;
|
|
648
|
+
hostPunydecoded = toUnicode;
|
|
649
|
+
} else {
|
|
650
|
+
host = hostLowerCase;
|
|
651
|
+
hostPunydecoded = hostLowerCase;
|
|
652
|
+
}
|
|
653
|
+
if (host === "" || hostPunydecoded === "") host = null;
|
|
654
|
+
port = is(String, portToCast) && portToCast.length > 0 && !isPort(portToCast) ? portToCast : int(portToCast) || portToCast;
|
|
655
|
+
/* v8 ignore next -- unreachable false branch: hostPunydecoded is always an assigned string in this block */
|
|
656
|
+
if (exists(hostPunydecoded)) {
|
|
657
|
+
authorityPunydecoded = "";
|
|
658
|
+
if (exists(userinfo)) authorityPunydecoded += `${userinfo}@`;
|
|
659
|
+
authorityPunydecoded += hostToURI(hostPunydecoded);
|
|
660
|
+
if (exists(port)) authorityPunydecoded += `:${port}`;
|
|
661
|
+
}
|
|
662
|
+
if (exists(host)) {
|
|
663
|
+
authority = "";
|
|
664
|
+
if (exists(userinfo)) authority += `${userinfo}@`;
|
|
665
|
+
authority += hostToURI(host);
|
|
666
|
+
if (exists(port)) authority += `:${port}`;
|
|
667
|
+
} else {
|
|
668
|
+
userinfo = null;
|
|
669
|
+
port = null;
|
|
670
|
+
}
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
const query = is(String, queryParsed) ? queryParsed : null;
|
|
674
|
+
const fragment = is(String, fragmentParsed) ? fragmentParsed : null;
|
|
675
|
+
/* v8 ignore next -- unreachable null branch: the Appendix-B regexp always captures a string path */
|
|
676
|
+
parsed.pathqf = is(String, path) ? path.valueOf() : null;
|
|
677
|
+
if (is(String, parsed.pathqf)) {
|
|
678
|
+
if (is(String, query)) parsed.pathqf += `?${query}`;
|
|
679
|
+
if (is(String, fragment)) parsed.pathqf += `#${fragment}`;
|
|
680
|
+
}
|
|
681
|
+
parsed.scheme = scheme.toLowerCase();
|
|
682
|
+
parsed.authority = authority;
|
|
683
|
+
parsed.authorityPunydecoded = authorityPunydecoded;
|
|
684
|
+
parsed.userinfo = userinfo;
|
|
685
|
+
parsed.host = host;
|
|
686
|
+
parsed.hostPunydecoded = hostPunydecoded;
|
|
687
|
+
parsed.port = port;
|
|
688
|
+
/* v8 ignore next -- unreachable: the Appendix-B regexp always captures a string path */
|
|
689
|
+
parsed.path = path ?? null;
|
|
690
|
+
parsed.query = query;
|
|
691
|
+
parsed.fragment = fragment;
|
|
692
|
+
const recomposedURI = recomposeURI({
|
|
693
|
+
scheme: parsed.scheme,
|
|
694
|
+
userinfo: parsed.userinfo,
|
|
695
|
+
host: parsed.host,
|
|
696
|
+
port: parsed.port,
|
|
697
|
+
path: parsed.path,
|
|
698
|
+
query: parsed.query,
|
|
699
|
+
fragment: parsed.fragment
|
|
700
|
+
});
|
|
701
|
+
parsed.href = recomposedURI !== "" ? recomposedURI : null;
|
|
702
|
+
return parsed;
|
|
703
|
+
};
|
|
704
|
+
//#endregion
|
|
705
|
+
//#region src/sitemap/index.ts
|
|
706
|
+
/**
|
|
707
|
+
* sitemap
|
|
708
|
+
*
|
|
709
|
+
* Percent encodings, entities and escape codes.
|
|
710
|
+
*
|
|
711
|
+
* - specialChars
|
|
712
|
+
* - specialCharsKeys
|
|
713
|
+
* - pencodings
|
|
714
|
+
* - pencodingsKeys
|
|
715
|
+
* - entities
|
|
716
|
+
* - entitiesKeys
|
|
717
|
+
* - escapeCodes
|
|
718
|
+
* - escapeCodesKeys
|
|
719
|
+
* - escapeCodesKeysLen
|
|
720
|
+
*/
|
|
721
|
+
const specialChars = { "*": "%2A" };
|
|
722
|
+
const specialCharsKeys = Object.keys(specialChars);
|
|
723
|
+
const pencodings = {};
|
|
724
|
+
specialCharsKeys.forEach((char) => {
|
|
725
|
+
pencodings[specialChars[char]] = char;
|
|
726
|
+
});
|
|
727
|
+
const pencodingsKeys = Object.keys(pencodings);
|
|
728
|
+
const entities = {
|
|
729
|
+
"&": "&",
|
|
730
|
+
"'": "'",
|
|
731
|
+
"\"": """,
|
|
732
|
+
">": ">",
|
|
733
|
+
"<": "<"
|
|
734
|
+
};
|
|
735
|
+
const entitiesKeys = Object.keys(entities);
|
|
736
|
+
const escapeCodes = {};
|
|
737
|
+
entitiesKeys.forEach((entity) => {
|
|
738
|
+
escapeCodes[entities[entity]] = entity;
|
|
739
|
+
});
|
|
740
|
+
const escapeCodesKeys = Object.keys(escapeCodes);
|
|
741
|
+
const escapeCodesKeysLen = escapeCodesKeys.length;
|
|
742
|
+
//#endregion
|
|
743
|
+
//#region src/checkers/index.ts
|
|
744
|
+
/**
|
|
745
|
+
* checkers
|
|
746
|
+
*
|
|
747
|
+
* - checkPercentEncoding(string, index, stringLen) -> Number throws URIError
|
|
748
|
+
* - checkSitemapEncoding(string, index, stringLen) -> Number throws URIError
|
|
749
|
+
* - checkComponent({ type, string, sitemap } = {}) -> Boolean throws URIError
|
|
750
|
+
* - checkSchemeChars(scheme, len) -> Boolean throws URIError
|
|
751
|
+
* - checkLowercase(uri) -> Boolean throws URIError
|
|
752
|
+
* - checkURISyntax(uri) -> Object throws URIError
|
|
753
|
+
* - checkURI(uri, { sitemap } = {}) -> Object throws URIError
|
|
754
|
+
* - checkHttpURL(uri, { https, web, sitemap } = {}) -> Object throws URIError
|
|
755
|
+
* - checkHttpsURL(uri) -> Object throws URIError
|
|
756
|
+
* - checkHttpSitemapURL(uri) -> Object throws URIError
|
|
757
|
+
* - checkHttpsSitemapURL(uri) -> Object throws URIError
|
|
758
|
+
* - checkWebURL(uri) -> Object throws URIError
|
|
759
|
+
* - checkSitemapURL(uri) -> Object throws URIError
|
|
760
|
+
*/
|
|
761
|
+
const ipv6ZoneIdRegexp = /^(?:[A-Za-z0-9._~-]|%[0-9A-Fa-f]{2})+$/;
|
|
762
|
+
/**
|
|
763
|
+
* @func checkPercentEncoding
|
|
764
|
+
*
|
|
765
|
+
* Check a % char found from a string at a specific index has a valid
|
|
766
|
+
* percent encoding following this char.
|
|
767
|
+
*/
|
|
768
|
+
const checkPercentEncoding = function checkPercentEncoding(string, index, stringLen) {
|
|
769
|
+
if (!is(String, string)) fail("URI_INVALID_PERCENT_ENCODING", "a string is required when checking for percent encoding");
|
|
770
|
+
const len = is(Number, stringLen) && stringLen >= 0 ? stringLen : string.length;
|
|
771
|
+
const i = is(Number, index) && index < len ? index : 0;
|
|
772
|
+
let offset = 0;
|
|
773
|
+
if (len > 0 && string.charAt(i) === "%") if (i + 2 < len) if (!isPercentEncodingChar(string.charAt(i + 1))) fail("URI_INVALID_PERCENT_ENCODING", `invalid percent encoding char '${string.charAt(i + 1)}'`);
|
|
774
|
+
else if (!isPercentEncodingChar(string.charAt(i + 2))) fail("URI_INVALID_PERCENT_ENCODING", `invalid percent encoding char '${string.charAt(i + 2)}'`);
|
|
775
|
+
else offset = 2;
|
|
776
|
+
else fail("URI_INVALID_PERCENT_ENCODING", "incomplete percent encoding found");
|
|
777
|
+
return offset;
|
|
778
|
+
};
|
|
779
|
+
/**
|
|
780
|
+
* @func checkSitemapEncoding
|
|
781
|
+
*
|
|
782
|
+
* Check an entity in an URL at a specific index has a valid
|
|
783
|
+
* sitemap escape encoding following this char.
|
|
784
|
+
*/
|
|
785
|
+
const checkSitemapEncoding = function checkSitemapEncoding(string, index, stringLen) {
|
|
786
|
+
if (!is(String, string)) fail("URI_INVALID_SITEMAP_ENCODING", "a string is required when checking for sitemap encoding");
|
|
787
|
+
const len = is(Number, stringLen) && stringLen >= 0 ? stringLen : string.length;
|
|
788
|
+
const i = is(Number, index) && index < len ? index : 0;
|
|
789
|
+
let offset = 0;
|
|
790
|
+
if (len > 0 && string.charAt(i) === "&") {
|
|
791
|
+
let escapeOffset;
|
|
792
|
+
for (let j = 0; j < escapeCodesKeysLen; j += 1) {
|
|
793
|
+
const code = escapeCodesKeys[j];
|
|
794
|
+
/* v8 ignore next 3 -- unreachable: j is bounded by escapeCodesKeys.length so the index is always defined */
|
|
795
|
+
if (code === void 0) break;
|
|
796
|
+
const codeLen = code.length;
|
|
797
|
+
if (i + codeLen <= len && code === string.substring(i, i + codeLen)) {
|
|
798
|
+
escapeOffset = codeLen - 1;
|
|
799
|
+
break;
|
|
800
|
+
}
|
|
801
|
+
}
|
|
802
|
+
if (!exists(escapeOffset)) fail("URI_INVALID_SITEMAP_ENCODING", `entity '${string.charAt(i)}' is not properly escaped`);
|
|
803
|
+
else offset = escapeOffset;
|
|
804
|
+
}
|
|
805
|
+
return offset;
|
|
806
|
+
};
|
|
807
|
+
/**
|
|
808
|
+
* @func checkComponent
|
|
809
|
+
*
|
|
810
|
+
* Check a string has valid characters regarding userinfo, path, query,
|
|
811
|
+
* or fragment URI component type.
|
|
812
|
+
*
|
|
813
|
+
* NOTE:
|
|
814
|
+
* - check only if string is present as these components are not required;
|
|
815
|
+
* - path is required but is at least empty,
|
|
816
|
+
* regexp assures that and checkURISyntax verifies that too.
|
|
817
|
+
*/
|
|
818
|
+
const checkComponent = function checkComponent({ type, string, sitemap } = {}) {
|
|
819
|
+
if (![
|
|
820
|
+
"userinfo",
|
|
821
|
+
"path",
|
|
822
|
+
"query",
|
|
823
|
+
"fragment"
|
|
824
|
+
].includes(type)) fail("URI_INVALID_CHECKING_COMPONENT", `unable to check pathqf, got '${type}' component to check`);
|
|
825
|
+
if (type === "path" && (!exists(string) || string === "") || !exists(string)) return true;
|
|
826
|
+
const len = string.length;
|
|
827
|
+
const checkSitemap = sitemap === true;
|
|
828
|
+
let checkCharFunc;
|
|
829
|
+
switch (type) {
|
|
830
|
+
case "userinfo":
|
|
831
|
+
checkCharFunc = checkSitemap ? isSitemapUserinfoChar : isUserinfoChar;
|
|
832
|
+
break;
|
|
833
|
+
case "path":
|
|
834
|
+
checkCharFunc = checkSitemap ? isSitemapPathChar : isPathChar;
|
|
835
|
+
break;
|
|
836
|
+
case "query":
|
|
837
|
+
case "fragment":
|
|
838
|
+
checkCharFunc = checkSitemap ? isSitemapQueryOrFragmentChar : isQueryOrFragmentChar;
|
|
839
|
+
break;
|
|
840
|
+
/* v8 ignore next -- unreachable: type is validated to one of the four cases before the switch */
|
|
841
|
+
default:
|
|
842
|
+
}
|
|
843
|
+
for (let i = 0; i < len; i += 1) {
|
|
844
|
+
if (!checkCharFunc(string.charAt(i))) fail(`URI_INVALID_${type.toUpperCase()}_CHAR`, `invalid ${type} char '${string.charAt(i)}'`);
|
|
845
|
+
i += checkPercentEncoding(string, i, len);
|
|
846
|
+
if (checkSitemap) i += checkSitemapEncoding(string, i, len);
|
|
847
|
+
}
|
|
848
|
+
return true;
|
|
849
|
+
};
|
|
850
|
+
/**
|
|
851
|
+
* @func checkSchemeChars
|
|
852
|
+
*
|
|
853
|
+
* Check scheme characters are valid.
|
|
854
|
+
*
|
|
855
|
+
* Based on:
|
|
856
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-3.1.
|
|
857
|
+
*/
|
|
858
|
+
const checkSchemeChars = function checkSchemeChars(scheme, len) {
|
|
859
|
+
if (!is(String, scheme)) fail("URI_INVALID_SCHEME", "scheme must be a string");
|
|
860
|
+
const schemeLen = is(Number, len) && len > 0 ? len : scheme.length;
|
|
861
|
+
if (schemeLen <= 0) fail("URI_INVALID_SCHEME", "scheme cannot be empty");
|
|
862
|
+
for (let i = 0; i < schemeLen; i += 1) if (!isSchemeChar(scheme.charAt(i), { start: i === 0 })) fail("URI_INVALID_SCHEME_CHAR", `invalid scheme char '${scheme.charAt(i)}'`);
|
|
863
|
+
return true;
|
|
864
|
+
};
|
|
865
|
+
/**
|
|
866
|
+
* @func checkLowercase
|
|
867
|
+
*
|
|
868
|
+
* Check a string has not any uppercase characters.
|
|
869
|
+
*/
|
|
870
|
+
const checkLowercase = function checkLowercase(uri) {
|
|
871
|
+
if (!is(String, uri)) fail("URI_INVALID_TYPE", "uri must be a string");
|
|
872
|
+
if (uri.toLowerCase() !== uri) fail("URI_INVALID_CHAR", "uri cannot contain any uppercase characters");
|
|
873
|
+
return true;
|
|
874
|
+
};
|
|
875
|
+
/**
|
|
876
|
+
* @func checkURISyntax
|
|
877
|
+
*
|
|
878
|
+
* Check an URI syntax is valid according to RFC-3986.
|
|
879
|
+
*
|
|
880
|
+
* Beware this function does not fully check if an URI is valid.
|
|
881
|
+
* Rules:
|
|
882
|
+
* 1. scheme is required and cannot be empty;
|
|
883
|
+
* 2. path is required and can be empty;
|
|
884
|
+
* 3. if authority is present path must be empty or start with /;
|
|
885
|
+
* 4. if authority is not present path must not start with //;
|
|
886
|
+
* 5. check for inconsistent authority (original vs parsed)
|
|
887
|
+
* which would mean host parsed was actually wrong.
|
|
888
|
+
*/
|
|
889
|
+
const checkURISyntax = function checkURISyntax(uri) {
|
|
890
|
+
if (!is(String, uri)) fail("URI_INVALID_TYPE", "uri must be a string");
|
|
891
|
+
const { scheme, authority, authorityPunydecoded, userinfo, host, hostPunydecoded, port, path, pathqf, query, fragment, href } = parseURI(uri);
|
|
892
|
+
const schemeLen = is(String, scheme) ? scheme.length : 0;
|
|
893
|
+
if (!is(String, scheme)) fail("URI_MISSING_SCHEME", "uri scheme is required");
|
|
894
|
+
else if (schemeLen <= 0) fail("URI_EMPTY_SCHEME", "uri scheme must not be empty");
|
|
895
|
+
/* v8 ignore stop */
|
|
896
|
+
/* v8 ignore next 3 -- unreachable: the Appendix-B regexp always captures a string path */
|
|
897
|
+
if (!is(String, path)) fail("URI_MISSING_PATH", "uri path is required");
|
|
898
|
+
if (is(String, authority) && authority.length > 0) {
|
|
899
|
+
/* v8 ignore next 3 -- unreachable: when authority is present the Appendix-B regexp makes path empty or '/'-prefixed */
|
|
900
|
+
if (!(path === "" || path.startsWith("/"))) fail("URI_INVALID_PATH", "path must be empty or start with '/' when authority is present");
|
|
901
|
+
} else if (path.startsWith("//")) fail("URI_INVALID_PATH", "path must not start with '//' when authority is not present");
|
|
902
|
+
if (!exists(authority) && exists(authorityPunydecoded)) fail("URI_INVALID_HOST", `host must be a valid ip or domain name, got '${hostPunydecoded}'`);
|
|
903
|
+
if (is(String, host) && host.includes(":")) {
|
|
904
|
+
const zoneAt = host.indexOf("%");
|
|
905
|
+
if (zoneAt !== -1) {
|
|
906
|
+
const zoneId = host.slice(zoneAt + 3);
|
|
907
|
+
if (host.slice(zoneAt, zoneAt + 3) !== "%25") fail("URI_INVALID_HOST", `IPv6 zone identifier must use the '%25' delimiter, got '${host}'`);
|
|
908
|
+
if (zoneId === "" || !ipv6ZoneIdRegexp.test(zoneId)) fail("URI_INVALID_HOST", `IPv6 zone identifier must be a non-empty RFC 6874 ZoneID, got '${host}'`);
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
return {
|
|
912
|
+
scheme,
|
|
913
|
+
authority,
|
|
914
|
+
authorityPunydecoded,
|
|
915
|
+
userinfo,
|
|
916
|
+
host,
|
|
917
|
+
hostPunydecoded,
|
|
918
|
+
port,
|
|
919
|
+
path,
|
|
920
|
+
pathqf,
|
|
921
|
+
query,
|
|
922
|
+
fragment,
|
|
923
|
+
href,
|
|
924
|
+
schemeLen,
|
|
925
|
+
valid: true
|
|
926
|
+
};
|
|
927
|
+
};
|
|
928
|
+
/**
|
|
929
|
+
* @func checkURI
|
|
930
|
+
*
|
|
931
|
+
* Check an URI is valid according to RFC-3986.
|
|
932
|
+
*
|
|
933
|
+
* Rules:
|
|
934
|
+
* 1. scheme is required and cannot be empty;
|
|
935
|
+
* 2. path is required and can be empty;
|
|
936
|
+
* 3. if authority is present path must be empty or start with /;
|
|
937
|
+
* 4. if authority is not present path must not start with //;
|
|
938
|
+
* 5. scheme can only have specific characters:
|
|
939
|
+
* https://tools.ietf.org/html/rfc3986#section-3.1;
|
|
940
|
+
* 6. if authority is present:
|
|
941
|
+
* 1. host must be a valid IP or domain name;
|
|
942
|
+
* 2. userinfo, if any, can only have specific characters:
|
|
943
|
+
* https://tools.ietf.org/html/rfc3986#section-3.2.1;
|
|
944
|
+
* 3. port, if any, must be an integer in a specific range.
|
|
945
|
+
* 7. path, query and fragment can only have specific characters:
|
|
946
|
+
* https://tools.ietf.org/html/rfc3986#section-3.3.
|
|
947
|
+
*/
|
|
948
|
+
const checkURI = function checkURI(uri, { sitemap } = {}) {
|
|
949
|
+
const { scheme, authority, authorityPunydecoded, userinfo, host, hostPunydecoded, port, path, pathqf, query, fragment, href, schemeLen } = checkURISyntax(uri);
|
|
950
|
+
checkSchemeChars(scheme, schemeLen);
|
|
951
|
+
if (exists(authority)) {
|
|
952
|
+
checkComponent({
|
|
953
|
+
sitemap,
|
|
954
|
+
type: "userinfo",
|
|
955
|
+
string: userinfo
|
|
956
|
+
});
|
|
957
|
+
if (!isIP(host) && !isDomain(host)) fail("URI_INVALID_HOST", `host must be a valid ip or domain name, got '${host}'`);
|
|
958
|
+
if (exists(port) && (!isPort(port) || int(port, {
|
|
959
|
+
ge: 0,
|
|
960
|
+
le: 65535
|
|
961
|
+
}) === void 0)) fail("URI_INVALID_PORT", `port must be an integer between 0-${maxPortInteger}, got '${port}'`);
|
|
962
|
+
}
|
|
963
|
+
checkComponent({
|
|
964
|
+
sitemap,
|
|
965
|
+
type: "path",
|
|
966
|
+
string: path
|
|
967
|
+
});
|
|
968
|
+
checkComponent({
|
|
969
|
+
sitemap,
|
|
970
|
+
type: "query",
|
|
971
|
+
string: query
|
|
972
|
+
});
|
|
973
|
+
checkComponent({
|
|
974
|
+
sitemap,
|
|
975
|
+
type: "fragment",
|
|
976
|
+
string: fragment
|
|
977
|
+
});
|
|
978
|
+
return {
|
|
979
|
+
scheme,
|
|
980
|
+
authority,
|
|
981
|
+
authorityPunydecoded,
|
|
982
|
+
userinfo,
|
|
983
|
+
host,
|
|
984
|
+
hostPunydecoded,
|
|
985
|
+
port,
|
|
986
|
+
path,
|
|
987
|
+
pathqf,
|
|
988
|
+
query,
|
|
989
|
+
fragment,
|
|
990
|
+
href,
|
|
991
|
+
valid: true
|
|
992
|
+
};
|
|
993
|
+
};
|
|
994
|
+
/**
|
|
995
|
+
* @func checkHttpURL
|
|
996
|
+
*
|
|
997
|
+
* Check an URI is a valid HTTP URL (sitemap URLs supported to create aliases).
|
|
998
|
+
*
|
|
999
|
+
* This function uses checkURI to check URI provided is valid.
|
|
1000
|
+
*
|
|
1001
|
+
* Rules:
|
|
1002
|
+
* 1. scheme must be http or HTTP;
|
|
1003
|
+
* 2. authority is required;
|
|
1004
|
+
* 3. URL must be less than max length.
|
|
1005
|
+
*
|
|
1006
|
+
* Based on:
|
|
1007
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1008
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1009
|
+
*/
|
|
1010
|
+
const checkHttpURL = function checkHttpURL(uri, { https, web, sitemap } = {}) {
|
|
1011
|
+
if (sitemap === true) checkLowercase(uri);
|
|
1012
|
+
const schemesToCheck = [];
|
|
1013
|
+
if (https === true) schemesToCheck.push("https");
|
|
1014
|
+
else if (web === true) schemesToCheck.push("http", "https");
|
|
1015
|
+
else schemesToCheck.push("http");
|
|
1016
|
+
const { scheme, authority, authorityPunydecoded, userinfo, host, hostPunydecoded, port, path, pathqf, query, fragment, href } = checkURI(uri, { sitemap });
|
|
1017
|
+
if (!schemesToCheck.includes(scheme)) fail("URI_INVALID_SCHEME", `scheme must be ${schemesToCheck.join(" or ")}, got '${scheme}'`);
|
|
1018
|
+
if (!is(String, authority)) fail("URI_MISSING_AUTHORITY", "authority is required");
|
|
1019
|
+
if (is(String, href) && href.length >= 2048) fail("URI_MAX_LENGTH_URL", `max URL length of ${maxLengthURL} reached: ${href.length}`);
|
|
1020
|
+
return {
|
|
1021
|
+
scheme,
|
|
1022
|
+
authority,
|
|
1023
|
+
authorityPunydecoded,
|
|
1024
|
+
userinfo,
|
|
1025
|
+
host,
|
|
1026
|
+
hostPunydecoded,
|
|
1027
|
+
port,
|
|
1028
|
+
path,
|
|
1029
|
+
pathqf,
|
|
1030
|
+
query,
|
|
1031
|
+
fragment,
|
|
1032
|
+
href,
|
|
1033
|
+
valid: true
|
|
1034
|
+
};
|
|
1035
|
+
};
|
|
1036
|
+
/**
|
|
1037
|
+
* @func checkHttpsURL
|
|
1038
|
+
*
|
|
1039
|
+
* Check an URI is a valid HTTPS URL.
|
|
1040
|
+
*
|
|
1041
|
+
* Same behavior than checkHttpURL except scheme must be https or HTTPS.
|
|
1042
|
+
*/
|
|
1043
|
+
const checkHttpsURL = function checkHttpsURL(uri) {
|
|
1044
|
+
return checkHttpURL(uri, { https: true });
|
|
1045
|
+
};
|
|
1046
|
+
/**
|
|
1047
|
+
* @func checkHttpSitemapURL
|
|
1048
|
+
*
|
|
1049
|
+
* Check an URI is a valid HTTP URL to be used in an XML sitemap file.
|
|
1050
|
+
*
|
|
1051
|
+
* This function uses checkHttpURL to check URI provided is a valid HTTP URL.
|
|
1052
|
+
*
|
|
1053
|
+
* Rules:
|
|
1054
|
+
* 1. scheme must be http;
|
|
1055
|
+
* 2. authority is required;
|
|
1056
|
+
* 3. specific characters must be escaped;
|
|
1057
|
+
* 4. can only contain lowercase characters;
|
|
1058
|
+
* 5. URL must be less than max length.
|
|
1059
|
+
*
|
|
1060
|
+
* Based on:
|
|
1061
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1062
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1063
|
+
*/
|
|
1064
|
+
const checkHttpSitemapURL = function checkHttpSitemapURL(uri) {
|
|
1065
|
+
return checkHttpURL(uri, { sitemap: true });
|
|
1066
|
+
};
|
|
1067
|
+
/**
|
|
1068
|
+
* @func checkHttpsSitemapURL
|
|
1069
|
+
*
|
|
1070
|
+
* Check an URI is a valid HTTPS URL to be used in an XML sitemap file.
|
|
1071
|
+
* Same behavior than checkHttpSitemapURL except scheme must be https.
|
|
1072
|
+
*/
|
|
1073
|
+
const checkHttpsSitemapURL = function checkHttpsSitemapURL(uri) {
|
|
1074
|
+
return checkHttpURL(uri, {
|
|
1075
|
+
https: true,
|
|
1076
|
+
sitemap: true
|
|
1077
|
+
});
|
|
1078
|
+
};
|
|
1079
|
+
/**
|
|
1080
|
+
* @func checkWebURL
|
|
1081
|
+
*
|
|
1082
|
+
* Check an URI is a valid HTTP or HTTPS URL.
|
|
1083
|
+
*
|
|
1084
|
+
* Same behavior than checkHttpURL except scheme can be http/HTTP or https/HTTPS.
|
|
1085
|
+
*/
|
|
1086
|
+
const checkWebURL = function checkWebURL(uri) {
|
|
1087
|
+
return checkHttpURL(uri, { web: true });
|
|
1088
|
+
};
|
|
1089
|
+
/**
|
|
1090
|
+
* @func checkSitemapURL
|
|
1091
|
+
*
|
|
1092
|
+
* Check an URI is a valid HTTP or HTTPS URL to be used in an XML sitemap file.
|
|
1093
|
+
*
|
|
1094
|
+
* Same behavior than checkHttpSitemapURL except scheme can be http or https.
|
|
1095
|
+
*/
|
|
1096
|
+
const checkSitemapURL = function checkSitemapURL(uri) {
|
|
1097
|
+
return checkHttpURL(uri, {
|
|
1098
|
+
web: true,
|
|
1099
|
+
sitemap: true
|
|
1100
|
+
});
|
|
1101
|
+
};
|
|
1102
|
+
//#endregion
|
|
1103
|
+
//#region src/decoders/index.ts
|
|
1104
|
+
/**
|
|
1105
|
+
* decoders
|
|
1106
|
+
*
|
|
1107
|
+
* - decodeURIComponentString(component, { sitemap, lowercase } = {}) -> String
|
|
1108
|
+
* - decodeURIString(uri, { sitemap, lowercase } = {}) -> String throws URIError
|
|
1109
|
+
* - decodeWebURL(uri, { lowercase } = {}) -> String
|
|
1110
|
+
* - decodeSitemapURL(uri, { lowercase } = {}) -> String
|
|
1111
|
+
*/
|
|
1112
|
+
const sitemapDecodeRegexp = new RegExp(escapeCodesKeys.concat(pencodingsKeys).join("|"), "g");
|
|
1113
|
+
/**
|
|
1114
|
+
* @func decodeURIComponentString
|
|
1115
|
+
*
|
|
1116
|
+
* Decode an URI component string with Sitemap's escape codes support.
|
|
1117
|
+
*
|
|
1118
|
+
* Native function decodeURIComponent could throw and to be consistent with
|
|
1119
|
+
* encodeURIComponentString the empty string is returned if unable to decode.
|
|
1120
|
+
*
|
|
1121
|
+
* Based on:
|
|
1122
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1123
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1124
|
+
*/
|
|
1125
|
+
const decodeURIComponentString = function decodeURIComponentString(component, { sitemap, lowercase } = {}) {
|
|
1126
|
+
if (!is(String, component)) return "";
|
|
1127
|
+
const componentToDecode = lowercase === true ? component.toLowerCase() : component;
|
|
1128
|
+
if (sitemap === true) {
|
|
1129
|
+
const uriToDecode = componentToDecode.replace(
|
|
1130
|
+
sitemapDecodeRegexp,
|
|
1131
|
+
/* v8 ignore next -- unreachable '': the regexp is built from these keys so every match resolves */
|
|
1132
|
+
(match) => escapeCodes[match] || pencodings[match] || ""
|
|
1133
|
+
);
|
|
1134
|
+
try {
|
|
1135
|
+
return decodeURIComponent(uriToDecode);
|
|
1136
|
+
} catch {
|
|
1137
|
+
return "";
|
|
1138
|
+
}
|
|
1139
|
+
}
|
|
1140
|
+
try {
|
|
1141
|
+
return decodeURIComponent(componentToDecode);
|
|
1142
|
+
} catch {
|
|
1143
|
+
return "";
|
|
1144
|
+
}
|
|
1145
|
+
};
|
|
1146
|
+
/**
|
|
1147
|
+
* @func decodeURIString
|
|
1148
|
+
*
|
|
1149
|
+
* Decode an URI string according to RFC-3986 with basic checking.
|
|
1150
|
+
*
|
|
1151
|
+
* Checked:
|
|
1152
|
+
* - scheme is required;
|
|
1153
|
+
* - path is required, can be empty;
|
|
1154
|
+
* - port, if any, must be an integer in a specific range;
|
|
1155
|
+
* - host must be a valid ip or domain name;
|
|
1156
|
+
* - maximum size once encoded for URLs.
|
|
1157
|
+
*
|
|
1158
|
+
* Support:
|
|
1159
|
+
* - IDNs: returns URI with its Punydecoded host (Unicode serialization of the domain), if any;
|
|
1160
|
+
* - lower and upper case.
|
|
1161
|
+
*
|
|
1162
|
+
* Note:
|
|
1163
|
+
* - if one of userinfo, path, query or fragment component cannot be decoded, it will be ignored;
|
|
1164
|
+
* - native function decodeURI does not support IDNs and cannot properly work
|
|
1165
|
+
* with encodeURI since the function is based on an outdated standard;
|
|
1166
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase;
|
|
1167
|
+
* - to only use with encodeURIString.
|
|
1168
|
+
*
|
|
1169
|
+
* Based on:
|
|
1170
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1171
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1172
|
+
*/
|
|
1173
|
+
const decodeURIString = function decodeURIString(uri, { web, sitemap, lowercase } = {}) {
|
|
1174
|
+
const uriToDecode = is(String, uri) && lowercase === true ? uri.toLowerCase() : uri;
|
|
1175
|
+
const webURL = web === true || sitemap === true;
|
|
1176
|
+
const { scheme, authority, userinfo, host, hostPunydecoded, port, path, query, fragment, schemeLen } = checkURISyntax(uriToDecode);
|
|
1177
|
+
if (webURL) {
|
|
1178
|
+
if (scheme !== "http" && scheme !== "https") fail("URI_INVALID_SCHEME", `scheme must be http or https, got '${scheme}'`);
|
|
1179
|
+
} else checkSchemeChars(scheme, schemeLen);
|
|
1180
|
+
if (webURL && !is(String, authority)) fail("URI_MISSING_AUTHORITY", "authority is required");
|
|
1181
|
+
if (exists(host) && !isIP(host) && !isDomain(host)) fail("URI_INVALID_HOST", `host must be a valid ip or domain name, got '${host}'`);
|
|
1182
|
+
if (exists(port) && (!isPort(port) || int(port, {
|
|
1183
|
+
ge: 0,
|
|
1184
|
+
le: 65535
|
|
1185
|
+
}) === void 0)) fail("URI_INVALID_PORT", `port must be an integer between 0-${maxPortInteger}, got '${port}'`);
|
|
1186
|
+
const userinfoDecoded = decodeURIComponentString(userinfo ?? "", {
|
|
1187
|
+
sitemap,
|
|
1188
|
+
lowercase: false
|
|
1189
|
+
});
|
|
1190
|
+
/* v8 ignore next -- unreachable '': checkURISyntax always yields a string path */
|
|
1191
|
+
const pathDecoded = decodeURIComponentString(path ?? "", {
|
|
1192
|
+
sitemap,
|
|
1193
|
+
lowercase: false
|
|
1194
|
+
});
|
|
1195
|
+
const decodeComponent = (value) => {
|
|
1196
|
+
if (!is(String, value) || value === "") return value;
|
|
1197
|
+
return decodeURIComponentString(value, {
|
|
1198
|
+
sitemap,
|
|
1199
|
+
lowercase: false
|
|
1200
|
+
}) || null;
|
|
1201
|
+
};
|
|
1202
|
+
const uridecoded = recomposeURI({
|
|
1203
|
+
scheme,
|
|
1204
|
+
port,
|
|
1205
|
+
host: hostPunydecoded,
|
|
1206
|
+
userinfo: userinfoDecoded,
|
|
1207
|
+
path: pathDecoded,
|
|
1208
|
+
query: decodeComponent(query),
|
|
1209
|
+
fragment: decodeComponent(fragment)
|
|
1210
|
+
});
|
|
1211
|
+
if (webURL && uridecoded.length >= 2048) fail("URI_MAX_LENGTH_URL", `max URL length of ${maxLengthURL} reached: ${uridecoded.length}`);
|
|
1212
|
+
return uridecoded;
|
|
1213
|
+
};
|
|
1214
|
+
/**
|
|
1215
|
+
* @func decodeWebURL
|
|
1216
|
+
*
|
|
1217
|
+
* Decode an URI string with basic checking based on RFC-3986 standard applied
|
|
1218
|
+
* to HTTP and HTTPS URLs.
|
|
1219
|
+
*
|
|
1220
|
+
* Uses a fixed decodeURI function to be RFC-3986 compliant.
|
|
1221
|
+
*
|
|
1222
|
+
* Checked:
|
|
1223
|
+
* - scheme must be http/HTTP or https/HTTPS;
|
|
1224
|
+
* - path is required, can be empty;
|
|
1225
|
+
* - authority is required;
|
|
1226
|
+
* - port, if any, must be an integer in a specific range;
|
|
1227
|
+
* - parseURI prechecked host, will be null if invalid and so does authority.
|
|
1228
|
+
*
|
|
1229
|
+
* Support:
|
|
1230
|
+
* - IDNs: returns URI with its Punydecoded host
|
|
1231
|
+
* (Unicode serialization of the domain), if any;
|
|
1232
|
+
* - lower and upper case.
|
|
1233
|
+
*
|
|
1234
|
+
* Note:
|
|
1235
|
+
* - native function decodeURI does not support IDNs and cannot properly work
|
|
1236
|
+
* with encodeURI since the function is based on an outdated standard;
|
|
1237
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase;
|
|
1238
|
+
* - to use only with encodeWebURL.
|
|
1239
|
+
*/
|
|
1240
|
+
const decodeWebURL = function decodeWebURL(uri, { lowercase } = {}) {
|
|
1241
|
+
return decodeURIString(uri, {
|
|
1242
|
+
lowercase,
|
|
1243
|
+
web: true
|
|
1244
|
+
});
|
|
1245
|
+
};
|
|
1246
|
+
/**
|
|
1247
|
+
* @func decodeSitemapURL
|
|
1248
|
+
*
|
|
1249
|
+
* Decode an URI string with basic checking based on RFC-3986 standard applied
|
|
1250
|
+
* to HTTP and HTTPS URLs and sitemap requirements regarding escape codes to decode.
|
|
1251
|
+
*
|
|
1252
|
+
* Checked:
|
|
1253
|
+
* - scheme must be http/HTTP or https/HTTPS;
|
|
1254
|
+
* - path is required, can be empty;
|
|
1255
|
+
* - authority is required;
|
|
1256
|
+
* - port, if any, must be an integer in a specific range;
|
|
1257
|
+
* - parseURI prechecked host, will be null if invalid and so does authority.
|
|
1258
|
+
*
|
|
1259
|
+
* Support:
|
|
1260
|
+
* - Sitemap's escape codes;
|
|
1261
|
+
* - IDNs: returns URI with its Punydecoded host
|
|
1262
|
+
* (Unicode serialization of the domain), if any;
|
|
1263
|
+
* - lower and upper case.
|
|
1264
|
+
*
|
|
1265
|
+
* Note:
|
|
1266
|
+
* - native function decodeURI does not support IDNs and cannot properly work
|
|
1267
|
+
* with encodeURI since the function is based on an outdated standard;
|
|
1268
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase;
|
|
1269
|
+
* - to use only with encodeSitemapURL.
|
|
1270
|
+
*
|
|
1271
|
+
* Based on:
|
|
1272
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1273
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1274
|
+
*/
|
|
1275
|
+
const decodeSitemapURL = function decodeSitemapURL(uri, { lowercase } = {}) {
|
|
1276
|
+
return decodeURIString(uri, {
|
|
1277
|
+
lowercase,
|
|
1278
|
+
sitemap: true
|
|
1279
|
+
});
|
|
1280
|
+
};
|
|
1281
|
+
//#endregion
|
|
1282
|
+
//#region src/encoders/index.ts
|
|
1283
|
+
/**
|
|
1284
|
+
* encoders
|
|
1285
|
+
*
|
|
1286
|
+
* - encodeURIComponentString(uri, { sitemap, lowercase } = {}) -> String
|
|
1287
|
+
* - encodeURIString(uri, { web, sitemap, lowercase } = {}) -> String throws URIError
|
|
1288
|
+
* - encodeWebURL(uri, { lowercase } = {}) -> String
|
|
1289
|
+
* - encodeSitemapURL(uri, { lowercase } = {}) -> String
|
|
1290
|
+
*/
|
|
1291
|
+
/**
|
|
1292
|
+
* @func encodeURIComponentString
|
|
1293
|
+
*
|
|
1294
|
+
* Encode an URI component according to RFC-3986 with Sitemap entities support.
|
|
1295
|
+
*
|
|
1296
|
+
* Support:
|
|
1297
|
+
* - Sitemap's special characters;
|
|
1298
|
+
* - lower and upper case.
|
|
1299
|
+
*
|
|
1300
|
+
* Note:
|
|
1301
|
+
* - only userinfo, path, query and fragment components can be encoded;
|
|
1302
|
+
* - scheme and authority host+port can never have percent encoded characters;
|
|
1303
|
+
* - the empty string is returned if unable to encode;
|
|
1304
|
+
* - sitemap characters must be in lowercase and escaped for XML sitemap.
|
|
1305
|
+
*
|
|
1306
|
+
* Based on:
|
|
1307
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1308
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1309
|
+
*/
|
|
1310
|
+
const encodeURIComponentString = function encodeURIComponentString(component, { type, sitemap, lowercase } = {}) {
|
|
1311
|
+
if (!is(String, component)) return "";
|
|
1312
|
+
const componentToEncode = lowercase === true || sitemap === true ? component.toLowerCase() : component;
|
|
1313
|
+
const componentToEncodeLen = componentToEncode.length;
|
|
1314
|
+
let uricomponent = "";
|
|
1315
|
+
for (let i = 0; i < componentToEncodeLen; i += 1) {
|
|
1316
|
+
const char = componentToEncode.charAt(i);
|
|
1317
|
+
let encoded = false;
|
|
1318
|
+
if (sitemap === true) {
|
|
1319
|
+
const entity = entities[char];
|
|
1320
|
+
const special = specialChars[char];
|
|
1321
|
+
if (exists(entity)) {
|
|
1322
|
+
uricomponent += entity;
|
|
1323
|
+
encoded = true;
|
|
1324
|
+
} else if (exists(special)) {
|
|
1325
|
+
uricomponent += special;
|
|
1326
|
+
encoded = true;
|
|
1327
|
+
}
|
|
1328
|
+
}
|
|
1329
|
+
if (!encoded) {
|
|
1330
|
+
let isChar;
|
|
1331
|
+
switch (type) {
|
|
1332
|
+
case "userinfo":
|
|
1333
|
+
isChar = sitemap === true && isSitemapUserinfoChar(char, true) || isUserinfoChar(char, true);
|
|
1334
|
+
break;
|
|
1335
|
+
case "path":
|
|
1336
|
+
isChar = sitemap === true && isSitemapPathChar(char, true) || isPathChar(char, true);
|
|
1337
|
+
break;
|
|
1338
|
+
case "query":
|
|
1339
|
+
case "fragment":
|
|
1340
|
+
isChar = sitemap === true && isSitemapQueryOrFragmentChar(char, true) || isQueryOrFragmentChar(char, true);
|
|
1341
|
+
break;
|
|
1342
|
+
default: isChar = false;
|
|
1343
|
+
}
|
|
1344
|
+
uricomponent += !isChar ? encodeURIComponent(char) : char;
|
|
1345
|
+
}
|
|
1346
|
+
}
|
|
1347
|
+
return uricomponent;
|
|
1348
|
+
};
|
|
1349
|
+
/**
|
|
1350
|
+
* @func encodeURIString
|
|
1351
|
+
*
|
|
1352
|
+
* Encode an URI string according to RFC-3986 with basic checking.
|
|
1353
|
+
*
|
|
1354
|
+
* Checked:
|
|
1355
|
+
* - scheme is required;
|
|
1356
|
+
* - path is required, can be empty;
|
|
1357
|
+
* - port, if any, must be an integer in a specific range;
|
|
1358
|
+
* - host must be a valid ip or domain name;
|
|
1359
|
+
* - maximum size once encoded for URLs.
|
|
1360
|
+
*
|
|
1361
|
+
* Support:
|
|
1362
|
+
* - IDNs: returns URI with its Punycode host, if any;
|
|
1363
|
+
* - lower and upper case.
|
|
1364
|
+
*
|
|
1365
|
+
* Note:
|
|
1366
|
+
* - only userinfo, path, query and fragment can be percent encoded;
|
|
1367
|
+
* - native function encodeURI encodes string according to RFC-2396 which is outdated;
|
|
1368
|
+
* - native function encodeURI also encodes scheme and host that cannot have
|
|
1369
|
+
* percend-encoded characters;
|
|
1370
|
+
* - characters that should not be percent-encoded in RFC-3986 are [] to represent IPv6 host;
|
|
1371
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase.
|
|
1372
|
+
*
|
|
1373
|
+
* Based on:
|
|
1374
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1375
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1376
|
+
*/
|
|
1377
|
+
const encodeURIString = function encodeURIString(uri, { web, sitemap, lowercase } = {}) {
|
|
1378
|
+
const uriToEncode = is(String, uri) && lowercase === true ? uri.toLowerCase() : uri;
|
|
1379
|
+
const webURL = web === true || sitemap === true;
|
|
1380
|
+
const { scheme, authority, userinfo, host, port, path, query, fragment, schemeLen } = checkURISyntax(uriToEncode);
|
|
1381
|
+
if (webURL) {
|
|
1382
|
+
if (scheme !== "http" && scheme !== "https") fail("URI_INVALID_SCHEME", `scheme must be http or https, got '${scheme}'`);
|
|
1383
|
+
} else checkSchemeChars(scheme, schemeLen);
|
|
1384
|
+
if (webURL && !is(String, authority)) fail("URI_MISSING_AUTHORITY", "authority is required");
|
|
1385
|
+
if (exists(host) && !isIP(host) && !isDomain(host)) fail("URI_INVALID_HOST", `host must be a valid ip or domain name, got '${host}'`);
|
|
1386
|
+
if (exists(port) && (!isPort(port) || int(port, {
|
|
1387
|
+
ge: 0,
|
|
1388
|
+
le: 65535
|
|
1389
|
+
}) === void 0)) fail("URI_INVALID_PORT", `port must be an integer between 0-${maxPortInteger}, got '${port}'`);
|
|
1390
|
+
const uriencoded = recomposeURI({
|
|
1391
|
+
scheme,
|
|
1392
|
+
host,
|
|
1393
|
+
port,
|
|
1394
|
+
userinfo: encodeURIComponentString(userinfo ?? "", {
|
|
1395
|
+
sitemap,
|
|
1396
|
+
type: "userinfo",
|
|
1397
|
+
lowercase: false
|
|
1398
|
+
}),
|
|
1399
|
+
path: encodeURIComponentString(path ?? "", {
|
|
1400
|
+
sitemap,
|
|
1401
|
+
type: "path",
|
|
1402
|
+
lowercase: false
|
|
1403
|
+
}),
|
|
1404
|
+
query: is(String, query) ? encodeURIComponentString(query, {
|
|
1405
|
+
sitemap,
|
|
1406
|
+
type: "query",
|
|
1407
|
+
lowercase: false
|
|
1408
|
+
}) : query,
|
|
1409
|
+
fragment: is(String, fragment) ? encodeURIComponentString(fragment, {
|
|
1410
|
+
sitemap,
|
|
1411
|
+
type: "fragment",
|
|
1412
|
+
lowercase: false
|
|
1413
|
+
}) : fragment
|
|
1414
|
+
});
|
|
1415
|
+
if (webURL && uriencoded.length >= 2048) fail("URI_MAX_LENGTH_URL", `max URL length of ${maxLengthURL} reached: ${uriencoded.length}`);
|
|
1416
|
+
return uriencoded;
|
|
1417
|
+
};
|
|
1418
|
+
/**
|
|
1419
|
+
* @func encodeWebURL
|
|
1420
|
+
*
|
|
1421
|
+
* Encode an URI string with basic checking based on RFC-3986 standard applied
|
|
1422
|
+
* to HTTP and HTTPS URLs.
|
|
1423
|
+
*
|
|
1424
|
+
* Uses a fixed encodeURI function to be RFC-3986 compliant.
|
|
1425
|
+
*
|
|
1426
|
+
* Checked:
|
|
1427
|
+
* - scheme must be http/HTTP or https/HTTPS;
|
|
1428
|
+
* - path is required, can be empty;
|
|
1429
|
+
* - authority is required;
|
|
1430
|
+
* - port, if any, must be an integer in a specific range;
|
|
1431
|
+
* - host must be a valid IP or domain name;
|
|
1432
|
+
* - maximum size once encoded.
|
|
1433
|
+
*
|
|
1434
|
+
* Support:
|
|
1435
|
+
* - IDNs: returns URL with its Punycode host, if any;
|
|
1436
|
+
* - lower and upper case.
|
|
1437
|
+
*
|
|
1438
|
+
* Note:
|
|
1439
|
+
* - only userinfo, path, query and fragment can be percent encoded;
|
|
1440
|
+
* - native function encodeURI encodes string according to RFC-2396 which is outdated;
|
|
1441
|
+
* - native function encodes also scheme and host that cannot have percend encoded characters;
|
|
1442
|
+
* - characters that should not be percent-encoded in RFC-3986 are [] to represent IPv6 host;
|
|
1443
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase.
|
|
1444
|
+
*
|
|
1445
|
+
* Based on:
|
|
1446
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986.
|
|
1447
|
+
*/
|
|
1448
|
+
const encodeWebURL = function encodeWebURL(uri, { lowercase } = {}) {
|
|
1449
|
+
return encodeURIString(uri, {
|
|
1450
|
+
lowercase,
|
|
1451
|
+
web: true
|
|
1452
|
+
});
|
|
1453
|
+
};
|
|
1454
|
+
/**
|
|
1455
|
+
* @func encodeSitemapURL
|
|
1456
|
+
*
|
|
1457
|
+
* Encode an URI string with basic checking based on RFC-3986 standard applied
|
|
1458
|
+
* to HTTP and HTTPS URLs and sitemap requirements regarding special entities to escape.
|
|
1459
|
+
*
|
|
1460
|
+
* Checked:
|
|
1461
|
+
* - scheme must be http/HTTP or https/HTTPS;
|
|
1462
|
+
* - path is required, can be empty;
|
|
1463
|
+
* - authority is required;
|
|
1464
|
+
* - port, if any, must be an integer in a specific range;
|
|
1465
|
+
* - host must be a valid IP or domain name;
|
|
1466
|
+
* - maximum size once encoded.
|
|
1467
|
+
*
|
|
1468
|
+
* Support:
|
|
1469
|
+
* - Sitemap's special characters;
|
|
1470
|
+
* - IDNs: returns URI with its Punycode host, if any;
|
|
1471
|
+
* - lower case only.
|
|
1472
|
+
*
|
|
1473
|
+
* Note:
|
|
1474
|
+
* - only userinfo, path, query and fragment can be percent encoded;
|
|
1475
|
+
* - native function encodeURI encodes string according to RFC-2396 which is outdated;
|
|
1476
|
+
* - native function encodes also scheme and host that cannot have percend encoded characters;
|
|
1477
|
+
* - characters that should not be percent-encoded in RFC-3986 are [] to represent IPv6 host;
|
|
1478
|
+
* - to stay fully RFC-3986 compliant, scheme and host are put in lowercase.
|
|
1479
|
+
*
|
|
1480
|
+
* Based on:
|
|
1481
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986;
|
|
1482
|
+
* - https://support.google.com/webmasters/answer/183668?hl=en&ref_topic=4581190.
|
|
1483
|
+
*/
|
|
1484
|
+
const encodeSitemapURL = function encodeSitemapURL(uri) {
|
|
1485
|
+
return encodeURIString(uri, {
|
|
1486
|
+
lowercase: true,
|
|
1487
|
+
sitemap: true
|
|
1488
|
+
});
|
|
1489
|
+
};
|
|
1490
|
+
//#endregion
|
|
1491
|
+
//#region src/resolver/index.ts
|
|
1492
|
+
/**
|
|
1493
|
+
* reference resolution
|
|
1494
|
+
*
|
|
1495
|
+
* - removeDotSegments(path) -> String
|
|
1496
|
+
* - resolveURI(base, reference) -> String
|
|
1497
|
+
*
|
|
1498
|
+
* Based on:
|
|
1499
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-5.
|
|
1500
|
+
*/
|
|
1501
|
+
const referenceRegexp = /^(?:([^:/?#]+):)?(?:\/\/([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?/;
|
|
1502
|
+
/**
|
|
1503
|
+
* @func parseReference
|
|
1504
|
+
*
|
|
1505
|
+
* Split a URI-reference into its five RFC-3986 components. A component is
|
|
1506
|
+
* null when the delimiter is absent and '' when present but empty, so the
|
|
1507
|
+
* defined/undefined distinction §5.2.2 relies on is preserved.
|
|
1508
|
+
*/
|
|
1509
|
+
const parseReference = function parseReference(reference) {
|
|
1510
|
+
/* v8 ignore next -- unreachable []: the Appendix-B regexp is all-optional and matches any string */
|
|
1511
|
+
const [, scheme, authority, path, query, fragment] = reference.match(referenceRegexp) ?? [];
|
|
1512
|
+
return {
|
|
1513
|
+
scheme: scheme ?? null,
|
|
1514
|
+
authority: authority ?? null,
|
|
1515
|
+
/* v8 ignore next -- unreachable '': the path group [^?#]* always captures a string */
|
|
1516
|
+
path: path ?? "",
|
|
1517
|
+
query: query ?? null,
|
|
1518
|
+
fragment: fragment ?? null
|
|
1519
|
+
};
|
|
1520
|
+
};
|
|
1521
|
+
/**
|
|
1522
|
+
* @func removeDotSegments
|
|
1523
|
+
*
|
|
1524
|
+
* Remove the special "." and ".." complete path segments from a path,
|
|
1525
|
+
* implementing the RFC-3986 §5.2.4 ordered loop verbatim.
|
|
1526
|
+
*
|
|
1527
|
+
* Based on:
|
|
1528
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-5.2.4.
|
|
1529
|
+
*/
|
|
1530
|
+
const removeDotSegments = function removeDotSegments(path) {
|
|
1531
|
+
if (!is(String, path)) return "";
|
|
1532
|
+
let input = path;
|
|
1533
|
+
let output = "";
|
|
1534
|
+
while (input.length > 0) if (input.startsWith("../")) input = input.slice(3);
|
|
1535
|
+
else if (input.startsWith("./")) input = input.slice(2);
|
|
1536
|
+
else if (input.startsWith("/./")) input = `/${input.slice(3)}`;
|
|
1537
|
+
else if (input === "/.") input = "/";
|
|
1538
|
+
else if (input.startsWith("/../")) {
|
|
1539
|
+
input = `/${input.slice(4)}`;
|
|
1540
|
+
output = output.slice(0, Math.max(0, output.lastIndexOf("/")));
|
|
1541
|
+
} else if (input === "/..") {
|
|
1542
|
+
input = "/";
|
|
1543
|
+
output = output.slice(0, Math.max(0, output.lastIndexOf("/")));
|
|
1544
|
+
} else if (input === "." || input === "..") input = "";
|
|
1545
|
+
else {
|
|
1546
|
+
const start = input.startsWith("/") ? 1 : 0;
|
|
1547
|
+
const next = input.indexOf("/", start);
|
|
1548
|
+
if (next === -1) {
|
|
1549
|
+
output += input;
|
|
1550
|
+
input = "";
|
|
1551
|
+
} else {
|
|
1552
|
+
output += input.slice(0, next);
|
|
1553
|
+
input = input.slice(next);
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
return output;
|
|
1557
|
+
};
|
|
1558
|
+
/**
|
|
1559
|
+
* @func merge
|
|
1560
|
+
*
|
|
1561
|
+
* Merge a relative reference's path with the base path, per RFC-3986 §5.2.3.
|
|
1562
|
+
*/
|
|
1563
|
+
const merge = function merge(base, refPath) {
|
|
1564
|
+
if (is(String, base.authority) && base.path === "") return `/${refPath}`;
|
|
1565
|
+
const lastSlash = base.path.lastIndexOf("/");
|
|
1566
|
+
return lastSlash === -1 ? refPath : base.path.slice(0, lastSlash + 1) + refPath;
|
|
1567
|
+
};
|
|
1568
|
+
/**
|
|
1569
|
+
* @func recompose
|
|
1570
|
+
*
|
|
1571
|
+
* Recompose a resolved target from its components, per RFC-3986 §5.3.
|
|
1572
|
+
* A component is emitted whenever it is defined (non-null), including ''.
|
|
1573
|
+
*/
|
|
1574
|
+
const recompose = function recompose(target) {
|
|
1575
|
+
let result = "";
|
|
1576
|
+
/* v8 ignore next -- unreachable false branch: a resolved target always has a scheme (the base is absolute) */
|
|
1577
|
+
if (is(String, target.scheme)) result += `${target.scheme}:`;
|
|
1578
|
+
if (is(String, target.authority)) result += `//${target.authority}`;
|
|
1579
|
+
result += target.path;
|
|
1580
|
+
if (is(String, target.query)) result += `?${target.query}`;
|
|
1581
|
+
if (is(String, target.fragment)) result += `#${target.fragment}`;
|
|
1582
|
+
return result;
|
|
1583
|
+
};
|
|
1584
|
+
/**
|
|
1585
|
+
* @func resolveURI
|
|
1586
|
+
*
|
|
1587
|
+
* Resolve a URI reference against a base URI, implementing the RFC-3986
|
|
1588
|
+
* §5.2.2 strict transform (with §5.2.3 merge and §5.2.4 remove_dot_segments)
|
|
1589
|
+
* and recomposing per §5.3.
|
|
1590
|
+
*
|
|
1591
|
+
* The base must be an absolute URI (a scheme is required, RFC-3986 §5.2.1);
|
|
1592
|
+
* a fragment on the base is ignored (RFC-3986 §5.1: the base is used
|
|
1593
|
+
* stripped of any fragment); the empty string is returned if base or
|
|
1594
|
+
* reference is invalid.
|
|
1595
|
+
*
|
|
1596
|
+
* Based on:
|
|
1597
|
+
* - RFC-3986 https://tools.ietf.org/html/rfc3986#section-5.2.
|
|
1598
|
+
*/
|
|
1599
|
+
const resolveURI = function resolveURI(base, reference) {
|
|
1600
|
+
if (!(is(String, base) && is(String, reference))) return "";
|
|
1601
|
+
const baseRef = parseReference(base);
|
|
1602
|
+
if (!is(String, baseRef.scheme)) return "";
|
|
1603
|
+
const r = parseReference(reference);
|
|
1604
|
+
const t = {
|
|
1605
|
+
scheme: null,
|
|
1606
|
+
authority: null,
|
|
1607
|
+
path: "",
|
|
1608
|
+
query: null,
|
|
1609
|
+
fragment: null
|
|
1610
|
+
};
|
|
1611
|
+
if (is(String, r.scheme)) {
|
|
1612
|
+
t.scheme = r.scheme;
|
|
1613
|
+
t.authority = r.authority;
|
|
1614
|
+
t.path = removeDotSegments(r.path);
|
|
1615
|
+
t.query = r.query;
|
|
1616
|
+
} else {
|
|
1617
|
+
if (is(String, r.authority)) {
|
|
1618
|
+
t.authority = r.authority;
|
|
1619
|
+
t.path = removeDotSegments(r.path);
|
|
1620
|
+
t.query = r.query;
|
|
1621
|
+
} else {
|
|
1622
|
+
if (r.path === "") {
|
|
1623
|
+
t.path = baseRef.path;
|
|
1624
|
+
t.query = is(String, r.query) ? r.query : baseRef.query;
|
|
1625
|
+
} else {
|
|
1626
|
+
t.path = r.path.startsWith("/") ? removeDotSegments(r.path) : removeDotSegments(merge(baseRef, r.path));
|
|
1627
|
+
t.query = r.query;
|
|
1628
|
+
}
|
|
1629
|
+
t.authority = baseRef.authority;
|
|
1630
|
+
}
|
|
1631
|
+
t.scheme = baseRef.scheme;
|
|
1632
|
+
}
|
|
1633
|
+
t.fragment = r.fragment;
|
|
1634
|
+
return recompose(t);
|
|
1635
|
+
};
|
|
1636
|
+
//#endregion
|
|
1637
|
+
export { checkHttpSitemapURL, checkHttpURL, checkHttpsSitemapURL, checkHttpsURL, checkSitemapURL, checkURI, checkWebURL, decodeSitemapURL, decodeURIComponentString, decodeURIString, decodeWebURL, encodeSitemapURL, encodeURIComponentString, encodeURIString, encodeWebURL, isDomain, isDomainLabel, isIP, isIPv4, isIPv6, parseURI, punycode, punydecode, recomposeURI, removeDotSegments, resolveURI };
|
|
1638
|
+
|
|
1639
|
+
//# sourceMappingURL=index.mjs.map
|