ada-url 1.19.0__cp312-cp312-macosx_11_0_arm64.whl → 1.29.0__cp312-cp312-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ada_url/__init__.py +2 -0
- ada_url/_ada_wrapper.abi3.so +0 -0
- ada_url/ada.cpp +3572 -2824
- ada_url/ada.h +1517 -771
- ada_url/ada_adapter.py +4 -1
- ada_url/ada_build.py +1 -0
- ada_url/ada_c.h +10 -0
- {ada_url-1.19.0.dist-info → ada_url-1.29.0.dist-info}/METADATA +16 -16
- ada_url-1.29.0.dist-info/RECORD +14 -0
- {ada_url-1.19.0.dist-info → ada_url-1.29.0.dist-info}/WHEEL +1 -1
- ada_url-1.19.0.dist-info/RECORD +0 -14
- {ada_url-1.19.0.dist-info → ada_url-1.29.0.dist-info/licenses}/LICENSE +0 -0
- {ada_url-1.19.0.dist-info → ada_url-1.29.0.dist-info}/top_level.txt +0 -0
ada_url/ada.h
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
-
/* auto-generated on
|
|
1
|
+
/* auto-generated on 2026-01-30 13:29:04 -0500. Do not edit! */
|
|
2
2
|
/* begin file include/ada.h */
|
|
3
3
|
/**
|
|
4
4
|
* @file ada.h
|
|
5
|
-
* @brief
|
|
5
|
+
* @brief Main header for the Ada URL parser library.
|
|
6
|
+
*
|
|
7
|
+
* This is the primary entry point for the Ada URL parser library. Including
|
|
8
|
+
* this single header provides access to the complete Ada API, including:
|
|
9
|
+
*
|
|
10
|
+
* - URL parsing via `ada::parse()` function
|
|
11
|
+
* - Two URL representations: `ada::url` and `ada::url_aggregator`
|
|
12
|
+
* - URL search parameters via `ada::url_search_params`
|
|
13
|
+
* - URL pattern matching via `ada::url_pattern` (URLPattern API)
|
|
14
|
+
* - IDNA (Internationalized Domain Names) support
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```cpp
|
|
18
|
+
*
|
|
19
|
+
* // Parse a URL
|
|
20
|
+
* auto url = ada::parse("https://example.com/path?query=1");
|
|
21
|
+
* if (url) {
|
|
22
|
+
* std::cout << url->get_hostname(); // "example.com"
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* @see https://url.spec.whatwg.org/ - WHATWG URL Standard
|
|
27
|
+
* @see https://github.com/ada-url/ada - Ada URL Parser GitHub Repository
|
|
6
28
|
*/
|
|
7
29
|
#ifndef ADA_H
|
|
8
30
|
#define ADA_H
|
|
9
31
|
|
|
10
32
|
/* begin file include/ada/ada_idna.h */
|
|
11
|
-
/* auto-generated on
|
|
33
|
+
/* auto-generated on 2026-01-30 12:00:02 -0500. Do not edit! */
|
|
12
34
|
/* begin file include/idna.h */
|
|
13
35
|
#ifndef ADA_IDNA_H
|
|
14
36
|
#define ADA_IDNA_H
|
|
@@ -45,8 +67,6 @@ namespace ada::idna {
|
|
|
45
67
|
|
|
46
68
|
// If the input is ascii, then the mapping is just -> lower case.
|
|
47
69
|
void ascii_map(char* input, size_t length);
|
|
48
|
-
// check whether an ascii string needs mapping
|
|
49
|
-
bool ascii_has_upper_case(char* input, size_t length);
|
|
50
70
|
// Map the characters according to IDNA, returning the empty string on error.
|
|
51
71
|
std::u32string map(std::u32string_view input);
|
|
52
72
|
|
|
@@ -160,7 +180,6 @@ std::string to_unicode(std::string_view input);
|
|
|
160
180
|
|
|
161
181
|
namespace ada::idna {
|
|
162
182
|
|
|
163
|
-
// Access the first code point of the input string.
|
|
164
183
|
// Verify if it is valid name code point given a Unicode code point and a
|
|
165
184
|
// boolean first: If first is true return the result of checking if code point
|
|
166
185
|
// is contained in the IdentifierStart set of code points. Otherwise return the
|
|
@@ -168,7 +187,7 @@ namespace ada::idna {
|
|
|
168
187
|
// code points. Returns false if the input is empty or the code point is not
|
|
169
188
|
// valid. There is minimal Unicode error handling: the input should be valid
|
|
170
189
|
// UTF-8. https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
|
|
171
|
-
bool valid_name_code_point(char32_t
|
|
190
|
+
bool valid_name_code_point(char32_t code_point, bool first);
|
|
172
191
|
|
|
173
192
|
} // namespace ada::idna
|
|
174
193
|
|
|
@@ -191,7 +210,11 @@ bool valid_name_code_point(char32_t input, bool first);
|
|
|
191
210
|
/* begin file include/ada/common_defs.h */
|
|
192
211
|
/**
|
|
193
212
|
* @file common_defs.h
|
|
194
|
-
* @brief
|
|
213
|
+
* @brief Cross-platform compiler macros and common definitions.
|
|
214
|
+
*
|
|
215
|
+
* This header provides compiler-specific macros for optimization hints,
|
|
216
|
+
* platform detection, SIMD support detection, and development/debug utilities.
|
|
217
|
+
* It ensures consistent behavior across different compilers (GCC, Clang, MSVC).
|
|
195
218
|
*/
|
|
196
219
|
#ifndef ADA_COMMON_DEFS_H
|
|
197
220
|
#define ADA_COMMON_DEFS_H
|
|
@@ -424,6 +447,10 @@ namespace ada {
|
|
|
424
447
|
} while (0)
|
|
425
448
|
#endif
|
|
426
449
|
|
|
450
|
+
#if defined(__SSSE3__)
|
|
451
|
+
#define ADA_SSSE3 1
|
|
452
|
+
#endif
|
|
453
|
+
|
|
427
454
|
#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
|
|
428
455
|
(defined(_M_AMD64) || defined(_M_X64) || \
|
|
429
456
|
(defined(_M_IX86_FP) && _M_IX86_FP == 2))
|
|
@@ -434,6 +461,15 @@ namespace ada {
|
|
|
434
461
|
#define ADA_NEON 1
|
|
435
462
|
#endif
|
|
436
463
|
|
|
464
|
+
#if defined(__loongarch_sx)
|
|
465
|
+
#define ADA_LSX 1
|
|
466
|
+
#endif
|
|
467
|
+
|
|
468
|
+
#if defined(__riscv_v) && __riscv_v_intrinsic >= 11000
|
|
469
|
+
// Support RVV intrinsics v0.11 and above
|
|
470
|
+
#define ADA_RVV 1
|
|
471
|
+
#endif
|
|
472
|
+
|
|
437
473
|
#ifndef __has_cpp_attribute
|
|
438
474
|
#define ada_lifetime_bound
|
|
439
475
|
#elif __has_cpp_attribute(msvc::lifetimebound)
|
|
@@ -453,6 +489,10 @@ namespace ada {
|
|
|
453
489
|
#endif
|
|
454
490
|
#endif
|
|
455
491
|
|
|
492
|
+
#ifndef ADA_INCLUDE_URL_PATTERN
|
|
493
|
+
#define ADA_INCLUDE_URL_PATTERN 1
|
|
494
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
495
|
+
|
|
456
496
|
#endif // ADA_COMMON_DEFS_H
|
|
457
497
|
/* end file include/ada/common_defs.h */
|
|
458
498
|
#include <cstdint>
|
|
@@ -876,7 +916,7 @@ constexpr uint8_t PATH_PERCENT_ENCODE[32] = {
|
|
|
876
916
|
// 50 51 52 53 54 55 56 57
|
|
877
917
|
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
|
878
918
|
// 58 59 5A 5B 5C 5D 5E 5F
|
|
879
|
-
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 |
|
|
919
|
+
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x40 | 0x00,
|
|
880
920
|
// 60 61 62 63 64 65 66 67
|
|
881
921
|
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
|
882
922
|
// 68 69 6A 6B 6C 6D 6E 6F
|
|
@@ -942,7 +982,7 @@ constexpr uint8_t WWW_FORM_URLENCODED_PERCENT_ENCODE[32] = {
|
|
|
942
982
|
// 50 51 52 53 54 55 56 57
|
|
943
983
|
0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
|
944
984
|
// 58 59 5A 5B 5C 5D 5E 5F
|
|
945
|
-
0x00 | 0x00 | 0x00 | 0x08 |
|
|
985
|
+
0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
|
|
946
986
|
// 60 61 62 63 64 65 66 67
|
|
947
987
|
0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
|
|
948
988
|
// 68 69 6A 6B 6C 6D 6E 6F
|
|
@@ -1002,6 +1042,140 @@ ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i) {
|
|
|
1002
1042
|
|
|
1003
1043
|
#include <bit>
|
|
1004
1044
|
#include <string_view>
|
|
1045
|
+
/* begin file include/ada/checkers.h */
|
|
1046
|
+
/**
|
|
1047
|
+
* @file checkers.h
|
|
1048
|
+
* @brief Declarations for URL specific checkers used within Ada.
|
|
1049
|
+
*/
|
|
1050
|
+
#ifndef ADA_CHECKERS_H
|
|
1051
|
+
#define ADA_CHECKERS_H
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
#include <cstring>
|
|
1055
|
+
#include <string_view>
|
|
1056
|
+
|
|
1057
|
+
/**
|
|
1058
|
+
* These functions are not part of our public API and may
|
|
1059
|
+
* change at any time.
|
|
1060
|
+
* @private
|
|
1061
|
+
* @namespace ada::checkers
|
|
1062
|
+
* @brief Includes the definitions for validation functions
|
|
1063
|
+
*/
|
|
1064
|
+
namespace ada::checkers {
|
|
1065
|
+
|
|
1066
|
+
/**
|
|
1067
|
+
* @private
|
|
1068
|
+
* Assuming that x is an ASCII letter, this function returns the lower case
|
|
1069
|
+
* equivalent.
|
|
1070
|
+
* @details More likely to be inlined by the compiler and constexpr.
|
|
1071
|
+
*/
|
|
1072
|
+
constexpr char to_lower(char x) noexcept;
|
|
1073
|
+
|
|
1074
|
+
/**
|
|
1075
|
+
* @private
|
|
1076
|
+
* Returns true if the character is an ASCII letter. Equivalent to std::isalpha
|
|
1077
|
+
* but more likely to be inlined by the compiler.
|
|
1078
|
+
*
|
|
1079
|
+
* @attention std::isalpha is not constexpr generally.
|
|
1080
|
+
*/
|
|
1081
|
+
constexpr bool is_alpha(char x) noexcept;
|
|
1082
|
+
|
|
1083
|
+
/**
|
|
1084
|
+
* @private
|
|
1085
|
+
* Check whether a string starts with 0x or 0X. The function is only
|
|
1086
|
+
* safe if input.size() >=2.
|
|
1087
|
+
*
|
|
1088
|
+
* @see has_hex_prefix
|
|
1089
|
+
*/
|
|
1090
|
+
constexpr bool has_hex_prefix_unsafe(std::string_view input);
|
|
1091
|
+
/**
|
|
1092
|
+
* @private
|
|
1093
|
+
* Check whether a string starts with 0x or 0X.
|
|
1094
|
+
*/
|
|
1095
|
+
constexpr bool has_hex_prefix(std::string_view input);
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* @private
|
|
1099
|
+
* Check whether x is an ASCII digit. More likely to be inlined than
|
|
1100
|
+
* std::isdigit.
|
|
1101
|
+
*/
|
|
1102
|
+
constexpr bool is_digit(char x) noexcept;
|
|
1103
|
+
|
|
1104
|
+
/**
|
|
1105
|
+
* @private
|
|
1106
|
+
* @details A string starts with a Windows drive letter if all of the following
|
|
1107
|
+
* are true:
|
|
1108
|
+
*
|
|
1109
|
+
* - its length is greater than or equal to 2
|
|
1110
|
+
* - its first two code points are a Windows drive letter
|
|
1111
|
+
* - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
|
|
1112
|
+
* (?), or U+0023 (#).
|
|
1113
|
+
*
|
|
1114
|
+
* https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
|
|
1115
|
+
*/
|
|
1116
|
+
inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
|
|
1117
|
+
|
|
1118
|
+
/**
|
|
1119
|
+
* @private
|
|
1120
|
+
* @details A normalized Windows drive letter is a Windows drive letter of which
|
|
1121
|
+
* the second code point is U+003A (:).
|
|
1122
|
+
*/
|
|
1123
|
+
inline constexpr bool is_normalized_windows_drive_letter(
|
|
1124
|
+
std::string_view input) noexcept;
|
|
1125
|
+
|
|
1126
|
+
/**
|
|
1127
|
+
* @private
|
|
1128
|
+
* Returns true if an input is an ipv4 address. It is assumed that the string
|
|
1129
|
+
* does not contain uppercase ASCII characters (the input should have been
|
|
1130
|
+
* lowered cased before calling this function) and is not empty.
|
|
1131
|
+
*/
|
|
1132
|
+
ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
|
|
1133
|
+
|
|
1134
|
+
/**
|
|
1135
|
+
* @private
|
|
1136
|
+
* Returns a bitset. If the first bit is set, then at least one character needs
|
|
1137
|
+
* percent encoding. If the second bit is set, a \\ is found. If the third bit
|
|
1138
|
+
* is set then we have a dot. If the fourth bit is set, then we have a percent
|
|
1139
|
+
* character.
|
|
1140
|
+
*/
|
|
1141
|
+
ada_really_inline constexpr uint8_t path_signature(
|
|
1142
|
+
std::string_view input) noexcept;
|
|
1143
|
+
|
|
1144
|
+
/**
|
|
1145
|
+
* @private
|
|
1146
|
+
* Returns true if the length of the domain name and its labels are according to
|
|
1147
|
+
* the specifications. The length of the domain must be 255 octets (253
|
|
1148
|
+
* characters not including the last 2 which are the empty label reserved at the
|
|
1149
|
+
* end). When the empty label is included (a dot at the end), the domain name
|
|
1150
|
+
* can have 254 characters. The length of a label must be at least 1 and at most
|
|
1151
|
+
* 63 characters.
|
|
1152
|
+
* @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
|
|
1153
|
+
* @see https://www.unicode.org/reports/tr46/#ToASCII
|
|
1154
|
+
*/
|
|
1155
|
+
ada_really_inline constexpr bool verify_dns_length(
|
|
1156
|
+
std::string_view input) noexcept;
|
|
1157
|
+
|
|
1158
|
+
/**
|
|
1159
|
+
* @private
|
|
1160
|
+
* Fast-path parser for pure decimal IPv4 addresses (e.g., "192.168.1.1").
|
|
1161
|
+
* Returns the packed 32-bit IPv4 address on success, or a value > 0xFFFFFFFF
|
|
1162
|
+
* to indicate failure (caller should fall back to general parser).
|
|
1163
|
+
* This is optimized for the common case where the input is a well-formed
|
|
1164
|
+
* decimal IPv4 address with exactly 4 octets.
|
|
1165
|
+
*/
|
|
1166
|
+
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
|
|
1167
|
+
std::string_view input) noexcept;
|
|
1168
|
+
|
|
1169
|
+
/**
|
|
1170
|
+
* Sentinel value indicating try_parse_ipv4_fast() did not succeed.
|
|
1171
|
+
* Any value > 0xFFFFFFFF indicates the fast path should not be used.
|
|
1172
|
+
*/
|
|
1173
|
+
constexpr uint64_t ipv4_fast_fail = uint64_t(1) << 32;
|
|
1174
|
+
|
|
1175
|
+
} // namespace ada::checkers
|
|
1176
|
+
|
|
1177
|
+
#endif // ADA_CHECKERS_H
|
|
1178
|
+
/* end file include/ada/checkers.h */
|
|
1005
1179
|
|
|
1006
1180
|
namespace ada::checkers {
|
|
1007
1181
|
|
|
@@ -1044,6 +1218,64 @@ constexpr bool is_normalized_windows_drive_letter(
|
|
|
1044
1218
|
return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':'));
|
|
1045
1219
|
}
|
|
1046
1220
|
|
|
1221
|
+
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
|
|
1222
|
+
std::string_view input) noexcept {
|
|
1223
|
+
const char* p = input.data();
|
|
1224
|
+
const char* const pend = p + input.size();
|
|
1225
|
+
|
|
1226
|
+
uint32_t ipv4 = 0;
|
|
1227
|
+
|
|
1228
|
+
for (int i = 0; i < 4; ++i) {
|
|
1229
|
+
if (p == pend) {
|
|
1230
|
+
return ipv4_fast_fail;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
uint32_t val;
|
|
1234
|
+
char c = *p;
|
|
1235
|
+
if (c >= '0' && c <= '9') {
|
|
1236
|
+
val = c - '0';
|
|
1237
|
+
p++;
|
|
1238
|
+
} else {
|
|
1239
|
+
return ipv4_fast_fail;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
if (p < pend) {
|
|
1243
|
+
c = *p;
|
|
1244
|
+
if (c >= '0' && c <= '9') {
|
|
1245
|
+
if (val == 0) return ipv4_fast_fail;
|
|
1246
|
+
val = val * 10 + (c - '0');
|
|
1247
|
+
p++;
|
|
1248
|
+
if (p < pend) {
|
|
1249
|
+
c = *p;
|
|
1250
|
+
if (c >= '0' && c <= '9') {
|
|
1251
|
+
val = val * 10 + (c - '0');
|
|
1252
|
+
p++;
|
|
1253
|
+
if (val > 255) return ipv4_fast_fail;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
ipv4 = (ipv4 << 8) | val;
|
|
1260
|
+
|
|
1261
|
+
if (i < 3) {
|
|
1262
|
+
if (p == pend || *p != '.') {
|
|
1263
|
+
return ipv4_fast_fail;
|
|
1264
|
+
}
|
|
1265
|
+
p++;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
if (p != pend) {
|
|
1270
|
+
if (p == pend - 1 && *p == '.') {
|
|
1271
|
+
return ipv4;
|
|
1272
|
+
}
|
|
1273
|
+
return ipv4_fast_fail;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
return ipv4;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1047
1279
|
} // namespace ada::checkers
|
|
1048
1280
|
|
|
1049
1281
|
#endif // ADA_CHECKERS_INL_H
|
|
@@ -1097,7 +1329,11 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
|
|
|
1097
1329
|
/* begin file include/ada/encoding_type.h */
|
|
1098
1330
|
/**
|
|
1099
1331
|
* @file encoding_type.h
|
|
1100
|
-
* @brief
|
|
1332
|
+
* @brief Character encoding type definitions.
|
|
1333
|
+
*
|
|
1334
|
+
* Defines the encoding types supported for URL processing.
|
|
1335
|
+
*
|
|
1336
|
+
* @see https://encoding.spec.whatwg.org/
|
|
1101
1337
|
*/
|
|
1102
1338
|
#ifndef ADA_ENCODING_TYPE_H
|
|
1103
1339
|
#define ADA_ENCODING_TYPE_H
|
|
@@ -1107,21 +1343,25 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
|
|
|
1107
1343
|
namespace ada {
|
|
1108
1344
|
|
|
1109
1345
|
/**
|
|
1110
|
-
*
|
|
1111
|
-
*
|
|
1346
|
+
* @brief Character encoding types for URL processing.
|
|
1347
|
+
*
|
|
1348
|
+
* Specifies the character encoding used for percent-decoding and other
|
|
1349
|
+
* string operations. UTF-8 is the most commonly used encoding for URLs.
|
|
1112
1350
|
*
|
|
1113
1351
|
* @see https://encoding.spec.whatwg.org/#encodings
|
|
1114
1352
|
*/
|
|
1115
1353
|
enum class encoding_type {
|
|
1116
|
-
UTF8,
|
|
1117
|
-
UTF_16LE,
|
|
1118
|
-
UTF_16BE,
|
|
1354
|
+
UTF8, /**< UTF-8 encoding (default for URLs) */
|
|
1355
|
+
UTF_16LE, /**< UTF-16 Little Endian encoding */
|
|
1356
|
+
UTF_16BE, /**< UTF-16 Big Endian encoding */
|
|
1119
1357
|
};
|
|
1120
1358
|
|
|
1121
1359
|
/**
|
|
1122
|
-
*
|
|
1360
|
+
* Converts an encoding_type to its string representation.
|
|
1361
|
+
* @param type The encoding type to convert.
|
|
1362
|
+
* @return A string view of the encoding name.
|
|
1123
1363
|
*/
|
|
1124
|
-
ada_warn_unused std::
|
|
1364
|
+
ada_warn_unused std::string_view to_string(encoding_type type);
|
|
1125
1365
|
|
|
1126
1366
|
} // namespace ada
|
|
1127
1367
|
|
|
@@ -1138,7 +1378,11 @@ ada_warn_unused std::string to_string(encoding_type type);
|
|
|
1138
1378
|
/* begin file include/ada/url_base.h */
|
|
1139
1379
|
/**
|
|
1140
1380
|
* @file url_base.h
|
|
1141
|
-
* @brief
|
|
1381
|
+
* @brief Base class and common definitions for URL types.
|
|
1382
|
+
*
|
|
1383
|
+
* This file defines the `url_base` abstract base class from which both
|
|
1384
|
+
* `ada::url` and `ada::url_aggregator` inherit. It also defines common
|
|
1385
|
+
* enumerations like `url_host_type`.
|
|
1142
1386
|
*/
|
|
1143
1387
|
#ifndef ADA_URL_BASE_H
|
|
1144
1388
|
#define ADA_URL_BASE_H
|
|
@@ -1146,7 +1390,13 @@ ada_warn_unused std::string to_string(encoding_type type);
|
|
|
1146
1390
|
/* begin file include/ada/scheme.h */
|
|
1147
1391
|
/**
|
|
1148
1392
|
* @file scheme.h
|
|
1149
|
-
* @brief
|
|
1393
|
+
* @brief URL scheme type definitions and utilities.
|
|
1394
|
+
*
|
|
1395
|
+
* This header defines the URL scheme types (http, https, etc.) and provides
|
|
1396
|
+
* functions to identify special schemes and their default ports according
|
|
1397
|
+
* to the WHATWG URL Standard.
|
|
1398
|
+
*
|
|
1399
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1150
1400
|
*/
|
|
1151
1401
|
#ifndef ADA_SCHEME_H
|
|
1152
1402
|
#define ADA_SCHEME_H
|
|
@@ -1156,62 +1406,65 @@ ada_warn_unused std::string to_string(encoding_type type);
|
|
|
1156
1406
|
|
|
1157
1407
|
/**
|
|
1158
1408
|
* @namespace ada::scheme
|
|
1159
|
-
* @brief
|
|
1409
|
+
* @brief URL scheme utilities and constants.
|
|
1410
|
+
*
|
|
1411
|
+
* Provides functions for working with URL schemes, including identification
|
|
1412
|
+
* of special schemes and retrieval of default port numbers.
|
|
1160
1413
|
*/
|
|
1161
1414
|
namespace ada::scheme {
|
|
1162
1415
|
|
|
1163
1416
|
/**
|
|
1164
|
-
*
|
|
1165
|
-
*
|
|
1166
|
-
*
|
|
1167
|
-
*
|
|
1168
|
-
*
|
|
1169
|
-
*
|
|
1170
|
-
*
|
|
1171
|
-
*
|
|
1417
|
+
* @brief Enumeration of URL scheme types.
|
|
1418
|
+
*
|
|
1419
|
+
* Special schemes have specific parsing rules and default ports.
|
|
1420
|
+
* Using an enum allows efficient scheme comparisons without string operations.
|
|
1421
|
+
*
|
|
1422
|
+
* Default ports:
|
|
1423
|
+
* - HTTP: 80
|
|
1424
|
+
* - HTTPS: 443
|
|
1425
|
+
* - WS: 80
|
|
1426
|
+
* - WSS: 443
|
|
1427
|
+
* - FTP: 21
|
|
1428
|
+
* - FILE: (none)
|
|
1172
1429
|
*/
|
|
1173
1430
|
enum type : uint8_t {
|
|
1174
|
-
HTTP = 0,
|
|
1175
|
-
NOT_SPECIAL = 1,
|
|
1176
|
-
HTTPS = 2,
|
|
1177
|
-
WS = 3,
|
|
1178
|
-
FTP = 4,
|
|
1179
|
-
WSS = 5,
|
|
1180
|
-
FILE = 6
|
|
1431
|
+
HTTP = 0, /**< http:// scheme (port 80) */
|
|
1432
|
+
NOT_SPECIAL = 1, /**< Non-special scheme (no default port) */
|
|
1433
|
+
HTTPS = 2, /**< https:// scheme (port 443) */
|
|
1434
|
+
WS = 3, /**< ws:// WebSocket scheme (port 80) */
|
|
1435
|
+
FTP = 4, /**< ftp:// scheme (port 21) */
|
|
1436
|
+
WSS = 5, /**< wss:// secure WebSocket scheme (port 443) */
|
|
1437
|
+
FILE = 6 /**< file:// scheme (no default port) */
|
|
1181
1438
|
};
|
|
1182
1439
|
|
|
1183
1440
|
/**
|
|
1184
|
-
*
|
|
1185
|
-
*
|
|
1186
|
-
*
|
|
1187
|
-
*
|
|
1188
|
-
*
|
|
1189
|
-
* @see https://url.spec.whatwg.org/#url-miscellaneous
|
|
1190
|
-
* @param scheme
|
|
1191
|
-
* @return If scheme is a special scheme
|
|
1441
|
+
* Checks if a scheme string is a special scheme.
|
|
1442
|
+
* @param scheme The scheme string to check (e.g., "http", "https").
|
|
1443
|
+
* @return `true` if the scheme is special, `false` otherwise.
|
|
1444
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1192
1445
|
*/
|
|
1193
1446
|
ada_really_inline constexpr bool is_special(std::string_view scheme);
|
|
1194
1447
|
|
|
1195
1448
|
/**
|
|
1196
|
-
*
|
|
1197
|
-
*
|
|
1198
|
-
*
|
|
1199
|
-
*
|
|
1200
|
-
*
|
|
1201
|
-
* @see https://url.spec.whatwg.org/#url-miscellaneous
|
|
1202
|
-
* @param scheme
|
|
1203
|
-
* @return The special port
|
|
1449
|
+
* Returns the default port for a special scheme string.
|
|
1450
|
+
* @param scheme The scheme string (e.g., "http", "https").
|
|
1451
|
+
* @return The default port number, or 0 if not a special scheme.
|
|
1452
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1204
1453
|
*/
|
|
1205
1454
|
constexpr uint16_t get_special_port(std::string_view scheme) noexcept;
|
|
1206
1455
|
|
|
1207
1456
|
/**
|
|
1208
|
-
* Returns the port
|
|
1457
|
+
* Returns the default port for a scheme type.
|
|
1458
|
+
* @param type The scheme type enum value.
|
|
1459
|
+
* @return The default port number, or 0 if not applicable.
|
|
1209
1460
|
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1210
1461
|
*/
|
|
1211
1462
|
constexpr uint16_t get_special_port(ada::scheme::type type) noexcept;
|
|
1463
|
+
|
|
1212
1464
|
/**
|
|
1213
|
-
*
|
|
1214
|
-
*
|
|
1465
|
+
* Converts a scheme string to its type enum.
|
|
1466
|
+
* @param scheme The scheme string to convert.
|
|
1467
|
+
* @return The corresponding scheme type, or NOT_SPECIAL if not recognized.
|
|
1215
1468
|
*/
|
|
1216
1469
|
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
|
|
1217
1470
|
|
|
@@ -1226,112 +1479,112 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
|
|
|
1226
1479
|
namespace ada {
|
|
1227
1480
|
|
|
1228
1481
|
/**
|
|
1229
|
-
*
|
|
1482
|
+
* @brief Enum representing the type of host in a URL.
|
|
1483
|
+
*
|
|
1484
|
+
* Used to distinguish between regular domain names, IPv4 addresses,
|
|
1485
|
+
* and IPv6 addresses for proper parsing and serialization.
|
|
1230
1486
|
*/
|
|
1231
1487
|
enum url_host_type : uint8_t {
|
|
1232
|
-
/**
|
|
1233
|
-
* Represents common URLs such as "https://www.google.com"
|
|
1234
|
-
*/
|
|
1488
|
+
/** Regular domain name (e.g., "www.example.com") */
|
|
1235
1489
|
DEFAULT = 0,
|
|
1236
|
-
/**
|
|
1237
|
-
* Represents ipv4 addresses such as "http://127.0.0.1"
|
|
1238
|
-
*/
|
|
1490
|
+
/** IPv4 address (e.g., "127.0.0.1") */
|
|
1239
1491
|
IPV4 = 1,
|
|
1240
|
-
/**
|
|
1241
|
-
* Represents ipv6 addresses such as
|
|
1242
|
-
* "http://[2001:db8:3333:4444:5555:6666:7777:8888]"
|
|
1243
|
-
*/
|
|
1492
|
+
/** IPv6 address (e.g., "[::1]" or "[2001:db8::1]") */
|
|
1244
1493
|
IPV6 = 2,
|
|
1245
1494
|
};
|
|
1246
1495
|
|
|
1247
1496
|
/**
|
|
1248
|
-
* @brief
|
|
1497
|
+
* @brief Abstract base class for URL representations.
|
|
1249
1498
|
*
|
|
1250
|
-
*
|
|
1251
|
-
*
|
|
1252
|
-
*
|
|
1499
|
+
* The `url_base` class provides the common interface and state shared by
|
|
1500
|
+
* both `ada::url` and `ada::url_aggregator`. It contains basic URL attributes
|
|
1501
|
+
* like validity status and scheme type, but delegates component storage and
|
|
1502
|
+
* access to derived classes.
|
|
1253
1503
|
*
|
|
1254
|
-
*
|
|
1504
|
+
* @note This is an abstract class and cannot be instantiated directly.
|
|
1505
|
+
* Use `ada::url` or `ada::url_aggregator` instead.
|
|
1506
|
+
*
|
|
1507
|
+
* @see url
|
|
1508
|
+
* @see url_aggregator
|
|
1255
1509
|
*/
|
|
1256
1510
|
struct url_base {
|
|
1257
1511
|
virtual ~url_base() = default;
|
|
1258
1512
|
|
|
1259
1513
|
/**
|
|
1260
|
-
*
|
|
1514
|
+
* Indicates whether the URL was successfully parsed.
|
|
1515
|
+
* Set to `false` if parsing failed (e.g., invalid URL syntax).
|
|
1261
1516
|
*/
|
|
1262
1517
|
bool is_valid{true};
|
|
1263
1518
|
|
|
1264
1519
|
/**
|
|
1265
|
-
*
|
|
1520
|
+
* Indicates whether the URL has an opaque path (non-hierarchical).
|
|
1521
|
+
* Opaque paths occur in non-special URLs like `mailto:` or `javascript:`.
|
|
1266
1522
|
*/
|
|
1267
1523
|
bool has_opaque_path{false};
|
|
1268
1524
|
|
|
1269
1525
|
/**
|
|
1270
|
-
* URL
|
|
1526
|
+
* The type of the URL's host (domain, IPv4, or IPv6).
|
|
1271
1527
|
*/
|
|
1272
1528
|
url_host_type host_type = url_host_type::DEFAULT;
|
|
1273
1529
|
|
|
1274
1530
|
/**
|
|
1275
1531
|
* @private
|
|
1532
|
+
* Internal representation of the URL's scheme type.
|
|
1276
1533
|
*/
|
|
1277
1534
|
ada::scheme::type type{ada::scheme::type::NOT_SPECIAL};
|
|
1278
1535
|
|
|
1279
1536
|
/**
|
|
1280
|
-
*
|
|
1281
|
-
*
|
|
1537
|
+
* Checks if the URL has a special scheme (http, https, ws, wss, ftp, file).
|
|
1538
|
+
* Special schemes have specific parsing rules and default ports.
|
|
1539
|
+
* @return `true` if the scheme is special, `false` otherwise.
|
|
1282
1540
|
*/
|
|
1283
1541
|
[[nodiscard]] ada_really_inline constexpr bool is_special() const noexcept;
|
|
1284
1542
|
|
|
1285
1543
|
/**
|
|
1286
|
-
*
|
|
1287
|
-
* origin.
|
|
1288
|
-
* @return a newly allocated string.
|
|
1544
|
+
* Returns the URL's origin (scheme + host + port for special URLs).
|
|
1545
|
+
* @return A newly allocated string containing the serialized origin.
|
|
1289
1546
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
1290
1547
|
*/
|
|
1291
|
-
[[nodiscard]] virtual std::string get_origin() const
|
|
1548
|
+
[[nodiscard]] virtual std::string get_origin() const = 0;
|
|
1292
1549
|
|
|
1293
1550
|
/**
|
|
1294
|
-
*
|
|
1295
|
-
*
|
|
1296
|
-
*
|
|
1551
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
1552
|
+
* Checks that the domain and its labels have valid lengths.
|
|
1553
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
1297
1554
|
*/
|
|
1298
1555
|
[[nodiscard]] virtual bool has_valid_domain() const noexcept = 0;
|
|
1299
1556
|
|
|
1300
1557
|
/**
|
|
1301
1558
|
* @private
|
|
1302
|
-
*
|
|
1303
|
-
*
|
|
1304
|
-
* Returns 0 otherwise.
|
|
1559
|
+
* Returns the default port for special schemes (e.g., 443 for https).
|
|
1560
|
+
* Returns 0 for file:// URLs or non-special schemes.
|
|
1305
1561
|
*/
|
|
1306
1562
|
[[nodiscard]] inline uint16_t get_special_port() const noexcept;
|
|
1307
1563
|
|
|
1308
1564
|
/**
|
|
1309
1565
|
* @private
|
|
1310
|
-
*
|
|
1311
|
-
* Get the default port if the url's scheme has one, returns 0 otherwise.
|
|
1566
|
+
* Returns the default port for the URL's scheme, or 0 if none.
|
|
1312
1567
|
*/
|
|
1313
1568
|
[[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept;
|
|
1314
1569
|
|
|
1315
1570
|
/**
|
|
1316
1571
|
* @private
|
|
1317
|
-
*
|
|
1318
|
-
*
|
|
1319
|
-
*
|
|
1320
|
-
*
|
|
1321
|
-
* It returns how many bytes were consumed when a number is successfully
|
|
1322
|
-
* parsed.
|
|
1323
|
-
* @return On failure, it returns zero.
|
|
1324
|
-
* @see https://url.spec.whatwg.org/#host-parsing
|
|
1572
|
+
* Parses a port number from the input string.
|
|
1573
|
+
* @param view The string containing the port to parse.
|
|
1574
|
+
* @param check_trailing_content Whether to validate no trailing characters.
|
|
1575
|
+
* @return Number of bytes consumed on success, 0 on failure.
|
|
1325
1576
|
*/
|
|
1326
1577
|
virtual size_t parse_port(std::string_view view,
|
|
1327
|
-
bool check_trailing_content)
|
|
1578
|
+
bool check_trailing_content) = 0;
|
|
1328
1579
|
|
|
1329
|
-
|
|
1580
|
+
/** @private */
|
|
1581
|
+
virtual ada_really_inline size_t parse_port(std::string_view view) {
|
|
1330
1582
|
return this->parse_port(view, false);
|
|
1331
1583
|
}
|
|
1332
1584
|
|
|
1333
1585
|
/**
|
|
1334
|
-
* Returns a JSON string representation of this URL.
|
|
1586
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
1587
|
+
* @return A JSON-formatted string with URL information.
|
|
1335
1588
|
*/
|
|
1336
1589
|
[[nodiscard]] virtual std::string to_string() const = 0;
|
|
1337
1590
|
|
|
@@ -1400,8 +1653,7 @@ ada_really_inline std::optional<std::string_view> prune_hash(
|
|
|
1400
1653
|
* @see https://url.spec.whatwg.org/#shorten-a-urls-path
|
|
1401
1654
|
* @returns Returns true if path is shortened.
|
|
1402
1655
|
*/
|
|
1403
|
-
ada_really_inline bool shorten_path(std::string& path,
|
|
1404
|
-
ada::scheme::type type) noexcept;
|
|
1656
|
+
ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type);
|
|
1405
1657
|
|
|
1406
1658
|
/**
|
|
1407
1659
|
* @private
|
|
@@ -1410,7 +1662,7 @@ ada_really_inline bool shorten_path(std::string& path,
|
|
|
1410
1662
|
* @returns Returns true if path is shortened.
|
|
1411
1663
|
*/
|
|
1412
1664
|
ada_really_inline bool shorten_path(std::string_view& path,
|
|
1413
|
-
ada::scheme::type type)
|
|
1665
|
+
ada::scheme::type type);
|
|
1414
1666
|
|
|
1415
1667
|
/**
|
|
1416
1668
|
* @private
|
|
@@ -1431,15 +1683,14 @@ ada_really_inline void parse_prepared_path(std::string_view input,
|
|
|
1431
1683
|
* @private
|
|
1432
1684
|
* Remove and mutate all ASCII tab or newline characters from an input.
|
|
1433
1685
|
*/
|
|
1434
|
-
ada_really_inline void remove_ascii_tab_or_newline(std::string& input)
|
|
1686
|
+
ada_really_inline void remove_ascii_tab_or_newline(std::string& input);
|
|
1435
1687
|
|
|
1436
1688
|
/**
|
|
1437
1689
|
* @private
|
|
1438
1690
|
* Return the substring from input going from index pos to the end.
|
|
1439
|
-
* This function cannot throw.
|
|
1440
1691
|
*/
|
|
1441
1692
|
ada_really_inline constexpr std::string_view substring(std::string_view input,
|
|
1442
|
-
size_t pos)
|
|
1693
|
+
size_t pos);
|
|
1443
1694
|
|
|
1444
1695
|
/**
|
|
1445
1696
|
* @private
|
|
@@ -1454,7 +1705,7 @@ bool overlaps(std::string_view input1, const std::string& input2) noexcept;
|
|
|
1454
1705
|
*/
|
|
1455
1706
|
ada_really_inline constexpr std::string_view substring(std::string_view input,
|
|
1456
1707
|
size_t pos1,
|
|
1457
|
-
size_t pos2)
|
|
1708
|
+
size_t pos2) {
|
|
1458
1709
|
#if ADA_DEVELOPMENT_CHECKS
|
|
1459
1710
|
if (pos2 < pos1) {
|
|
1460
1711
|
std::cerr << "Negative-length substring: [" << pos1 << " to " << pos2 << ")"
|
|
@@ -1493,8 +1744,7 @@ void trim_c0_whitespace(std::string_view& input) noexcept;
|
|
|
1493
1744
|
* https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
|
|
1494
1745
|
*/
|
|
1495
1746
|
template <class url_type>
|
|
1496
|
-
ada_really_inline void strip_trailing_spaces_from_opaque_path(
|
|
1497
|
-
url_type& url) noexcept;
|
|
1747
|
+
ada_really_inline void strip_trailing_spaces_from_opaque_path(url_type& url);
|
|
1498
1748
|
|
|
1499
1749
|
/**
|
|
1500
1750
|
* @private
|
|
@@ -1584,7 +1834,13 @@ inline int fast_digit_count(uint32_t x) noexcept {
|
|
|
1584
1834
|
/* begin file include/ada/parser.h */
|
|
1585
1835
|
/**
|
|
1586
1836
|
* @file parser.h
|
|
1587
|
-
* @brief
|
|
1837
|
+
* @brief Low-level URL parsing functions.
|
|
1838
|
+
*
|
|
1839
|
+
* This header provides the internal URL parsing implementation. Most users
|
|
1840
|
+
* should use `ada::parse()` from implementation.h instead of these functions
|
|
1841
|
+
* directly.
|
|
1842
|
+
*
|
|
1843
|
+
* @see implementation.h for the recommended public API
|
|
1588
1844
|
*/
|
|
1589
1845
|
#ifndef ADA_PARSER_H
|
|
1590
1846
|
#define ADA_PARSER_H
|
|
@@ -2328,6 +2584,7 @@ struct expected_operations_base : expected_storage_base<T, E> {
|
|
|
2328
2584
|
}
|
|
2329
2585
|
|
|
2330
2586
|
template <class Rhs>
|
|
2587
|
+
// NOLINTNEXTLINE(bugprone-exception-escape)
|
|
2331
2588
|
void construct_with(Rhs &&rhs) noexcept {
|
|
2332
2589
|
new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());
|
|
2333
2590
|
this->m_has_val = true;
|
|
@@ -4113,6 +4370,7 @@ void swap(expected<T, E> &lhs,
|
|
|
4113
4370
|
|
|
4114
4371
|
#endif
|
|
4115
4372
|
/* end file include/ada/expected.h */
|
|
4373
|
+
|
|
4116
4374
|
/* begin file include/ada/url_pattern_regex.h */
|
|
4117
4375
|
/**
|
|
4118
4376
|
* @file url_search_params.h
|
|
@@ -4128,6 +4386,7 @@ void swap(expected<T, E> &lhs,
|
|
|
4128
4386
|
#include <regex>
|
|
4129
4387
|
#endif // ADA_USE_UNSAFE_STD_REGEX_PROVIDER
|
|
4130
4388
|
|
|
4389
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
4131
4390
|
namespace ada::url_pattern_regex {
|
|
4132
4391
|
|
|
4133
4392
|
template <typename T>
|
|
@@ -4172,7 +4431,7 @@ class std_regex_provider final {
|
|
|
4172
4431
|
#endif // ADA_USE_UNSAFE_STD_REGEX_PROVIDER
|
|
4173
4432
|
|
|
4174
4433
|
} // namespace ada::url_pattern_regex
|
|
4175
|
-
|
|
4434
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
4176
4435
|
#endif // ADA_URL_PATTERN_REGEX_H
|
|
4177
4436
|
/* end file include/ada/url_pattern_regex.h */
|
|
4178
4437
|
/* begin file include/ada/url_pattern_init.h */
|
|
@@ -4186,14 +4445,23 @@ class std_regex_provider final {
|
|
|
4186
4445
|
/* begin file include/ada/errors.h */
|
|
4187
4446
|
/**
|
|
4188
4447
|
* @file errors.h
|
|
4189
|
-
* @brief
|
|
4448
|
+
* @brief Error type definitions for URL parsing.
|
|
4449
|
+
*
|
|
4450
|
+
* Defines the error codes that can be returned when URL parsing fails.
|
|
4190
4451
|
*/
|
|
4191
4452
|
#ifndef ADA_ERRORS_H
|
|
4192
4453
|
#define ADA_ERRORS_H
|
|
4193
4454
|
|
|
4194
4455
|
#include <cstdint>
|
|
4195
4456
|
namespace ada {
|
|
4196
|
-
|
|
4457
|
+
/**
|
|
4458
|
+
* @brief Error codes for URL parsing operations.
|
|
4459
|
+
*
|
|
4460
|
+
* Used with `tl::expected` to indicate why a URL parsing operation failed.
|
|
4461
|
+
*/
|
|
4462
|
+
enum class errors : uint8_t {
|
|
4463
|
+
type_error /**< A type error occurred (e.g., invalid URL syntax). */
|
|
4464
|
+
};
|
|
4197
4465
|
} // namespace ada
|
|
4198
4466
|
#endif // ADA_ERRORS_H
|
|
4199
4467
|
/* end file include/ada/errors.h */
|
|
@@ -4201,11 +4469,13 @@ enum class errors : uint8_t { type_error };
|
|
|
4201
4469
|
#include <string_view>
|
|
4202
4470
|
#include <string>
|
|
4203
4471
|
#include <optional>
|
|
4472
|
+
#include <iostream>
|
|
4204
4473
|
|
|
4205
4474
|
#if ADA_TESTING
|
|
4206
4475
|
#include <iostream>
|
|
4207
4476
|
#endif // ADA_TESTING
|
|
4208
4477
|
|
|
4478
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
4209
4479
|
namespace ada {
|
|
4210
4480
|
|
|
4211
4481
|
// Important: C++20 allows us to use concept rather than `using` or `typedef
|
|
@@ -4229,10 +4499,21 @@ struct url_pattern_init {
|
|
|
4229
4499
|
pattern,
|
|
4230
4500
|
};
|
|
4231
4501
|
|
|
4502
|
+
friend std::ostream& operator<<(std::ostream& os, process_type type) {
|
|
4503
|
+
switch (type) {
|
|
4504
|
+
case process_type::url:
|
|
4505
|
+
return os << "url";
|
|
4506
|
+
case process_type::pattern:
|
|
4507
|
+
return os << "pattern";
|
|
4508
|
+
default:
|
|
4509
|
+
return os << "unknown";
|
|
4510
|
+
}
|
|
4511
|
+
}
|
|
4512
|
+
|
|
4232
4513
|
// All strings must be valid UTF-8.
|
|
4233
4514
|
// @see https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
|
|
4234
4515
|
static tl::expected<url_pattern_init, errors> process(
|
|
4235
|
-
url_pattern_init init, process_type type,
|
|
4516
|
+
const url_pattern_init& init, process_type type,
|
|
4236
4517
|
std::optional<std::string_view> protocol = std::nullopt,
|
|
4237
4518
|
std::optional<std::string_view> username = std::nullopt,
|
|
4238
4519
|
std::optional<std::string_view> password = std::nullopt,
|
|
@@ -4309,32 +4590,42 @@ struct url_pattern_init {
|
|
|
4309
4590
|
std::optional<std::string> base_url{};
|
|
4310
4591
|
};
|
|
4311
4592
|
} // namespace ada
|
|
4312
|
-
|
|
4593
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
4313
4594
|
#endif // ADA_URL_PATTERN_INIT_H
|
|
4314
4595
|
/* end file include/ada/url_pattern_init.h */
|
|
4315
4596
|
|
|
4316
|
-
/**
|
|
4317
|
-
* @private
|
|
4318
|
-
*/
|
|
4597
|
+
/** @private Forward declarations */
|
|
4319
4598
|
namespace ada {
|
|
4320
4599
|
struct url_aggregator;
|
|
4321
4600
|
struct url;
|
|
4601
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
4322
4602
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
4323
4603
|
class url_pattern;
|
|
4324
4604
|
struct url_pattern_options;
|
|
4605
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
4325
4606
|
enum class errors : uint8_t;
|
|
4326
4607
|
} // namespace ada
|
|
4327
4608
|
|
|
4328
4609
|
/**
|
|
4329
4610
|
* @namespace ada::parser
|
|
4330
|
-
* @brief
|
|
4611
|
+
* @brief Internal URL parsing implementation.
|
|
4612
|
+
*
|
|
4613
|
+
* Contains the core URL parsing algorithm as specified by the WHATWG URL
|
|
4614
|
+
* Standard. These functions are used internally by `ada::parse()`.
|
|
4331
4615
|
*/
|
|
4332
4616
|
namespace ada::parser {
|
|
4333
4617
|
/**
|
|
4334
|
-
* Parses a
|
|
4335
|
-
*
|
|
4336
|
-
*
|
|
4337
|
-
*
|
|
4618
|
+
* Parses a URL string into a URL object.
|
|
4619
|
+
*
|
|
4620
|
+
* @tparam result_type The type of URL object to create (url or url_aggregator).
|
|
4621
|
+
*
|
|
4622
|
+
* @param user_input The URL string to parse (must be valid UTF-8).
|
|
4623
|
+
* @param base_url Optional base URL for resolving relative URLs.
|
|
4624
|
+
*
|
|
4625
|
+
* @return The parsed URL object. Check `is_valid` to determine if parsing
|
|
4626
|
+
* succeeded.
|
|
4627
|
+
*
|
|
4628
|
+
* @see https://url.spec.whatwg.org/#concept-basic-url-parser
|
|
4338
4629
|
*/
|
|
4339
4630
|
template <typename result_type = url_aggregator>
|
|
4340
4631
|
result_type parse_url(std::string_view user_input,
|
|
@@ -4354,10 +4645,12 @@ extern template url_aggregator parse_url_impl<url_aggregator>(
|
|
|
4354
4645
|
extern template url parse_url_impl<url>(std::string_view user_input,
|
|
4355
4646
|
const url* base_url);
|
|
4356
4647
|
|
|
4648
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
4357
4649
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
4358
4650
|
tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
4359
|
-
std::variant<std::string_view, url_pattern_init
|
|
4651
|
+
std::variant<std::string_view, url_pattern_init>&& input,
|
|
4360
4652
|
const std::string_view* base_url, const url_pattern_options* options);
|
|
4653
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
4361
4654
|
|
|
4362
4655
|
} // namespace ada::parser
|
|
4363
4656
|
|
|
@@ -4373,7 +4666,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4373
4666
|
/* begin file include/ada/url_pattern.h */
|
|
4374
4667
|
/**
|
|
4375
4668
|
* @file url_pattern.h
|
|
4376
|
-
* @brief
|
|
4669
|
+
* @brief URLPattern API implementation.
|
|
4670
|
+
*
|
|
4671
|
+
* This header provides the URLPattern API as specified by the WHATWG URL
|
|
4672
|
+
* Pattern Standard. URLPattern allows matching URLs against patterns with
|
|
4673
|
+
* wildcards and named groups, similar to how regular expressions match strings.
|
|
4674
|
+
*
|
|
4675
|
+
* @see https://urlpattern.spec.whatwg.org/
|
|
4676
|
+
* @see https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
|
|
4377
4677
|
*/
|
|
4378
4678
|
#ifndef ADA_URL_PATTERN_H
|
|
4379
4679
|
#define ADA_URL_PATTERN_H
|
|
@@ -4381,8 +4681,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4381
4681
|
/* begin file include/ada/implementation.h */
|
|
4382
4682
|
/**
|
|
4383
4683
|
* @file implementation.h
|
|
4384
|
-
* @brief
|
|
4385
|
-
*
|
|
4684
|
+
* @brief User-facing functions for URL parsing and manipulation.
|
|
4685
|
+
*
|
|
4686
|
+
* This header provides the primary public API for parsing URLs in Ada.
|
|
4687
|
+
* It includes the main `ada::parse()` function which is the recommended
|
|
4688
|
+
* entry point for most users.
|
|
4689
|
+
*
|
|
4690
|
+
* @see https://url.spec.whatwg.org/#api
|
|
4386
4691
|
*/
|
|
4387
4692
|
#ifndef ADA_IMPLEMENTATION_H
|
|
4388
4693
|
#define ADA_IMPLEMENTATION_H
|
|
@@ -4394,7 +4699,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4394
4699
|
/* begin file include/ada/url.h */
|
|
4395
4700
|
/**
|
|
4396
4701
|
* @file url.h
|
|
4397
|
-
* @brief Declaration for the
|
|
4702
|
+
* @brief Declaration for the `ada::url` class.
|
|
4703
|
+
*
|
|
4704
|
+
* This file contains the `ada::url` struct which represents a parsed URL
|
|
4705
|
+
* using separate `std::string` instances for each component. This
|
|
4706
|
+
* representation is more flexible but uses more memory than `url_aggregator`.
|
|
4707
|
+
*
|
|
4708
|
+
* @see url_aggregator.h for a more memory-efficient alternative
|
|
4398
4709
|
*/
|
|
4399
4710
|
#ifndef ADA_URL_H
|
|
4400
4711
|
#define ADA_URL_H
|
|
@@ -4405,127 +4716,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4405
4716
|
#include <string>
|
|
4406
4717
|
#include <string_view>
|
|
4407
4718
|
|
|
4408
|
-
/* begin file include/ada/checkers.h */
|
|
4409
|
-
/**
|
|
4410
|
-
* @file checkers.h
|
|
4411
|
-
* @brief Declarations for URL specific checkers used within Ada.
|
|
4412
|
-
*/
|
|
4413
|
-
#ifndef ADA_CHECKERS_H
|
|
4414
|
-
#define ADA_CHECKERS_H
|
|
4415
|
-
|
|
4416
|
-
|
|
4417
|
-
#include <cstring>
|
|
4418
|
-
#include <string_view>
|
|
4419
|
-
|
|
4420
|
-
/**
|
|
4421
|
-
* These functions are not part of our public API and may
|
|
4422
|
-
* change at any time.
|
|
4423
|
-
* @private
|
|
4424
|
-
* @namespace ada::checkers
|
|
4425
|
-
* @brief Includes the definitions for validation functions
|
|
4426
|
-
*/
|
|
4427
|
-
namespace ada::checkers {
|
|
4428
|
-
|
|
4429
|
-
/**
|
|
4430
|
-
* @private
|
|
4431
|
-
* Assuming that x is an ASCII letter, this function returns the lower case
|
|
4432
|
-
* equivalent.
|
|
4433
|
-
* @details More likely to be inlined by the compiler and constexpr.
|
|
4434
|
-
*/
|
|
4435
|
-
constexpr char to_lower(char x) noexcept;
|
|
4436
|
-
|
|
4437
|
-
/**
|
|
4438
|
-
* @private
|
|
4439
|
-
* Returns true if the character is an ASCII letter. Equivalent to std::isalpha
|
|
4440
|
-
* but more likely to be inlined by the compiler.
|
|
4441
|
-
*
|
|
4442
|
-
* @attention std::isalpha is not constexpr generally.
|
|
4443
|
-
*/
|
|
4444
|
-
constexpr bool is_alpha(char x) noexcept;
|
|
4445
|
-
|
|
4446
|
-
/**
|
|
4447
|
-
* @private
|
|
4448
|
-
* Check whether a string starts with 0x or 0X. The function is only
|
|
4449
|
-
* safe if input.size() >=2.
|
|
4450
|
-
*
|
|
4451
|
-
* @see has_hex_prefix
|
|
4452
|
-
*/
|
|
4453
|
-
constexpr bool has_hex_prefix_unsafe(std::string_view input);
|
|
4454
|
-
/**
|
|
4455
|
-
* @private
|
|
4456
|
-
* Check whether a string starts with 0x or 0X.
|
|
4457
|
-
*/
|
|
4458
|
-
constexpr bool has_hex_prefix(std::string_view input);
|
|
4459
|
-
|
|
4460
|
-
/**
|
|
4461
|
-
* @private
|
|
4462
|
-
* Check whether x is an ASCII digit. More likely to be inlined than
|
|
4463
|
-
* std::isdigit.
|
|
4464
|
-
*/
|
|
4465
|
-
constexpr bool is_digit(char x) noexcept;
|
|
4466
|
-
|
|
4467
|
-
/**
|
|
4468
|
-
* @private
|
|
4469
|
-
* @details A string starts with a Windows drive letter if all of the following
|
|
4470
|
-
* are true:
|
|
4471
|
-
*
|
|
4472
|
-
* - its length is greater than or equal to 2
|
|
4473
|
-
* - its first two code points are a Windows drive letter
|
|
4474
|
-
* - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
|
|
4475
|
-
* (?), or U+0023 (#).
|
|
4476
|
-
*
|
|
4477
|
-
* https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
|
|
4478
|
-
*/
|
|
4479
|
-
inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
|
|
4480
|
-
|
|
4481
|
-
/**
|
|
4482
|
-
* @private
|
|
4483
|
-
* @details A normalized Windows drive letter is a Windows drive letter of which
|
|
4484
|
-
* the second code point is U+003A (:).
|
|
4485
|
-
*/
|
|
4486
|
-
inline constexpr bool is_normalized_windows_drive_letter(
|
|
4487
|
-
std::string_view input) noexcept;
|
|
4488
|
-
|
|
4489
|
-
/**
|
|
4490
|
-
* @private
|
|
4491
|
-
* Returns true if an input is an ipv4 address. It is assumed that the string
|
|
4492
|
-
* does not contain uppercase ASCII characters (the input should have been
|
|
4493
|
-
* lowered cased before calling this function) and is not empty.
|
|
4494
|
-
*/
|
|
4495
|
-
ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
|
|
4496
|
-
|
|
4497
|
-
/**
|
|
4498
|
-
* @private
|
|
4499
|
-
* Returns a bitset. If the first bit is set, then at least one character needs
|
|
4500
|
-
* percent encoding. If the second bit is set, a \\ is found. If the third bit
|
|
4501
|
-
* is set then we have a dot. If the fourth bit is set, then we have a percent
|
|
4502
|
-
* character.
|
|
4503
|
-
*/
|
|
4504
|
-
ada_really_inline constexpr uint8_t path_signature(
|
|
4505
|
-
std::string_view input) noexcept;
|
|
4506
|
-
|
|
4507
|
-
/**
|
|
4508
|
-
* @private
|
|
4509
|
-
* Returns true if the length of the domain name and its labels are according to
|
|
4510
|
-
* the specifications. The length of the domain must be 255 octets (253
|
|
4511
|
-
* characters not including the last 2 which are the empty label reserved at the
|
|
4512
|
-
* end). When the empty label is included (a dot at the end), the domain name
|
|
4513
|
-
* can have 254 characters. The length of a label must be at least 1 and at most
|
|
4514
|
-
* 63 characters.
|
|
4515
|
-
* @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
|
|
4516
|
-
* @see https://www.unicode.org/reports/tr46/#ToASCII
|
|
4517
|
-
*/
|
|
4518
|
-
ada_really_inline constexpr bool verify_dns_length(
|
|
4519
|
-
std::string_view input) noexcept;
|
|
4520
|
-
|
|
4521
|
-
} // namespace ada::checkers
|
|
4522
|
-
|
|
4523
|
-
#endif // ADA_CHECKERS_H
|
|
4524
|
-
/* end file include/ada/checkers.h */
|
|
4525
4719
|
/* begin file include/ada/url_components.h */
|
|
4526
4720
|
/**
|
|
4527
4721
|
* @file url_components.h
|
|
4528
|
-
* @brief
|
|
4722
|
+
* @brief URL component offset representation for url_aggregator.
|
|
4723
|
+
*
|
|
4724
|
+
* This file defines the `url_components` struct which stores byte offsets
|
|
4725
|
+
* into a URL string buffer. It is used internally by `url_aggregator` to
|
|
4726
|
+
* efficiently locate URL components without storing separate strings.
|
|
4529
4727
|
*/
|
|
4530
4728
|
#ifndef ADA_URL_COMPONENTS_H
|
|
4531
4729
|
#define ADA_URL_COMPONENTS_H
|
|
@@ -4533,14 +4731,32 @@ ada_really_inline constexpr bool verify_dns_length(
|
|
|
4533
4731
|
namespace ada {
|
|
4534
4732
|
|
|
4535
4733
|
/**
|
|
4536
|
-
* @brief URL
|
|
4734
|
+
* @brief Stores byte offsets for URL components within a buffer.
|
|
4537
4735
|
*
|
|
4538
|
-
*
|
|
4539
|
-
*
|
|
4736
|
+
* The `url_components` struct uses 32-bit offsets to track the boundaries
|
|
4737
|
+
* of each URL component within a single string buffer. This enables efficient
|
|
4738
|
+
* component extraction without additional memory allocations.
|
|
4540
4739
|
*
|
|
4541
|
-
*
|
|
4740
|
+
* Component layout in a URL:
|
|
4741
|
+
* ```
|
|
4742
|
+
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4743
|
+
* | | | | ^^^^| | |
|
|
4744
|
+
* | | | | | | | `----- hash_start
|
|
4745
|
+
* | | | | | | `--------- search_start
|
|
4746
|
+
* | | | | | `----------------- pathname_start
|
|
4747
|
+
* | | | | `--------------------- port
|
|
4748
|
+
* | | | `----------------------- host_end
|
|
4749
|
+
* | | `---------------------------------- host_start
|
|
4750
|
+
* | `--------------------------------------- username_end
|
|
4751
|
+
* `--------------------------------------------- protocol_end
|
|
4752
|
+
* ```
|
|
4753
|
+
*
|
|
4754
|
+
* @note The 32-bit offsets limit URLs to 4GB in length.
|
|
4755
|
+
* @note A value of `omitted` (UINT32_MAX) indicates the component is not
|
|
4756
|
+
* present.
|
|
4542
4757
|
*/
|
|
4543
4758
|
struct url_components {
|
|
4759
|
+
/** Sentinel value indicating a component is not present. */
|
|
4544
4760
|
constexpr static uint32_t omitted = uint32_t(-1);
|
|
4545
4761
|
|
|
4546
4762
|
url_components() = default;
|
|
@@ -4550,47 +4766,43 @@ struct url_components {
|
|
|
4550
4766
|
url_components &operator=(const url_components &u) = default;
|
|
4551
4767
|
~url_components() = default;
|
|
4552
4768
|
|
|
4553
|
-
|
|
4554
|
-
* By using 32-bit integers, we implicitly assume that the URL string
|
|
4555
|
-
* cannot exceed 4 GB.
|
|
4556
|
-
*
|
|
4557
|
-
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4558
|
-
* | | | | ^^^^| | |
|
|
4559
|
-
* | | | | | | | `----- hash_start
|
|
4560
|
-
* | | | | | | `--------- search_start
|
|
4561
|
-
* | | | | | `----------------- pathname_start
|
|
4562
|
-
* | | | | `--------------------- port
|
|
4563
|
-
* | | | `----------------------- host_end
|
|
4564
|
-
* | | `---------------------------------- host_start
|
|
4565
|
-
* | `--------------------------------------- username_end
|
|
4566
|
-
* `--------------------------------------------- protocol_end
|
|
4567
|
-
*/
|
|
4769
|
+
/** Offset of the end of the protocol/scheme (position of ':'). */
|
|
4568
4770
|
uint32_t protocol_end{0};
|
|
4771
|
+
|
|
4569
4772
|
/**
|
|
4570
|
-
*
|
|
4571
|
-
*
|
|
4773
|
+
* Offset of the end of the username.
|
|
4774
|
+
* Initialized to 0 (not `omitted`) to simplify username/password getters.
|
|
4572
4775
|
*/
|
|
4573
4776
|
uint32_t username_end{0};
|
|
4777
|
+
|
|
4778
|
+
/** Offset of the start of the host. */
|
|
4574
4779
|
uint32_t host_start{0};
|
|
4780
|
+
|
|
4781
|
+
/** Offset of the end of the host. */
|
|
4575
4782
|
uint32_t host_end{0};
|
|
4783
|
+
|
|
4784
|
+
/** Port number, or `omitted` if no port is specified. */
|
|
4576
4785
|
uint32_t port{omitted};
|
|
4786
|
+
|
|
4787
|
+
/** Offset of the start of the pathname. */
|
|
4577
4788
|
uint32_t pathname_start{0};
|
|
4789
|
+
|
|
4790
|
+
/** Offset of the '?' starting the query, or `omitted` if no query. */
|
|
4578
4791
|
uint32_t search_start{omitted};
|
|
4792
|
+
|
|
4793
|
+
/** Offset of the '#' starting the fragment, or `omitted` if no fragment. */
|
|
4579
4794
|
uint32_t hash_start{omitted};
|
|
4580
4795
|
|
|
4581
4796
|
/**
|
|
4582
|
-
*
|
|
4583
|
-
*
|
|
4584
|
-
*
|
|
4585
|
-
* a lower bound on the possible string length that may match these
|
|
4586
|
-
* offsets.
|
|
4587
|
-
* @return true if the offset values are
|
|
4588
|
-
* consistent with a possible URL string
|
|
4797
|
+
* Validates that offsets are in ascending order and consistent.
|
|
4798
|
+
* Useful for debugging to detect internal corruption.
|
|
4799
|
+
* @return `true` if offsets are consistent, `false` otherwise.
|
|
4589
4800
|
*/
|
|
4590
4801
|
[[nodiscard]] constexpr bool check_offset_consistency() const noexcept;
|
|
4591
4802
|
|
|
4592
4803
|
/**
|
|
4593
|
-
*
|
|
4804
|
+
* Returns a JSON string representation of the offsets for debugging.
|
|
4805
|
+
* @return A JSON-formatted string with all offset values.
|
|
4594
4806
|
*/
|
|
4595
4807
|
[[nodiscard]] std::string to_string() const;
|
|
4596
4808
|
|
|
@@ -4613,15 +4825,26 @@ struct url_aggregator;
|
|
|
4613
4825
|
// }
|
|
4614
4826
|
|
|
4615
4827
|
/**
|
|
4616
|
-
* @brief
|
|
4828
|
+
* @brief Represents a parsed URL with individual string components.
|
|
4617
4829
|
*
|
|
4618
|
-
*
|
|
4619
|
-
*
|
|
4620
|
-
*
|
|
4621
|
-
*
|
|
4622
|
-
* structure heavier and more reliant on memory allocations. When getting
|
|
4623
|
-
* components from the parsed URL, a new std::string is typically constructed.
|
|
4830
|
+
* The `url` struct stores each URL component (scheme, username, password,
|
|
4831
|
+
* host, port, path, query, fragment) as a separate `std::string`. This
|
|
4832
|
+
* provides flexibility but incurs more memory allocations compared to
|
|
4833
|
+
* `url_aggregator`.
|
|
4624
4834
|
*
|
|
4835
|
+
* **When to use `ada::url`:**
|
|
4836
|
+
* - When you need to frequently modify individual URL components
|
|
4837
|
+
* - When you want independent ownership of component strings
|
|
4838
|
+
*
|
|
4839
|
+
* **When to use `ada::url_aggregator` instead:**
|
|
4840
|
+
* - For read-mostly operations on parsed URLs
|
|
4841
|
+
* - When memory efficiency is important
|
|
4842
|
+
* - When you only need string_view access to components
|
|
4843
|
+
*
|
|
4844
|
+
* @note This type is returned when parsing with `ada::parse<ada::url>()`.
|
|
4845
|
+
* By default, `ada::parse()` returns `ada::url_aggregator`.
|
|
4846
|
+
*
|
|
4847
|
+
* @see url_aggregator For a more memory-efficient URL representation
|
|
4625
4848
|
* @see https://url.spec.whatwg.org/#url-representation
|
|
4626
4849
|
*/
|
|
4627
4850
|
struct url : url_base {
|
|
@@ -4680,177 +4903,217 @@ struct url : url_base {
|
|
|
4680
4903
|
*/
|
|
4681
4904
|
std::optional<std::string> hash{};
|
|
4682
4905
|
|
|
4683
|
-
/**
|
|
4906
|
+
/**
|
|
4907
|
+
* Checks if the URL has an empty hostname (host is set but empty string).
|
|
4908
|
+
* @return `true` if host exists but is empty, `false` otherwise.
|
|
4909
|
+
*/
|
|
4684
4910
|
[[nodiscard]] inline bool has_empty_hostname() const noexcept;
|
|
4685
|
-
|
|
4911
|
+
|
|
4912
|
+
/**
|
|
4913
|
+
* Checks if the URL has a non-default port explicitly specified.
|
|
4914
|
+
* @return `true` if a port is present, `false` otherwise.
|
|
4915
|
+
*/
|
|
4686
4916
|
[[nodiscard]] inline bool has_port() const noexcept;
|
|
4687
|
-
|
|
4917
|
+
|
|
4918
|
+
/**
|
|
4919
|
+
* Checks if the URL has a hostname (including empty hostnames).
|
|
4920
|
+
* @return `true` if host is present, `false` otherwise.
|
|
4921
|
+
*/
|
|
4688
4922
|
[[nodiscard]] inline bool has_hostname() const noexcept;
|
|
4923
|
+
|
|
4924
|
+
/**
|
|
4925
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
4926
|
+
* Checks that the domain and its labels have valid lengths (max 255 octets
|
|
4927
|
+
* total, max 63 octets per label).
|
|
4928
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
4929
|
+
*/
|
|
4689
4930
|
[[nodiscard]] bool has_valid_domain() const noexcept override;
|
|
4690
4931
|
|
|
4691
4932
|
/**
|
|
4692
|
-
* Returns a JSON string representation of this URL.
|
|
4933
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
4934
|
+
* @return A JSON-formatted string with all URL components.
|
|
4693
4935
|
*/
|
|
4694
4936
|
[[nodiscard]] std::string to_string() const override;
|
|
4695
4937
|
|
|
4696
4938
|
/**
|
|
4939
|
+
* Returns the full serialized URL (the href).
|
|
4940
|
+
* @return The complete URL string (allocates a new string).
|
|
4697
4941
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
4698
|
-
* @see https://url.spec.whatwg.org/#concept-url-serializer
|
|
4699
4942
|
*/
|
|
4700
|
-
[[nodiscard]] ada_really_inline std::string get_href() const
|
|
4943
|
+
[[nodiscard]] ada_really_inline std::string get_href() const;
|
|
4701
4944
|
|
|
4702
4945
|
/**
|
|
4703
|
-
*
|
|
4704
|
-
*
|
|
4705
|
-
* @return
|
|
4946
|
+
* Returns the URL's origin as a string (scheme + host + port for special
|
|
4947
|
+
* URLs).
|
|
4948
|
+
* @return A newly allocated string containing the serialized origin.
|
|
4706
4949
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
4707
4950
|
*/
|
|
4708
|
-
[[nodiscard]] std::string get_origin() const
|
|
4951
|
+
[[nodiscard]] std::string get_origin() const override;
|
|
4709
4952
|
|
|
4710
4953
|
/**
|
|
4711
|
-
*
|
|
4712
|
-
*
|
|
4713
|
-
* @return a newly allocated string.
|
|
4954
|
+
* Returns the URL's scheme followed by a colon (e.g., "https:").
|
|
4955
|
+
* @return A newly allocated string with the protocol.
|
|
4714
4956
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
4715
4957
|
*/
|
|
4716
|
-
[[nodiscard]] std::string get_protocol() const
|
|
4958
|
+
[[nodiscard]] std::string get_protocol() const;
|
|
4717
4959
|
|
|
4718
4960
|
/**
|
|
4719
|
-
*
|
|
4720
|
-
*
|
|
4721
|
-
*
|
|
4722
|
-
* @return a newly allocated string.
|
|
4961
|
+
* Returns the URL's host and port (e.g., "example.com:8080").
|
|
4962
|
+
* If no port is set, returns just the host. Returns empty string if no host.
|
|
4963
|
+
* @return A newly allocated string with host:port.
|
|
4723
4964
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
4724
4965
|
*/
|
|
4725
|
-
[[nodiscard]] std::string get_host() const
|
|
4966
|
+
[[nodiscard]] std::string get_host() const;
|
|
4726
4967
|
|
|
4727
4968
|
/**
|
|
4728
|
-
*
|
|
4729
|
-
*
|
|
4730
|
-
* @return
|
|
4969
|
+
* Returns the URL's hostname (without port).
|
|
4970
|
+
* Returns empty string if no host is set.
|
|
4971
|
+
* @return A newly allocated string with the hostname.
|
|
4731
4972
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
4732
4973
|
*/
|
|
4733
|
-
[[nodiscard]] std::string get_hostname() const
|
|
4974
|
+
[[nodiscard]] std::string get_hostname() const;
|
|
4734
4975
|
|
|
4735
4976
|
/**
|
|
4736
|
-
*
|
|
4737
|
-
*
|
|
4738
|
-
* @return a newly allocated string.
|
|
4977
|
+
* Returns the URL's path component.
|
|
4978
|
+
* @return A string_view pointing to the path.
|
|
4739
4979
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4740
4980
|
*/
|
|
4741
4981
|
[[nodiscard]] constexpr std::string_view get_pathname() const noexcept;
|
|
4742
4982
|
|
|
4743
4983
|
/**
|
|
4744
|
-
*
|
|
4745
|
-
*
|
|
4746
|
-
* @return size of the pathname in bytes
|
|
4984
|
+
* Returns the byte length of the pathname without creating a string.
|
|
4985
|
+
* @return Size of the pathname in bytes.
|
|
4747
4986
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4748
4987
|
*/
|
|
4749
4988
|
[[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept;
|
|
4750
4989
|
|
|
4751
4990
|
/**
|
|
4752
|
-
*
|
|
4753
|
-
*
|
|
4991
|
+
* Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
|
|
4992
|
+
* Returns empty string if no query is set.
|
|
4993
|
+
* @return A newly allocated string with the search/query.
|
|
4754
4994
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
4755
4995
|
*/
|
|
4756
|
-
[[nodiscard]] std::string get_search() const
|
|
4996
|
+
[[nodiscard]] std::string get_search() const;
|
|
4757
4997
|
|
|
4758
4998
|
/**
|
|
4759
|
-
*
|
|
4760
|
-
* @return
|
|
4999
|
+
* Returns the URL's username component.
|
|
5000
|
+
* @return A constant reference to the username string.
|
|
4761
5001
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
4762
5002
|
*/
|
|
4763
5003
|
[[nodiscard]] const std::string &get_username() const noexcept;
|
|
4764
5004
|
|
|
4765
5005
|
/**
|
|
4766
|
-
*
|
|
5006
|
+
* Sets the URL's username, percent-encoding special characters.
|
|
5007
|
+
* @param input The new username value.
|
|
5008
|
+
* @return `true` on success, `false` if the URL cannot have credentials.
|
|
4767
5009
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
4768
5010
|
*/
|
|
4769
5011
|
bool set_username(std::string_view input);
|
|
4770
5012
|
|
|
4771
5013
|
/**
|
|
4772
|
-
*
|
|
5014
|
+
* Sets the URL's password, percent-encoding special characters.
|
|
5015
|
+
* @param input The new password value.
|
|
5016
|
+
* @return `true` on success, `false` if the URL cannot have credentials.
|
|
4773
5017
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
4774
5018
|
*/
|
|
4775
5019
|
bool set_password(std::string_view input);
|
|
4776
5020
|
|
|
4777
5021
|
/**
|
|
4778
|
-
*
|
|
5022
|
+
* Sets the URL's port from a string (e.g., "8080").
|
|
5023
|
+
* @param input The port string. Empty string removes the port.
|
|
5024
|
+
* @return `true` on success, `false` if the URL cannot have a port.
|
|
4779
5025
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
4780
5026
|
*/
|
|
4781
5027
|
bool set_port(std::string_view input);
|
|
4782
5028
|
|
|
4783
5029
|
/**
|
|
4784
|
-
*
|
|
5030
|
+
* Sets the URL's fragment/hash (the part after '#').
|
|
5031
|
+
* @param input The new hash value (with or without leading '#').
|
|
4785
5032
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
4786
5033
|
*/
|
|
4787
5034
|
void set_hash(std::string_view input);
|
|
4788
5035
|
|
|
4789
5036
|
/**
|
|
4790
|
-
*
|
|
5037
|
+
* Sets the URL's query string (the part after '?').
|
|
5038
|
+
* @param input The new query value (with or without leading '?').
|
|
4791
5039
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
4792
5040
|
*/
|
|
4793
5041
|
void set_search(std::string_view input);
|
|
4794
5042
|
|
|
4795
5043
|
/**
|
|
4796
|
-
*
|
|
4797
|
-
* @
|
|
5044
|
+
* Sets the URL's pathname.
|
|
5045
|
+
* @param input The new path value.
|
|
5046
|
+
* @return `true` on success, `false` if the URL has an opaque path.
|
|
5047
|
+
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4798
5048
|
*/
|
|
4799
5049
|
bool set_pathname(std::string_view input);
|
|
4800
5050
|
|
|
4801
5051
|
/**
|
|
4802
|
-
*
|
|
5052
|
+
* Sets the URL's host (hostname and optionally port).
|
|
5053
|
+
* @param input The new host value (e.g., "example.com:8080").
|
|
5054
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4803
5055
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
4804
5056
|
*/
|
|
4805
5057
|
bool set_host(std::string_view input);
|
|
4806
5058
|
|
|
4807
5059
|
/**
|
|
4808
|
-
*
|
|
5060
|
+
* Sets the URL's hostname (without port).
|
|
5061
|
+
* @param input The new hostname value.
|
|
5062
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4809
5063
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
4810
5064
|
*/
|
|
4811
5065
|
bool set_hostname(std::string_view input);
|
|
4812
5066
|
|
|
4813
5067
|
/**
|
|
4814
|
-
*
|
|
5068
|
+
* Sets the URL's protocol/scheme.
|
|
5069
|
+
* @param input The new protocol (with or without trailing ':').
|
|
5070
|
+
* @return `true` on success, `false` if the scheme is invalid.
|
|
4815
5071
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
4816
5072
|
*/
|
|
4817
5073
|
bool set_protocol(std::string_view input);
|
|
4818
5074
|
|
|
4819
5075
|
/**
|
|
5076
|
+
* Replaces the entire URL by parsing a new href string.
|
|
5077
|
+
* @param input The new URL string to parse.
|
|
5078
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4820
5079
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
4821
5080
|
*/
|
|
4822
5081
|
bool set_href(std::string_view input);
|
|
4823
5082
|
|
|
4824
5083
|
/**
|
|
4825
|
-
*
|
|
4826
|
-
* @return
|
|
5084
|
+
* Returns the URL's password component.
|
|
5085
|
+
* @return A constant reference to the password string.
|
|
4827
5086
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
4828
5087
|
*/
|
|
4829
5088
|
[[nodiscard]] const std::string &get_password() const noexcept;
|
|
4830
5089
|
|
|
4831
5090
|
/**
|
|
4832
|
-
*
|
|
4833
|
-
*
|
|
5091
|
+
* Returns the URL's port as a string (e.g., "8080").
|
|
5092
|
+
* Returns empty string if no port is set.
|
|
5093
|
+
* @return A newly allocated string with the port.
|
|
4834
5094
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
4835
5095
|
*/
|
|
4836
|
-
[[nodiscard]] std::string get_port() const
|
|
5096
|
+
[[nodiscard]] std::string get_port() const;
|
|
4837
5097
|
|
|
4838
5098
|
/**
|
|
4839
|
-
*
|
|
4840
|
-
*
|
|
5099
|
+
* Returns the URL's fragment prefixed with '#' (e.g., "#section").
|
|
5100
|
+
* Returns empty string if no fragment is set.
|
|
5101
|
+
* @return A newly allocated string with the hash.
|
|
4841
5102
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
4842
5103
|
*/
|
|
4843
|
-
[[nodiscard]] std::string get_hash() const
|
|
5104
|
+
[[nodiscard]] std::string get_hash() const;
|
|
4844
5105
|
|
|
4845
5106
|
/**
|
|
4846
|
-
*
|
|
4847
|
-
*
|
|
5107
|
+
* Checks if the URL has credentials (non-empty username or password).
|
|
5108
|
+
* @return `true` if username or password is non-empty, `false` otherwise.
|
|
4848
5109
|
*/
|
|
4849
5110
|
[[nodiscard]] ada_really_inline bool has_credentials() const noexcept;
|
|
4850
5111
|
|
|
4851
5112
|
/**
|
|
4852
|
-
*
|
|
5113
|
+
* Returns the URL component offsets for efficient serialization.
|
|
4853
5114
|
*
|
|
5115
|
+
* The components represent byte offsets into the serialized URL:
|
|
5116
|
+
* ```
|
|
4854
5117
|
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4855
5118
|
* | | | | ^^^^| | |
|
|
4856
5119
|
* | | | | | | | `----- hash_start
|
|
@@ -4861,19 +5124,23 @@ struct url : url_base {
|
|
|
4861
5124
|
* | | `---------------------------------- host_start
|
|
4862
5125
|
* | `--------------------------------------- username_end
|
|
4863
5126
|
* `--------------------------------------------- protocol_end
|
|
4864
|
-
*
|
|
4865
|
-
*
|
|
4866
|
-
*
|
|
4867
|
-
* @return a newly constructed component.
|
|
4868
|
-
*
|
|
4869
|
-
* @see
|
|
4870
|
-
* https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
|
|
5127
|
+
* ```
|
|
5128
|
+
* @return A newly constructed url_components struct.
|
|
5129
|
+
* @see https://github.com/servo/rust-url
|
|
4871
5130
|
*/
|
|
4872
5131
|
[[nodiscard]] ada_really_inline ada::url_components get_components()
|
|
4873
5132
|
const noexcept;
|
|
4874
|
-
|
|
5133
|
+
|
|
5134
|
+
/**
|
|
5135
|
+
* Checks if the URL has a fragment/hash component.
|
|
5136
|
+
* @return `true` if hash is present, `false` otherwise.
|
|
5137
|
+
*/
|
|
4875
5138
|
[[nodiscard]] constexpr bool has_hash() const noexcept override;
|
|
4876
|
-
|
|
5139
|
+
|
|
5140
|
+
/**
|
|
5141
|
+
* Checks if the URL has a query/search component.
|
|
5142
|
+
* @return `true` if query is present, `false` otherwise.
|
|
5143
|
+
*/
|
|
4877
5144
|
[[nodiscard]] constexpr bool has_search() const noexcept override;
|
|
4878
5145
|
|
|
4879
5146
|
private:
|
|
@@ -4882,7 +5149,7 @@ struct url : url_base {
|
|
|
4882
5149
|
friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(
|
|
4883
5150
|
std::string_view, const ada::url_aggregator *);
|
|
4884
5151
|
friend void ada::helpers::strip_trailing_spaces_from_opaque_path<ada::url>(
|
|
4885
|
-
ada::url &url)
|
|
5152
|
+
ada::url &url);
|
|
4886
5153
|
|
|
4887
5154
|
friend ada::url ada::parser::parse_url_impl<ada::url, true>(std::string_view,
|
|
4888
5155
|
const ada::url *);
|
|
@@ -4989,7 +5256,7 @@ struct url : url_base {
|
|
|
4989
5256
|
* Take the scheme from another URL. The scheme string is moved from the
|
|
4990
5257
|
* provided url.
|
|
4991
5258
|
*/
|
|
4992
|
-
constexpr void copy_scheme(ada::url &&u)
|
|
5259
|
+
constexpr void copy_scheme(ada::url &&u);
|
|
4993
5260
|
|
|
4994
5261
|
/**
|
|
4995
5262
|
* Take the scheme from another URL. The scheme string is copied from the
|
|
@@ -5007,17 +5274,70 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u);
|
|
|
5007
5274
|
|
|
5008
5275
|
namespace ada {
|
|
5009
5276
|
|
|
5277
|
+
/**
|
|
5278
|
+
* Result type for URL parsing operations.
|
|
5279
|
+
*
|
|
5280
|
+
* Uses `tl::expected` to represent either a successfully parsed URL or an
|
|
5281
|
+
* error. This allows for exception-free error handling.
|
|
5282
|
+
*
|
|
5283
|
+
* @tparam result_type The URL type to return (default: `ada::url_aggregator`)
|
|
5284
|
+
*
|
|
5285
|
+
* @example
|
|
5286
|
+
* ```cpp
|
|
5287
|
+
* ada::result<ada::url_aggregator> result = ada::parse("https://example.com");
|
|
5288
|
+
* if (result) {
|
|
5289
|
+
* // Success: use result.value() or *result
|
|
5290
|
+
* } else {
|
|
5291
|
+
* // Error: handle result.error()
|
|
5292
|
+
* }
|
|
5293
|
+
* ```
|
|
5294
|
+
*/
|
|
5010
5295
|
template <class result_type = ada::url_aggregator>
|
|
5011
5296
|
using result = tl::expected<result_type, ada::errors>;
|
|
5012
5297
|
|
|
5013
5298
|
/**
|
|
5014
|
-
*
|
|
5015
|
-
*
|
|
5016
|
-
*
|
|
5299
|
+
* Parses a URL string according to the WHATWG URL Standard.
|
|
5300
|
+
*
|
|
5301
|
+
* This is the main entry point for URL parsing in Ada. The function takes
|
|
5302
|
+
* a string input and optionally a base URL for resolving relative URLs.
|
|
5303
|
+
*
|
|
5304
|
+
* @tparam result_type The URL type to return. Can be either `ada::url` or
|
|
5305
|
+
* `ada::url_aggregator` (default). The `url_aggregator` type is more
|
|
5306
|
+
* memory-efficient as it stores components as offsets into a single
|
|
5307
|
+
* buffer.
|
|
5308
|
+
*
|
|
5309
|
+
* @param input The URL string to parse. Must be valid ASCII or UTF-8 encoded.
|
|
5310
|
+
* Leading and trailing whitespace is automatically trimmed.
|
|
5311
|
+
* @param base_url Optional pointer to a base URL for resolving relative URLs.
|
|
5312
|
+
* If nullptr (default), only absolute URLs can be parsed successfully.
|
|
5313
|
+
*
|
|
5314
|
+
* @return A `result<result_type>` containing either the parsed URL on success,
|
|
5315
|
+
* or an error code on failure. Use the boolean conversion or
|
|
5316
|
+
* `has_value()` to check for success.
|
|
5317
|
+
*
|
|
5318
|
+
* @note The parser is fully compliant with the WHATWG URL Standard.
|
|
5017
5319
|
*
|
|
5018
|
-
* @
|
|
5019
|
-
*
|
|
5020
|
-
*
|
|
5320
|
+
* @example
|
|
5321
|
+
* ```cpp
|
|
5322
|
+
* // Parse an absolute URL
|
|
5323
|
+
* auto url = ada::parse("https://user:pass@example.com:8080/path?query#hash");
|
|
5324
|
+
* if (url) {
|
|
5325
|
+
* std::cout << url->get_hostname(); // "example.com"
|
|
5326
|
+
* std::cout << url->get_pathname(); // "/path"
|
|
5327
|
+
* }
|
|
5328
|
+
*
|
|
5329
|
+
* // Parse a relative URL with a base
|
|
5330
|
+
* auto base = ada::parse("https://example.com/dir/");
|
|
5331
|
+
* if (base) {
|
|
5332
|
+
* auto relative = ada::parse("../other/page", &*base);
|
|
5333
|
+
* if (relative) {
|
|
5334
|
+
* std::cout << relative->get_href(); //
|
|
5335
|
+
* "https://example.com/other/page"
|
|
5336
|
+
* }
|
|
5337
|
+
* }
|
|
5338
|
+
* ```
|
|
5339
|
+
*
|
|
5340
|
+
* @see https://url.spec.whatwg.org/#url-parsing
|
|
5021
5341
|
*/
|
|
5022
5342
|
template <class result_type = ada::url_aggregator>
|
|
5023
5343
|
ada_warn_unused ada::result<result_type> parse(
|
|
@@ -5029,33 +5349,73 @@ extern template ada::result<url_aggregator> parse<url_aggregator>(
|
|
|
5029
5349
|
std::string_view input, const url_aggregator* base_url);
|
|
5030
5350
|
|
|
5031
5351
|
/**
|
|
5032
|
-
*
|
|
5033
|
-
*
|
|
5352
|
+
* Checks whether a URL string can be successfully parsed.
|
|
5353
|
+
*
|
|
5354
|
+
* This is a fast validation function that checks if a URL string is valid
|
|
5355
|
+
* according to the WHATWG URL Standard without fully constructing a URL
|
|
5356
|
+
* object. Use this when you only need to validate URLs without needing
|
|
5357
|
+
* their parsed components.
|
|
5358
|
+
*
|
|
5359
|
+
* @param input The URL string to validate. Must be valid ASCII or UTF-8.
|
|
5360
|
+
* @param base_input Optional pointer to a base URL string for resolving
|
|
5361
|
+
* relative URLs. If nullptr (default), the input is validated as
|
|
5362
|
+
* an absolute URL.
|
|
5363
|
+
*
|
|
5364
|
+
* @return `true` if the URL can be parsed successfully, `false` otherwise.
|
|
5365
|
+
*
|
|
5366
|
+
* @example
|
|
5367
|
+
* ```cpp
|
|
5368
|
+
* // Check absolute URL
|
|
5369
|
+
* bool valid = ada::can_parse("https://example.com"); // true
|
|
5370
|
+
* bool invalid = ada::can_parse("not a url"); // false
|
|
5371
|
+
*
|
|
5372
|
+
* // Check relative URL with base
|
|
5373
|
+
* std::string_view base = "https://example.com/";
|
|
5374
|
+
* bool relative_valid = ada::can_parse("../path", &base); // true
|
|
5375
|
+
* ```
|
|
5376
|
+
*
|
|
5034
5377
|
* @see https://url.spec.whatwg.org/#dom-url-canparse
|
|
5035
|
-
* @return If URL can be parsed or not.
|
|
5036
5378
|
*/
|
|
5037
5379
|
bool can_parse(std::string_view input,
|
|
5038
5380
|
const std::string_view* base_input = nullptr);
|
|
5039
5381
|
|
|
5382
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
5040
5383
|
/**
|
|
5041
|
-
*
|
|
5042
|
-
*
|
|
5384
|
+
* Parses a URL pattern according to the URLPattern specification.
|
|
5385
|
+
*
|
|
5386
|
+
* URL patterns provide a syntax for matching URLs against patterns, similar
|
|
5387
|
+
* to how regular expressions match strings. This is useful for routing and
|
|
5388
|
+
* URL-based dispatching.
|
|
5389
|
+
*
|
|
5390
|
+
* @tparam regex_provider The regex implementation to use for pattern matching.
|
|
5043
5391
|
*
|
|
5044
|
-
* @param input valid UTF-8
|
|
5045
|
-
*
|
|
5046
|
-
* @param
|
|
5047
|
-
*
|
|
5392
|
+
* @param input Either a URL pattern string (valid UTF-8) or a URLPatternInit
|
|
5393
|
+
* struct specifying individual component patterns.
|
|
5394
|
+
* @param base_url Optional pointer to a base URL string (valid UTF-8) for
|
|
5395
|
+
* resolving relative patterns.
|
|
5396
|
+
* @param options Optional pointer to configuration options (e.g., ignore_case).
|
|
5397
|
+
*
|
|
5398
|
+
* @return A `tl::expected` containing either the parsed url_pattern on success,
|
|
5399
|
+
* or an error code on failure.
|
|
5400
|
+
*
|
|
5401
|
+
* @see https://urlpattern.spec.whatwg.org
|
|
5048
5402
|
*/
|
|
5049
5403
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
5050
5404
|
ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
|
|
5051
|
-
parse_url_pattern(std::variant<std::string_view, url_pattern_init
|
|
5405
|
+
parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
|
|
5052
5406
|
const std::string_view* base_url = nullptr,
|
|
5053
5407
|
const url_pattern_options* options = nullptr);
|
|
5408
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
5054
5409
|
|
|
5055
5410
|
/**
|
|
5056
|
-
*
|
|
5057
|
-
*
|
|
5058
|
-
*
|
|
5411
|
+
* Converts a file system path to a file:// URL.
|
|
5412
|
+
*
|
|
5413
|
+
* Creates a properly formatted file URL from a local file system path.
|
|
5414
|
+
* Handles platform-specific path separators and percent-encoding.
|
|
5415
|
+
*
|
|
5416
|
+
* @param path The file system path to convert. Must be valid ASCII or UTF-8.
|
|
5417
|
+
*
|
|
5418
|
+
* @return A file:// URL string representing the given path.
|
|
5059
5419
|
*/
|
|
5060
5420
|
std::string href_from_file(std::string_view path);
|
|
5061
5421
|
} // namespace ada
|
|
@@ -5074,6 +5434,7 @@ std::string href_from_file(std::string_view path);
|
|
|
5074
5434
|
#include <iostream>
|
|
5075
5435
|
#endif // ADA_TESTING
|
|
5076
5436
|
|
|
5437
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
5077
5438
|
namespace ada {
|
|
5078
5439
|
|
|
5079
5440
|
enum class url_pattern_part_type : uint8_t {
|
|
@@ -5090,6 +5451,19 @@ enum class url_pattern_part_type : uint8_t {
|
|
|
5090
5451
|
FULL_WILDCARD,
|
|
5091
5452
|
};
|
|
5092
5453
|
|
|
5454
|
+
// Pattern type for fast-path matching optimization.
|
|
5455
|
+
// This allows skipping expensive regex evaluation for common simple patterns.
|
|
5456
|
+
enum class url_pattern_component_type : uint8_t {
|
|
5457
|
+
// Pattern is "^$" - only matches empty string
|
|
5458
|
+
EMPTY,
|
|
5459
|
+
// Pattern is "^<literal>$" - exact string match (no regex needed)
|
|
5460
|
+
EXACT_MATCH,
|
|
5461
|
+
// Pattern is "^(.*)$" - matches anything (full wildcard)
|
|
5462
|
+
FULL_WILDCARD,
|
|
5463
|
+
// Pattern requires actual regex evaluation
|
|
5464
|
+
REGEXP,
|
|
5465
|
+
};
|
|
5466
|
+
|
|
5093
5467
|
enum class url_pattern_part_modifier : uint8_t {
|
|
5094
5468
|
// The part does not have a modifier.
|
|
5095
5469
|
none,
|
|
@@ -5108,17 +5482,17 @@ class url_pattern_part {
|
|
|
5108
5482
|
public:
|
|
5109
5483
|
url_pattern_part(url_pattern_part_type _type, std::string&& _value,
|
|
5110
5484
|
url_pattern_part_modifier _modifier)
|
|
5111
|
-
: type(_type), value(_value), modifier(_modifier) {}
|
|
5485
|
+
: type(_type), value(std::move(_value)), modifier(_modifier) {}
|
|
5112
5486
|
|
|
5113
5487
|
url_pattern_part(url_pattern_part_type _type, std::string&& _value,
|
|
5114
5488
|
url_pattern_part_modifier _modifier, std::string&& _name,
|
|
5115
5489
|
std::string&& _prefix, std::string&& _suffix)
|
|
5116
5490
|
: type(_type),
|
|
5117
|
-
value(_value),
|
|
5491
|
+
value(std::move(_value)),
|
|
5118
5492
|
modifier(_modifier),
|
|
5119
|
-
name(_name),
|
|
5120
|
-
prefix(_prefix),
|
|
5121
|
-
suffix(_suffix) {}
|
|
5493
|
+
name(std::move(_name)),
|
|
5494
|
+
prefix(std::move(_prefix)),
|
|
5495
|
+
suffix(std::move(_suffix)) {}
|
|
5122
5496
|
// A part has an associated type, a string, which must be set upon creation.
|
|
5123
5497
|
url_pattern_part_type type;
|
|
5124
5498
|
// A part has an associated value, a string, which must be set upon creation.
|
|
@@ -5209,11 +5583,15 @@ class url_pattern_component {
|
|
|
5209
5583
|
url_pattern_component(std::string&& new_pattern,
|
|
5210
5584
|
typename regex_provider::regex_type&& new_regexp,
|
|
5211
5585
|
std::vector<std::string>&& new_group_name_list,
|
|
5212
|
-
bool new_has_regexp_groups
|
|
5586
|
+
bool new_has_regexp_groups,
|
|
5587
|
+
url_pattern_component_type new_type,
|
|
5588
|
+
std::string&& new_exact_match_value = {})
|
|
5213
5589
|
: regexp(std::move(new_regexp)),
|
|
5214
5590
|
pattern(std::move(new_pattern)),
|
|
5215
|
-
group_name_list(new_group_name_list),
|
|
5216
|
-
|
|
5591
|
+
group_name_list(std::move(new_group_name_list)),
|
|
5592
|
+
exact_match_value(std::move(new_exact_match_value)),
|
|
5593
|
+
has_regexp_groups(new_has_regexp_groups),
|
|
5594
|
+
type(new_type) {}
|
|
5217
5595
|
|
|
5218
5596
|
// @see https://urlpattern.spec.whatwg.org/#compile-a-component
|
|
5219
5597
|
template <url_pattern_encoding_callback F>
|
|
@@ -5226,6 +5604,16 @@ class url_pattern_component {
|
|
|
5226
5604
|
std::string&& input,
|
|
5227
5605
|
std::vector<std::optional<std::string>>&& exec_result);
|
|
5228
5606
|
|
|
5607
|
+
// Fast path test that returns true/false without constructing result groups.
|
|
5608
|
+
// Uses cached pattern type to skip regex evaluation for simple patterns.
|
|
5609
|
+
bool fast_test(std::string_view input) const noexcept;
|
|
5610
|
+
|
|
5611
|
+
// Fast path match that returns capture groups without regex for simple
|
|
5612
|
+
// patterns. Returns nullopt if pattern doesn't match, otherwise returns
|
|
5613
|
+
// capture groups.
|
|
5614
|
+
std::optional<std::vector<std::optional<std::string>>> fast_match(
|
|
5615
|
+
std::string_view input) const;
|
|
5616
|
+
|
|
5229
5617
|
#if ADA_TESTING
|
|
5230
5618
|
friend void PrintTo(const url_pattern_component& component,
|
|
5231
5619
|
std::ostream* os) {
|
|
@@ -5241,7 +5629,11 @@ class url_pattern_component {
|
|
|
5241
5629
|
typename regex_provider::regex_type regexp{};
|
|
5242
5630
|
std::string pattern{};
|
|
5243
5631
|
std::vector<std::string> group_name_list{};
|
|
5632
|
+
// For EXACT_MATCH type: the literal string to compare against
|
|
5633
|
+
std::string exact_match_value{};
|
|
5244
5634
|
bool has_regexp_groups = false;
|
|
5635
|
+
// Cached pattern type for fast-path optimization
|
|
5636
|
+
url_pattern_component_type type = url_pattern_component_type::REGEXP;
|
|
5245
5637
|
};
|
|
5246
5638
|
|
|
5247
5639
|
// A URLPattern input can be either a string or a URLPatternInit object.
|
|
@@ -5273,14 +5665,28 @@ struct url_pattern_options {
|
|
|
5273
5665
|
#endif // ADA_TESTING
|
|
5274
5666
|
};
|
|
5275
5667
|
|
|
5276
|
-
|
|
5277
|
-
|
|
5278
|
-
|
|
5279
|
-
|
|
5280
|
-
|
|
5281
|
-
|
|
5282
|
-
|
|
5283
|
-
|
|
5668
|
+
/**
|
|
5669
|
+
* @brief URL pattern matching class implementing the URLPattern API.
|
|
5670
|
+
*
|
|
5671
|
+
* URLPattern provides a way to match URLs against patterns with wildcards
|
|
5672
|
+
* and named capture groups. It's useful for routing, URL-based dispatching,
|
|
5673
|
+
* and URL validation.
|
|
5674
|
+
*
|
|
5675
|
+
* Pattern syntax supports:
|
|
5676
|
+
* - Literal text matching
|
|
5677
|
+
* - Named groups: `:name` (matches up to the next separator)
|
|
5678
|
+
* - Wildcards: `*` (matches everything)
|
|
5679
|
+
* - Custom regex: `(pattern)`
|
|
5680
|
+
* - Optional segments: `:name?`
|
|
5681
|
+
* - Repeated segments: `:name+`, `:name*`
|
|
5682
|
+
*
|
|
5683
|
+
* @tparam regex_provider The regex implementation to use for pattern matching.
|
|
5684
|
+
* Must satisfy the url_pattern_regex::regex_concept.
|
|
5685
|
+
*
|
|
5686
|
+
* @note All string inputs must be valid UTF-8.
|
|
5687
|
+
*
|
|
5688
|
+
* @see https://urlpattern.spec.whatwg.org/
|
|
5689
|
+
*/
|
|
5284
5690
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
5285
5691
|
class url_pattern {
|
|
5286
5692
|
public:
|
|
@@ -5333,6 +5739,13 @@ class url_pattern {
|
|
|
5333
5739
|
// @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
|
|
5334
5740
|
[[nodiscard]] bool has_regexp_groups() const;
|
|
5335
5741
|
|
|
5742
|
+
// Helper to test all components at once. Returns true if all match.
|
|
5743
|
+
[[nodiscard]] bool test_components(
|
|
5744
|
+
std::string_view protocol, std::string_view username,
|
|
5745
|
+
std::string_view password, std::string_view hostname,
|
|
5746
|
+
std::string_view port, std::string_view pathname, std::string_view search,
|
|
5747
|
+
std::string_view hash) const;
|
|
5748
|
+
|
|
5336
5749
|
#if ADA_TESTING
|
|
5337
5750
|
friend void PrintTo(const url_pattern& c, std::ostream* os) {
|
|
5338
5751
|
*os << "protocol_component: '" << c.get_protocol() << ", ";
|
|
@@ -5348,7 +5761,7 @@ class url_pattern {
|
|
|
5348
5761
|
|
|
5349
5762
|
template <url_pattern_regex::regex_concept P>
|
|
5350
5763
|
friend tl::expected<url_pattern<P>, errors> parser::parse_url_pattern_impl(
|
|
5351
|
-
std::variant<std::string_view, url_pattern_init
|
|
5764
|
+
std::variant<std::string_view, url_pattern_init>&& input,
|
|
5352
5765
|
const std::string_view* base_url, const url_pattern_options* options);
|
|
5353
5766
|
|
|
5354
5767
|
/**
|
|
@@ -5406,9 +5819,8 @@ class url_pattern {
|
|
|
5406
5819
|
*/
|
|
5407
5820
|
bool ignore_case_ = false;
|
|
5408
5821
|
};
|
|
5409
|
-
|
|
5410
5822
|
} // namespace ada
|
|
5411
|
-
|
|
5823
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
5412
5824
|
#endif
|
|
5413
5825
|
/* end file include/ada/url_pattern.h */
|
|
5414
5826
|
/* begin file include/ada/url_pattern_helpers.h */
|
|
@@ -5424,6 +5836,7 @@ class url_pattern {
|
|
|
5424
5836
|
#include <tuple>
|
|
5425
5837
|
#include <vector>
|
|
5426
5838
|
|
|
5839
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
5427
5840
|
namespace ada {
|
|
5428
5841
|
enum class errors : uint8_t;
|
|
5429
5842
|
}
|
|
@@ -5457,8 +5870,8 @@ enum class token_policy {
|
|
|
5457
5870
|
// @see https://urlpattern.spec.whatwg.org/#tokens
|
|
5458
5871
|
class token {
|
|
5459
5872
|
public:
|
|
5460
|
-
token(token_type _type, size_t _index, std::
|
|
5461
|
-
: type(_type), index(_index), value(
|
|
5873
|
+
token(token_type _type, size_t _index, std::string_view _value)
|
|
5874
|
+
: type(_type), index(_index), value(_value) {}
|
|
5462
5875
|
|
|
5463
5876
|
// A token has an associated type, a string, initially "invalid-char".
|
|
5464
5877
|
token_type type = token_type::INVALID_CHAR;
|
|
@@ -5469,7 +5882,7 @@ class token {
|
|
|
5469
5882
|
|
|
5470
5883
|
// A token has an associated value, a string, initially the empty string. It
|
|
5471
5884
|
// contains the code points from the pattern string represented by the token.
|
|
5472
|
-
std::
|
|
5885
|
+
std::string_view value{};
|
|
5473
5886
|
};
|
|
5474
5887
|
|
|
5475
5888
|
// @see https://urlpattern.spec.whatwg.org/#pattern-parser
|
|
@@ -5547,7 +5960,7 @@ class Tokenizer {
|
|
|
5547
5960
|
|
|
5548
5961
|
private:
|
|
5549
5962
|
// has an associated input, a pattern string, initially the empty string.
|
|
5550
|
-
std::
|
|
5963
|
+
std::string_view input;
|
|
5551
5964
|
// has an associated policy, a tokenize policy, initially "strict".
|
|
5552
5965
|
token_policy policy;
|
|
5553
5966
|
// has an associated token list, a token list, initially an empty list.
|
|
@@ -5641,7 +6054,7 @@ struct constructor_string_parser {
|
|
|
5641
6054
|
// @see https://urlpattern.spec.whatwg.org/#make-a-component-string
|
|
5642
6055
|
std::string make_component_string();
|
|
5643
6056
|
// has an associated input, a string, which must be set upon creation.
|
|
5644
|
-
std::
|
|
6057
|
+
std::string_view input;
|
|
5645
6058
|
// has an associated token list, a token list, which must be set upon
|
|
5646
6059
|
// creation.
|
|
5647
6060
|
std::vector<token> token_list;
|
|
@@ -5748,14 +6161,14 @@ bool protocol_component_matches_special_scheme(
|
|
|
5748
6161
|
ada::url_pattern_component<regex_provider>& input);
|
|
5749
6162
|
|
|
5750
6163
|
// @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
|
|
5751
|
-
std::
|
|
6164
|
+
std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);
|
|
5752
6165
|
|
|
5753
6166
|
// @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
|
|
5754
6167
|
std::string generate_segment_wildcard_regexp(
|
|
5755
6168
|
url_pattern_compile_component_options options);
|
|
5756
6169
|
|
|
5757
6170
|
} // namespace ada::url_pattern_helpers
|
|
5758
|
-
|
|
6171
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
5759
6172
|
#endif
|
|
5760
6173
|
/* end file include/ada/url_pattern_helpers.h */
|
|
5761
6174
|
|
|
@@ -5764,9 +6177,10 @@ std::string generate_segment_wildcard_regexp(
|
|
|
5764
6177
|
#include <variant>
|
|
5765
6178
|
|
|
5766
6179
|
namespace ada::parser {
|
|
6180
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
5767
6181
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
5768
6182
|
tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
5769
|
-
std::variant<std::string_view, url_pattern_init
|
|
6183
|
+
std::variant<std::string_view, url_pattern_init>&& input,
|
|
5770
6184
|
const std::string_view* base_url, const url_pattern_options* options) {
|
|
5771
6185
|
// Let init be null.
|
|
5772
6186
|
url_pattern_init init;
|
|
@@ -5815,7 +6229,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5815
6229
|
return tl::unexpected(processed_init.error());
|
|
5816
6230
|
}
|
|
5817
6231
|
|
|
5818
|
-
// For each componentName of
|
|
6232
|
+
// For each componentName of "protocol", "username", "password", "hostname",
|
|
5819
6233
|
// "port", "pathname", "search", "hash" If processedInit[componentName] does
|
|
5820
6234
|
// not exist, then set processedInit[componentName] to "*".
|
|
5821
6235
|
ADA_ASSERT_TRUE(processed_init.has_value());
|
|
@@ -5843,7 +6257,6 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5843
6257
|
// TODO: Optimization opportunity.
|
|
5844
6258
|
if (scheme::is_special(*processed_init->protocol)) {
|
|
5845
6259
|
std::string_view port = processed_init->port.value();
|
|
5846
|
-
helpers::trim_c0_whitespace(port);
|
|
5847
6260
|
if (std::to_string(scheme::get_special_port(*processed_init->protocol)) ==
|
|
5848
6261
|
port) {
|
|
5849
6262
|
processed_init->port->clear();
|
|
@@ -5853,7 +6266,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5853
6266
|
// Let urlPattern be a new URL pattern.
|
|
5854
6267
|
url_pattern<regex_provider> url_pattern_{};
|
|
5855
6268
|
|
|
5856
|
-
// Set urlPattern
|
|
6269
|
+
// Set urlPattern's protocol component to the result of compiling a component
|
|
5857
6270
|
// given processedInit["protocol"], canonicalize a protocol, and default
|
|
5858
6271
|
// options.
|
|
5859
6272
|
auto protocol_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5867,7 +6280,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5867
6280
|
}
|
|
5868
6281
|
url_pattern_.protocol_component = std::move(*protocol_component);
|
|
5869
6282
|
|
|
5870
|
-
// Set urlPattern
|
|
6283
|
+
// Set urlPattern's username component to the result of compiling a component
|
|
5871
6284
|
// given processedInit["username"], canonicalize a username, and default
|
|
5872
6285
|
// options.
|
|
5873
6286
|
auto username_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5881,7 +6294,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5881
6294
|
}
|
|
5882
6295
|
url_pattern_.username_component = std::move(*username_component);
|
|
5883
6296
|
|
|
5884
|
-
// Set urlPattern
|
|
6297
|
+
// Set urlPattern's password component to the result of compiling a component
|
|
5885
6298
|
// given processedInit["password"], canonicalize a password, and default
|
|
5886
6299
|
// options.
|
|
5887
6300
|
auto password_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5898,12 +6311,12 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5898
6311
|
// TODO: Optimization opportunity. The following if statement can be
|
|
5899
6312
|
// simplified.
|
|
5900
6313
|
// If the result running hostname pattern is an IPv6 address given
|
|
5901
|
-
// processedInit["hostname"] is true, then set urlPattern
|
|
6314
|
+
// processedInit["hostname"] is true, then set urlPattern's hostname component
|
|
5902
6315
|
// to the result of compiling a component given processedInit["hostname"],
|
|
5903
6316
|
// canonicalize an IPv6 hostname, and hostname options.
|
|
5904
6317
|
if (url_pattern_helpers::is_ipv6_address(processed_init->hostname.value())) {
|
|
5905
6318
|
ada_log("processed_init->hostname is ipv6 address");
|
|
5906
|
-
// then set urlPattern
|
|
6319
|
+
// then set urlPattern's hostname component to the result of compiling a
|
|
5907
6320
|
// component given processedInit["hostname"], canonicalize an IPv6 hostname,
|
|
5908
6321
|
// and hostname options.
|
|
5909
6322
|
auto hostname_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5917,7 +6330,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5917
6330
|
}
|
|
5918
6331
|
url_pattern_.hostname_component = std::move(*hostname_component);
|
|
5919
6332
|
} else {
|
|
5920
|
-
// Otherwise, set urlPattern
|
|
6333
|
+
// Otherwise, set urlPattern's hostname component to the result of compiling
|
|
5921
6334
|
// a component given processedInit["hostname"], canonicalize a hostname, and
|
|
5922
6335
|
// hostname options.
|
|
5923
6336
|
auto hostname_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5932,7 +6345,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5932
6345
|
url_pattern_.hostname_component = std::move(*hostname_component);
|
|
5933
6346
|
}
|
|
5934
6347
|
|
|
5935
|
-
// Set urlPattern
|
|
6348
|
+
// Set urlPattern's port component to the result of compiling a component
|
|
5936
6349
|
// given processedInit["port"], canonicalize a port, and default options.
|
|
5937
6350
|
auto port_component = url_pattern_component<regex_provider>::compile(
|
|
5938
6351
|
processed_init->port.value(), url_pattern_helpers::canonicalize_port,
|
|
@@ -5953,7 +6366,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5953
6366
|
|
|
5954
6367
|
// TODO: Optimization opportunity: Simplify this if statement.
|
|
5955
6368
|
// If the result of running protocol component matches a special scheme given
|
|
5956
|
-
// urlPattern
|
|
6369
|
+
// urlPattern's protocol component is true, then:
|
|
5957
6370
|
if (url_pattern_helpers::protocol_component_matches_special_scheme<
|
|
5958
6371
|
regex_provider>(url_pattern_.protocol_component)) {
|
|
5959
6372
|
// Let pathCompileOptions be copy of the pathname options with the ignore
|
|
@@ -5963,7 +6376,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5963
6376
|
path_compile_options.ignore_case = options->ignore_case;
|
|
5964
6377
|
}
|
|
5965
6378
|
|
|
5966
|
-
// Set urlPattern
|
|
6379
|
+
// Set urlPattern's pathname component to the result of compiling a
|
|
5967
6380
|
// component given processedInit["pathname"], canonicalize a pathname, and
|
|
5968
6381
|
// pathCompileOptions.
|
|
5969
6382
|
auto pathname_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5976,7 +6389,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5976
6389
|
}
|
|
5977
6390
|
url_pattern_.pathname_component = std::move(*pathname_component);
|
|
5978
6391
|
} else {
|
|
5979
|
-
// Otherwise set urlPattern
|
|
6392
|
+
// Otherwise set urlPattern's pathname component to the result of compiling
|
|
5980
6393
|
// a component given processedInit["pathname"], canonicalize an opaque
|
|
5981
6394
|
// pathname, and compileOptions.
|
|
5982
6395
|
auto pathname_component = url_pattern_component<regex_provider>::compile(
|
|
@@ -5990,7 +6403,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
5990
6403
|
url_pattern_.pathname_component = std::move(*pathname_component);
|
|
5991
6404
|
}
|
|
5992
6405
|
|
|
5993
|
-
// Set urlPattern
|
|
6406
|
+
// Set urlPattern's search component to the result of compiling a component
|
|
5994
6407
|
// given processedInit["search"], canonicalize a search, and compileOptions.
|
|
5995
6408
|
auto search_component = url_pattern_component<regex_provider>::compile(
|
|
5996
6409
|
processed_init->search.value(), url_pattern_helpers::canonicalize_search,
|
|
@@ -6002,7 +6415,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
6002
6415
|
}
|
|
6003
6416
|
url_pattern_.search_component = std::move(*search_component);
|
|
6004
6417
|
|
|
6005
|
-
// Set urlPattern
|
|
6418
|
+
// Set urlPattern's hash component to the result of compiling a component
|
|
6006
6419
|
// given processedInit["hash"], canonicalize a hash, and compileOptions.
|
|
6007
6420
|
auto hash_component = url_pattern_component<regex_provider>::compile(
|
|
6008
6421
|
processed_init->hash.value(), url_pattern_helpers::canonicalize_hash,
|
|
@@ -6017,6 +6430,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
6017
6430
|
// Return urlPattern.
|
|
6018
6431
|
return url_pattern_;
|
|
6019
6432
|
}
|
|
6433
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
6020
6434
|
|
|
6021
6435
|
} // namespace ada::parser
|
|
6022
6436
|
|
|
@@ -6113,7 +6527,10 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
|
|
|
6113
6527
|
/* begin file include/ada/serializers.h */
|
|
6114
6528
|
/**
|
|
6115
6529
|
* @file serializers.h
|
|
6116
|
-
* @brief
|
|
6530
|
+
* @brief IP address serialization utilities.
|
|
6531
|
+
*
|
|
6532
|
+
* This header provides functions for converting IP addresses to their
|
|
6533
|
+
* string representations according to the WHATWG URL Standard.
|
|
6117
6534
|
*/
|
|
6118
6535
|
#ifndef ADA_SERIALIZERS_H
|
|
6119
6536
|
#define ADA_SERIALIZERS_H
|
|
@@ -6124,32 +6541,41 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
|
|
|
6124
6541
|
|
|
6125
6542
|
/**
|
|
6126
6543
|
* @namespace ada::serializers
|
|
6127
|
-
* @brief
|
|
6544
|
+
* @brief IP address serialization functions.
|
|
6545
|
+
*
|
|
6546
|
+
* Contains utilities for serializing IPv4 and IPv6 addresses to strings.
|
|
6128
6547
|
*/
|
|
6129
6548
|
namespace ada::serializers {
|
|
6130
6549
|
|
|
6131
6550
|
/**
|
|
6132
|
-
* Finds
|
|
6551
|
+
* Finds the longest consecutive sequence of zero pieces in an IPv6 address.
|
|
6552
|
+
* Used for :: compression in IPv6 serialization.
|
|
6553
|
+
*
|
|
6554
|
+
* @param address The 8 16-bit pieces of the IPv6 address.
|
|
6555
|
+
* @param[out] compress Index of the start of the longest zero sequence.
|
|
6556
|
+
* @param[out] compress_length Length of the longest zero sequence.
|
|
6133
6557
|
*/
|
|
6134
6558
|
void find_longest_sequence_of_ipv6_pieces(
|
|
6135
6559
|
const std::array<uint16_t, 8>& address, size_t& compress,
|
|
6136
6560
|
size_t& compress_length) noexcept;
|
|
6137
6561
|
|
|
6138
6562
|
/**
|
|
6139
|
-
* Serializes an
|
|
6140
|
-
*
|
|
6141
|
-
*
|
|
6563
|
+
* Serializes an IPv6 address to its string representation.
|
|
6564
|
+
*
|
|
6565
|
+
* @param address The 8 16-bit pieces of the IPv6 address.
|
|
6566
|
+
* @return The serialized IPv6 string (e.g., "2001:db8::1").
|
|
6142
6567
|
* @see https://url.spec.whatwg.org/#concept-ipv6-serializer
|
|
6143
6568
|
*/
|
|
6144
|
-
std::string ipv6(const std::array<uint16_t, 8>& address)
|
|
6569
|
+
std::string ipv6(const std::array<uint16_t, 8>& address);
|
|
6145
6570
|
|
|
6146
6571
|
/**
|
|
6147
|
-
* Serializes an
|
|
6148
|
-
*
|
|
6149
|
-
*
|
|
6572
|
+
* Serializes an IPv4 address to its dotted-decimal string representation.
|
|
6573
|
+
*
|
|
6574
|
+
* @param address The 32-bit IPv4 address as an integer.
|
|
6575
|
+
* @return The serialized IPv4 string (e.g., "192.168.1.1").
|
|
6150
6576
|
* @see https://url.spec.whatwg.org/#concept-ipv4-serializer
|
|
6151
6577
|
*/
|
|
6152
|
-
std::string ipv4(uint64_t address)
|
|
6578
|
+
std::string ipv4(uint64_t address);
|
|
6153
6579
|
|
|
6154
6580
|
} // namespace ada::serializers
|
|
6155
6581
|
|
|
@@ -6158,7 +6584,12 @@ std::string ipv4(uint64_t address) noexcept;
|
|
|
6158
6584
|
/* begin file include/ada/state.h */
|
|
6159
6585
|
/**
|
|
6160
6586
|
* @file state.h
|
|
6161
|
-
* @brief
|
|
6587
|
+
* @brief URL parser state machine states.
|
|
6588
|
+
*
|
|
6589
|
+
* Defines the states used by the URL parsing state machine as specified
|
|
6590
|
+
* in the WHATWG URL Standard.
|
|
6591
|
+
*
|
|
6592
|
+
* @see https://url.spec.whatwg.org/#url-parsing
|
|
6162
6593
|
*/
|
|
6163
6594
|
#ifndef ADA_STATE_H
|
|
6164
6595
|
#define ADA_STATE_H
|
|
@@ -6169,6 +6600,11 @@ std::string ipv4(uint64_t address) noexcept;
|
|
|
6169
6600
|
namespace ada {
|
|
6170
6601
|
|
|
6171
6602
|
/**
|
|
6603
|
+
* @brief States in the URL parsing state machine.
|
|
6604
|
+
*
|
|
6605
|
+
* The URL parser processes input through a sequence of states, each handling
|
|
6606
|
+
* a specific part of the URL syntax.
|
|
6607
|
+
*
|
|
6172
6608
|
* @see https://url.spec.whatwg.org/#url-parsing
|
|
6173
6609
|
*/
|
|
6174
6610
|
enum class state {
|
|
@@ -6274,7 +6710,9 @@ enum class state {
|
|
|
6274
6710
|
};
|
|
6275
6711
|
|
|
6276
6712
|
/**
|
|
6277
|
-
*
|
|
6713
|
+
* Converts a parser state to its string name for debugging.
|
|
6714
|
+
* @param s The state to convert.
|
|
6715
|
+
* @return A string representation of the state.
|
|
6278
6716
|
*/
|
|
6279
6717
|
ada_warn_unused std::string to_string(ada::state s);
|
|
6280
6718
|
|
|
@@ -6613,6 +7051,7 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
|
|
|
6613
7051
|
out.protocol_end = uint32_t(get_protocol().size());
|
|
6614
7052
|
|
|
6615
7053
|
// Trailing index is always the next character of the current one.
|
|
7054
|
+
// NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
|
|
6616
7055
|
size_t running_index = out.protocol_end;
|
|
6617
7056
|
|
|
6618
7057
|
if (host.has_value()) {
|
|
@@ -6732,7 +7171,7 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept {
|
|
|
6732
7171
|
}
|
|
6733
7172
|
}
|
|
6734
7173
|
|
|
6735
|
-
constexpr void url::copy_scheme(ada::url &&u)
|
|
7174
|
+
constexpr void url::copy_scheme(ada::url &&u) {
|
|
6736
7175
|
non_special_scheme = u.non_special_scheme;
|
|
6737
7176
|
type = u.type;
|
|
6738
7177
|
}
|
|
@@ -6742,7 +7181,7 @@ constexpr void url::copy_scheme(const ada::url &u) {
|
|
|
6742
7181
|
type = u.type;
|
|
6743
7182
|
}
|
|
6744
7183
|
|
|
6745
|
-
[[nodiscard]] ada_really_inline std::string url::get_href() const
|
|
7184
|
+
[[nodiscard]] ada_really_inline std::string url::get_href() const {
|
|
6746
7185
|
std::string output = get_protocol();
|
|
6747
7186
|
|
|
6748
7187
|
if (host.has_value()) {
|
|
@@ -6901,7 +7340,13 @@ namespace ada {
|
|
|
6901
7340
|
/* begin file include/ada/url_aggregator.h */
|
|
6902
7341
|
/**
|
|
6903
7342
|
* @file url_aggregator.h
|
|
6904
|
-
* @brief Declaration for the
|
|
7343
|
+
* @brief Declaration for the `ada::url_aggregator` class.
|
|
7344
|
+
*
|
|
7345
|
+
* This file contains the `ada::url_aggregator` struct which represents a parsed
|
|
7346
|
+
* URL using a single buffer with component offsets. This is the default and
|
|
7347
|
+
* most memory-efficient URL representation in Ada.
|
|
7348
|
+
*
|
|
7349
|
+
* @see url.h for an alternative representation using separate strings
|
|
6905
7350
|
*/
|
|
6906
7351
|
#ifndef ADA_URL_AGGREGATOR_H
|
|
6907
7352
|
#define ADA_URL_AGGREGATOR_H
|
|
@@ -6917,12 +7362,23 @@ namespace ada {
|
|
|
6917
7362
|
namespace parser {}
|
|
6918
7363
|
|
|
6919
7364
|
/**
|
|
6920
|
-
* @brief
|
|
7365
|
+
* @brief Memory-efficient URL representation using a single buffer.
|
|
7366
|
+
*
|
|
7367
|
+
* The `url_aggregator` stores the entire normalized URL in a single string
|
|
7368
|
+
* buffer and tracks component boundaries using offsets. This design minimizes
|
|
7369
|
+
* memory allocations and is ideal for read-mostly access patterns.
|
|
7370
|
+
*
|
|
7371
|
+
* Getter methods return `std::string_view` pointing into the internal buffer.
|
|
7372
|
+
* These views are lightweight (no allocation) but become invalid if the
|
|
7373
|
+
* url_aggregator is modified or destroyed.
|
|
6921
7374
|
*
|
|
6922
|
-
* @
|
|
6923
|
-
*
|
|
6924
|
-
*
|
|
6925
|
-
*
|
|
7375
|
+
* @warning Views returned by getters (e.g., `get_pathname()`) are invalidated
|
|
7376
|
+
* when any setter is called. Do not use a getter's result as input to a
|
|
7377
|
+
* setter on the same object without copying first.
|
|
7378
|
+
*
|
|
7379
|
+
* @note This is the default URL type returned by `ada::parse()`.
|
|
7380
|
+
*
|
|
7381
|
+
* @see url For an alternative using separate std::string instances
|
|
6926
7382
|
*/
|
|
6927
7383
|
struct url_aggregator : url_base {
|
|
6928
7384
|
url_aggregator() = default;
|
|
@@ -6932,6 +7388,25 @@ struct url_aggregator : url_base {
|
|
|
6932
7388
|
url_aggregator &operator=(const url_aggregator &u) = default;
|
|
6933
7389
|
~url_aggregator() override = default;
|
|
6934
7390
|
|
|
7391
|
+
/**
|
|
7392
|
+
* The setter functions follow the steps defined in the URL Standard.
|
|
7393
|
+
*
|
|
7394
|
+
* The url_aggregator has a single buffer that contains the entire normalized
|
|
7395
|
+
* URL. The various components are represented as offsets into that buffer.
|
|
7396
|
+
* When you call get_pathname(), for example, you get a std::string_view that
|
|
7397
|
+
* points into that buffer. If the url_aggregator is modified, the buffer may
|
|
7398
|
+
* be reallocated, and the std::string_view you obtained earlier may become
|
|
7399
|
+
* invalid. In particular, this implies that you cannot modify the URL using
|
|
7400
|
+
* a setter function with a std::string_view that points into the
|
|
7401
|
+
* url_aggregator E.g., the following is incorrect:
|
|
7402
|
+
* url->set_hostname(url->get_pathname()).
|
|
7403
|
+
* You must first copy the pathname to a separate string.
|
|
7404
|
+
* std::string pathname(url->get_pathname());
|
|
7405
|
+
* url->set_hostname(pathname);
|
|
7406
|
+
*
|
|
7407
|
+
* The caller is responsible for ensuring that the url_aggregator is not
|
|
7408
|
+
* modified while any std::string_view obtained from it is in use.
|
|
7409
|
+
*/
|
|
6935
7410
|
bool set_href(std::string_view input);
|
|
6936
7411
|
bool set_host(std::string_view input);
|
|
6937
7412
|
bool set_hostname(std::string_view input);
|
|
@@ -6943,115 +7418,130 @@ struct url_aggregator : url_base {
|
|
|
6943
7418
|
void set_search(std::string_view input);
|
|
6944
7419
|
void set_hash(std::string_view input);
|
|
6945
7420
|
|
|
7421
|
+
/**
|
|
7422
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
7423
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
7424
|
+
*/
|
|
6946
7425
|
[[nodiscard]] bool has_valid_domain() const noexcept override;
|
|
7426
|
+
|
|
6947
7427
|
/**
|
|
6948
|
-
*
|
|
6949
|
-
* origin.
|
|
6950
|
-
* @return a newly allocated string.
|
|
7428
|
+
* Returns the URL's origin (scheme + host + port for special URLs).
|
|
7429
|
+
* @return A newly allocated string containing the serialized origin.
|
|
6951
7430
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
6952
7431
|
*/
|
|
6953
|
-
[[nodiscard]] std::string get_origin() const
|
|
7432
|
+
[[nodiscard]] std::string get_origin() const override;
|
|
7433
|
+
|
|
6954
7434
|
/**
|
|
6955
|
-
*
|
|
6956
|
-
*
|
|
6957
|
-
*
|
|
6958
|
-
* @return
|
|
7435
|
+
* Returns the full serialized URL (the href) as a string_view.
|
|
7436
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7437
|
+
* url_aggregator is modified or destroyed.
|
|
7438
|
+
* @return A string_view into the internal buffer.
|
|
6959
7439
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
6960
|
-
* @see https://url.spec.whatwg.org/#concept-url-serializer
|
|
6961
7440
|
*/
|
|
6962
7441
|
[[nodiscard]] constexpr std::string_view get_href() const noexcept
|
|
6963
7442
|
ada_lifetime_bound;
|
|
7443
|
+
|
|
6964
7444
|
/**
|
|
6965
|
-
*
|
|
6966
|
-
*
|
|
6967
|
-
*
|
|
7445
|
+
* Returns the URL's username component.
|
|
7446
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7447
|
+
* url_aggregator is modified or destroyed.
|
|
7448
|
+
* @return A string_view of the username.
|
|
6968
7449
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
6969
7450
|
*/
|
|
6970
|
-
[[nodiscard]] std::string_view get_username() const
|
|
6971
|
-
|
|
7451
|
+
[[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
|
|
7452
|
+
|
|
6972
7453
|
/**
|
|
6973
|
-
*
|
|
6974
|
-
*
|
|
6975
|
-
*
|
|
7454
|
+
* Returns the URL's password component.
|
|
7455
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7456
|
+
* url_aggregator is modified or destroyed.
|
|
7457
|
+
* @return A string_view of the password.
|
|
6976
7458
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
6977
7459
|
*/
|
|
6978
|
-
[[nodiscard]] std::string_view get_password() const
|
|
6979
|
-
|
|
7460
|
+
[[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
|
|
7461
|
+
|
|
6980
7462
|
/**
|
|
6981
|
-
*
|
|
6982
|
-
*
|
|
6983
|
-
*
|
|
7463
|
+
* Returns the URL's port as a string (e.g., "8080").
|
|
7464
|
+
* Does not allocate memory. Returns empty view if no port is set.
|
|
7465
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7466
|
+
* @return A string_view of the port.
|
|
6984
7467
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
6985
7468
|
*/
|
|
6986
|
-
[[nodiscard]] std::string_view get_port() const
|
|
7469
|
+
[[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
|
|
7470
|
+
|
|
6987
7471
|
/**
|
|
6988
|
-
*
|
|
6989
|
-
*
|
|
6990
|
-
*
|
|
7472
|
+
* Returns the URL's fragment prefixed with '#' (e.g., "#section").
|
|
7473
|
+
* Does not allocate memory. Returns empty view if no fragment is set.
|
|
7474
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7475
|
+
* @return A string_view of the hash.
|
|
6991
7476
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
6992
7477
|
*/
|
|
6993
|
-
[[nodiscard]] std::string_view get_hash() const
|
|
7478
|
+
[[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
|
|
7479
|
+
|
|
6994
7480
|
/**
|
|
6995
|
-
*
|
|
6996
|
-
*
|
|
6997
|
-
*
|
|
6998
|
-
*
|
|
6999
|
-
* @return a lightweight std::string_view.
|
|
7481
|
+
* Returns the URL's host and port (e.g., "example.com:8080").
|
|
7482
|
+
* Does not allocate memory. Returns empty view if no host is set.
|
|
7483
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7484
|
+
* @return A string_view of host:port.
|
|
7000
7485
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
7001
7486
|
*/
|
|
7002
|
-
[[nodiscard]] std::string_view get_host() const
|
|
7487
|
+
[[nodiscard]] std::string_view get_host() const ada_lifetime_bound;
|
|
7488
|
+
|
|
7003
7489
|
/**
|
|
7004
|
-
*
|
|
7005
|
-
*
|
|
7006
|
-
*
|
|
7007
|
-
* @return
|
|
7490
|
+
* Returns the URL's hostname (without port).
|
|
7491
|
+
* Does not allocate memory. Returns empty view if no host is set.
|
|
7492
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7493
|
+
* @return A string_view of the hostname.
|
|
7008
7494
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
7009
7495
|
*/
|
|
7010
|
-
[[nodiscard]] std::string_view get_hostname() const
|
|
7011
|
-
|
|
7496
|
+
[[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
|
|
7497
|
+
|
|
7012
7498
|
/**
|
|
7013
|
-
*
|
|
7014
|
-
*
|
|
7015
|
-
*
|
|
7016
|
-
* @return
|
|
7499
|
+
* Returns the URL's path component.
|
|
7500
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7501
|
+
* url_aggregator is modified or destroyed.
|
|
7502
|
+
* @return A string_view of the pathname.
|
|
7017
7503
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
7018
7504
|
*/
|
|
7019
|
-
[[nodiscard]] constexpr std::string_view get_pathname() const
|
|
7505
|
+
[[nodiscard]] constexpr std::string_view get_pathname() const
|
|
7020
7506
|
ada_lifetime_bound;
|
|
7507
|
+
|
|
7021
7508
|
/**
|
|
7022
|
-
*
|
|
7023
|
-
*
|
|
7024
|
-
* @return size of the pathname in bytes
|
|
7509
|
+
* Returns the byte length of the pathname without creating a string.
|
|
7510
|
+
* @return Size of the pathname in bytes.
|
|
7025
7511
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
7026
7512
|
*/
|
|
7027
7513
|
[[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept;
|
|
7514
|
+
|
|
7028
7515
|
/**
|
|
7029
|
-
*
|
|
7030
|
-
*
|
|
7031
|
-
*
|
|
7516
|
+
* Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
|
|
7517
|
+
* Does not allocate memory. Returns empty view if no query is set.
|
|
7518
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7519
|
+
* @return A string_view of the search/query.
|
|
7032
7520
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
7033
7521
|
*/
|
|
7034
|
-
[[nodiscard]] std::string_view get_search() const
|
|
7522
|
+
[[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
|
|
7523
|
+
|
|
7035
7524
|
/**
|
|
7036
|
-
*
|
|
7037
|
-
*
|
|
7038
|
-
*
|
|
7039
|
-
* @return
|
|
7525
|
+
* Returns the URL's scheme followed by a colon (e.g., "https:").
|
|
7526
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7527
|
+
* url_aggregator is modified or destroyed.
|
|
7528
|
+
* @return A string_view of the protocol.
|
|
7040
7529
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
7041
7530
|
*/
|
|
7042
|
-
[[nodiscard]] std::string_view get_protocol() const
|
|
7043
|
-
ada_lifetime_bound;
|
|
7531
|
+
[[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
|
|
7044
7532
|
|
|
7045
7533
|
/**
|
|
7046
|
-
*
|
|
7047
|
-
*
|
|
7534
|
+
* Checks if the URL has credentials (non-empty username or password).
|
|
7535
|
+
* @return `true` if username or password is non-empty, `false` otherwise.
|
|
7048
7536
|
*/
|
|
7049
7537
|
[[nodiscard]] ada_really_inline constexpr bool has_credentials()
|
|
7050
7538
|
const noexcept;
|
|
7051
7539
|
|
|
7052
7540
|
/**
|
|
7053
|
-
*
|
|
7541
|
+
* Returns the URL component offsets for efficient serialization.
|
|
7054
7542
|
*
|
|
7543
|
+
* The components represent byte offsets into the serialized URL:
|
|
7544
|
+
* ```
|
|
7055
7545
|
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
7056
7546
|
* | | | | ^^^^| | |
|
|
7057
7547
|
* | | | | | | | `----- hash_start
|
|
@@ -7062,57 +7552,99 @@ struct url_aggregator : url_base {
|
|
|
7062
7552
|
* | | `---------------------------------- host_start
|
|
7063
7553
|
* | `--------------------------------------- username_end
|
|
7064
7554
|
* `--------------------------------------------- protocol_end
|
|
7065
|
-
*
|
|
7066
|
-
*
|
|
7067
|
-
*
|
|
7068
|
-
* @return a constant reference to the underlying component attribute.
|
|
7069
|
-
*
|
|
7070
|
-
* @see
|
|
7071
|
-
* https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
|
|
7555
|
+
* ```
|
|
7556
|
+
* @return A constant reference to the url_components struct.
|
|
7557
|
+
* @see https://github.com/servo/rust-url
|
|
7072
7558
|
*/
|
|
7073
7559
|
[[nodiscard]] ada_really_inline const url_components &get_components()
|
|
7074
7560
|
const noexcept;
|
|
7561
|
+
|
|
7075
7562
|
/**
|
|
7076
|
-
* Returns a string representation of this URL.
|
|
7563
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
7564
|
+
* @return A JSON-formatted string with all URL components.
|
|
7077
7565
|
*/
|
|
7078
7566
|
[[nodiscard]] std::string to_string() const override;
|
|
7567
|
+
|
|
7079
7568
|
/**
|
|
7080
|
-
* Returns a
|
|
7569
|
+
* Returns a visual diagram showing component boundaries in the URL.
|
|
7570
|
+
* Useful for debugging and understanding URL structure.
|
|
7571
|
+
* @return A multi-line string diagram.
|
|
7081
7572
|
*/
|
|
7082
7573
|
[[nodiscard]] std::string to_diagram() const;
|
|
7083
7574
|
|
|
7084
7575
|
/**
|
|
7085
|
-
*
|
|
7086
|
-
* @return true if
|
|
7087
|
-
* possible.
|
|
7576
|
+
* Validates internal consistency of component offsets (for debugging).
|
|
7577
|
+
* @return `true` if offsets are consistent, `false` if corrupted.
|
|
7088
7578
|
*/
|
|
7089
7579
|
[[nodiscard]] constexpr bool validate() const noexcept;
|
|
7090
7580
|
|
|
7091
|
-
/**
|
|
7581
|
+
/**
|
|
7582
|
+
* Checks if the URL has an empty hostname (host is set but empty string).
|
|
7583
|
+
* @return `true` if host exists but is empty, `false` otherwise.
|
|
7584
|
+
*/
|
|
7092
7585
|
[[nodiscard]] constexpr bool has_empty_hostname() const noexcept;
|
|
7093
|
-
|
|
7586
|
+
|
|
7587
|
+
/**
|
|
7588
|
+
* Checks if the URL has a hostname (including empty hostnames).
|
|
7589
|
+
* @return `true` if host is present, `false` otherwise.
|
|
7590
|
+
*/
|
|
7094
7591
|
[[nodiscard]] constexpr bool has_hostname() const noexcept;
|
|
7095
|
-
|
|
7592
|
+
|
|
7593
|
+
/**
|
|
7594
|
+
* Checks if the URL has a non-empty username.
|
|
7595
|
+
* @return `true` if username is non-empty, `false` otherwise.
|
|
7596
|
+
*/
|
|
7096
7597
|
[[nodiscard]] constexpr bool has_non_empty_username() const noexcept;
|
|
7097
|
-
|
|
7598
|
+
|
|
7599
|
+
/**
|
|
7600
|
+
* Checks if the URL has a non-empty password.
|
|
7601
|
+
* @return `true` if password is non-empty, `false` otherwise.
|
|
7602
|
+
*/
|
|
7098
7603
|
[[nodiscard]] constexpr bool has_non_empty_password() const noexcept;
|
|
7099
|
-
|
|
7604
|
+
|
|
7605
|
+
/**
|
|
7606
|
+
* Checks if the URL has a non-default port explicitly specified.
|
|
7607
|
+
* @return `true` if a port is present, `false` otherwise.
|
|
7608
|
+
*/
|
|
7100
7609
|
[[nodiscard]] constexpr bool has_port() const noexcept;
|
|
7101
|
-
|
|
7610
|
+
|
|
7611
|
+
/**
|
|
7612
|
+
* Checks if the URL has a password component (may be empty).
|
|
7613
|
+
* @return `true` if password is present, `false` otherwise.
|
|
7614
|
+
*/
|
|
7102
7615
|
[[nodiscard]] constexpr bool has_password() const noexcept;
|
|
7103
|
-
|
|
7616
|
+
|
|
7617
|
+
/**
|
|
7618
|
+
* Checks if the URL has a fragment/hash component.
|
|
7619
|
+
* @return `true` if hash is present, `false` otherwise.
|
|
7620
|
+
*/
|
|
7104
7621
|
[[nodiscard]] constexpr bool has_hash() const noexcept override;
|
|
7105
|
-
|
|
7622
|
+
|
|
7623
|
+
/**
|
|
7624
|
+
* Checks if the URL has a query/search component.
|
|
7625
|
+
* @return `true` if query is present, `false` otherwise.
|
|
7626
|
+
*/
|
|
7106
7627
|
[[nodiscard]] constexpr bool has_search() const noexcept override;
|
|
7107
7628
|
|
|
7629
|
+
/**
|
|
7630
|
+
* Removes the port from the URL.
|
|
7631
|
+
*/
|
|
7108
7632
|
inline void clear_port();
|
|
7633
|
+
|
|
7634
|
+
/**
|
|
7635
|
+
* Removes the hash/fragment from the URL.
|
|
7636
|
+
*/
|
|
7109
7637
|
inline void clear_hash();
|
|
7638
|
+
|
|
7639
|
+
/**
|
|
7640
|
+
* Removes the query/search string from the URL.
|
|
7641
|
+
*/
|
|
7110
7642
|
inline void clear_search() override;
|
|
7111
7643
|
|
|
7112
7644
|
private:
|
|
7113
7645
|
// helper methods
|
|
7114
7646
|
friend void helpers::strip_trailing_spaces_from_opaque_path<url_aggregator>(
|
|
7115
|
-
url_aggregator &url)
|
|
7647
|
+
url_aggregator &url);
|
|
7116
7648
|
// parse_url methods
|
|
7117
7649
|
friend url_aggregator parser::parse_url<url_aggregator>(
|
|
7118
7650
|
std::string_view, const url_aggregator *);
|
|
@@ -7121,12 +7653,15 @@ struct url_aggregator : url_base {
|
|
|
7121
7653
|
std::string_view, const url_aggregator *);
|
|
7122
7654
|
friend url_aggregator parser::parse_url_impl<url_aggregator, false>(
|
|
7123
7655
|
std::string_view, const url_aggregator *);
|
|
7656
|
+
|
|
7657
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
7124
7658
|
// url_pattern methods
|
|
7125
7659
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
7126
7660
|
friend tl::expected<url_pattern<regex_provider>, errors>
|
|
7127
|
-
parse_url_pattern_impl(
|
|
7128
|
-
|
|
7129
|
-
|
|
7661
|
+
parse_url_pattern_impl(
|
|
7662
|
+
std::variant<std::string_view, url_pattern_init> &&input,
|
|
7663
|
+
const std::string_view *base_url, const url_pattern_options *options);
|
|
7664
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
7130
7665
|
|
|
7131
7666
|
std::string buffer{};
|
|
7132
7667
|
url_components components{};
|
|
@@ -7138,7 +7673,7 @@ struct url_aggregator : url_base {
|
|
|
7138
7673
|
*/
|
|
7139
7674
|
[[nodiscard]] ada_really_inline bool is_at_path() const noexcept;
|
|
7140
7675
|
|
|
7141
|
-
inline void add_authority_slashes_if_needed()
|
|
7676
|
+
inline void add_authority_slashes_if_needed();
|
|
7142
7677
|
|
|
7143
7678
|
/**
|
|
7144
7679
|
* To optimize performance, you may indicate how much memory to allocate
|
|
@@ -7146,10 +7681,10 @@ struct url_aggregator : url_base {
|
|
|
7146
7681
|
*/
|
|
7147
7682
|
constexpr void reserve(uint32_t capacity);
|
|
7148
7683
|
|
|
7149
|
-
ada_really_inline size_t parse_port(
|
|
7150
|
-
|
|
7684
|
+
ada_really_inline size_t parse_port(std::string_view view,
|
|
7685
|
+
bool check_trailing_content) override;
|
|
7151
7686
|
|
|
7152
|
-
ada_really_inline size_t parse_port(std::string_view view)
|
|
7687
|
+
ada_really_inline size_t parse_port(std::string_view view) override {
|
|
7153
7688
|
return this->parse_port(view, false);
|
|
7154
7689
|
}
|
|
7155
7690
|
|
|
@@ -7214,16 +7749,16 @@ struct url_aggregator : url_base {
|
|
|
7214
7749
|
std::string_view input);
|
|
7215
7750
|
[[nodiscard]] constexpr bool has_authority() const noexcept;
|
|
7216
7751
|
constexpr void set_protocol_as_file();
|
|
7217
|
-
inline void set_scheme(std::string_view new_scheme)
|
|
7752
|
+
inline void set_scheme(std::string_view new_scheme);
|
|
7218
7753
|
/**
|
|
7219
7754
|
* Fast function to set the scheme from a view with a colon in the
|
|
7220
7755
|
* buffer, does not change type.
|
|
7221
7756
|
*/
|
|
7222
7757
|
inline void set_scheme_from_view_with_colon(
|
|
7223
|
-
std::string_view new_scheme_with_colon)
|
|
7224
|
-
inline void copy_scheme(const url_aggregator &u)
|
|
7758
|
+
std::string_view new_scheme_with_colon);
|
|
7759
|
+
inline void copy_scheme(const url_aggregator &u);
|
|
7225
7760
|
|
|
7226
|
-
inline void update_host_to_base_host(const std::string_view input)
|
|
7761
|
+
inline void update_host_to_base_host(const std::string_view input);
|
|
7227
7762
|
|
|
7228
7763
|
}; // url_aggregator
|
|
7229
7764
|
|
|
@@ -8015,7 +8550,7 @@ url_aggregator::get_components() const noexcept {
|
|
|
8015
8550
|
components.protocol_end + 2) == "//";
|
|
8016
8551
|
}
|
|
8017
8552
|
|
|
8018
|
-
inline void ada::url_aggregator::add_authority_slashes_if_needed()
|
|
8553
|
+
inline void ada::url_aggregator::add_authority_slashes_if_needed() {
|
|
8019
8554
|
ada_log("url_aggregator::add_authority_slashes_if_needed");
|
|
8020
8555
|
ADA_ASSERT_TRUE(validate());
|
|
8021
8556
|
// Protocol setter will insert `http:` to the URL. It is up to hostname setter
|
|
@@ -8052,7 +8587,7 @@ constexpr bool url_aggregator::has_non_empty_username() const noexcept {
|
|
|
8052
8587
|
|
|
8053
8588
|
constexpr bool url_aggregator::has_non_empty_password() const noexcept {
|
|
8054
8589
|
ada_log("url_aggregator::has_non_empty_password");
|
|
8055
|
-
return components.host_start
|
|
8590
|
+
return components.host_start > components.username_end;
|
|
8056
8591
|
}
|
|
8057
8592
|
|
|
8058
8593
|
constexpr bool url_aggregator::has_password() const noexcept {
|
|
@@ -8124,8 +8659,8 @@ constexpr bool url_aggregator::has_port() const noexcept {
|
|
|
8124
8659
|
return buffer;
|
|
8125
8660
|
}
|
|
8126
8661
|
|
|
8127
|
-
ada_really_inline size_t
|
|
8128
|
-
|
|
8662
|
+
ada_really_inline size_t
|
|
8663
|
+
url_aggregator::parse_port(std::string_view view, bool check_trailing_content) {
|
|
8129
8664
|
ada_log("url_aggregator::parse_port('", view, "') ", view.size());
|
|
8130
8665
|
if (!view.empty() && view[0] == '-') {
|
|
8131
8666
|
ada_log("parse_port: view[0] == '0' && view.size() > 1");
|
|
@@ -8363,8 +8898,8 @@ constexpr void url_aggregator::set_protocol_as_file() {
|
|
|
8363
8898
|
return true;
|
|
8364
8899
|
}
|
|
8365
8900
|
|
|
8366
|
-
[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
|
|
8367
|
-
|
|
8901
|
+
[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const
|
|
8902
|
+
ada_lifetime_bound {
|
|
8368
8903
|
ada_log("url_aggregator::get_pathname pathname_start = ",
|
|
8369
8904
|
components.pathname_start, " buffer.size() = ", buffer.size(),
|
|
8370
8905
|
" components.search_start = ", components.search_start,
|
|
@@ -8383,8 +8918,7 @@ inline std::ostream &operator<<(std::ostream &out,
|
|
|
8383
8918
|
return out << u.to_string();
|
|
8384
8919
|
}
|
|
8385
8920
|
|
|
8386
|
-
void url_aggregator::update_host_to_base_host(
|
|
8387
|
-
const std::string_view input) noexcept {
|
|
8921
|
+
void url_aggregator::update_host_to_base_host(const std::string_view input) {
|
|
8388
8922
|
ada_log("url_aggregator::update_host_to_base_host ", input);
|
|
8389
8923
|
ADA_ASSERT_TRUE(validate());
|
|
8390
8924
|
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
|
|
@@ -8411,7 +8945,13 @@ void url_aggregator::update_host_to_base_host(
|
|
|
8411
8945
|
/* begin file include/ada/url_search_params.h */
|
|
8412
8946
|
/**
|
|
8413
8947
|
* @file url_search_params.h
|
|
8414
|
-
* @brief
|
|
8948
|
+
* @brief URL query string parameter manipulation.
|
|
8949
|
+
*
|
|
8950
|
+
* This file provides the `url_search_params` class for parsing, manipulating,
|
|
8951
|
+
* and serializing URL query strings. It implements the URLSearchParams API
|
|
8952
|
+
* from the WHATWG URL Standard.
|
|
8953
|
+
*
|
|
8954
|
+
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
8415
8955
|
*/
|
|
8416
8956
|
#ifndef ADA_URL_SEARCH_PARAMS_H
|
|
8417
8957
|
#define ADA_URL_SEARCH_PARAMS_H
|
|
@@ -8423,37 +8963,51 @@ void url_aggregator::update_host_to_base_host(
|
|
|
8423
8963
|
|
|
8424
8964
|
namespace ada {
|
|
8425
8965
|
|
|
8966
|
+
/**
|
|
8967
|
+
* @brief Iterator types for url_search_params iteration.
|
|
8968
|
+
*/
|
|
8426
8969
|
enum class url_search_params_iter_type {
|
|
8427
|
-
KEYS,
|
|
8428
|
-
VALUES,
|
|
8429
|
-
ENTRIES,
|
|
8970
|
+
KEYS, /**< Iterate over parameter keys only */
|
|
8971
|
+
VALUES, /**< Iterate over parameter values only */
|
|
8972
|
+
ENTRIES, /**< Iterate over key-value pairs */
|
|
8430
8973
|
};
|
|
8431
8974
|
|
|
8432
8975
|
template <typename T, url_search_params_iter_type Type>
|
|
8433
8976
|
struct url_search_params_iter;
|
|
8434
8977
|
|
|
8978
|
+
/** Type alias for a key-value pair of string views. */
|
|
8435
8979
|
typedef std::pair<std::string_view, std::string_view> key_value_view_pair;
|
|
8436
8980
|
|
|
8981
|
+
/** Iterator over search parameter keys. */
|
|
8437
8982
|
using url_search_params_keys_iter =
|
|
8438
8983
|
url_search_params_iter<std::string_view, url_search_params_iter_type::KEYS>;
|
|
8984
|
+
/** Iterator over search parameter values. */
|
|
8439
8985
|
using url_search_params_values_iter =
|
|
8440
8986
|
url_search_params_iter<std::string_view,
|
|
8441
8987
|
url_search_params_iter_type::VALUES>;
|
|
8988
|
+
/** Iterator over search parameter key-value pairs. */
|
|
8442
8989
|
using url_search_params_entries_iter =
|
|
8443
8990
|
url_search_params_iter<key_value_view_pair,
|
|
8444
8991
|
url_search_params_iter_type::ENTRIES>;
|
|
8445
8992
|
|
|
8446
8993
|
/**
|
|
8447
|
-
*
|
|
8448
|
-
*
|
|
8994
|
+
* @brief Class for parsing and manipulating URL query strings.
|
|
8995
|
+
*
|
|
8996
|
+
* The `url_search_params` class provides methods to parse, modify, and
|
|
8997
|
+
* serialize URL query parameters (the part after '?' in a URL). It handles
|
|
8998
|
+
* percent-encoding and decoding automatically.
|
|
8999
|
+
*
|
|
9000
|
+
* All string inputs must be valid UTF-8. The caller is responsible for
|
|
9001
|
+
* ensuring UTF-8 validity.
|
|
9002
|
+
*
|
|
8449
9003
|
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
8450
9004
|
*/
|
|
8451
9005
|
struct url_search_params {
|
|
8452
9006
|
url_search_params() = default;
|
|
8453
9007
|
|
|
8454
9008
|
/**
|
|
8455
|
-
*
|
|
8456
|
-
*
|
|
9009
|
+
* Constructs url_search_params by parsing a query string.
|
|
9010
|
+
* @param input A query string (with or without leading '?'). Must be UTF-8.
|
|
8457
9011
|
*/
|
|
8458
9012
|
explicit url_search_params(const std::string_view input) {
|
|
8459
9013
|
initialize(input);
|
|
@@ -8465,75 +9019,106 @@ struct url_search_params {
|
|
|
8465
9019
|
url_search_params &operator=(const url_search_params &u) = default;
|
|
8466
9020
|
~url_search_params() = default;
|
|
8467
9021
|
|
|
9022
|
+
/**
|
|
9023
|
+
* Returns the number of key-value pairs.
|
|
9024
|
+
* @return The total count of parameters.
|
|
9025
|
+
*/
|
|
8468
9026
|
[[nodiscard]] inline size_t size() const noexcept;
|
|
8469
9027
|
|
|
8470
9028
|
/**
|
|
8471
|
-
*
|
|
9029
|
+
* Appends a new key-value pair to the parameter list.
|
|
9030
|
+
* @param key The parameter name (must be valid UTF-8).
|
|
9031
|
+
* @param value The parameter value (must be valid UTF-8).
|
|
8472
9032
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-append
|
|
8473
9033
|
*/
|
|
8474
9034
|
inline void append(std::string_view key, std::string_view value);
|
|
8475
9035
|
|
|
8476
9036
|
/**
|
|
9037
|
+
* Removes all pairs with the given key.
|
|
9038
|
+
* @param key The parameter name to remove.
|
|
8477
9039
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-delete
|
|
8478
9040
|
*/
|
|
8479
9041
|
inline void remove(std::string_view key);
|
|
9042
|
+
|
|
9043
|
+
/**
|
|
9044
|
+
* Removes all pairs with the given key and value.
|
|
9045
|
+
* @param key The parameter name.
|
|
9046
|
+
* @param value The parameter value to match.
|
|
9047
|
+
*/
|
|
8480
9048
|
inline void remove(std::string_view key, std::string_view value);
|
|
8481
9049
|
|
|
8482
9050
|
/**
|
|
9051
|
+
* Returns the value of the first pair with the given key.
|
|
9052
|
+
* @param key The parameter name to search for.
|
|
9053
|
+
* @return The value if found, or std::nullopt if not present.
|
|
8483
9054
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-get
|
|
8484
9055
|
*/
|
|
8485
9056
|
inline std::optional<std::string_view> get(std::string_view key);
|
|
8486
9057
|
|
|
8487
9058
|
/**
|
|
9059
|
+
* Returns all values for pairs with the given key.
|
|
9060
|
+
* @param key The parameter name to search for.
|
|
9061
|
+
* @return A vector of all matching values (may be empty).
|
|
8488
9062
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-getall
|
|
8489
9063
|
*/
|
|
8490
9064
|
inline std::vector<std::string> get_all(std::string_view key);
|
|
8491
9065
|
|
|
8492
9066
|
/**
|
|
9067
|
+
* Checks if any pair has the given key.
|
|
9068
|
+
* @param key The parameter name to search for.
|
|
9069
|
+
* @return `true` if at least one pair has this key.
|
|
8493
9070
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-has
|
|
8494
9071
|
*/
|
|
8495
9072
|
inline bool has(std::string_view key) noexcept;
|
|
9073
|
+
|
|
9074
|
+
/**
|
|
9075
|
+
* Checks if any pair matches the given key and value.
|
|
9076
|
+
* @param key The parameter name to search for.
|
|
9077
|
+
* @param value The parameter value to match.
|
|
9078
|
+
* @return `true` if a matching pair exists.
|
|
9079
|
+
*/
|
|
8496
9080
|
inline bool has(std::string_view key, std::string_view value) noexcept;
|
|
8497
9081
|
|
|
8498
9082
|
/**
|
|
8499
|
-
*
|
|
9083
|
+
* Sets a parameter value, replacing any existing pairs with the same key.
|
|
9084
|
+
* @param key The parameter name (must be valid UTF-8).
|
|
9085
|
+
* @param value The parameter value (must be valid UTF-8).
|
|
8500
9086
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-set
|
|
8501
9087
|
*/
|
|
8502
9088
|
inline void set(std::string_view key, std::string_view value);
|
|
8503
9089
|
|
|
8504
9090
|
/**
|
|
9091
|
+
* Sorts all key-value pairs by their keys using code unit comparison.
|
|
8505
9092
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-sort
|
|
8506
9093
|
*/
|
|
8507
9094
|
inline void sort();
|
|
8508
9095
|
|
|
8509
9096
|
/**
|
|
9097
|
+
* Serializes the parameters to a query string (without leading '?').
|
|
9098
|
+
* @return The percent-encoded query string.
|
|
8510
9099
|
* @see https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior
|
|
8511
9100
|
*/
|
|
8512
9101
|
inline std::string to_string() const;
|
|
8513
9102
|
|
|
8514
9103
|
/**
|
|
8515
|
-
* Returns
|
|
8516
|
-
*
|
|
8517
|
-
*
|
|
8518
|
-
*
|
|
8519
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9104
|
+
* Returns an iterator over all parameter keys.
|
|
9105
|
+
* Keys may repeat if there are duplicate parameters.
|
|
9106
|
+
* @return An iterator yielding string_view keys.
|
|
9107
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8520
9108
|
*/
|
|
8521
9109
|
inline url_search_params_keys_iter get_keys();
|
|
8522
9110
|
|
|
8523
9111
|
/**
|
|
8524
|
-
* Returns
|
|
8525
|
-
*
|
|
8526
|
-
*
|
|
8527
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9112
|
+
* Returns an iterator over all parameter values.
|
|
9113
|
+
* @return An iterator yielding string_view values.
|
|
9114
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8528
9115
|
*/
|
|
8529
9116
|
inline url_search_params_values_iter get_values();
|
|
8530
9117
|
|
|
8531
9118
|
/**
|
|
8532
|
-
* Returns
|
|
8533
|
-
*
|
|
8534
|
-
* The
|
|
8535
|
-
* iterator must be freed when you're done with it.
|
|
8536
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9119
|
+
* Returns an iterator over all key-value pairs.
|
|
9120
|
+
* @return An iterator yielding key-value pair views.
|
|
9121
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8537
9122
|
*/
|
|
8538
9123
|
inline url_search_params_entries_iter get_entries();
|
|
8539
9124
|
|
|
@@ -8570,8 +9155,13 @@ struct url_search_params {
|
|
|
8570
9155
|
}; // url_search_params
|
|
8571
9156
|
|
|
8572
9157
|
/**
|
|
8573
|
-
*
|
|
8574
|
-
*
|
|
9158
|
+
* @brief JavaScript-style iterator for url_search_params.
|
|
9159
|
+
*
|
|
9160
|
+
* Provides a `next()` method that returns successive values until exhausted.
|
|
9161
|
+
* This matches the iterator pattern used in the Web Platform.
|
|
9162
|
+
*
|
|
9163
|
+
* @tparam T The type of value returned by the iterator.
|
|
9164
|
+
* @tparam Type The type of iteration (KEYS, VALUES, or ENTRIES).
|
|
8575
9165
|
*
|
|
8576
9166
|
* @see https://webidl.spec.whatwg.org/#idl-iterable
|
|
8577
9167
|
*/
|
|
@@ -8586,10 +9176,15 @@ struct url_search_params_iter {
|
|
|
8586
9176
|
~url_search_params_iter() = default;
|
|
8587
9177
|
|
|
8588
9178
|
/**
|
|
8589
|
-
*
|
|
9179
|
+
* Returns the next value in the iteration sequence.
|
|
9180
|
+
* @return The next value, or std::nullopt if iteration is complete.
|
|
8590
9181
|
*/
|
|
8591
9182
|
inline std::optional<T> next();
|
|
8592
9183
|
|
|
9184
|
+
/**
|
|
9185
|
+
* Checks if more values are available.
|
|
9186
|
+
* @return `true` if `next()` will return a value, `false` if exhausted.
|
|
9187
|
+
*/
|
|
8593
9188
|
inline bool has_next() const;
|
|
8594
9189
|
|
|
8595
9190
|
private:
|
|
@@ -8895,6 +9490,7 @@ url_search_params_entries_iter::next() {
|
|
|
8895
9490
|
|
|
8896
9491
|
#endif // ADA_URL_SEARCH_PARAMS_INL_H
|
|
8897
9492
|
/* end file include/ada/url_search_params-inl.h */
|
|
9493
|
+
|
|
8898
9494
|
/* begin file include/ada/url_pattern-inl.h */
|
|
8899
9495
|
/**
|
|
8900
9496
|
* @file url_pattern-inl.h
|
|
@@ -8908,6 +9504,7 @@ url_search_params_entries_iter::next() {
|
|
|
8908
9504
|
#include <string_view>
|
|
8909
9505
|
#include <utility>
|
|
8910
9506
|
|
|
9507
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
8911
9508
|
namespace ada {
|
|
8912
9509
|
|
|
8913
9510
|
inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
|
|
@@ -8940,10 +9537,8 @@ url_pattern_component<regex_provider>::create_component_match_result(
|
|
|
8940
9537
|
// says we should start from 1. This case is handled by the
|
|
8941
9538
|
// std_regex_provider.
|
|
8942
9539
|
for (size_t index = 0; index < exec_result.size(); index++) {
|
|
8943
|
-
result.groups.
|
|
8944
|
-
|
|
8945
|
-
std::move(exec_result[index]),
|
|
8946
|
-
});
|
|
9540
|
+
result.groups.emplace(group_name_list[index],
|
|
9541
|
+
std::move(exec_result[index]));
|
|
8947
9542
|
}
|
|
8948
9543
|
return result;
|
|
8949
9544
|
}
|
|
@@ -9049,43 +9644,113 @@ url_pattern_component<regex_provider>::compile(
|
|
|
9049
9644
|
return tl::unexpected(part_list.error());
|
|
9050
9645
|
}
|
|
9051
9646
|
|
|
9052
|
-
//
|
|
9053
|
-
|
|
9647
|
+
// Detect pattern type early to potentially skip expensive regex compilation
|
|
9648
|
+
const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
|
|
9649
|
+
const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
|
|
9650
|
+
|
|
9651
|
+
url_pattern_component_type component_type =
|
|
9652
|
+
url_pattern_component_type::REGEXP;
|
|
9653
|
+
std::string exact_match_value{};
|
|
9654
|
+
|
|
9655
|
+
if (part_list->empty()) {
|
|
9656
|
+
component_type = url_pattern_component_type::EMPTY;
|
|
9657
|
+
} else if (part_list->size() == 1) {
|
|
9658
|
+
const auto& part = (*part_list)[0];
|
|
9659
|
+
if (part.type == url_pattern_part_type::FIXED_TEXT &&
|
|
9660
|
+
part.modifier == url_pattern_part_modifier::none &&
|
|
9661
|
+
!options.ignore_case) {
|
|
9662
|
+
component_type = url_pattern_component_type::EXACT_MATCH;
|
|
9663
|
+
exact_match_value = part.value;
|
|
9664
|
+
} else if (part.type == url_pattern_part_type::FULL_WILDCARD &&
|
|
9665
|
+
part.modifier == url_pattern_part_modifier::none &&
|
|
9666
|
+
part.prefix.empty() && part.suffix.empty()) {
|
|
9667
|
+
component_type = url_pattern_component_type::FULL_WILDCARD;
|
|
9668
|
+
}
|
|
9669
|
+
}
|
|
9670
|
+
|
|
9671
|
+
// For simple patterns, skip regex generation and compilation entirely
|
|
9672
|
+
if (component_type != url_pattern_component_type::REGEXP) {
|
|
9673
|
+
auto pattern_string =
|
|
9674
|
+
url_pattern_helpers::generate_pattern_string(*part_list, options);
|
|
9675
|
+
// For FULL_WILDCARD, we need the group name from
|
|
9676
|
+
// generate_regular_expression
|
|
9677
|
+
std::vector<std::string> name_list;
|
|
9678
|
+
if (component_type == url_pattern_component_type::FULL_WILDCARD &&
|
|
9679
|
+
!part_list->empty()) {
|
|
9680
|
+
name_list.push_back((*part_list)[0].name);
|
|
9681
|
+
}
|
|
9682
|
+
return url_pattern_component<regex_provider>(
|
|
9683
|
+
std::move(pattern_string), typename regex_provider::regex_type{},
|
|
9684
|
+
std::move(name_list), has_regexp_groups, component_type,
|
|
9685
|
+
std::move(exact_match_value));
|
|
9686
|
+
}
|
|
9687
|
+
|
|
9688
|
+
// Generate regex for complex patterns
|
|
9054
9689
|
auto [regular_expression_string, name_list] =
|
|
9055
9690
|
url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
|
|
9056
9691
|
options);
|
|
9057
|
-
|
|
9058
|
-
ada_log("regular expression string: ", regular_expression_string);
|
|
9059
|
-
|
|
9060
|
-
// Let pattern string be the result of running generate a pattern
|
|
9061
|
-
// string given part list and options.
|
|
9062
9692
|
auto pattern_string =
|
|
9063
9693
|
url_pattern_helpers::generate_pattern_string(*part_list, options);
|
|
9064
9694
|
|
|
9065
|
-
// Let regular expression be RegExpCreate(regular expression string,
|
|
9066
|
-
// flags). If this throws an exception, catch it, and throw a
|
|
9067
|
-
// TypeError.
|
|
9068
9695
|
std::optional<typename regex_provider::regex_type> regular_expression =
|
|
9069
9696
|
regex_provider::create_instance(regular_expression_string,
|
|
9070
9697
|
options.ignore_case);
|
|
9071
|
-
|
|
9072
9698
|
if (!regular_expression) {
|
|
9073
9699
|
return tl::unexpected(errors::type_error);
|
|
9074
9700
|
}
|
|
9075
9701
|
|
|
9076
|
-
// For each part of part list:
|
|
9077
|
-
// - If part’s type is "regexp", then set has regexp groups to true.
|
|
9078
|
-
const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
|
|
9079
|
-
const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
|
|
9080
|
-
|
|
9081
|
-
ada_log("has regexp groups: ", has_regexp_groups);
|
|
9082
|
-
|
|
9083
|
-
// Return a new component whose pattern string is pattern string, regular
|
|
9084
|
-
// expression is regular expression, group name list is name list, and has
|
|
9085
|
-
// regexp groups is has regexp groups.
|
|
9086
9702
|
return url_pattern_component<regex_provider>(
|
|
9087
9703
|
std::move(pattern_string), std::move(*regular_expression),
|
|
9088
|
-
std::move(name_list), has_regexp_groups
|
|
9704
|
+
std::move(name_list), has_regexp_groups, component_type,
|
|
9705
|
+
std::move(exact_match_value));
|
|
9706
|
+
}
|
|
9707
|
+
|
|
9708
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9709
|
+
bool url_pattern_component<regex_provider>::fast_test(
|
|
9710
|
+
std::string_view input) const noexcept {
|
|
9711
|
+
// Fast path for simple patterns - avoid regex evaluation
|
|
9712
|
+
// Using if-else for better branch prediction on common cases
|
|
9713
|
+
if (type == url_pattern_component_type::FULL_WILDCARD) {
|
|
9714
|
+
return true;
|
|
9715
|
+
}
|
|
9716
|
+
if (type == url_pattern_component_type::EXACT_MATCH) {
|
|
9717
|
+
return input == exact_match_value;
|
|
9718
|
+
}
|
|
9719
|
+
if (type == url_pattern_component_type::EMPTY) {
|
|
9720
|
+
return input.empty();
|
|
9721
|
+
}
|
|
9722
|
+
// type == REGEXP
|
|
9723
|
+
return regex_provider::regex_match(input, regexp);
|
|
9724
|
+
}
|
|
9725
|
+
|
|
9726
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9727
|
+
std::optional<std::vector<std::optional<std::string>>>
|
|
9728
|
+
url_pattern_component<regex_provider>::fast_match(
|
|
9729
|
+
std::string_view input) const {
|
|
9730
|
+
// Handle each type directly without redundant checks
|
|
9731
|
+
if (type == url_pattern_component_type::FULL_WILDCARD) {
|
|
9732
|
+
// FULL_WILDCARD always matches - capture the input (even if empty)
|
|
9733
|
+
// If there's no group name, return empty groups
|
|
9734
|
+
if (group_name_list.empty()) {
|
|
9735
|
+
return std::vector<std::optional<std::string>>{};
|
|
9736
|
+
}
|
|
9737
|
+
// Capture the matched input (including empty strings)
|
|
9738
|
+
return std::vector<std::optional<std::string>>{std::string(input)};
|
|
9739
|
+
}
|
|
9740
|
+
if (type == url_pattern_component_type::EXACT_MATCH) {
|
|
9741
|
+
if (input == exact_match_value) {
|
|
9742
|
+
return std::vector<std::optional<std::string>>{};
|
|
9743
|
+
}
|
|
9744
|
+
return std::nullopt;
|
|
9745
|
+
}
|
|
9746
|
+
if (type == url_pattern_component_type::EMPTY) {
|
|
9747
|
+
if (input.empty()) {
|
|
9748
|
+
return std::vector<std::optional<std::string>>{};
|
|
9749
|
+
}
|
|
9750
|
+
return std::nullopt;
|
|
9751
|
+
}
|
|
9752
|
+
// type == REGEXP - use regex
|
|
9753
|
+
return regex_provider::regex_search(input, regexp);
|
|
9089
9754
|
}
|
|
9090
9755
|
|
|
9091
9756
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -9096,18 +9761,88 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
|
|
|
9096
9761
|
return match(input, base_url);
|
|
9097
9762
|
}
|
|
9098
9763
|
|
|
9764
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9765
|
+
bool url_pattern<regex_provider>::test_components(
|
|
9766
|
+
std::string_view protocol, std::string_view username,
|
|
9767
|
+
std::string_view password, std::string_view hostname, std::string_view port,
|
|
9768
|
+
std::string_view pathname, std::string_view search,
|
|
9769
|
+
std::string_view hash) const {
|
|
9770
|
+
return protocol_component.fast_test(protocol) &&
|
|
9771
|
+
username_component.fast_test(username) &&
|
|
9772
|
+
password_component.fast_test(password) &&
|
|
9773
|
+
hostname_component.fast_test(hostname) &&
|
|
9774
|
+
port_component.fast_test(port) &&
|
|
9775
|
+
pathname_component.fast_test(pathname) &&
|
|
9776
|
+
search_component.fast_test(search) && hash_component.fast_test(hash);
|
|
9777
|
+
}
|
|
9778
|
+
|
|
9099
9779
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9100
9780
|
result<bool> url_pattern<regex_provider>::test(
|
|
9101
|
-
const url_pattern_input& input, const std::string_view*
|
|
9102
|
-
//
|
|
9103
|
-
|
|
9104
|
-
|
|
9105
|
-
|
|
9106
|
-
|
|
9107
|
-
|
|
9108
|
-
|
|
9781
|
+
const url_pattern_input& input, const std::string_view* base_url_string) {
|
|
9782
|
+
// If input is a URLPatternInit
|
|
9783
|
+
if (std::holds_alternative<url_pattern_init>(input)) {
|
|
9784
|
+
if (base_url_string) {
|
|
9785
|
+
return tl::unexpected(errors::type_error);
|
|
9786
|
+
}
|
|
9787
|
+
|
|
9788
|
+
std::string protocol{}, username{}, password{}, hostname{};
|
|
9789
|
+
std::string port{}, pathname{}, search{}, hash{};
|
|
9790
|
+
|
|
9791
|
+
auto apply_result = url_pattern_init::process(
|
|
9792
|
+
std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
|
|
9793
|
+
protocol, username, password, hostname, port, pathname, search, hash);
|
|
9794
|
+
|
|
9795
|
+
if (!apply_result) {
|
|
9796
|
+
return false;
|
|
9797
|
+
}
|
|
9798
|
+
|
|
9799
|
+
std::string_view search_view = *apply_result->search;
|
|
9800
|
+
if (search_view.starts_with("?")) {
|
|
9801
|
+
search_view.remove_prefix(1);
|
|
9802
|
+
}
|
|
9803
|
+
|
|
9804
|
+
return test_components(*apply_result->protocol, *apply_result->username,
|
|
9805
|
+
*apply_result->password, *apply_result->hostname,
|
|
9806
|
+
*apply_result->port, *apply_result->pathname,
|
|
9807
|
+
search_view, *apply_result->hash);
|
|
9808
|
+
}
|
|
9809
|
+
|
|
9810
|
+
// URL string input path
|
|
9811
|
+
result<url_aggregator> base_url;
|
|
9812
|
+
if (base_url_string) {
|
|
9813
|
+
base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
|
|
9814
|
+
if (!base_url) {
|
|
9815
|
+
return false;
|
|
9816
|
+
}
|
|
9817
|
+
}
|
|
9818
|
+
|
|
9819
|
+
auto url =
|
|
9820
|
+
ada::parse<url_aggregator>(std::get<std::string_view>(input),
|
|
9821
|
+
base_url.has_value() ? &*base_url : nullptr);
|
|
9822
|
+
if (!url) {
|
|
9823
|
+
return false;
|
|
9824
|
+
}
|
|
9825
|
+
|
|
9826
|
+
// Extract components as string_view
|
|
9827
|
+
auto protocol_view = url->get_protocol();
|
|
9828
|
+
if (protocol_view.ends_with(":")) {
|
|
9829
|
+
protocol_view.remove_suffix(1);
|
|
9830
|
+
}
|
|
9831
|
+
|
|
9832
|
+
auto search_view = url->get_search();
|
|
9833
|
+
if (search_view.starts_with("?")) {
|
|
9834
|
+
search_view.remove_prefix(1);
|
|
9835
|
+
}
|
|
9836
|
+
|
|
9837
|
+
auto hash_view = url->get_hash();
|
|
9838
|
+
if (hash_view.starts_with("#")) {
|
|
9839
|
+
hash_view.remove_prefix(1);
|
|
9109
9840
|
}
|
|
9110
|
-
|
|
9841
|
+
|
|
9842
|
+
return test_components(protocol_view, url->get_username(),
|
|
9843
|
+
url->get_password(), url->get_hostname(),
|
|
9844
|
+
url->get_port(), url->get_pathname(), search_view,
|
|
9845
|
+
hash_view);
|
|
9111
9846
|
}
|
|
9112
9847
|
|
|
9113
9848
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -9221,24 +9956,24 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9221
9956
|
return std::nullopt;
|
|
9222
9957
|
}
|
|
9223
9958
|
|
|
9224
|
-
// Set protocol to url
|
|
9959
|
+
// Set protocol to url's scheme.
|
|
9225
9960
|
// IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
|
|
9226
9961
|
// is removed. Similar work was done on workerd:
|
|
9227
9962
|
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
|
|
9228
9963
|
protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
|
|
9229
|
-
// Set username to url
|
|
9964
|
+
// Set username to url's username.
|
|
9230
9965
|
username = url->get_username();
|
|
9231
|
-
// Set password to url
|
|
9966
|
+
// Set password to url's password.
|
|
9232
9967
|
password = url->get_password();
|
|
9233
|
-
// Set hostname to url
|
|
9968
|
+
// Set hostname to url's host, serialized, or the empty string if the value
|
|
9234
9969
|
// is null.
|
|
9235
9970
|
hostname = url->get_hostname();
|
|
9236
|
-
// Set port to url
|
|
9971
|
+
// Set port to url's port, serialized, or the empty string if the value is
|
|
9237
9972
|
// null.
|
|
9238
9973
|
port = url->get_port();
|
|
9239
9974
|
// Set pathname to the result of URL path serializing url.
|
|
9240
9975
|
pathname = url->get_pathname();
|
|
9241
|
-
// Set search to url
|
|
9976
|
+
// Set search to url's query or the empty string if the value is null.
|
|
9242
9977
|
// IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
|
|
9243
9978
|
// is removed. Similar work was done on workerd:
|
|
9244
9979
|
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
|
|
@@ -9246,7 +9981,7 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9246
9981
|
auto view = url->get_search();
|
|
9247
9982
|
search = view.starts_with("?") ? url->get_search().substr(1) : view;
|
|
9248
9983
|
}
|
|
9249
|
-
// Set hash to url
|
|
9984
|
+
// Set hash to url's fragment or the empty string if the value is null.
|
|
9250
9985
|
// IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
|
|
9251
9986
|
// removed. Similar work was done on workerd:
|
|
9252
9987
|
// https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
|
|
@@ -9256,74 +9991,61 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9256
9991
|
}
|
|
9257
9992
|
}
|
|
9258
9993
|
|
|
9259
|
-
//
|
|
9260
|
-
//
|
|
9261
|
-
auto protocol_exec_result =
|
|
9262
|
-
regex_provider::regex_search(protocol, protocol_component.regexp);
|
|
9994
|
+
// Use fast_match which skips regex for simple patterns (EMPTY, EXACT_MATCH,
|
|
9995
|
+
// FULL_WILDCARD) and only falls back to regex for complex REGEXP patterns.
|
|
9263
9996
|
|
|
9997
|
+
// Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
|
|
9998
|
+
// component's regular expression, protocol).
|
|
9999
|
+
auto protocol_exec_result = protocol_component.fast_match(protocol);
|
|
9264
10000
|
if (!protocol_exec_result) {
|
|
9265
10001
|
return std::nullopt;
|
|
9266
10002
|
}
|
|
9267
10003
|
|
|
9268
|
-
// Let usernameExecResult be RegExpBuiltinExec(urlPattern
|
|
10004
|
+
// Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
|
|
9269
10005
|
// component's regular expression, username).
|
|
9270
|
-
auto username_exec_result =
|
|
9271
|
-
regex_provider::regex_search(username, username_component.regexp);
|
|
9272
|
-
|
|
10006
|
+
auto username_exec_result = username_component.fast_match(username);
|
|
9273
10007
|
if (!username_exec_result) {
|
|
9274
10008
|
return std::nullopt;
|
|
9275
10009
|
}
|
|
9276
10010
|
|
|
9277
|
-
// Let passwordExecResult be RegExpBuiltinExec(urlPattern
|
|
10011
|
+
// Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
|
|
9278
10012
|
// component's regular expression, password).
|
|
9279
|
-
auto password_exec_result =
|
|
9280
|
-
regex_provider::regex_search(password, password_component.regexp);
|
|
9281
|
-
|
|
10013
|
+
auto password_exec_result = password_component.fast_match(password);
|
|
9282
10014
|
if (!password_exec_result) {
|
|
9283
10015
|
return std::nullopt;
|
|
9284
10016
|
}
|
|
9285
10017
|
|
|
9286
|
-
// Let hostnameExecResult be RegExpBuiltinExec(urlPattern
|
|
10018
|
+
// Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
|
|
9287
10019
|
// component's regular expression, hostname).
|
|
9288
|
-
auto hostname_exec_result =
|
|
9289
|
-
regex_provider::regex_search(hostname, hostname_component.regexp);
|
|
9290
|
-
|
|
10020
|
+
auto hostname_exec_result = hostname_component.fast_match(hostname);
|
|
9291
10021
|
if (!hostname_exec_result) {
|
|
9292
10022
|
return std::nullopt;
|
|
9293
10023
|
}
|
|
9294
10024
|
|
|
9295
|
-
// Let portExecResult be RegExpBuiltinExec(urlPattern
|
|
10025
|
+
// Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
|
|
9296
10026
|
// regular expression, port).
|
|
9297
|
-
auto port_exec_result =
|
|
9298
|
-
regex_provider::regex_search(port, port_component.regexp);
|
|
9299
|
-
|
|
10027
|
+
auto port_exec_result = port_component.fast_match(port);
|
|
9300
10028
|
if (!port_exec_result) {
|
|
9301
10029
|
return std::nullopt;
|
|
9302
10030
|
}
|
|
9303
10031
|
|
|
9304
|
-
// Let pathnameExecResult be RegExpBuiltinExec(urlPattern
|
|
10032
|
+
// Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
|
|
9305
10033
|
// component's regular expression, pathname).
|
|
9306
|
-
auto pathname_exec_result =
|
|
9307
|
-
regex_provider::regex_search(pathname, pathname_component.regexp);
|
|
9308
|
-
|
|
10034
|
+
auto pathname_exec_result = pathname_component.fast_match(pathname);
|
|
9309
10035
|
if (!pathname_exec_result) {
|
|
9310
10036
|
return std::nullopt;
|
|
9311
10037
|
}
|
|
9312
10038
|
|
|
9313
|
-
// Let searchExecResult be RegExpBuiltinExec(urlPattern
|
|
10039
|
+
// Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
|
|
9314
10040
|
// regular expression, search).
|
|
9315
|
-
auto search_exec_result =
|
|
9316
|
-
regex_provider::regex_search(search, search_component.regexp);
|
|
9317
|
-
|
|
10041
|
+
auto search_exec_result = search_component.fast_match(search);
|
|
9318
10042
|
if (!search_exec_result) {
|
|
9319
10043
|
return std::nullopt;
|
|
9320
10044
|
}
|
|
9321
10045
|
|
|
9322
|
-
// Let hashExecResult be RegExpBuiltinExec(urlPattern
|
|
10046
|
+
// Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
|
|
9323
10047
|
// regular expression, hash).
|
|
9324
|
-
auto hash_exec_result =
|
|
9325
|
-
regex_provider::regex_search(hash, hash_component.regexp);
|
|
9326
|
-
|
|
10048
|
+
auto hash_exec_result = hash_component.fast_match(hash);
|
|
9327
10049
|
if (!hash_exec_result) {
|
|
9328
10050
|
return std::nullopt;
|
|
9329
10051
|
}
|
|
@@ -9333,42 +10055,42 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9333
10055
|
// Set result["inputs"] to inputs.
|
|
9334
10056
|
result.inputs = std::move(inputs);
|
|
9335
10057
|
// Set result["protocol"] to the result of creating a component match result
|
|
9336
|
-
// given urlPattern
|
|
10058
|
+
// given urlPattern's protocol component, protocol, and protocolExecResult.
|
|
9337
10059
|
result.protocol = protocol_component.create_component_match_result(
|
|
9338
10060
|
std::move(protocol), std::move(*protocol_exec_result));
|
|
9339
10061
|
|
|
9340
10062
|
// Set result["username"] to the result of creating a component match result
|
|
9341
|
-
// given urlPattern
|
|
10063
|
+
// given urlPattern's username component, username, and usernameExecResult.
|
|
9342
10064
|
result.username = username_component.create_component_match_result(
|
|
9343
10065
|
std::move(username), std::move(*username_exec_result));
|
|
9344
10066
|
|
|
9345
10067
|
// Set result["password"] to the result of creating a component match result
|
|
9346
|
-
// given urlPattern
|
|
10068
|
+
// given urlPattern's password component, password, and passwordExecResult.
|
|
9347
10069
|
result.password = password_component.create_component_match_result(
|
|
9348
10070
|
std::move(password), std::move(*password_exec_result));
|
|
9349
10071
|
|
|
9350
10072
|
// Set result["hostname"] to the result of creating a component match result
|
|
9351
|
-
// given urlPattern
|
|
10073
|
+
// given urlPattern's hostname component, hostname, and hostnameExecResult.
|
|
9352
10074
|
result.hostname = hostname_component.create_component_match_result(
|
|
9353
10075
|
std::move(hostname), std::move(*hostname_exec_result));
|
|
9354
10076
|
|
|
9355
10077
|
// Set result["port"] to the result of creating a component match result given
|
|
9356
|
-
// urlPattern
|
|
10078
|
+
// urlPattern's port component, port, and portExecResult.
|
|
9357
10079
|
result.port = port_component.create_component_match_result(
|
|
9358
10080
|
std::move(port), std::move(*port_exec_result));
|
|
9359
10081
|
|
|
9360
10082
|
// Set result["pathname"] to the result of creating a component match result
|
|
9361
|
-
// given urlPattern
|
|
10083
|
+
// given urlPattern's pathname component, pathname, and pathnameExecResult.
|
|
9362
10084
|
result.pathname = pathname_component.create_component_match_result(
|
|
9363
10085
|
std::move(pathname), std::move(*pathname_exec_result));
|
|
9364
10086
|
|
|
9365
10087
|
// Set result["search"] to the result of creating a component match result
|
|
9366
|
-
// given urlPattern
|
|
10088
|
+
// given urlPattern's search component, search, and searchExecResult.
|
|
9367
10089
|
result.search = search_component.create_component_match_result(
|
|
9368
10090
|
std::move(search), std::move(*search_exec_result));
|
|
9369
10091
|
|
|
9370
10092
|
// Set result["hash"] to the result of creating a component match result given
|
|
9371
|
-
// urlPattern
|
|
10093
|
+
// urlPattern's hash component, hash, and hashExecResult.
|
|
9372
10094
|
result.hash = hash_component.create_component_match_result(
|
|
9373
10095
|
std::move(hash), std::move(*hash_exec_result));
|
|
9374
10096
|
|
|
@@ -9376,7 +10098,7 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9376
10098
|
}
|
|
9377
10099
|
|
|
9378
10100
|
} // namespace ada
|
|
9379
|
-
|
|
10101
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
9380
10102
|
#endif
|
|
9381
10103
|
/* end file include/ada/url_pattern-inl.h */
|
|
9382
10104
|
/* begin file include/ada/url_pattern_helpers-inl.h */
|
|
@@ -9391,8 +10113,9 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9391
10113
|
#include <string_view>
|
|
9392
10114
|
|
|
9393
10115
|
|
|
10116
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
9394
10117
|
namespace ada::url_pattern_helpers {
|
|
9395
|
-
#
|
|
10118
|
+
#if defined(ADA_TESTING) || defined(ADA_LOGGING)
|
|
9396
10119
|
inline std::string to_string(token_type type) {
|
|
9397
10120
|
switch (type) {
|
|
9398
10121
|
case token_type::INVALID_CHAR:
|
|
@@ -9419,32 +10142,32 @@ inline std::string to_string(token_type type) {
|
|
|
9419
10142
|
ada::unreachable();
|
|
9420
10143
|
}
|
|
9421
10144
|
}
|
|
9422
|
-
#endif // ADA_TESTING
|
|
10145
|
+
#endif // defined(ADA_TESTING) || defined(ADA_LOGGING)
|
|
9423
10146
|
|
|
9424
10147
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9425
10148
|
constexpr void constructor_string_parser<regex_provider>::rewind() {
|
|
9426
|
-
// Set parser
|
|
10149
|
+
// Set parser's token index to parser's component start.
|
|
9427
10150
|
token_index = component_start;
|
|
9428
|
-
// Set parser
|
|
10151
|
+
// Set parser's token increment to 0.
|
|
9429
10152
|
token_increment = 0;
|
|
9430
10153
|
}
|
|
9431
10154
|
|
|
9432
10155
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9433
10156
|
constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
|
|
9434
10157
|
// Return the result of running is a non-special pattern char given parser,
|
|
9435
|
-
// parser
|
|
10158
|
+
// parser's token index and "#".
|
|
9436
10159
|
return is_non_special_pattern_char(token_index, '#');
|
|
9437
10160
|
}
|
|
9438
10161
|
|
|
9439
10162
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9440
10163
|
constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
|
|
9441
|
-
// If result of running is a non-special pattern char given parser, parser
|
|
10164
|
+
// If result of running is a non-special pattern char given parser, parser's
|
|
9442
10165
|
// token index and "?" is true, then return true.
|
|
9443
10166
|
if (is_non_special_pattern_char(token_index, '?')) {
|
|
9444
10167
|
return true;
|
|
9445
10168
|
}
|
|
9446
10169
|
|
|
9447
|
-
// If parser
|
|
10170
|
+
// If parser's token list[parser's token index]'s value is not "?", then
|
|
9448
10171
|
// return false.
|
|
9449
10172
|
if (token_list[token_index].value != "?") {
|
|
9450
10173
|
return false;
|
|
@@ -9452,17 +10175,17 @@ constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
|
|
|
9452
10175
|
|
|
9453
10176
|
// If previous index is less than 0, then return true.
|
|
9454
10177
|
if (token_index == 0) return true;
|
|
9455
|
-
// Let previous index be parser
|
|
10178
|
+
// Let previous index be parser's token index - 1.
|
|
9456
10179
|
auto previous_index = token_index - 1;
|
|
9457
10180
|
// Let previous token be the result of running get a safe token given parser
|
|
9458
10181
|
// and previous index.
|
|
9459
10182
|
auto previous_token = get_safe_token(previous_index);
|
|
9460
10183
|
ADA_ASSERT_TRUE(previous_token);
|
|
9461
10184
|
// If any of the following are true, then return false:
|
|
9462
|
-
// - previous token
|
|
9463
|
-
// - previous token
|
|
9464
|
-
// - previous token
|
|
9465
|
-
// - previous token
|
|
10185
|
+
// - previous token's type is "name".
|
|
10186
|
+
// - previous token's type is "regexp".
|
|
10187
|
+
// - previous token's type is "close".
|
|
10188
|
+
// - previous token's type is "asterisk".
|
|
9466
10189
|
return !(previous_token->type == token_type::NAME ||
|
|
9467
10190
|
previous_token->type == token_type::REGEXP ||
|
|
9468
10191
|
previous_token->type == token_type::CLOSE ||
|
|
@@ -9477,17 +10200,18 @@ constructor_string_parser<regex_provider>::is_non_special_pattern_char(
|
|
|
9477
10200
|
auto token = get_safe_token(index);
|
|
9478
10201
|
ADA_ASSERT_TRUE(token);
|
|
9479
10202
|
|
|
9480
|
-
// If token
|
|
10203
|
+
// If token's value is not value, then return false.
|
|
9481
10204
|
// TODO: Remove this once we make sure get_safe_token returns a non-empty
|
|
9482
10205
|
// string.
|
|
9483
|
-
if (!token->value.empty() &&
|
|
10206
|
+
if (!token->value.empty() &&
|
|
10207
|
+
static_cast<uint32_t>(token->value[0]) != value) {
|
|
9484
10208
|
return false;
|
|
9485
10209
|
}
|
|
9486
10210
|
|
|
9487
10211
|
// If any of the following are true:
|
|
9488
|
-
// - token
|
|
9489
|
-
// - token
|
|
9490
|
-
// - token
|
|
10212
|
+
// - token's type is "char";
|
|
10213
|
+
// - token's type is "escaped-char"; or
|
|
10214
|
+
// - token's type is "invalid-char",
|
|
9491
10215
|
// - then return true.
|
|
9492
10216
|
return token->type == token_type::CHAR ||
|
|
9493
10217
|
token->type == token_type::ESCAPED_CHAR ||
|
|
@@ -9497,17 +10221,17 @@ constructor_string_parser<regex_provider>::is_non_special_pattern_char(
|
|
|
9497
10221
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9498
10222
|
constexpr const token*
|
|
9499
10223
|
constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
|
|
9500
|
-
// If index is less than parser
|
|
10224
|
+
// If index is less than parser's token list's size, then return parser's
|
|
9501
10225
|
// token list[index].
|
|
9502
10226
|
if (index < token_list.size()) [[likely]] {
|
|
9503
10227
|
return &token_list[index];
|
|
9504
10228
|
}
|
|
9505
10229
|
|
|
9506
|
-
// Assert: parser
|
|
10230
|
+
// Assert: parser's token list's size is greater than or equal to 1.
|
|
9507
10231
|
ADA_ASSERT_TRUE(!token_list.empty());
|
|
9508
10232
|
|
|
9509
|
-
// Let token be parser
|
|
9510
|
-
// Assert: token
|
|
10233
|
+
// Let token be parser's token list[last index].
|
|
10234
|
+
// Assert: token's type is "end".
|
|
9511
10235
|
ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
|
|
9512
10236
|
|
|
9513
10237
|
// Return token.
|
|
@@ -9517,7 +10241,7 @@ constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
|
|
|
9517
10241
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9518
10242
|
constexpr bool constructor_string_parser<regex_provider>::is_group_open()
|
|
9519
10243
|
const {
|
|
9520
|
-
// If parser
|
|
10244
|
+
// If parser's token list[parser's token index]'s type is "open", then return
|
|
9521
10245
|
// true.
|
|
9522
10246
|
return token_list[token_index].type == token_type::OPEN;
|
|
9523
10247
|
}
|
|
@@ -9525,7 +10249,7 @@ constexpr bool constructor_string_parser<regex_provider>::is_group_open()
|
|
|
9525
10249
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9526
10250
|
constexpr bool constructor_string_parser<regex_provider>::is_group_close()
|
|
9527
10251
|
const {
|
|
9528
|
-
// If parser
|
|
10252
|
+
// If parser's token list[parser's token index]'s type is "close", then return
|
|
9529
10253
|
// true.
|
|
9530
10254
|
return token_list[token_index].type == token_type::CLOSE;
|
|
9531
10255
|
}
|
|
@@ -9534,12 +10258,12 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9534
10258
|
constexpr bool
|
|
9535
10259
|
constructor_string_parser<regex_provider>::next_is_authority_slashes() const {
|
|
9536
10260
|
// If the result of running is a non-special pattern char given parser,
|
|
9537
|
-
// parser
|
|
10261
|
+
// parser's token index + 1, and "/" is false, then return false.
|
|
9538
10262
|
if (!is_non_special_pattern_char(token_index + 1, '/')) {
|
|
9539
10263
|
return false;
|
|
9540
10264
|
}
|
|
9541
10265
|
// If the result of running is a non-special pattern char given parser,
|
|
9542
|
-
// parser
|
|
10266
|
+
// parser's token index + 2, and "/" is false, then return false.
|
|
9543
10267
|
if (!is_non_special_pattern_char(token_index + 2, '/')) {
|
|
9544
10268
|
return false;
|
|
9545
10269
|
}
|
|
@@ -9550,15 +10274,15 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9550
10274
|
constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
|
|
9551
10275
|
const {
|
|
9552
10276
|
// Return the result of running is a non-special pattern char given parser,
|
|
9553
|
-
// parser
|
|
10277
|
+
// parser's token index, and ":".
|
|
9554
10278
|
return is_non_special_pattern_char(token_index, ':');
|
|
9555
10279
|
}
|
|
9556
10280
|
|
|
9557
10281
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9558
10282
|
void constructor_string_parser<regex_provider>::change_state(State new_state,
|
|
9559
10283
|
size_t skip) {
|
|
9560
|
-
// If parser
|
|
9561
|
-
// parser
|
|
10284
|
+
// If parser's state is not "init", not "authority", and not "done", then set
|
|
10285
|
+
// parser's result[parser's state] to the result of running make a component
|
|
9562
10286
|
// string given parser.
|
|
9563
10287
|
if (state != State::INIT && state != State::AUTHORITY &&
|
|
9564
10288
|
state != State::DONE) {
|
|
@@ -9602,11 +10326,11 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
|
|
|
9602
10326
|
}
|
|
9603
10327
|
}
|
|
9604
10328
|
|
|
9605
|
-
// If parser
|
|
10329
|
+
// If parser's state is not "init" and new state is not "done", then:
|
|
9606
10330
|
if (state != State::INIT && new_state != State::DONE) {
|
|
9607
|
-
// If parser
|
|
9608
|
-
// new state is "port", "pathname", "search", or "hash"; and parser
|
|
9609
|
-
// result["hostname"] does not exist, then set parser
|
|
10331
|
+
// If parser's state is "protocol", "authority", "username", or "password";
|
|
10332
|
+
// new state is "port", "pathname", "search", or "hash"; and parser's
|
|
10333
|
+
// result["hostname"] does not exist, then set parser's result["hostname"]
|
|
9610
10334
|
// to the empty string.
|
|
9611
10335
|
if ((state == State::PROTOCOL || state == State::AUTHORITY ||
|
|
9612
10336
|
state == State::USERNAME || state == State::PASSWORD) &&
|
|
@@ -9616,8 +10340,8 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
|
|
|
9616
10340
|
result.hostname = "";
|
|
9617
10341
|
}
|
|
9618
10342
|
|
|
9619
|
-
// If parser
|
|
9620
|
-
// "hostname", or "port"; new state is "search" or "hash"; and parser
|
|
10343
|
+
// If parser's state is "protocol", "authority", "username", "password",
|
|
10344
|
+
// "hostname", or "port"; new state is "search" or "hash"; and parser's
|
|
9621
10345
|
// result["pathname"] does not exist, then:
|
|
9622
10346
|
if ((state == State::PROTOCOL || state == State::AUTHORITY ||
|
|
9623
10347
|
state == State::USERNAME || state == State::PASSWORD ||
|
|
@@ -9627,14 +10351,14 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
|
|
|
9627
10351
|
if (protocol_matches_a_special_scheme_flag) {
|
|
9628
10352
|
result.pathname = "/";
|
|
9629
10353
|
} else {
|
|
9630
|
-
// Otherwise, set parser
|
|
10354
|
+
// Otherwise, set parser's result["pathname"] to the empty string.
|
|
9631
10355
|
result.pathname = "";
|
|
9632
10356
|
}
|
|
9633
10357
|
}
|
|
9634
10358
|
|
|
9635
|
-
// If parser
|
|
9636
|
-
// "hostname", "port", or "pathname"; new state is "hash"; and parser
|
|
9637
|
-
// result["search"] does not exist, then set parser
|
|
10359
|
+
// If parser's state is "protocol", "authority", "username", "password",
|
|
10360
|
+
// "hostname", "port", or "pathname"; new state is "hash"; and parser's
|
|
10361
|
+
// result["search"] does not exist, then set parser's result["search"] to
|
|
9638
10362
|
// the empty string.
|
|
9639
10363
|
if ((state == State::PROTOCOL || state == State::AUTHORITY ||
|
|
9640
10364
|
state == State::USERNAME || state == State::PASSWORD ||
|
|
@@ -9644,41 +10368,41 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
|
|
|
9644
10368
|
result.search = "";
|
|
9645
10369
|
}
|
|
9646
10370
|
|
|
9647
|
-
// Set parser
|
|
10371
|
+
// Set parser's state to new state.
|
|
9648
10372
|
state = new_state;
|
|
9649
|
-
// Increment parser
|
|
10373
|
+
// Increment parser's token index by skip.
|
|
9650
10374
|
token_index += skip;
|
|
9651
|
-
// Set parser
|
|
10375
|
+
// Set parser's component start to parser's token index.
|
|
9652
10376
|
component_start = token_index;
|
|
9653
|
-
// Set parser
|
|
10377
|
+
// Set parser's token increment to 0.
|
|
9654
10378
|
token_increment = 0;
|
|
9655
10379
|
}
|
|
9656
10380
|
|
|
9657
10381
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9658
10382
|
std::string constructor_string_parser<regex_provider>::make_component_string() {
|
|
9659
|
-
// Assert: parser
|
|
10383
|
+
// Assert: parser's token index is less than parser's token list's size.
|
|
9660
10384
|
ADA_ASSERT_TRUE(token_index < token_list.size());
|
|
9661
10385
|
|
|
9662
|
-
// Let token be parser
|
|
9663
|
-
// Let end index be token
|
|
10386
|
+
// Let token be parser's token list[parser's token index].
|
|
10387
|
+
// Let end index be token's index.
|
|
9664
10388
|
const auto end_index = token_list[token_index].index;
|
|
9665
10389
|
// Let component start token be the result of running get a safe token given
|
|
9666
|
-
// parser and parser
|
|
10390
|
+
// parser and parser's component start.
|
|
9667
10391
|
const auto component_start_token = get_safe_token(component_start);
|
|
9668
10392
|
ADA_ASSERT_TRUE(component_start_token);
|
|
9669
|
-
// Let component start input index be component start token
|
|
10393
|
+
// Let component start input index be component start token's index.
|
|
9670
10394
|
const auto component_start_input_index = component_start_token->index;
|
|
9671
10395
|
// Return the code point substring from component start input index to end
|
|
9672
|
-
// index within parser
|
|
9673
|
-
return input.substr(component_start_input_index,
|
|
9674
|
-
|
|
10396
|
+
// index within parser's input.
|
|
10397
|
+
return std::string(input.substr(component_start_input_index,
|
|
10398
|
+
end_index - component_start_input_index));
|
|
9675
10399
|
}
|
|
9676
10400
|
|
|
9677
10401
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9678
10402
|
constexpr bool
|
|
9679
10403
|
constructor_string_parser<regex_provider>::is_an_identity_terminator() const {
|
|
9680
10404
|
// Return the result of running is a non-special pattern char given parser,
|
|
9681
|
-
// parser
|
|
10405
|
+
// parser's token index, and "@".
|
|
9682
10406
|
return is_non_special_pattern_char(token_index, '@');
|
|
9683
10407
|
}
|
|
9684
10408
|
|
|
@@ -9686,7 +10410,7 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9686
10410
|
constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
|
|
9687
10411
|
const {
|
|
9688
10412
|
// Return the result of running is a non-special pattern char given parser,
|
|
9689
|
-
// parser
|
|
10413
|
+
// parser's token index, and "/".
|
|
9690
10414
|
return is_non_special_pattern_char(token_index, '/');
|
|
9691
10415
|
}
|
|
9692
10416
|
|
|
@@ -9694,7 +10418,7 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9694
10418
|
constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
|
|
9695
10419
|
const {
|
|
9696
10420
|
// Return the result of running is a non-special pattern char given parser,
|
|
9697
|
-
// parser
|
|
10421
|
+
// parser's token index, and ":".
|
|
9698
10422
|
return is_non_special_pattern_char(token_index, ':');
|
|
9699
10423
|
}
|
|
9700
10424
|
|
|
@@ -9702,7 +10426,7 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9702
10426
|
constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
|
|
9703
10427
|
const {
|
|
9704
10428
|
// Return the result of running is a non-special pattern char given parser,
|
|
9705
|
-
// parser
|
|
10429
|
+
// parser's token index, and "[".
|
|
9706
10430
|
return is_non_special_pattern_char(token_index, '[');
|
|
9707
10431
|
}
|
|
9708
10432
|
|
|
@@ -9710,7 +10434,7 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9710
10434
|
constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
|
|
9711
10435
|
const {
|
|
9712
10436
|
// Return the result of running is a non-special pattern char given parser,
|
|
9713
|
-
// parser
|
|
10437
|
+
// parser's token index, and "]".
|
|
9714
10438
|
return is_non_special_pattern_char(token_index, ']');
|
|
9715
10439
|
}
|
|
9716
10440
|
|
|
@@ -9718,7 +10442,7 @@ template <url_pattern_regex::regex_concept regex_provider>
|
|
|
9718
10442
|
constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
|
|
9719
10443
|
const {
|
|
9720
10444
|
// Return the result of running is a non-special pattern char given parser,
|
|
9721
|
-
// parser
|
|
10445
|
+
// parser's token index, and ":".
|
|
9722
10446
|
return is_non_special_pattern_char(token_index, ':');
|
|
9723
10447
|
}
|
|
9724
10448
|
|
|
@@ -9772,7 +10496,7 @@ constexpr void Tokenizer::get_next_code_point() {
|
|
|
9772
10496
|
constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
|
|
9773
10497
|
ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
|
|
9774
10498
|
new_index);
|
|
9775
|
-
// Set tokenizer
|
|
10499
|
+
// Set tokenizer's next index to index.
|
|
9776
10500
|
next_index = new_index;
|
|
9777
10501
|
// Run get the next code point given tokenizer.
|
|
9778
10502
|
get_next_code_point();
|
|
@@ -9785,21 +10509,21 @@ inline void Tokenizer::add_token(token_type type, size_t next_position,
|
|
|
9785
10509
|
ADA_ASSERT_TRUE(next_position >= value_position);
|
|
9786
10510
|
|
|
9787
10511
|
// Let token be a new token.
|
|
9788
|
-
// Set token
|
|
9789
|
-
// Set token
|
|
9790
|
-
// Set token
|
|
9791
|
-
// length value length within tokenizer
|
|
9792
|
-
// Append token to the back of tokenizer
|
|
10512
|
+
// Set token's type to type.
|
|
10513
|
+
// Set token's index to tokenizer's index.
|
|
10514
|
+
// Set token's value to the code point substring from value position with
|
|
10515
|
+
// length value length within tokenizer's input.
|
|
10516
|
+
// Append token to the back of tokenizer's token list.
|
|
9793
10517
|
token_list.emplace_back(type, index,
|
|
9794
10518
|
input.substr(value_position, value_length));
|
|
9795
|
-
// Set tokenizer
|
|
10519
|
+
// Set tokenizer's index to next position.
|
|
9796
10520
|
index = next_position;
|
|
9797
10521
|
}
|
|
9798
10522
|
|
|
9799
10523
|
inline void Tokenizer::add_token_with_default_length(token_type type,
|
|
9800
10524
|
size_t next_position,
|
|
9801
10525
|
size_t value_position) {
|
|
9802
|
-
// Let computed length be next position
|
|
10526
|
+
// Let computed length be next position - value position.
|
|
9803
10527
|
auto computed_length = next_position - value_position;
|
|
9804
10528
|
// Run add a token given tokenizer, type, next position, value position, and
|
|
9805
10529
|
// computed length.
|
|
@@ -9809,21 +10533,21 @@ inline void Tokenizer::add_token_with_default_length(token_type type,
|
|
|
9809
10533
|
inline void Tokenizer::add_token_with_defaults(token_type type) {
|
|
9810
10534
|
ada_log("Tokenizer::add_token_with_defaults called with type=",
|
|
9811
10535
|
to_string(type));
|
|
9812
|
-
// Run add a token with default length given tokenizer, type, tokenizer
|
|
9813
|
-
// index, and tokenizer
|
|
10536
|
+
// Run add a token with default length given tokenizer, type, tokenizer's next
|
|
10537
|
+
// index, and tokenizer's index.
|
|
9814
10538
|
add_token_with_default_length(type, next_index, index);
|
|
9815
10539
|
}
|
|
9816
10540
|
|
|
9817
10541
|
inline ada_warn_unused std::optional<errors>
|
|
9818
10542
|
Tokenizer::process_tokenizing_error(size_t next_position,
|
|
9819
10543
|
size_t value_position) {
|
|
9820
|
-
// If tokenizer
|
|
10544
|
+
// If tokenizer's policy is "strict", then throw a TypeError.
|
|
9821
10545
|
if (policy == token_policy::strict) {
|
|
9822
10546
|
ada_log("process_tokenizing_error failed with next_position=",
|
|
9823
10547
|
next_position, " value_position=", value_position);
|
|
9824
10548
|
return errors::type_error;
|
|
9825
10549
|
}
|
|
9826
|
-
// Assert: tokenizer
|
|
10550
|
+
// Assert: tokenizer's policy is "lenient".
|
|
9827
10551
|
ADA_ASSERT_TRUE(policy == token_policy::lenient);
|
|
9828
10552
|
// Run add a token with default length given tokenizer, "invalid-char", next
|
|
9829
10553
|
// position, and value position.
|
|
@@ -9864,13 +10588,13 @@ template <url_pattern_encoding_callback F>
|
|
|
9864
10588
|
token* url_pattern_parser<F>::try_consume_token(token_type type) {
|
|
9865
10589
|
ada_log("url_pattern_parser::try_consume_token called with type=",
|
|
9866
10590
|
to_string(type));
|
|
9867
|
-
// Assert: parser
|
|
10591
|
+
// Assert: parser's index is less than parser's token list size.
|
|
9868
10592
|
ADA_ASSERT_TRUE(index < tokens.size());
|
|
9869
|
-
// Let next token be parser
|
|
10593
|
+
// Let next token be parser's token list[parser's index].
|
|
9870
10594
|
auto& next_token = tokens[index];
|
|
9871
|
-
// If next token
|
|
10595
|
+
// If next token's type is not type return null.
|
|
9872
10596
|
if (next_token.type != type) return nullptr;
|
|
9873
|
-
// Increase parser
|
|
10597
|
+
// Increase parser's index by 1.
|
|
9874
10598
|
index++;
|
|
9875
10599
|
// Return next token.
|
|
9876
10600
|
return &next_token;
|
|
@@ -9890,7 +10614,7 @@ std::string url_pattern_parser<F>::consume_text() {
|
|
|
9890
10614
|
if (!token) token = try_consume_token(token_type::ESCAPED_CHAR);
|
|
9891
10615
|
// If token is null, then break.
|
|
9892
10616
|
if (!token) break;
|
|
9893
|
-
// Append token
|
|
10617
|
+
// Append token's value to the end of result.
|
|
9894
10618
|
result.append(token->value);
|
|
9895
10619
|
}
|
|
9896
10620
|
// Return result.
|
|
@@ -9909,23 +10633,23 @@ bool url_pattern_parser<F>::consume_required_token(token_type type) {
|
|
|
9909
10633
|
template <url_pattern_encoding_callback F>
|
|
9910
10634
|
std::optional<errors>
|
|
9911
10635
|
url_pattern_parser<F>::maybe_add_part_from_the_pending_fixed_value() {
|
|
9912
|
-
// If parser
|
|
10636
|
+
// If parser's pending fixed value is the empty string, then return.
|
|
9913
10637
|
if (pending_fixed_value.empty()) {
|
|
9914
10638
|
ada_log("pending_fixed_value is empty");
|
|
9915
10639
|
return std::nullopt;
|
|
9916
10640
|
}
|
|
9917
|
-
// Let encoded value be the result of running parser
|
|
9918
|
-
// parser
|
|
10641
|
+
// Let encoded value be the result of running parser's encoding callback given
|
|
10642
|
+
// parser's pending fixed value.
|
|
9919
10643
|
auto encoded_value = encoding_callback(pending_fixed_value);
|
|
9920
10644
|
if (!encoded_value) {
|
|
9921
10645
|
ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
|
|
9922
10646
|
return encoded_value.error();
|
|
9923
10647
|
}
|
|
9924
|
-
// Set parser
|
|
10648
|
+
// Set parser's pending fixed value to the empty string.
|
|
9925
10649
|
pending_fixed_value.clear();
|
|
9926
10650
|
// Let part be a new part whose type is "fixed-text", value is encoded value,
|
|
9927
10651
|
// and modifier is "none".
|
|
9928
|
-
// Append part to parser
|
|
10652
|
+
// Append part to parser's part list.
|
|
9929
10653
|
parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
|
|
9930
10654
|
std::move(*encoded_value),
|
|
9931
10655
|
url_pattern_part_modifier::none);
|
|
@@ -9940,15 +10664,15 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
9940
10664
|
auto modifier = url_pattern_part_modifier::none;
|
|
9941
10665
|
// If modifier token is not null:
|
|
9942
10666
|
if (modifier_token) {
|
|
9943
|
-
// If modifier token
|
|
10667
|
+
// If modifier token's value is "?" then set modifier to "optional".
|
|
9944
10668
|
if (modifier_token->value == "?") {
|
|
9945
10669
|
modifier = url_pattern_part_modifier::optional;
|
|
9946
10670
|
} else if (modifier_token->value == "*") {
|
|
9947
|
-
// Otherwise if modifier token
|
|
10671
|
+
// Otherwise if modifier token's value is "*" then set modifier to
|
|
9948
10672
|
// "zero-or-more".
|
|
9949
10673
|
modifier = url_pattern_part_modifier::zero_or_more;
|
|
9950
10674
|
} else if (modifier_token->value == "+") {
|
|
9951
|
-
// Otherwise if modifier token
|
|
10675
|
+
// Otherwise if modifier token's value is "+" then set modifier to
|
|
9952
10676
|
// "one-or-more".
|
|
9953
10677
|
modifier = url_pattern_part_modifier::one_or_more;
|
|
9954
10678
|
}
|
|
@@ -9957,7 +10681,7 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
9957
10681
|
// is "none":
|
|
9958
10682
|
if (!name_token && !regexp_or_wildcard_token &&
|
|
9959
10683
|
modifier == url_pattern_part_modifier::none) {
|
|
9960
|
-
// Append prefix to the end of parser
|
|
10684
|
+
// Append prefix to the end of parser's pending fixed value.
|
|
9961
10685
|
pending_fixed_value.append(prefix);
|
|
9962
10686
|
return std::nullopt;
|
|
9963
10687
|
}
|
|
@@ -9971,7 +10695,7 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
9971
10695
|
ADA_ASSERT_TRUE(suffix.empty());
|
|
9972
10696
|
// If prefix is the empty string, then return.
|
|
9973
10697
|
if (prefix.empty()) return std::nullopt;
|
|
9974
|
-
// Let encoded value be the result of running parser
|
|
10698
|
+
// Let encoded value be the result of running parser's encoding callback
|
|
9975
10699
|
// given prefix.
|
|
9976
10700
|
auto encoded_value = encoding_callback(prefix);
|
|
9977
10701
|
if (!encoded_value) {
|
|
@@ -9979,28 +10703,28 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
9979
10703
|
}
|
|
9980
10704
|
// Let part be a new part whose type is "fixed-text", value is encoded
|
|
9981
10705
|
// value, and modifier is modifier.
|
|
9982
|
-
// Append part to parser
|
|
10706
|
+
// Append part to parser's part list.
|
|
9983
10707
|
parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
|
|
9984
10708
|
std::move(*encoded_value), modifier);
|
|
9985
10709
|
return std::nullopt;
|
|
9986
10710
|
}
|
|
9987
10711
|
// Let regexp value be the empty string.
|
|
9988
10712
|
std::string regexp_value{};
|
|
9989
|
-
// If regexp or wildcard token is null, then set regexp value to parser
|
|
10713
|
+
// If regexp or wildcard token is null, then set regexp value to parser's
|
|
9990
10714
|
// segment wildcard regexp.
|
|
9991
10715
|
if (!regexp_or_wildcard_token) {
|
|
9992
10716
|
regexp_value = segment_wildcard_regexp;
|
|
9993
10717
|
} else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
|
|
9994
|
-
// Otherwise if regexp or wildcard token
|
|
10718
|
+
// Otherwise if regexp or wildcard token's type is "asterisk", then set
|
|
9995
10719
|
// regexp value to the full wildcard regexp value.
|
|
9996
10720
|
regexp_value = ".*";
|
|
9997
10721
|
} else {
|
|
9998
|
-
// Otherwise set regexp value to regexp or wildcard token
|
|
10722
|
+
// Otherwise set regexp value to regexp or wildcard token's value.
|
|
9999
10723
|
regexp_value = regexp_or_wildcard_token->value;
|
|
10000
10724
|
}
|
|
10001
10725
|
// Let type be "regexp".
|
|
10002
10726
|
auto type = url_pattern_part_type::REGEXP;
|
|
10003
|
-
// If regexp value is parser
|
|
10727
|
+
// If regexp value is parser's segment wildcard regexp:
|
|
10004
10728
|
if (regexp_value == segment_wildcard_regexp) {
|
|
10005
10729
|
// Set type to "segment-wildcard".
|
|
10006
10730
|
type = url_pattern_part_type::SEGMENT_WILDCARD;
|
|
@@ -10015,14 +10739,14 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
10015
10739
|
}
|
|
10016
10740
|
// Let name be the empty string.
|
|
10017
10741
|
std::string name{};
|
|
10018
|
-
// If name token is not null, then set name to name token
|
|
10742
|
+
// If name token is not null, then set name to name token's value.
|
|
10019
10743
|
if (name_token) {
|
|
10020
10744
|
name = name_token->value;
|
|
10021
10745
|
} else if (regexp_or_wildcard_token != nullptr) {
|
|
10022
10746
|
// Otherwise if regexp or wildcard token is not null:
|
|
10023
|
-
// Set name to parser
|
|
10747
|
+
// Set name to parser's next numeric name, serialized.
|
|
10024
10748
|
name = std::to_string(next_numeric_name);
|
|
10025
|
-
// Increment parser
|
|
10749
|
+
// Increment parser's next numeric name by 1.
|
|
10026
10750
|
next_numeric_name++;
|
|
10027
10751
|
}
|
|
10028
10752
|
// If the result of running is a duplicate name given parser and name is
|
|
@@ -10031,18 +10755,18 @@ std::optional<errors> url_pattern_parser<F>::add_part(
|
|
|
10031
10755
|
parts, [&name](const auto& part) { return part.name == name; })) {
|
|
10032
10756
|
return errors::type_error;
|
|
10033
10757
|
}
|
|
10034
|
-
// Let encoded prefix be the result of running parser
|
|
10758
|
+
// Let encoded prefix be the result of running parser's encoding callback
|
|
10035
10759
|
// given prefix.
|
|
10036
10760
|
auto encoded_prefix = encoding_callback(prefix);
|
|
10037
10761
|
if (!encoded_prefix) return encoded_prefix.error();
|
|
10038
|
-
// Let encoded suffix be the result of running parser
|
|
10762
|
+
// Let encoded suffix be the result of running parser's encoding callback
|
|
10039
10763
|
// given suffix.
|
|
10040
10764
|
auto encoded_suffix = encoding_callback(suffix);
|
|
10041
10765
|
if (!encoded_suffix) return encoded_suffix.error();
|
|
10042
10766
|
// Let part be a new part whose type is type, value is regexp value,
|
|
10043
10767
|
// modifier is modifier, name is name, prefix is encoded prefix, and suffix
|
|
10044
10768
|
// is encoded suffix.
|
|
10045
|
-
// Append part to parser
|
|
10769
|
+
// Append part to parser's part list.
|
|
10046
10770
|
parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
|
|
10047
10771
|
std::move(*encoded_prefix), std::move(*encoded_suffix));
|
|
10048
10772
|
return std::nullopt;
|
|
@@ -10058,7 +10782,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10058
10782
|
// segment wildcard regexp given options.
|
|
10059
10783
|
auto parser = url_pattern_parser<F>(
|
|
10060
10784
|
encoding_callback, generate_segment_wildcard_regexp(options));
|
|
10061
|
-
// Set parser
|
|
10785
|
+
// Set parser's token list to the result of running tokenize given input and
|
|
10062
10786
|
// "strict".
|
|
10063
10787
|
auto tokenize_result = tokenize(input, token_policy::strict);
|
|
10064
10788
|
if (!tokenize_result) {
|
|
@@ -10067,7 +10791,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10067
10791
|
}
|
|
10068
10792
|
parser.tokens = std::move(*tokenize_result);
|
|
10069
10793
|
|
|
10070
|
-
// While parser
|
|
10794
|
+
// While parser's index is less than parser's token list's size:
|
|
10071
10795
|
while (parser.can_continue()) {
|
|
10072
10796
|
// Let char token be the result of running try to consume a token given
|
|
10073
10797
|
// parser and "char".
|
|
@@ -10083,11 +10807,11 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10083
10807
|
if (name_token || regexp_or_wildcard_token) {
|
|
10084
10808
|
// Let prefix be the empty string.
|
|
10085
10809
|
std::string prefix{};
|
|
10086
|
-
// If char token is not null then set prefix to char token
|
|
10810
|
+
// If char token is not null then set prefix to char token's value.
|
|
10087
10811
|
if (char_token) prefix = char_token->value;
|
|
10088
|
-
// If prefix is not the empty string and not options
|
|
10812
|
+
// If prefix is not the empty string and not options's prefix code point:
|
|
10089
10813
|
if (!prefix.empty() && prefix != options.get_prefix()) {
|
|
10090
|
-
// Append prefix to the end of parser
|
|
10814
|
+
// Append prefix to the end of parser's pending fixed value.
|
|
10091
10815
|
parser.pending_fixed_value.append(prefix);
|
|
10092
10816
|
// Set prefix to the empty string.
|
|
10093
10817
|
prefix.clear();
|
|
@@ -10120,7 +10844,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10120
10844
|
fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
|
|
10121
10845
|
// If fixed token is not null:
|
|
10122
10846
|
if (fixed_token) {
|
|
10123
|
-
// Append fixed token
|
|
10847
|
+
// Append fixed token's value to parser's pending fixed value.
|
|
10124
10848
|
parser.pending_fixed_value.append(fixed_token->value);
|
|
10125
10849
|
// Continue.
|
|
10126
10850
|
continue;
|
|
@@ -10170,20 +10894,38 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10170
10894
|
}
|
|
10171
10895
|
}
|
|
10172
10896
|
ada_log("parser.parts size is: ", parser.parts.size());
|
|
10173
|
-
// Return parser
|
|
10897
|
+
// Return parser's part list.
|
|
10174
10898
|
return parser.parts;
|
|
10175
10899
|
}
|
|
10176
10900
|
|
|
10177
10901
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
10178
10902
|
bool protocol_component_matches_special_scheme(
|
|
10179
10903
|
url_pattern_component<regex_provider>& component) {
|
|
10180
|
-
//
|
|
10181
|
-
|
|
10182
|
-
|
|
10183
|
-
|
|
10184
|
-
|
|
10185
|
-
|
|
10186
|
-
|
|
10904
|
+
// Optimization: Use fast_test for simple patterns to avoid regex overhead
|
|
10905
|
+
switch (component.type) {
|
|
10906
|
+
case url_pattern_component_type::EMPTY:
|
|
10907
|
+
// Empty pattern can't match any special scheme
|
|
10908
|
+
return false;
|
|
10909
|
+
case url_pattern_component_type::EXACT_MATCH:
|
|
10910
|
+
// Direct string comparison for exact match patterns
|
|
10911
|
+
return component.exact_match_value == "http" ||
|
|
10912
|
+
component.exact_match_value == "https" ||
|
|
10913
|
+
component.exact_match_value == "ws" ||
|
|
10914
|
+
component.exact_match_value == "wss" ||
|
|
10915
|
+
component.exact_match_value == "ftp";
|
|
10916
|
+
case url_pattern_component_type::FULL_WILDCARD:
|
|
10917
|
+
// Full wildcard matches everything including special schemes
|
|
10918
|
+
return true;
|
|
10919
|
+
case url_pattern_component_type::REGEXP:
|
|
10920
|
+
// Fall back to regex matching for complex patterns
|
|
10921
|
+
auto& regex = component.regexp;
|
|
10922
|
+
return regex_provider::regex_match("http", regex) ||
|
|
10923
|
+
regex_provider::regex_match("https", regex) ||
|
|
10924
|
+
regex_provider::regex_match("ws", regex) ||
|
|
10925
|
+
regex_provider::regex_match("wss", regex) ||
|
|
10926
|
+
regex_provider::regex_match("ftp", regex);
|
|
10927
|
+
}
|
|
10928
|
+
ada::unreachable();
|
|
10187
10929
|
}
|
|
10188
10930
|
|
|
10189
10931
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -10206,7 +10948,7 @@ inline std::optional<errors> constructor_string_parser<
|
|
|
10206
10948
|
return protocol_component.error();
|
|
10207
10949
|
}
|
|
10208
10950
|
// If the result of running protocol component matches a special scheme given
|
|
10209
|
-
// protocol component is true, then set parser
|
|
10951
|
+
// protocol component is true, then set parser's protocol matches a special
|
|
10210
10952
|
// scheme flag to true.
|
|
10211
10953
|
if (protocol_component_matches_special_scheme(*protocol_component)) {
|
|
10212
10954
|
protocol_matches_a_special_scheme_flag = true;
|
|
@@ -10226,14 +10968,14 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10226
10968
|
}
|
|
10227
10969
|
auto parser = constructor_string_parser(input, std::move(*token_list));
|
|
10228
10970
|
|
|
10229
|
-
// While parser
|
|
10971
|
+
// While parser's token index is less than parser's token list size:
|
|
10230
10972
|
while (parser.token_index < parser.token_list.size()) {
|
|
10231
|
-
// Set parser
|
|
10973
|
+
// Set parser's token increment to 1.
|
|
10232
10974
|
parser.token_increment = 1;
|
|
10233
10975
|
|
|
10234
|
-
// If parser
|
|
10976
|
+
// If parser's token list[parser's token index]'s type is "end" then:
|
|
10235
10977
|
if (parser.token_list[parser.token_index].type == token_type::END) {
|
|
10236
|
-
// If parser
|
|
10978
|
+
// If parser's state is "init":
|
|
10237
10979
|
if (parser.state == State::INIT) {
|
|
10238
10980
|
// Run rewind given parser.
|
|
10239
10981
|
parser.rewind();
|
|
@@ -10249,18 +10991,18 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10249
10991
|
// Run change state given parser, "pathname" and 0.
|
|
10250
10992
|
parser.change_state(State::PATHNAME, 0);
|
|
10251
10993
|
}
|
|
10252
|
-
// Increment parser
|
|
10994
|
+
// Increment parser's token index by parser's token increment.
|
|
10253
10995
|
parser.token_index += parser.token_increment;
|
|
10254
10996
|
// Continue.
|
|
10255
10997
|
continue;
|
|
10256
10998
|
}
|
|
10257
10999
|
|
|
10258
11000
|
if (parser.state == State::AUTHORITY) {
|
|
10259
|
-
// If parser
|
|
11001
|
+
// If parser's state is "authority":
|
|
10260
11002
|
// Run rewind and set state given parser, and "hostname".
|
|
10261
11003
|
parser.rewind();
|
|
10262
11004
|
parser.change_state(State::HOSTNAME, 0);
|
|
10263
|
-
// Increment parser
|
|
11005
|
+
// Increment parser's token index by parser's token increment.
|
|
10264
11006
|
parser.token_index += parser.token_increment;
|
|
10265
11007
|
// Continue.
|
|
10266
11008
|
continue;
|
|
@@ -10274,26 +11016,26 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10274
11016
|
|
|
10275
11017
|
// If the result of running is a group open given parser is true:
|
|
10276
11018
|
if (parser.is_group_open()) {
|
|
10277
|
-
// Increment parser
|
|
11019
|
+
// Increment parser's group depth by 1.
|
|
10278
11020
|
parser.group_depth += 1;
|
|
10279
|
-
// Increment parser
|
|
11021
|
+
// Increment parser's token index by parser's token increment.
|
|
10280
11022
|
parser.token_index += parser.token_increment;
|
|
10281
11023
|
}
|
|
10282
11024
|
|
|
10283
|
-
// If parser
|
|
11025
|
+
// If parser's group depth is greater than 0:
|
|
10284
11026
|
if (parser.group_depth > 0) {
|
|
10285
11027
|
// If the result of running is a group close given parser is true, then
|
|
10286
|
-
// decrement parser
|
|
11028
|
+
// decrement parser's group depth by 1.
|
|
10287
11029
|
if (parser.is_group_close()) {
|
|
10288
11030
|
parser.group_depth -= 1;
|
|
10289
11031
|
} else {
|
|
10290
|
-
// Increment parser
|
|
11032
|
+
// Increment parser's token index by parser's token increment.
|
|
10291
11033
|
parser.token_index += parser.token_increment;
|
|
10292
11034
|
continue;
|
|
10293
11035
|
}
|
|
10294
11036
|
}
|
|
10295
11037
|
|
|
10296
|
-
// Switch on parser
|
|
11038
|
+
// Switch on parser's state and run the associated steps:
|
|
10297
11039
|
switch (parser.state) {
|
|
10298
11040
|
case State::INIT: {
|
|
10299
11041
|
// If the result of running is a protocol suffix given parser is true:
|
|
@@ -10325,7 +11067,7 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10325
11067
|
// Set skip to 3.
|
|
10326
11068
|
skip = 3;
|
|
10327
11069
|
} else if (parser.protocol_matches_a_special_scheme_flag) {
|
|
10328
|
-
// Otherwise if parser
|
|
11070
|
+
// Otherwise if parser's protocol matches a special scheme flag is
|
|
10329
11071
|
// true, then set next state to "authority".
|
|
10330
11072
|
next_state = State::AUTHORITY;
|
|
10331
11073
|
}
|
|
@@ -10376,17 +11118,17 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10376
11118
|
}
|
|
10377
11119
|
case State::HOSTNAME: {
|
|
10378
11120
|
// If the result of running is an IPv6 open given parser is true, then
|
|
10379
|
-
// increment parser
|
|
11121
|
+
// increment parser's hostname IPv6 bracket depth by 1.
|
|
10380
11122
|
if (parser.is_an_ipv6_open()) {
|
|
10381
11123
|
parser.hostname_ipv6_bracket_depth += 1;
|
|
10382
11124
|
} else if (parser.is_an_ipv6_close()) {
|
|
10383
11125
|
// Otherwise if the result of running is an IPv6 close given parser is
|
|
10384
|
-
// true, then decrement parser
|
|
11126
|
+
// true, then decrement parser's hostname IPv6 bracket depth by 1.
|
|
10385
11127
|
parser.hostname_ipv6_bracket_depth -= 1;
|
|
10386
11128
|
} else if (parser.is_port_prefix() &&
|
|
10387
11129
|
parser.hostname_ipv6_bracket_depth == 0) {
|
|
10388
11130
|
// Otherwise if the result of running is a port prefix given parser is
|
|
10389
|
-
// true and parser
|
|
11131
|
+
// true and parser's hostname IPv6 bracket depth is zero, then run
|
|
10390
11132
|
// change state given parser, "port", and 1.
|
|
10391
11133
|
parser.change_state(State::PORT, 1);
|
|
10392
11134
|
} else if (parser.is_pathname_start()) {
|
|
@@ -10439,6 +11181,7 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10439
11181
|
if (parser.is_hash_prefix()) {
|
|
10440
11182
|
parser.change_state(State::HASH, 1);
|
|
10441
11183
|
}
|
|
11184
|
+
break;
|
|
10442
11185
|
}
|
|
10443
11186
|
case State::HASH: {
|
|
10444
11187
|
// Do nothing
|
|
@@ -10450,22 +11193,22 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10450
11193
|
}
|
|
10451
11194
|
}
|
|
10452
11195
|
|
|
10453
|
-
// Increment parser
|
|
11196
|
+
// Increment parser's token index by parser's token increment.
|
|
10454
11197
|
parser.token_index += parser.token_increment;
|
|
10455
11198
|
}
|
|
10456
11199
|
|
|
10457
|
-
// If parser
|
|
11200
|
+
// If parser's result contains "hostname" and not "port", then set parser's
|
|
10458
11201
|
// result["port"] to the empty string.
|
|
10459
11202
|
if (parser.result.hostname && !parser.result.port) {
|
|
10460
11203
|
parser.result.port = "";
|
|
10461
11204
|
}
|
|
10462
11205
|
|
|
10463
|
-
// Return parser
|
|
11206
|
+
// Return parser's result.
|
|
10464
11207
|
return parser.result;
|
|
10465
11208
|
}
|
|
10466
11209
|
|
|
10467
11210
|
} // namespace ada::url_pattern_helpers
|
|
10468
|
-
|
|
11211
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
10469
11212
|
#endif
|
|
10470
11213
|
/* end file include/ada/url_pattern_helpers-inl.h */
|
|
10471
11214
|
|
|
@@ -10478,13 +11221,13 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10478
11221
|
#ifndef ADA_ADA_VERSION_H
|
|
10479
11222
|
#define ADA_ADA_VERSION_H
|
|
10480
11223
|
|
|
10481
|
-
#define ADA_VERSION "3.
|
|
11224
|
+
#define ADA_VERSION "3.4.2"
|
|
10482
11225
|
|
|
10483
11226
|
namespace ada {
|
|
10484
11227
|
|
|
10485
11228
|
enum {
|
|
10486
11229
|
ADA_VERSION_MAJOR = 3,
|
|
10487
|
-
ADA_VERSION_MINOR =
|
|
11230
|
+
ADA_VERSION_MINOR = 4,
|
|
10488
11231
|
ADA_VERSION_REVISION = 2,
|
|
10489
11232
|
};
|
|
10490
11233
|
|
|
@@ -10500,19 +11243,22 @@ enum {
|
|
|
10500
11243
|
#define ADA_IMPLEMENTATION_INL_H
|
|
10501
11244
|
|
|
10502
11245
|
|
|
11246
|
+
|
|
10503
11247
|
#include <variant>
|
|
10504
11248
|
#include <string_view>
|
|
10505
11249
|
|
|
10506
11250
|
namespace ada {
|
|
10507
11251
|
|
|
11252
|
+
#if ADA_INCLUDE_URL_PATTERN
|
|
10508
11253
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
10509
11254
|
ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
|
|
10510
|
-
parse_url_pattern(std::variant<std::string_view, url_pattern_init
|
|
11255
|
+
parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
|
|
10511
11256
|
const std::string_view* base_url,
|
|
10512
11257
|
const url_pattern_options* options) {
|
|
10513
11258
|
return parser::parse_url_pattern_impl<regex_provider>(std::move(input),
|
|
10514
11259
|
base_url, options);
|
|
10515
11260
|
}
|
|
11261
|
+
#endif // ADA_INCLUDE_URL_PATTERN
|
|
10516
11262
|
|
|
10517
11263
|
} // namespace ada
|
|
10518
11264
|
|