ada-url 1.27.0__cp311-cp311-musllinux_1_2_aarch64.whl → 1.29.0__cp311-cp311-musllinux_1_2_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ada_url/__init__.py +2 -0
- ada_url/_ada_wrapper.abi3.so +0 -0
- ada_url/ada.cpp +2374 -1931
- ada_url/ada.h +1292 -586
- ada_url/ada_adapter.py +3 -0
- ada_url/ada_c.h +10 -0
- {ada_url-1.27.0.dist-info → ada_url-1.29.0.dist-info}/METADATA +3 -4
- ada_url-1.29.0.dist-info/RECORD +16 -0
- {ada_url-1.27.0.dist-info → ada_url-1.29.0.dist-info}/WHEEL +1 -1
- ada_url-1.27.0.dist-info/RECORD +0 -16
- {ada_url-1.27.0.dist-info → ada_url-1.29.0.dist-info}/licenses/LICENSE +0 -0
- {ada_url-1.27.0.dist-info → ada_url-1.29.0.dist-info}/top_level.txt +0 -0
ada_url/ada.h
CHANGED
|
@@ -1,14 +1,36 @@
|
|
|
1
|
-
/* auto-generated on
|
|
1
|
+
/* auto-generated on 2026-01-30 13:29:04 -0500. Do not edit! */
|
|
2
2
|
/* begin file include/ada.h */
|
|
3
3
|
/**
|
|
4
4
|
* @file ada.h
|
|
5
|
-
* @brief
|
|
5
|
+
* @brief Main header for the Ada URL parser library.
|
|
6
|
+
*
|
|
7
|
+
* This is the primary entry point for the Ada URL parser library. Including
|
|
8
|
+
* this single header provides access to the complete Ada API, including:
|
|
9
|
+
*
|
|
10
|
+
* - URL parsing via `ada::parse()` function
|
|
11
|
+
* - Two URL representations: `ada::url` and `ada::url_aggregator`
|
|
12
|
+
* - URL search parameters via `ada::url_search_params`
|
|
13
|
+
* - URL pattern matching via `ada::url_pattern` (URLPattern API)
|
|
14
|
+
* - IDNA (Internationalized Domain Names) support
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```cpp
|
|
18
|
+
*
|
|
19
|
+
* // Parse a URL
|
|
20
|
+
* auto url = ada::parse("https://example.com/path?query=1");
|
|
21
|
+
* if (url) {
|
|
22
|
+
* std::cout << url->get_hostname(); // "example.com"
|
|
23
|
+
* }
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* @see https://url.spec.whatwg.org/ - WHATWG URL Standard
|
|
27
|
+
* @see https://github.com/ada-url/ada - Ada URL Parser GitHub Repository
|
|
6
28
|
*/
|
|
7
29
|
#ifndef ADA_H
|
|
8
30
|
#define ADA_H
|
|
9
31
|
|
|
10
32
|
/* begin file include/ada/ada_idna.h */
|
|
11
|
-
/* auto-generated on
|
|
33
|
+
/* auto-generated on 2026-01-30 12:00:02 -0500. Do not edit! */
|
|
12
34
|
/* begin file include/idna.h */
|
|
13
35
|
#ifndef ADA_IDNA_H
|
|
14
36
|
#define ADA_IDNA_H
|
|
@@ -188,7 +210,11 @@ bool valid_name_code_point(char32_t code_point, bool first);
|
|
|
188
210
|
/* begin file include/ada/common_defs.h */
|
|
189
211
|
/**
|
|
190
212
|
* @file common_defs.h
|
|
191
|
-
* @brief
|
|
213
|
+
* @brief Cross-platform compiler macros and common definitions.
|
|
214
|
+
*
|
|
215
|
+
* This header provides compiler-specific macros for optimization hints,
|
|
216
|
+
* platform detection, SIMD support detection, and development/debug utilities.
|
|
217
|
+
* It ensures consistent behavior across different compilers (GCC, Clang, MSVC).
|
|
192
218
|
*/
|
|
193
219
|
#ifndef ADA_COMMON_DEFS_H
|
|
194
220
|
#define ADA_COMMON_DEFS_H
|
|
@@ -421,6 +447,10 @@ namespace ada {
|
|
|
421
447
|
} while (0)
|
|
422
448
|
#endif
|
|
423
449
|
|
|
450
|
+
#if defined(__SSSE3__)
|
|
451
|
+
#define ADA_SSSE3 1
|
|
452
|
+
#endif
|
|
453
|
+
|
|
424
454
|
#if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
|
|
425
455
|
(defined(_M_AMD64) || defined(_M_X64) || \
|
|
426
456
|
(defined(_M_IX86_FP) && _M_IX86_FP == 2))
|
|
@@ -435,6 +465,11 @@ namespace ada {
|
|
|
435
465
|
#define ADA_LSX 1
|
|
436
466
|
#endif
|
|
437
467
|
|
|
468
|
+
#if defined(__riscv_v) && __riscv_v_intrinsic >= 11000
|
|
469
|
+
// Support RVV intrinsics v0.11 and above
|
|
470
|
+
#define ADA_RVV 1
|
|
471
|
+
#endif
|
|
472
|
+
|
|
438
473
|
#ifndef __has_cpp_attribute
|
|
439
474
|
#define ada_lifetime_bound
|
|
440
475
|
#elif __has_cpp_attribute(msvc::lifetimebound)
|
|
@@ -1007,6 +1042,140 @@ ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i) {
|
|
|
1007
1042
|
|
|
1008
1043
|
#include <bit>
|
|
1009
1044
|
#include <string_view>
|
|
1045
|
+
/* begin file include/ada/checkers.h */
|
|
1046
|
+
/**
|
|
1047
|
+
* @file checkers.h
|
|
1048
|
+
* @brief Declarations for URL specific checkers used within Ada.
|
|
1049
|
+
*/
|
|
1050
|
+
#ifndef ADA_CHECKERS_H
|
|
1051
|
+
#define ADA_CHECKERS_H
|
|
1052
|
+
|
|
1053
|
+
|
|
1054
|
+
#include <cstring>
|
|
1055
|
+
#include <string_view>
|
|
1056
|
+
|
|
1057
|
+
/**
|
|
1058
|
+
* These functions are not part of our public API and may
|
|
1059
|
+
* change at any time.
|
|
1060
|
+
* @private
|
|
1061
|
+
* @namespace ada::checkers
|
|
1062
|
+
* @brief Includes the definitions for validation functions
|
|
1063
|
+
*/
|
|
1064
|
+
namespace ada::checkers {
|
|
1065
|
+
|
|
1066
|
+
/**
|
|
1067
|
+
* @private
|
|
1068
|
+
* Assuming that x is an ASCII letter, this function returns the lower case
|
|
1069
|
+
* equivalent.
|
|
1070
|
+
* @details More likely to be inlined by the compiler and constexpr.
|
|
1071
|
+
*/
|
|
1072
|
+
constexpr char to_lower(char x) noexcept;
|
|
1073
|
+
|
|
1074
|
+
/**
|
|
1075
|
+
* @private
|
|
1076
|
+
* Returns true if the character is an ASCII letter. Equivalent to std::isalpha
|
|
1077
|
+
* but more likely to be inlined by the compiler.
|
|
1078
|
+
*
|
|
1079
|
+
* @attention std::isalpha is not constexpr generally.
|
|
1080
|
+
*/
|
|
1081
|
+
constexpr bool is_alpha(char x) noexcept;
|
|
1082
|
+
|
|
1083
|
+
/**
|
|
1084
|
+
* @private
|
|
1085
|
+
* Check whether a string starts with 0x or 0X. The function is only
|
|
1086
|
+
* safe if input.size() >=2.
|
|
1087
|
+
*
|
|
1088
|
+
* @see has_hex_prefix
|
|
1089
|
+
*/
|
|
1090
|
+
constexpr bool has_hex_prefix_unsafe(std::string_view input);
|
|
1091
|
+
/**
|
|
1092
|
+
* @private
|
|
1093
|
+
* Check whether a string starts with 0x or 0X.
|
|
1094
|
+
*/
|
|
1095
|
+
constexpr bool has_hex_prefix(std::string_view input);
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* @private
|
|
1099
|
+
* Check whether x is an ASCII digit. More likely to be inlined than
|
|
1100
|
+
* std::isdigit.
|
|
1101
|
+
*/
|
|
1102
|
+
constexpr bool is_digit(char x) noexcept;
|
|
1103
|
+
|
|
1104
|
+
/**
|
|
1105
|
+
* @private
|
|
1106
|
+
* @details A string starts with a Windows drive letter if all of the following
|
|
1107
|
+
* are true:
|
|
1108
|
+
*
|
|
1109
|
+
* - its length is greater than or equal to 2
|
|
1110
|
+
* - its first two code points are a Windows drive letter
|
|
1111
|
+
* - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
|
|
1112
|
+
* (?), or U+0023 (#).
|
|
1113
|
+
*
|
|
1114
|
+
* https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
|
|
1115
|
+
*/
|
|
1116
|
+
inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
|
|
1117
|
+
|
|
1118
|
+
/**
|
|
1119
|
+
* @private
|
|
1120
|
+
* @details A normalized Windows drive letter is a Windows drive letter of which
|
|
1121
|
+
* the second code point is U+003A (:).
|
|
1122
|
+
*/
|
|
1123
|
+
inline constexpr bool is_normalized_windows_drive_letter(
|
|
1124
|
+
std::string_view input) noexcept;
|
|
1125
|
+
|
|
1126
|
+
/**
|
|
1127
|
+
* @private
|
|
1128
|
+
* Returns true if an input is an ipv4 address. It is assumed that the string
|
|
1129
|
+
* does not contain uppercase ASCII characters (the input should have been
|
|
1130
|
+
* lowered cased before calling this function) and is not empty.
|
|
1131
|
+
*/
|
|
1132
|
+
ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
|
|
1133
|
+
|
|
1134
|
+
/**
|
|
1135
|
+
* @private
|
|
1136
|
+
* Returns a bitset. If the first bit is set, then at least one character needs
|
|
1137
|
+
* percent encoding. If the second bit is set, a \\ is found. If the third bit
|
|
1138
|
+
* is set then we have a dot. If the fourth bit is set, then we have a percent
|
|
1139
|
+
* character.
|
|
1140
|
+
*/
|
|
1141
|
+
ada_really_inline constexpr uint8_t path_signature(
|
|
1142
|
+
std::string_view input) noexcept;
|
|
1143
|
+
|
|
1144
|
+
/**
|
|
1145
|
+
* @private
|
|
1146
|
+
* Returns true if the length of the domain name and its labels are according to
|
|
1147
|
+
* the specifications. The length of the domain must be 255 octets (253
|
|
1148
|
+
* characters not including the last 2 which are the empty label reserved at the
|
|
1149
|
+
* end). When the empty label is included (a dot at the end), the domain name
|
|
1150
|
+
* can have 254 characters. The length of a label must be at least 1 and at most
|
|
1151
|
+
* 63 characters.
|
|
1152
|
+
* @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
|
|
1153
|
+
* @see https://www.unicode.org/reports/tr46/#ToASCII
|
|
1154
|
+
*/
|
|
1155
|
+
ada_really_inline constexpr bool verify_dns_length(
|
|
1156
|
+
std::string_view input) noexcept;
|
|
1157
|
+
|
|
1158
|
+
/**
|
|
1159
|
+
* @private
|
|
1160
|
+
* Fast-path parser for pure decimal IPv4 addresses (e.g., "192.168.1.1").
|
|
1161
|
+
* Returns the packed 32-bit IPv4 address on success, or a value > 0xFFFFFFFF
|
|
1162
|
+
* to indicate failure (caller should fall back to general parser).
|
|
1163
|
+
* This is optimized for the common case where the input is a well-formed
|
|
1164
|
+
* decimal IPv4 address with exactly 4 octets.
|
|
1165
|
+
*/
|
|
1166
|
+
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
|
|
1167
|
+
std::string_view input) noexcept;
|
|
1168
|
+
|
|
1169
|
+
/**
|
|
1170
|
+
* Sentinel value indicating try_parse_ipv4_fast() did not succeed.
|
|
1171
|
+
* Any value > 0xFFFFFFFF indicates the fast path should not be used.
|
|
1172
|
+
*/
|
|
1173
|
+
constexpr uint64_t ipv4_fast_fail = uint64_t(1) << 32;
|
|
1174
|
+
|
|
1175
|
+
} // namespace ada::checkers
|
|
1176
|
+
|
|
1177
|
+
#endif // ADA_CHECKERS_H
|
|
1178
|
+
/* end file include/ada/checkers.h */
|
|
1010
1179
|
|
|
1011
1180
|
namespace ada::checkers {
|
|
1012
1181
|
|
|
@@ -1049,6 +1218,64 @@ constexpr bool is_normalized_windows_drive_letter(
|
|
|
1049
1218
|
return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':'));
|
|
1050
1219
|
}
|
|
1051
1220
|
|
|
1221
|
+
ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
|
|
1222
|
+
std::string_view input) noexcept {
|
|
1223
|
+
const char* p = input.data();
|
|
1224
|
+
const char* const pend = p + input.size();
|
|
1225
|
+
|
|
1226
|
+
uint32_t ipv4 = 0;
|
|
1227
|
+
|
|
1228
|
+
for (int i = 0; i < 4; ++i) {
|
|
1229
|
+
if (p == pend) {
|
|
1230
|
+
return ipv4_fast_fail;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
uint32_t val;
|
|
1234
|
+
char c = *p;
|
|
1235
|
+
if (c >= '0' && c <= '9') {
|
|
1236
|
+
val = c - '0';
|
|
1237
|
+
p++;
|
|
1238
|
+
} else {
|
|
1239
|
+
return ipv4_fast_fail;
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
if (p < pend) {
|
|
1243
|
+
c = *p;
|
|
1244
|
+
if (c >= '0' && c <= '9') {
|
|
1245
|
+
if (val == 0) return ipv4_fast_fail;
|
|
1246
|
+
val = val * 10 + (c - '0');
|
|
1247
|
+
p++;
|
|
1248
|
+
if (p < pend) {
|
|
1249
|
+
c = *p;
|
|
1250
|
+
if (c >= '0' && c <= '9') {
|
|
1251
|
+
val = val * 10 + (c - '0');
|
|
1252
|
+
p++;
|
|
1253
|
+
if (val > 255) return ipv4_fast_fail;
|
|
1254
|
+
}
|
|
1255
|
+
}
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
ipv4 = (ipv4 << 8) | val;
|
|
1260
|
+
|
|
1261
|
+
if (i < 3) {
|
|
1262
|
+
if (p == pend || *p != '.') {
|
|
1263
|
+
return ipv4_fast_fail;
|
|
1264
|
+
}
|
|
1265
|
+
p++;
|
|
1266
|
+
}
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
if (p != pend) {
|
|
1270
|
+
if (p == pend - 1 && *p == '.') {
|
|
1271
|
+
return ipv4;
|
|
1272
|
+
}
|
|
1273
|
+
return ipv4_fast_fail;
|
|
1274
|
+
}
|
|
1275
|
+
|
|
1276
|
+
return ipv4;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1052
1279
|
} // namespace ada::checkers
|
|
1053
1280
|
|
|
1054
1281
|
#endif // ADA_CHECKERS_INL_H
|
|
@@ -1102,7 +1329,11 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
|
|
|
1102
1329
|
/* begin file include/ada/encoding_type.h */
|
|
1103
1330
|
/**
|
|
1104
1331
|
* @file encoding_type.h
|
|
1105
|
-
* @brief
|
|
1332
|
+
* @brief Character encoding type definitions.
|
|
1333
|
+
*
|
|
1334
|
+
* Defines the encoding types supported for URL processing.
|
|
1335
|
+
*
|
|
1336
|
+
* @see https://encoding.spec.whatwg.org/
|
|
1106
1337
|
*/
|
|
1107
1338
|
#ifndef ADA_ENCODING_TYPE_H
|
|
1108
1339
|
#define ADA_ENCODING_TYPE_H
|
|
@@ -1112,19 +1343,23 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
|
|
|
1112
1343
|
namespace ada {
|
|
1113
1344
|
|
|
1114
1345
|
/**
|
|
1115
|
-
*
|
|
1116
|
-
*
|
|
1346
|
+
* @brief Character encoding types for URL processing.
|
|
1347
|
+
*
|
|
1348
|
+
* Specifies the character encoding used for percent-decoding and other
|
|
1349
|
+
* string operations. UTF-8 is the most commonly used encoding for URLs.
|
|
1117
1350
|
*
|
|
1118
1351
|
* @see https://encoding.spec.whatwg.org/#encodings
|
|
1119
1352
|
*/
|
|
1120
1353
|
enum class encoding_type {
|
|
1121
|
-
UTF8,
|
|
1122
|
-
UTF_16LE,
|
|
1123
|
-
UTF_16BE,
|
|
1354
|
+
UTF8, /**< UTF-8 encoding (default for URLs) */
|
|
1355
|
+
UTF_16LE, /**< UTF-16 Little Endian encoding */
|
|
1356
|
+
UTF_16BE, /**< UTF-16 Big Endian encoding */
|
|
1124
1357
|
};
|
|
1125
1358
|
|
|
1126
1359
|
/**
|
|
1127
|
-
*
|
|
1360
|
+
* Converts an encoding_type to its string representation.
|
|
1361
|
+
* @param type The encoding type to convert.
|
|
1362
|
+
* @return A string view of the encoding name.
|
|
1128
1363
|
*/
|
|
1129
1364
|
ada_warn_unused std::string_view to_string(encoding_type type);
|
|
1130
1365
|
|
|
@@ -1143,7 +1378,11 @@ ada_warn_unused std::string_view to_string(encoding_type type);
|
|
|
1143
1378
|
/* begin file include/ada/url_base.h */
|
|
1144
1379
|
/**
|
|
1145
1380
|
* @file url_base.h
|
|
1146
|
-
* @brief
|
|
1381
|
+
* @brief Base class and common definitions for URL types.
|
|
1382
|
+
*
|
|
1383
|
+
* This file defines the `url_base` abstract base class from which both
|
|
1384
|
+
* `ada::url` and `ada::url_aggregator` inherit. It also defines common
|
|
1385
|
+
* enumerations like `url_host_type`.
|
|
1147
1386
|
*/
|
|
1148
1387
|
#ifndef ADA_URL_BASE_H
|
|
1149
1388
|
#define ADA_URL_BASE_H
|
|
@@ -1151,7 +1390,13 @@ ada_warn_unused std::string_view to_string(encoding_type type);
|
|
|
1151
1390
|
/* begin file include/ada/scheme.h */
|
|
1152
1391
|
/**
|
|
1153
1392
|
* @file scheme.h
|
|
1154
|
-
* @brief
|
|
1393
|
+
* @brief URL scheme type definitions and utilities.
|
|
1394
|
+
*
|
|
1395
|
+
* This header defines the URL scheme types (http, https, etc.) and provides
|
|
1396
|
+
* functions to identify special schemes and their default ports according
|
|
1397
|
+
* to the WHATWG URL Standard.
|
|
1398
|
+
*
|
|
1399
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1155
1400
|
*/
|
|
1156
1401
|
#ifndef ADA_SCHEME_H
|
|
1157
1402
|
#define ADA_SCHEME_H
|
|
@@ -1161,62 +1406,65 @@ ada_warn_unused std::string_view to_string(encoding_type type);
|
|
|
1161
1406
|
|
|
1162
1407
|
/**
|
|
1163
1408
|
* @namespace ada::scheme
|
|
1164
|
-
* @brief
|
|
1409
|
+
* @brief URL scheme utilities and constants.
|
|
1410
|
+
*
|
|
1411
|
+
* Provides functions for working with URL schemes, including identification
|
|
1412
|
+
* of special schemes and retrieval of default port numbers.
|
|
1165
1413
|
*/
|
|
1166
1414
|
namespace ada::scheme {
|
|
1167
1415
|
|
|
1168
1416
|
/**
|
|
1169
|
-
*
|
|
1170
|
-
*
|
|
1171
|
-
*
|
|
1172
|
-
*
|
|
1173
|
-
*
|
|
1174
|
-
*
|
|
1175
|
-
*
|
|
1176
|
-
*
|
|
1417
|
+
* @brief Enumeration of URL scheme types.
|
|
1418
|
+
*
|
|
1419
|
+
* Special schemes have specific parsing rules and default ports.
|
|
1420
|
+
* Using an enum allows efficient scheme comparisons without string operations.
|
|
1421
|
+
*
|
|
1422
|
+
* Default ports:
|
|
1423
|
+
* - HTTP: 80
|
|
1424
|
+
* - HTTPS: 443
|
|
1425
|
+
* - WS: 80
|
|
1426
|
+
* - WSS: 443
|
|
1427
|
+
* - FTP: 21
|
|
1428
|
+
* - FILE: (none)
|
|
1177
1429
|
*/
|
|
1178
1430
|
enum type : uint8_t {
|
|
1179
|
-
HTTP = 0,
|
|
1180
|
-
NOT_SPECIAL = 1,
|
|
1181
|
-
HTTPS = 2,
|
|
1182
|
-
WS = 3,
|
|
1183
|
-
FTP = 4,
|
|
1184
|
-
WSS = 5,
|
|
1185
|
-
FILE = 6
|
|
1431
|
+
HTTP = 0, /**< http:// scheme (port 80) */
|
|
1432
|
+
NOT_SPECIAL = 1, /**< Non-special scheme (no default port) */
|
|
1433
|
+
HTTPS = 2, /**< https:// scheme (port 443) */
|
|
1434
|
+
WS = 3, /**< ws:// WebSocket scheme (port 80) */
|
|
1435
|
+
FTP = 4, /**< ftp:// scheme (port 21) */
|
|
1436
|
+
WSS = 5, /**< wss:// secure WebSocket scheme (port 443) */
|
|
1437
|
+
FILE = 6 /**< file:// scheme (no default port) */
|
|
1186
1438
|
};
|
|
1187
1439
|
|
|
1188
1440
|
/**
|
|
1189
|
-
*
|
|
1190
|
-
*
|
|
1191
|
-
*
|
|
1192
|
-
*
|
|
1193
|
-
*
|
|
1194
|
-
* @see https://url.spec.whatwg.org/#url-miscellaneous
|
|
1195
|
-
* @param scheme
|
|
1196
|
-
* @return If scheme is a special scheme
|
|
1441
|
+
* Checks if a scheme string is a special scheme.
|
|
1442
|
+
* @param scheme The scheme string to check (e.g., "http", "https").
|
|
1443
|
+
* @return `true` if the scheme is special, `false` otherwise.
|
|
1444
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1197
1445
|
*/
|
|
1198
1446
|
ada_really_inline constexpr bool is_special(std::string_view scheme);
|
|
1199
1447
|
|
|
1200
1448
|
/**
|
|
1201
|
-
*
|
|
1202
|
-
*
|
|
1203
|
-
*
|
|
1204
|
-
*
|
|
1205
|
-
*
|
|
1206
|
-
* @see https://url.spec.whatwg.org/#url-miscellaneous
|
|
1207
|
-
* @param scheme
|
|
1208
|
-
* @return The special port
|
|
1449
|
+
* Returns the default port for a special scheme string.
|
|
1450
|
+
* @param scheme The scheme string (e.g., "http", "https").
|
|
1451
|
+
* @return The default port number, or 0 if not a special scheme.
|
|
1452
|
+
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1209
1453
|
*/
|
|
1210
1454
|
constexpr uint16_t get_special_port(std::string_view scheme) noexcept;
|
|
1211
1455
|
|
|
1212
1456
|
/**
|
|
1213
|
-
* Returns the port
|
|
1457
|
+
* Returns the default port for a scheme type.
|
|
1458
|
+
* @param type The scheme type enum value.
|
|
1459
|
+
* @return The default port number, or 0 if not applicable.
|
|
1214
1460
|
* @see https://url.spec.whatwg.org/#special-scheme
|
|
1215
1461
|
*/
|
|
1216
1462
|
constexpr uint16_t get_special_port(ada::scheme::type type) noexcept;
|
|
1463
|
+
|
|
1217
1464
|
/**
|
|
1218
|
-
*
|
|
1219
|
-
*
|
|
1465
|
+
* Converts a scheme string to its type enum.
|
|
1466
|
+
* @param scheme The scheme string to convert.
|
|
1467
|
+
* @return The corresponding scheme type, or NOT_SPECIAL if not recognized.
|
|
1220
1468
|
*/
|
|
1221
1469
|
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
|
|
1222
1470
|
|
|
@@ -1231,112 +1479,112 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
|
|
|
1231
1479
|
namespace ada {
|
|
1232
1480
|
|
|
1233
1481
|
/**
|
|
1234
|
-
*
|
|
1482
|
+
* @brief Enum representing the type of host in a URL.
|
|
1483
|
+
*
|
|
1484
|
+
* Used to distinguish between regular domain names, IPv4 addresses,
|
|
1485
|
+
* and IPv6 addresses for proper parsing and serialization.
|
|
1235
1486
|
*/
|
|
1236
1487
|
enum url_host_type : uint8_t {
|
|
1237
|
-
/**
|
|
1238
|
-
* Represents common URLs such as "https://www.google.com"
|
|
1239
|
-
*/
|
|
1488
|
+
/** Regular domain name (e.g., "www.example.com") */
|
|
1240
1489
|
DEFAULT = 0,
|
|
1241
|
-
/**
|
|
1242
|
-
* Represents ipv4 addresses such as "http://127.0.0.1"
|
|
1243
|
-
*/
|
|
1490
|
+
/** IPv4 address (e.g., "127.0.0.1") */
|
|
1244
1491
|
IPV4 = 1,
|
|
1245
|
-
/**
|
|
1246
|
-
* Represents ipv6 addresses such as
|
|
1247
|
-
* "http://[2001:db8:3333:4444:5555:6666:7777:8888]"
|
|
1248
|
-
*/
|
|
1492
|
+
/** IPv6 address (e.g., "[::1]" or "[2001:db8::1]") */
|
|
1249
1493
|
IPV6 = 2,
|
|
1250
1494
|
};
|
|
1251
1495
|
|
|
1252
1496
|
/**
|
|
1253
|
-
* @brief
|
|
1497
|
+
* @brief Abstract base class for URL representations.
|
|
1498
|
+
*
|
|
1499
|
+
* The `url_base` class provides the common interface and state shared by
|
|
1500
|
+
* both `ada::url` and `ada::url_aggregator`. It contains basic URL attributes
|
|
1501
|
+
* like validity status and scheme type, but delegates component storage and
|
|
1502
|
+
* access to derived classes.
|
|
1254
1503
|
*
|
|
1255
|
-
* @
|
|
1256
|
-
*
|
|
1257
|
-
* ada::url and ada::url_aggregator.
|
|
1504
|
+
* @note This is an abstract class and cannot be instantiated directly.
|
|
1505
|
+
* Use `ada::url` or `ada::url_aggregator` instead.
|
|
1258
1506
|
*
|
|
1259
|
-
*
|
|
1507
|
+
* @see url
|
|
1508
|
+
* @see url_aggregator
|
|
1260
1509
|
*/
|
|
1261
1510
|
struct url_base {
|
|
1262
1511
|
virtual ~url_base() = default;
|
|
1263
1512
|
|
|
1264
1513
|
/**
|
|
1265
|
-
*
|
|
1514
|
+
* Indicates whether the URL was successfully parsed.
|
|
1515
|
+
* Set to `false` if parsing failed (e.g., invalid URL syntax).
|
|
1266
1516
|
*/
|
|
1267
1517
|
bool is_valid{true};
|
|
1268
1518
|
|
|
1269
1519
|
/**
|
|
1270
|
-
*
|
|
1520
|
+
* Indicates whether the URL has an opaque path (non-hierarchical).
|
|
1521
|
+
* Opaque paths occur in non-special URLs like `mailto:` or `javascript:`.
|
|
1271
1522
|
*/
|
|
1272
1523
|
bool has_opaque_path{false};
|
|
1273
1524
|
|
|
1274
1525
|
/**
|
|
1275
|
-
* URL
|
|
1526
|
+
* The type of the URL's host (domain, IPv4, or IPv6).
|
|
1276
1527
|
*/
|
|
1277
1528
|
url_host_type host_type = url_host_type::DEFAULT;
|
|
1278
1529
|
|
|
1279
1530
|
/**
|
|
1280
1531
|
* @private
|
|
1532
|
+
* Internal representation of the URL's scheme type.
|
|
1281
1533
|
*/
|
|
1282
1534
|
ada::scheme::type type{ada::scheme::type::NOT_SPECIAL};
|
|
1283
1535
|
|
|
1284
1536
|
/**
|
|
1285
|
-
*
|
|
1286
|
-
*
|
|
1537
|
+
* Checks if the URL has a special scheme (http, https, ws, wss, ftp, file).
|
|
1538
|
+
* Special schemes have specific parsing rules and default ports.
|
|
1539
|
+
* @return `true` if the scheme is special, `false` otherwise.
|
|
1287
1540
|
*/
|
|
1288
1541
|
[[nodiscard]] ada_really_inline constexpr bool is_special() const noexcept;
|
|
1289
1542
|
|
|
1290
1543
|
/**
|
|
1291
|
-
*
|
|
1292
|
-
* origin.
|
|
1293
|
-
* @return a newly allocated string.
|
|
1544
|
+
* Returns the URL's origin (scheme + host + port for special URLs).
|
|
1545
|
+
* @return A newly allocated string containing the serialized origin.
|
|
1294
1546
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
1295
1547
|
*/
|
|
1296
|
-
[[nodiscard]] virtual std::string get_origin() const
|
|
1548
|
+
[[nodiscard]] virtual std::string get_origin() const = 0;
|
|
1297
1549
|
|
|
1298
1550
|
/**
|
|
1299
|
-
*
|
|
1300
|
-
*
|
|
1301
|
-
*
|
|
1551
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
1552
|
+
* Checks that the domain and its labels have valid lengths.
|
|
1553
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
1302
1554
|
*/
|
|
1303
1555
|
[[nodiscard]] virtual bool has_valid_domain() const noexcept = 0;
|
|
1304
1556
|
|
|
1305
1557
|
/**
|
|
1306
1558
|
* @private
|
|
1307
|
-
*
|
|
1308
|
-
*
|
|
1309
|
-
* Returns 0 otherwise.
|
|
1559
|
+
* Returns the default port for special schemes (e.g., 443 for https).
|
|
1560
|
+
* Returns 0 for file:// URLs or non-special schemes.
|
|
1310
1561
|
*/
|
|
1311
1562
|
[[nodiscard]] inline uint16_t get_special_port() const noexcept;
|
|
1312
1563
|
|
|
1313
1564
|
/**
|
|
1314
1565
|
* @private
|
|
1315
|
-
*
|
|
1316
|
-
* Get the default port if the url's scheme has one, returns 0 otherwise.
|
|
1566
|
+
* Returns the default port for the URL's scheme, or 0 if none.
|
|
1317
1567
|
*/
|
|
1318
1568
|
[[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept;
|
|
1319
1569
|
|
|
1320
1570
|
/**
|
|
1321
1571
|
* @private
|
|
1322
|
-
*
|
|
1323
|
-
*
|
|
1324
|
-
*
|
|
1325
|
-
*
|
|
1326
|
-
* It returns how many bytes were consumed when a number is successfully
|
|
1327
|
-
* parsed.
|
|
1328
|
-
* @return On failure, it returns zero.
|
|
1329
|
-
* @see https://url.spec.whatwg.org/#host-parsing
|
|
1572
|
+
* Parses a port number from the input string.
|
|
1573
|
+
* @param view The string containing the port to parse.
|
|
1574
|
+
* @param check_trailing_content Whether to validate no trailing characters.
|
|
1575
|
+
* @return Number of bytes consumed on success, 0 on failure.
|
|
1330
1576
|
*/
|
|
1331
1577
|
virtual size_t parse_port(std::string_view view,
|
|
1332
|
-
bool check_trailing_content)
|
|
1578
|
+
bool check_trailing_content) = 0;
|
|
1333
1579
|
|
|
1334
|
-
|
|
1580
|
+
/** @private */
|
|
1581
|
+
virtual ada_really_inline size_t parse_port(std::string_view view) {
|
|
1335
1582
|
return this->parse_port(view, false);
|
|
1336
1583
|
}
|
|
1337
1584
|
|
|
1338
1585
|
/**
|
|
1339
|
-
* Returns a JSON string representation of this URL.
|
|
1586
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
1587
|
+
* @return A JSON-formatted string with URL information.
|
|
1340
1588
|
*/
|
|
1341
1589
|
[[nodiscard]] virtual std::string to_string() const = 0;
|
|
1342
1590
|
|
|
@@ -1405,8 +1653,7 @@ ada_really_inline std::optional<std::string_view> prune_hash(
|
|
|
1405
1653
|
* @see https://url.spec.whatwg.org/#shorten-a-urls-path
|
|
1406
1654
|
* @returns Returns true if path is shortened.
|
|
1407
1655
|
*/
|
|
1408
|
-
ada_really_inline bool shorten_path(std::string& path,
|
|
1409
|
-
ada::scheme::type type) noexcept;
|
|
1656
|
+
ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type);
|
|
1410
1657
|
|
|
1411
1658
|
/**
|
|
1412
1659
|
* @private
|
|
@@ -1415,7 +1662,7 @@ ada_really_inline bool shorten_path(std::string& path,
|
|
|
1415
1662
|
* @returns Returns true if path is shortened.
|
|
1416
1663
|
*/
|
|
1417
1664
|
ada_really_inline bool shorten_path(std::string_view& path,
|
|
1418
|
-
ada::scheme::type type)
|
|
1665
|
+
ada::scheme::type type);
|
|
1419
1666
|
|
|
1420
1667
|
/**
|
|
1421
1668
|
* @private
|
|
@@ -1436,15 +1683,14 @@ ada_really_inline void parse_prepared_path(std::string_view input,
|
|
|
1436
1683
|
* @private
|
|
1437
1684
|
* Remove and mutate all ASCII tab or newline characters from an input.
|
|
1438
1685
|
*/
|
|
1439
|
-
ada_really_inline void remove_ascii_tab_or_newline(std::string& input)
|
|
1686
|
+
ada_really_inline void remove_ascii_tab_or_newline(std::string& input);
|
|
1440
1687
|
|
|
1441
1688
|
/**
|
|
1442
1689
|
* @private
|
|
1443
1690
|
* Return the substring from input going from index pos to the end.
|
|
1444
|
-
* This function cannot throw.
|
|
1445
1691
|
*/
|
|
1446
1692
|
ada_really_inline constexpr std::string_view substring(std::string_view input,
|
|
1447
|
-
size_t pos)
|
|
1693
|
+
size_t pos);
|
|
1448
1694
|
|
|
1449
1695
|
/**
|
|
1450
1696
|
* @private
|
|
@@ -1459,7 +1705,7 @@ bool overlaps(std::string_view input1, const std::string& input2) noexcept;
|
|
|
1459
1705
|
*/
|
|
1460
1706
|
ada_really_inline constexpr std::string_view substring(std::string_view input,
|
|
1461
1707
|
size_t pos1,
|
|
1462
|
-
size_t pos2)
|
|
1708
|
+
size_t pos2) {
|
|
1463
1709
|
#if ADA_DEVELOPMENT_CHECKS
|
|
1464
1710
|
if (pos2 < pos1) {
|
|
1465
1711
|
std::cerr << "Negative-length substring: [" << pos1 << " to " << pos2 << ")"
|
|
@@ -1498,8 +1744,7 @@ void trim_c0_whitespace(std::string_view& input) noexcept;
|
|
|
1498
1744
|
* https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
|
|
1499
1745
|
*/
|
|
1500
1746
|
template <class url_type>
|
|
1501
|
-
ada_really_inline void strip_trailing_spaces_from_opaque_path(
|
|
1502
|
-
url_type& url) noexcept;
|
|
1747
|
+
ada_really_inline void strip_trailing_spaces_from_opaque_path(url_type& url);
|
|
1503
1748
|
|
|
1504
1749
|
/**
|
|
1505
1750
|
* @private
|
|
@@ -1589,7 +1834,13 @@ inline int fast_digit_count(uint32_t x) noexcept {
|
|
|
1589
1834
|
/* begin file include/ada/parser.h */
|
|
1590
1835
|
/**
|
|
1591
1836
|
* @file parser.h
|
|
1592
|
-
* @brief
|
|
1837
|
+
* @brief Low-level URL parsing functions.
|
|
1838
|
+
*
|
|
1839
|
+
* This header provides the internal URL parsing implementation. Most users
|
|
1840
|
+
* should use `ada::parse()` from implementation.h instead of these functions
|
|
1841
|
+
* directly.
|
|
1842
|
+
*
|
|
1843
|
+
* @see implementation.h for the recommended public API
|
|
1593
1844
|
*/
|
|
1594
1845
|
#ifndef ADA_PARSER_H
|
|
1595
1846
|
#define ADA_PARSER_H
|
|
@@ -2333,6 +2584,7 @@ struct expected_operations_base : expected_storage_base<T, E> {
|
|
|
2333
2584
|
}
|
|
2334
2585
|
|
|
2335
2586
|
template <class Rhs>
|
|
2587
|
+
// NOLINTNEXTLINE(bugprone-exception-escape)
|
|
2336
2588
|
void construct_with(Rhs &&rhs) noexcept {
|
|
2337
2589
|
new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());
|
|
2338
2590
|
this->m_has_val = true;
|
|
@@ -4193,14 +4445,23 @@ class std_regex_provider final {
|
|
|
4193
4445
|
/* begin file include/ada/errors.h */
|
|
4194
4446
|
/**
|
|
4195
4447
|
* @file errors.h
|
|
4196
|
-
* @brief
|
|
4448
|
+
* @brief Error type definitions for URL parsing.
|
|
4449
|
+
*
|
|
4450
|
+
* Defines the error codes that can be returned when URL parsing fails.
|
|
4197
4451
|
*/
|
|
4198
4452
|
#ifndef ADA_ERRORS_H
|
|
4199
4453
|
#define ADA_ERRORS_H
|
|
4200
4454
|
|
|
4201
4455
|
#include <cstdint>
|
|
4202
4456
|
namespace ada {
|
|
4203
|
-
|
|
4457
|
+
/**
|
|
4458
|
+
* @brief Error codes for URL parsing operations.
|
|
4459
|
+
*
|
|
4460
|
+
* Used with `tl::expected` to indicate why a URL parsing operation failed.
|
|
4461
|
+
*/
|
|
4462
|
+
enum class errors : uint8_t {
|
|
4463
|
+
type_error /**< A type error occurred (e.g., invalid URL syntax). */
|
|
4464
|
+
};
|
|
4204
4465
|
} // namespace ada
|
|
4205
4466
|
#endif // ADA_ERRORS_H
|
|
4206
4467
|
/* end file include/ada/errors.h */
|
|
@@ -4333,9 +4594,7 @@ struct url_pattern_init {
|
|
|
4333
4594
|
#endif // ADA_URL_PATTERN_INIT_H
|
|
4334
4595
|
/* end file include/ada/url_pattern_init.h */
|
|
4335
4596
|
|
|
4336
|
-
/**
|
|
4337
|
-
* @private
|
|
4338
|
-
*/
|
|
4597
|
+
/** @private Forward declarations */
|
|
4339
4598
|
namespace ada {
|
|
4340
4599
|
struct url_aggregator;
|
|
4341
4600
|
struct url;
|
|
@@ -4349,14 +4608,24 @@ enum class errors : uint8_t;
|
|
|
4349
4608
|
|
|
4350
4609
|
/**
|
|
4351
4610
|
* @namespace ada::parser
|
|
4352
|
-
* @brief
|
|
4611
|
+
* @brief Internal URL parsing implementation.
|
|
4612
|
+
*
|
|
4613
|
+
* Contains the core URL parsing algorithm as specified by the WHATWG URL
|
|
4614
|
+
* Standard. These functions are used internally by `ada::parse()`.
|
|
4353
4615
|
*/
|
|
4354
4616
|
namespace ada::parser {
|
|
4355
4617
|
/**
|
|
4356
|
-
* Parses a
|
|
4357
|
-
*
|
|
4358
|
-
*
|
|
4359
|
-
*
|
|
4618
|
+
* Parses a URL string into a URL object.
|
|
4619
|
+
*
|
|
4620
|
+
* @tparam result_type The type of URL object to create (url or url_aggregator).
|
|
4621
|
+
*
|
|
4622
|
+
* @param user_input The URL string to parse (must be valid UTF-8).
|
|
4623
|
+
* @param base_url Optional base URL for resolving relative URLs.
|
|
4624
|
+
*
|
|
4625
|
+
* @return The parsed URL object. Check `is_valid` to determine if parsing
|
|
4626
|
+
* succeeded.
|
|
4627
|
+
*
|
|
4628
|
+
* @see https://url.spec.whatwg.org/#concept-basic-url-parser
|
|
4360
4629
|
*/
|
|
4361
4630
|
template <typename result_type = url_aggregator>
|
|
4362
4631
|
result_type parse_url(std::string_view user_input,
|
|
@@ -4397,7 +4666,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4397
4666
|
/* begin file include/ada/url_pattern.h */
|
|
4398
4667
|
/**
|
|
4399
4668
|
* @file url_pattern.h
|
|
4400
|
-
* @brief
|
|
4669
|
+
* @brief URLPattern API implementation.
|
|
4670
|
+
*
|
|
4671
|
+
* This header provides the URLPattern API as specified by the WHATWG URL
|
|
4672
|
+
* Pattern Standard. URLPattern allows matching URLs against patterns with
|
|
4673
|
+
* wildcards and named groups, similar to how regular expressions match strings.
|
|
4674
|
+
*
|
|
4675
|
+
* @see https://urlpattern.spec.whatwg.org/
|
|
4676
|
+
* @see https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
|
|
4401
4677
|
*/
|
|
4402
4678
|
#ifndef ADA_URL_PATTERN_H
|
|
4403
4679
|
#define ADA_URL_PATTERN_H
|
|
@@ -4405,8 +4681,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4405
4681
|
/* begin file include/ada/implementation.h */
|
|
4406
4682
|
/**
|
|
4407
4683
|
* @file implementation.h
|
|
4408
|
-
* @brief
|
|
4409
|
-
*
|
|
4684
|
+
* @brief User-facing functions for URL parsing and manipulation.
|
|
4685
|
+
*
|
|
4686
|
+
* This header provides the primary public API for parsing URLs in Ada.
|
|
4687
|
+
* It includes the main `ada::parse()` function which is the recommended
|
|
4688
|
+
* entry point for most users.
|
|
4689
|
+
*
|
|
4690
|
+
* @see https://url.spec.whatwg.org/#api
|
|
4410
4691
|
*/
|
|
4411
4692
|
#ifndef ADA_IMPLEMENTATION_H
|
|
4412
4693
|
#define ADA_IMPLEMENTATION_H
|
|
@@ -4418,7 +4699,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4418
4699
|
/* begin file include/ada/url.h */
|
|
4419
4700
|
/**
|
|
4420
4701
|
* @file url.h
|
|
4421
|
-
* @brief Declaration for the
|
|
4702
|
+
* @brief Declaration for the `ada::url` class.
|
|
4703
|
+
*
|
|
4704
|
+
* This file contains the `ada::url` struct which represents a parsed URL
|
|
4705
|
+
* using separate `std::string` instances for each component. This
|
|
4706
|
+
* representation is more flexible but uses more memory than `url_aggregator`.
|
|
4707
|
+
*
|
|
4708
|
+
* @see url_aggregator.h for a more memory-efficient alternative
|
|
4422
4709
|
*/
|
|
4423
4710
|
#ifndef ADA_URL_H
|
|
4424
4711
|
#define ADA_URL_H
|
|
@@ -4429,127 +4716,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
|
|
|
4429
4716
|
#include <string>
|
|
4430
4717
|
#include <string_view>
|
|
4431
4718
|
|
|
4432
|
-
/* begin file include/ada/checkers.h */
|
|
4433
|
-
/**
|
|
4434
|
-
* @file checkers.h
|
|
4435
|
-
* @brief Declarations for URL specific checkers used within Ada.
|
|
4436
|
-
*/
|
|
4437
|
-
#ifndef ADA_CHECKERS_H
|
|
4438
|
-
#define ADA_CHECKERS_H
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
#include <cstring>
|
|
4442
|
-
#include <string_view>
|
|
4443
|
-
|
|
4444
|
-
/**
|
|
4445
|
-
* These functions are not part of our public API and may
|
|
4446
|
-
* change at any time.
|
|
4447
|
-
* @private
|
|
4448
|
-
* @namespace ada::checkers
|
|
4449
|
-
* @brief Includes the definitions for validation functions
|
|
4450
|
-
*/
|
|
4451
|
-
namespace ada::checkers {
|
|
4452
|
-
|
|
4453
|
-
/**
|
|
4454
|
-
* @private
|
|
4455
|
-
* Assuming that x is an ASCII letter, this function returns the lower case
|
|
4456
|
-
* equivalent.
|
|
4457
|
-
* @details More likely to be inlined by the compiler and constexpr.
|
|
4458
|
-
*/
|
|
4459
|
-
constexpr char to_lower(char x) noexcept;
|
|
4460
|
-
|
|
4461
|
-
/**
|
|
4462
|
-
* @private
|
|
4463
|
-
* Returns true if the character is an ASCII letter. Equivalent to std::isalpha
|
|
4464
|
-
* but more likely to be inlined by the compiler.
|
|
4465
|
-
*
|
|
4466
|
-
* @attention std::isalpha is not constexpr generally.
|
|
4467
|
-
*/
|
|
4468
|
-
constexpr bool is_alpha(char x) noexcept;
|
|
4469
|
-
|
|
4470
|
-
/**
|
|
4471
|
-
* @private
|
|
4472
|
-
* Check whether a string starts with 0x or 0X. The function is only
|
|
4473
|
-
* safe if input.size() >=2.
|
|
4474
|
-
*
|
|
4475
|
-
* @see has_hex_prefix
|
|
4476
|
-
*/
|
|
4477
|
-
constexpr bool has_hex_prefix_unsafe(std::string_view input);
|
|
4478
|
-
/**
|
|
4479
|
-
* @private
|
|
4480
|
-
* Check whether a string starts with 0x or 0X.
|
|
4481
|
-
*/
|
|
4482
|
-
constexpr bool has_hex_prefix(std::string_view input);
|
|
4483
|
-
|
|
4484
|
-
/**
|
|
4485
|
-
* @private
|
|
4486
|
-
* Check whether x is an ASCII digit. More likely to be inlined than
|
|
4487
|
-
* std::isdigit.
|
|
4488
|
-
*/
|
|
4489
|
-
constexpr bool is_digit(char x) noexcept;
|
|
4490
|
-
|
|
4491
|
-
/**
|
|
4492
|
-
* @private
|
|
4493
|
-
* @details A string starts with a Windows drive letter if all of the following
|
|
4494
|
-
* are true:
|
|
4495
|
-
*
|
|
4496
|
-
* - its length is greater than or equal to 2
|
|
4497
|
-
* - its first two code points are a Windows drive letter
|
|
4498
|
-
* - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
|
|
4499
|
-
* (?), or U+0023 (#).
|
|
4500
|
-
*
|
|
4501
|
-
* https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
|
|
4502
|
-
*/
|
|
4503
|
-
inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
|
|
4504
|
-
|
|
4505
|
-
/**
|
|
4506
|
-
* @private
|
|
4507
|
-
* @details A normalized Windows drive letter is a Windows drive letter of which
|
|
4508
|
-
* the second code point is U+003A (:).
|
|
4509
|
-
*/
|
|
4510
|
-
inline constexpr bool is_normalized_windows_drive_letter(
|
|
4511
|
-
std::string_view input) noexcept;
|
|
4512
|
-
|
|
4513
|
-
/**
|
|
4514
|
-
* @private
|
|
4515
|
-
* Returns true if an input is an ipv4 address. It is assumed that the string
|
|
4516
|
-
* does not contain uppercase ASCII characters (the input should have been
|
|
4517
|
-
* lowered cased before calling this function) and is not empty.
|
|
4518
|
-
*/
|
|
4519
|
-
ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
|
|
4520
|
-
|
|
4521
|
-
/**
|
|
4522
|
-
* @private
|
|
4523
|
-
* Returns a bitset. If the first bit is set, then at least one character needs
|
|
4524
|
-
* percent encoding. If the second bit is set, a \\ is found. If the third bit
|
|
4525
|
-
* is set then we have a dot. If the fourth bit is set, then we have a percent
|
|
4526
|
-
* character.
|
|
4527
|
-
*/
|
|
4528
|
-
ada_really_inline constexpr uint8_t path_signature(
|
|
4529
|
-
std::string_view input) noexcept;
|
|
4530
|
-
|
|
4531
|
-
/**
|
|
4532
|
-
* @private
|
|
4533
|
-
* Returns true if the length of the domain name and its labels are according to
|
|
4534
|
-
* the specifications. The length of the domain must be 255 octets (253
|
|
4535
|
-
* characters not including the last 2 which are the empty label reserved at the
|
|
4536
|
-
* end). When the empty label is included (a dot at the end), the domain name
|
|
4537
|
-
* can have 254 characters. The length of a label must be at least 1 and at most
|
|
4538
|
-
* 63 characters.
|
|
4539
|
-
* @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
|
|
4540
|
-
* @see https://www.unicode.org/reports/tr46/#ToASCII
|
|
4541
|
-
*/
|
|
4542
|
-
ada_really_inline constexpr bool verify_dns_length(
|
|
4543
|
-
std::string_view input) noexcept;
|
|
4544
|
-
|
|
4545
|
-
} // namespace ada::checkers
|
|
4546
|
-
|
|
4547
|
-
#endif // ADA_CHECKERS_H
|
|
4548
|
-
/* end file include/ada/checkers.h */
|
|
4549
4719
|
/* begin file include/ada/url_components.h */
|
|
4550
4720
|
/**
|
|
4551
4721
|
* @file url_components.h
|
|
4552
|
-
* @brief
|
|
4722
|
+
* @brief URL component offset representation for url_aggregator.
|
|
4723
|
+
*
|
|
4724
|
+
* This file defines the `url_components` struct which stores byte offsets
|
|
4725
|
+
* into a URL string buffer. It is used internally by `url_aggregator` to
|
|
4726
|
+
* efficiently locate URL components without storing separate strings.
|
|
4553
4727
|
*/
|
|
4554
4728
|
#ifndef ADA_URL_COMPONENTS_H
|
|
4555
4729
|
#define ADA_URL_COMPONENTS_H
|
|
@@ -4557,14 +4731,32 @@ ada_really_inline constexpr bool verify_dns_length(
|
|
|
4557
4731
|
namespace ada {
|
|
4558
4732
|
|
|
4559
4733
|
/**
|
|
4560
|
-
* @brief URL
|
|
4734
|
+
* @brief Stores byte offsets for URL components within a buffer.
|
|
4735
|
+
*
|
|
4736
|
+
* The `url_components` struct uses 32-bit offsets to track the boundaries
|
|
4737
|
+
* of each URL component within a single string buffer. This enables efficient
|
|
4738
|
+
* component extraction without additional memory allocations.
|
|
4561
4739
|
*
|
|
4562
|
-
*
|
|
4563
|
-
*
|
|
4740
|
+
* Component layout in a URL:
|
|
4741
|
+
* ```
|
|
4742
|
+
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4743
|
+
* | | | | ^^^^| | |
|
|
4744
|
+
* | | | | | | | `----- hash_start
|
|
4745
|
+
* | | | | | | `--------- search_start
|
|
4746
|
+
* | | | | | `----------------- pathname_start
|
|
4747
|
+
* | | | | `--------------------- port
|
|
4748
|
+
* | | | `----------------------- host_end
|
|
4749
|
+
* | | `---------------------------------- host_start
|
|
4750
|
+
* | `--------------------------------------- username_end
|
|
4751
|
+
* `--------------------------------------------- protocol_end
|
|
4752
|
+
* ```
|
|
4564
4753
|
*
|
|
4565
|
-
*
|
|
4754
|
+
* @note The 32-bit offsets limit URLs to 4GB in length.
|
|
4755
|
+
* @note A value of `omitted` (UINT32_MAX) indicates the component is not
|
|
4756
|
+
* present.
|
|
4566
4757
|
*/
|
|
4567
4758
|
struct url_components {
|
|
4759
|
+
/** Sentinel value indicating a component is not present. */
|
|
4568
4760
|
constexpr static uint32_t omitted = uint32_t(-1);
|
|
4569
4761
|
|
|
4570
4762
|
url_components() = default;
|
|
@@ -4574,47 +4766,43 @@ struct url_components {
|
|
|
4574
4766
|
url_components &operator=(const url_components &u) = default;
|
|
4575
4767
|
~url_components() = default;
|
|
4576
4768
|
|
|
4577
|
-
|
|
4578
|
-
* By using 32-bit integers, we implicitly assume that the URL string
|
|
4579
|
-
* cannot exceed 4 GB.
|
|
4580
|
-
*
|
|
4581
|
-
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4582
|
-
* | | | | ^^^^| | |
|
|
4583
|
-
* | | | | | | | `----- hash_start
|
|
4584
|
-
* | | | | | | `--------- search_start
|
|
4585
|
-
* | | | | | `----------------- pathname_start
|
|
4586
|
-
* | | | | `--------------------- port
|
|
4587
|
-
* | | | `----------------------- host_end
|
|
4588
|
-
* | | `---------------------------------- host_start
|
|
4589
|
-
* | `--------------------------------------- username_end
|
|
4590
|
-
* `--------------------------------------------- protocol_end
|
|
4591
|
-
*/
|
|
4769
|
+
/** Offset of the end of the protocol/scheme (position of ':'). */
|
|
4592
4770
|
uint32_t protocol_end{0};
|
|
4771
|
+
|
|
4593
4772
|
/**
|
|
4594
|
-
*
|
|
4595
|
-
*
|
|
4773
|
+
* Offset of the end of the username.
|
|
4774
|
+
* Initialized to 0 (not `omitted`) to simplify username/password getters.
|
|
4596
4775
|
*/
|
|
4597
4776
|
uint32_t username_end{0};
|
|
4777
|
+
|
|
4778
|
+
/** Offset of the start of the host. */
|
|
4598
4779
|
uint32_t host_start{0};
|
|
4780
|
+
|
|
4781
|
+
/** Offset of the end of the host. */
|
|
4599
4782
|
uint32_t host_end{0};
|
|
4783
|
+
|
|
4784
|
+
/** Port number, or `omitted` if no port is specified. */
|
|
4600
4785
|
uint32_t port{omitted};
|
|
4786
|
+
|
|
4787
|
+
/** Offset of the start of the pathname. */
|
|
4601
4788
|
uint32_t pathname_start{0};
|
|
4789
|
+
|
|
4790
|
+
/** Offset of the '?' starting the query, or `omitted` if no query. */
|
|
4602
4791
|
uint32_t search_start{omitted};
|
|
4792
|
+
|
|
4793
|
+
/** Offset of the '#' starting the fragment, or `omitted` if no fragment. */
|
|
4603
4794
|
uint32_t hash_start{omitted};
|
|
4604
4795
|
|
|
4605
4796
|
/**
|
|
4606
|
-
*
|
|
4607
|
-
*
|
|
4608
|
-
*
|
|
4609
|
-
* a lower bound on the possible string length that may match these
|
|
4610
|
-
* offsets.
|
|
4611
|
-
* @return true if the offset values are
|
|
4612
|
-
* consistent with a possible URL string
|
|
4797
|
+
* Validates that offsets are in ascending order and consistent.
|
|
4798
|
+
* Useful for debugging to detect internal corruption.
|
|
4799
|
+
* @return `true` if offsets are consistent, `false` otherwise.
|
|
4613
4800
|
*/
|
|
4614
4801
|
[[nodiscard]] constexpr bool check_offset_consistency() const noexcept;
|
|
4615
4802
|
|
|
4616
4803
|
/**
|
|
4617
|
-
*
|
|
4804
|
+
* Returns a JSON string representation of the offsets for debugging.
|
|
4805
|
+
* @return A JSON-formatted string with all offset values.
|
|
4618
4806
|
*/
|
|
4619
4807
|
[[nodiscard]] std::string to_string() const;
|
|
4620
4808
|
|
|
@@ -4637,15 +4825,26 @@ struct url_aggregator;
|
|
|
4637
4825
|
// }
|
|
4638
4826
|
|
|
4639
4827
|
/**
|
|
4640
|
-
* @brief
|
|
4828
|
+
* @brief Represents a parsed URL with individual string components.
|
|
4641
4829
|
*
|
|
4642
|
-
*
|
|
4643
|
-
*
|
|
4644
|
-
*
|
|
4645
|
-
*
|
|
4646
|
-
* structure heavier and more reliant on memory allocations. When getting
|
|
4647
|
-
* components from the parsed URL, a new std::string is typically constructed.
|
|
4830
|
+
* The `url` struct stores each URL component (scheme, username, password,
|
|
4831
|
+
* host, port, path, query, fragment) as a separate `std::string`. This
|
|
4832
|
+
* provides flexibility but incurs more memory allocations compared to
|
|
4833
|
+
* `url_aggregator`.
|
|
4648
4834
|
*
|
|
4835
|
+
* **When to use `ada::url`:**
|
|
4836
|
+
* - When you need to frequently modify individual URL components
|
|
4837
|
+
* - When you want independent ownership of component strings
|
|
4838
|
+
*
|
|
4839
|
+
* **When to use `ada::url_aggregator` instead:**
|
|
4840
|
+
* - For read-mostly operations on parsed URLs
|
|
4841
|
+
* - When memory efficiency is important
|
|
4842
|
+
* - When you only need string_view access to components
|
|
4843
|
+
*
|
|
4844
|
+
* @note This type is returned when parsing with `ada::parse<ada::url>()`.
|
|
4845
|
+
* By default, `ada::parse()` returns `ada::url_aggregator`.
|
|
4846
|
+
*
|
|
4847
|
+
* @see url_aggregator For a more memory-efficient URL representation
|
|
4649
4848
|
* @see https://url.spec.whatwg.org/#url-representation
|
|
4650
4849
|
*/
|
|
4651
4850
|
struct url : url_base {
|
|
@@ -4704,177 +4903,217 @@ struct url : url_base {
|
|
|
4704
4903
|
*/
|
|
4705
4904
|
std::optional<std::string> hash{};
|
|
4706
4905
|
|
|
4707
|
-
/**
|
|
4906
|
+
/**
|
|
4907
|
+
* Checks if the URL has an empty hostname (host is set but empty string).
|
|
4908
|
+
* @return `true` if host exists but is empty, `false` otherwise.
|
|
4909
|
+
*/
|
|
4708
4910
|
[[nodiscard]] inline bool has_empty_hostname() const noexcept;
|
|
4709
|
-
|
|
4911
|
+
|
|
4912
|
+
/**
|
|
4913
|
+
* Checks if the URL has a non-default port explicitly specified.
|
|
4914
|
+
* @return `true` if a port is present, `false` otherwise.
|
|
4915
|
+
*/
|
|
4710
4916
|
[[nodiscard]] inline bool has_port() const noexcept;
|
|
4711
|
-
|
|
4917
|
+
|
|
4918
|
+
/**
|
|
4919
|
+
* Checks if the URL has a hostname (including empty hostnames).
|
|
4920
|
+
* @return `true` if host is present, `false` otherwise.
|
|
4921
|
+
*/
|
|
4712
4922
|
[[nodiscard]] inline bool has_hostname() const noexcept;
|
|
4923
|
+
|
|
4924
|
+
/**
|
|
4925
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
4926
|
+
* Checks that the domain and its labels have valid lengths (max 255 octets
|
|
4927
|
+
* total, max 63 octets per label).
|
|
4928
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
4929
|
+
*/
|
|
4713
4930
|
[[nodiscard]] bool has_valid_domain() const noexcept override;
|
|
4714
4931
|
|
|
4715
4932
|
/**
|
|
4716
|
-
* Returns a JSON string representation of this URL.
|
|
4933
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
4934
|
+
* @return A JSON-formatted string with all URL components.
|
|
4717
4935
|
*/
|
|
4718
4936
|
[[nodiscard]] std::string to_string() const override;
|
|
4719
4937
|
|
|
4720
4938
|
/**
|
|
4939
|
+
* Returns the full serialized URL (the href).
|
|
4940
|
+
* @return The complete URL string (allocates a new string).
|
|
4721
4941
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
4722
|
-
* @see https://url.spec.whatwg.org/#concept-url-serializer
|
|
4723
4942
|
*/
|
|
4724
|
-
[[nodiscard]] ada_really_inline std::string get_href() const
|
|
4943
|
+
[[nodiscard]] ada_really_inline std::string get_href() const;
|
|
4725
4944
|
|
|
4726
4945
|
/**
|
|
4727
|
-
*
|
|
4728
|
-
*
|
|
4729
|
-
* @return
|
|
4946
|
+
* Returns the URL's origin as a string (scheme + host + port for special
|
|
4947
|
+
* URLs).
|
|
4948
|
+
* @return A newly allocated string containing the serialized origin.
|
|
4730
4949
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
4731
4950
|
*/
|
|
4732
|
-
[[nodiscard]] std::string get_origin() const
|
|
4951
|
+
[[nodiscard]] std::string get_origin() const override;
|
|
4733
4952
|
|
|
4734
4953
|
/**
|
|
4735
|
-
*
|
|
4736
|
-
*
|
|
4737
|
-
* @return a newly allocated string.
|
|
4954
|
+
* Returns the URL's scheme followed by a colon (e.g., "https:").
|
|
4955
|
+
* @return A newly allocated string with the protocol.
|
|
4738
4956
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
4739
4957
|
*/
|
|
4740
|
-
[[nodiscard]] std::string get_protocol() const
|
|
4958
|
+
[[nodiscard]] std::string get_protocol() const;
|
|
4741
4959
|
|
|
4742
4960
|
/**
|
|
4743
|
-
*
|
|
4744
|
-
*
|
|
4745
|
-
*
|
|
4746
|
-
* @return a newly allocated string.
|
|
4961
|
+
* Returns the URL's host and port (e.g., "example.com:8080").
|
|
4962
|
+
* If no port is set, returns just the host. Returns empty string if no host.
|
|
4963
|
+
* @return A newly allocated string with host:port.
|
|
4747
4964
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
4748
4965
|
*/
|
|
4749
|
-
[[nodiscard]] std::string get_host() const
|
|
4966
|
+
[[nodiscard]] std::string get_host() const;
|
|
4750
4967
|
|
|
4751
4968
|
/**
|
|
4752
|
-
*
|
|
4753
|
-
*
|
|
4754
|
-
* @return
|
|
4969
|
+
* Returns the URL's hostname (without port).
|
|
4970
|
+
* Returns empty string if no host is set.
|
|
4971
|
+
* @return A newly allocated string with the hostname.
|
|
4755
4972
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
4756
4973
|
*/
|
|
4757
|
-
[[nodiscard]] std::string get_hostname() const
|
|
4974
|
+
[[nodiscard]] std::string get_hostname() const;
|
|
4758
4975
|
|
|
4759
4976
|
/**
|
|
4760
|
-
*
|
|
4761
|
-
*
|
|
4762
|
-
* @return a newly allocated string.
|
|
4977
|
+
* Returns the URL's path component.
|
|
4978
|
+
* @return A string_view pointing to the path.
|
|
4763
4979
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4764
4980
|
*/
|
|
4765
4981
|
[[nodiscard]] constexpr std::string_view get_pathname() const noexcept;
|
|
4766
4982
|
|
|
4767
4983
|
/**
|
|
4768
|
-
*
|
|
4769
|
-
*
|
|
4770
|
-
* @return size of the pathname in bytes
|
|
4984
|
+
* Returns the byte length of the pathname without creating a string.
|
|
4985
|
+
* @return Size of the pathname in bytes.
|
|
4771
4986
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4772
4987
|
*/
|
|
4773
4988
|
[[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept;
|
|
4774
4989
|
|
|
4775
4990
|
/**
|
|
4776
|
-
*
|
|
4777
|
-
*
|
|
4991
|
+
* Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
|
|
4992
|
+
* Returns empty string if no query is set.
|
|
4993
|
+
* @return A newly allocated string with the search/query.
|
|
4778
4994
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
4779
4995
|
*/
|
|
4780
|
-
[[nodiscard]] std::string get_search() const
|
|
4996
|
+
[[nodiscard]] std::string get_search() const;
|
|
4781
4997
|
|
|
4782
4998
|
/**
|
|
4783
|
-
*
|
|
4784
|
-
* @return
|
|
4999
|
+
* Returns the URL's username component.
|
|
5000
|
+
* @return A constant reference to the username string.
|
|
4785
5001
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
4786
5002
|
*/
|
|
4787
5003
|
[[nodiscard]] const std::string &get_username() const noexcept;
|
|
4788
5004
|
|
|
4789
5005
|
/**
|
|
4790
|
-
*
|
|
5006
|
+
* Sets the URL's username, percent-encoding special characters.
|
|
5007
|
+
* @param input The new username value.
|
|
5008
|
+
* @return `true` on success, `false` if the URL cannot have credentials.
|
|
4791
5009
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
4792
5010
|
*/
|
|
4793
5011
|
bool set_username(std::string_view input);
|
|
4794
5012
|
|
|
4795
5013
|
/**
|
|
4796
|
-
*
|
|
5014
|
+
* Sets the URL's password, percent-encoding special characters.
|
|
5015
|
+
* @param input The new password value.
|
|
5016
|
+
* @return `true` on success, `false` if the URL cannot have credentials.
|
|
4797
5017
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
4798
5018
|
*/
|
|
4799
5019
|
bool set_password(std::string_view input);
|
|
4800
5020
|
|
|
4801
5021
|
/**
|
|
4802
|
-
*
|
|
5022
|
+
* Sets the URL's port from a string (e.g., "8080").
|
|
5023
|
+
* @param input The port string. Empty string removes the port.
|
|
5024
|
+
* @return `true` on success, `false` if the URL cannot have a port.
|
|
4803
5025
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
4804
5026
|
*/
|
|
4805
5027
|
bool set_port(std::string_view input);
|
|
4806
5028
|
|
|
4807
5029
|
/**
|
|
4808
|
-
*
|
|
5030
|
+
* Sets the URL's fragment/hash (the part after '#').
|
|
5031
|
+
* @param input The new hash value (with or without leading '#').
|
|
4809
5032
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
4810
5033
|
*/
|
|
4811
5034
|
void set_hash(std::string_view input);
|
|
4812
5035
|
|
|
4813
5036
|
/**
|
|
4814
|
-
*
|
|
5037
|
+
* Sets the URL's query string (the part after '?').
|
|
5038
|
+
* @param input The new query value (with or without leading '?').
|
|
4815
5039
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
4816
5040
|
*/
|
|
4817
5041
|
void set_search(std::string_view input);
|
|
4818
5042
|
|
|
4819
5043
|
/**
|
|
4820
|
-
*
|
|
4821
|
-
* @
|
|
5044
|
+
* Sets the URL's pathname.
|
|
5045
|
+
* @param input The new path value.
|
|
5046
|
+
* @return `true` on success, `false` if the URL has an opaque path.
|
|
5047
|
+
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
4822
5048
|
*/
|
|
4823
5049
|
bool set_pathname(std::string_view input);
|
|
4824
5050
|
|
|
4825
5051
|
/**
|
|
4826
|
-
*
|
|
5052
|
+
* Sets the URL's host (hostname and optionally port).
|
|
5053
|
+
* @param input The new host value (e.g., "example.com:8080").
|
|
5054
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4827
5055
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
4828
5056
|
*/
|
|
4829
5057
|
bool set_host(std::string_view input);
|
|
4830
5058
|
|
|
4831
5059
|
/**
|
|
4832
|
-
*
|
|
5060
|
+
* Sets the URL's hostname (without port).
|
|
5061
|
+
* @param input The new hostname value.
|
|
5062
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4833
5063
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
4834
5064
|
*/
|
|
4835
5065
|
bool set_hostname(std::string_view input);
|
|
4836
5066
|
|
|
4837
5067
|
/**
|
|
4838
|
-
*
|
|
5068
|
+
* Sets the URL's protocol/scheme.
|
|
5069
|
+
* @param input The new protocol (with or without trailing ':').
|
|
5070
|
+
* @return `true` on success, `false` if the scheme is invalid.
|
|
4839
5071
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
4840
5072
|
*/
|
|
4841
5073
|
bool set_protocol(std::string_view input);
|
|
4842
5074
|
|
|
4843
5075
|
/**
|
|
5076
|
+
* Replaces the entire URL by parsing a new href string.
|
|
5077
|
+
* @param input The new URL string to parse.
|
|
5078
|
+
* @return `true` on success, `false` if parsing fails.
|
|
4844
5079
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
4845
5080
|
*/
|
|
4846
5081
|
bool set_href(std::string_view input);
|
|
4847
5082
|
|
|
4848
5083
|
/**
|
|
4849
|
-
*
|
|
4850
|
-
* @return
|
|
5084
|
+
* Returns the URL's password component.
|
|
5085
|
+
* @return A constant reference to the password string.
|
|
4851
5086
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
4852
5087
|
*/
|
|
4853
5088
|
[[nodiscard]] const std::string &get_password() const noexcept;
|
|
4854
5089
|
|
|
4855
5090
|
/**
|
|
4856
|
-
*
|
|
4857
|
-
*
|
|
5091
|
+
* Returns the URL's port as a string (e.g., "8080").
|
|
5092
|
+
* Returns empty string if no port is set.
|
|
5093
|
+
* @return A newly allocated string with the port.
|
|
4858
5094
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
4859
5095
|
*/
|
|
4860
|
-
[[nodiscard]] std::string get_port() const
|
|
5096
|
+
[[nodiscard]] std::string get_port() const;
|
|
4861
5097
|
|
|
4862
5098
|
/**
|
|
4863
|
-
*
|
|
4864
|
-
*
|
|
5099
|
+
* Returns the URL's fragment prefixed with '#' (e.g., "#section").
|
|
5100
|
+
* Returns empty string if no fragment is set.
|
|
5101
|
+
* @return A newly allocated string with the hash.
|
|
4865
5102
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
4866
5103
|
*/
|
|
4867
|
-
[[nodiscard]] std::string get_hash() const
|
|
5104
|
+
[[nodiscard]] std::string get_hash() const;
|
|
4868
5105
|
|
|
4869
5106
|
/**
|
|
4870
|
-
*
|
|
4871
|
-
*
|
|
5107
|
+
* Checks if the URL has credentials (non-empty username or password).
|
|
5108
|
+
* @return `true` if username or password is non-empty, `false` otherwise.
|
|
4872
5109
|
*/
|
|
4873
5110
|
[[nodiscard]] ada_really_inline bool has_credentials() const noexcept;
|
|
4874
5111
|
|
|
4875
5112
|
/**
|
|
4876
|
-
*
|
|
5113
|
+
* Returns the URL component offsets for efficient serialization.
|
|
4877
5114
|
*
|
|
5115
|
+
* The components represent byte offsets into the serialized URL:
|
|
5116
|
+
* ```
|
|
4878
5117
|
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
4879
5118
|
* | | | | ^^^^| | |
|
|
4880
5119
|
* | | | | | | | `----- hash_start
|
|
@@ -4885,19 +5124,23 @@ struct url : url_base {
|
|
|
4885
5124
|
* | | `---------------------------------- host_start
|
|
4886
5125
|
* | `--------------------------------------- username_end
|
|
4887
5126
|
* `--------------------------------------------- protocol_end
|
|
4888
|
-
*
|
|
4889
|
-
*
|
|
4890
|
-
*
|
|
4891
|
-
* @return a newly constructed component.
|
|
4892
|
-
*
|
|
4893
|
-
* @see
|
|
4894
|
-
* https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
|
|
5127
|
+
* ```
|
|
5128
|
+
* @return A newly constructed url_components struct.
|
|
5129
|
+
* @see https://github.com/servo/rust-url
|
|
4895
5130
|
*/
|
|
4896
5131
|
[[nodiscard]] ada_really_inline ada::url_components get_components()
|
|
4897
5132
|
const noexcept;
|
|
4898
|
-
|
|
5133
|
+
|
|
5134
|
+
/**
|
|
5135
|
+
* Checks if the URL has a fragment/hash component.
|
|
5136
|
+
* @return `true` if hash is present, `false` otherwise.
|
|
5137
|
+
*/
|
|
4899
5138
|
[[nodiscard]] constexpr bool has_hash() const noexcept override;
|
|
4900
|
-
|
|
5139
|
+
|
|
5140
|
+
/**
|
|
5141
|
+
* Checks if the URL has a query/search component.
|
|
5142
|
+
* @return `true` if query is present, `false` otherwise.
|
|
5143
|
+
*/
|
|
4901
5144
|
[[nodiscard]] constexpr bool has_search() const noexcept override;
|
|
4902
5145
|
|
|
4903
5146
|
private:
|
|
@@ -4906,7 +5149,7 @@ struct url : url_base {
|
|
|
4906
5149
|
friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(
|
|
4907
5150
|
std::string_view, const ada::url_aggregator *);
|
|
4908
5151
|
friend void ada::helpers::strip_trailing_spaces_from_opaque_path<ada::url>(
|
|
4909
|
-
ada::url &url)
|
|
5152
|
+
ada::url &url);
|
|
4910
5153
|
|
|
4911
5154
|
friend ada::url ada::parser::parse_url_impl<ada::url, true>(std::string_view,
|
|
4912
5155
|
const ada::url *);
|
|
@@ -5013,7 +5256,7 @@ struct url : url_base {
|
|
|
5013
5256
|
* Take the scheme from another URL. The scheme string is moved from the
|
|
5014
5257
|
* provided url.
|
|
5015
5258
|
*/
|
|
5016
|
-
constexpr void copy_scheme(ada::url &&u)
|
|
5259
|
+
constexpr void copy_scheme(ada::url &&u);
|
|
5017
5260
|
|
|
5018
5261
|
/**
|
|
5019
5262
|
* Take the scheme from another URL. The scheme string is copied from the
|
|
@@ -5031,17 +5274,70 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u);
|
|
|
5031
5274
|
|
|
5032
5275
|
namespace ada {
|
|
5033
5276
|
|
|
5277
|
+
/**
|
|
5278
|
+
* Result type for URL parsing operations.
|
|
5279
|
+
*
|
|
5280
|
+
* Uses `tl::expected` to represent either a successfully parsed URL or an
|
|
5281
|
+
* error. This allows for exception-free error handling.
|
|
5282
|
+
*
|
|
5283
|
+
* @tparam result_type The URL type to return (default: `ada::url_aggregator`)
|
|
5284
|
+
*
|
|
5285
|
+
* @example
|
|
5286
|
+
* ```cpp
|
|
5287
|
+
* ada::result<ada::url_aggregator> result = ada::parse("https://example.com");
|
|
5288
|
+
* if (result) {
|
|
5289
|
+
* // Success: use result.value() or *result
|
|
5290
|
+
* } else {
|
|
5291
|
+
* // Error: handle result.error()
|
|
5292
|
+
* }
|
|
5293
|
+
* ```
|
|
5294
|
+
*/
|
|
5034
5295
|
template <class result_type = ada::url_aggregator>
|
|
5035
5296
|
using result = tl::expected<result_type, ada::errors>;
|
|
5036
5297
|
|
|
5037
5298
|
/**
|
|
5038
|
-
*
|
|
5039
|
-
*
|
|
5040
|
-
*
|
|
5299
|
+
* Parses a URL string according to the WHATWG URL Standard.
|
|
5300
|
+
*
|
|
5301
|
+
* This is the main entry point for URL parsing in Ada. The function takes
|
|
5302
|
+
* a string input and optionally a base URL for resolving relative URLs.
|
|
5303
|
+
*
|
|
5304
|
+
* @tparam result_type The URL type to return. Can be either `ada::url` or
|
|
5305
|
+
* `ada::url_aggregator` (default). The `url_aggregator` type is more
|
|
5306
|
+
* memory-efficient as it stores components as offsets into a single
|
|
5307
|
+
* buffer.
|
|
5041
5308
|
*
|
|
5042
|
-
* @param input
|
|
5043
|
-
*
|
|
5044
|
-
* @
|
|
5309
|
+
* @param input The URL string to parse. Must be valid ASCII or UTF-8 encoded.
|
|
5310
|
+
* Leading and trailing whitespace is automatically trimmed.
|
|
5311
|
+
* @param base_url Optional pointer to a base URL for resolving relative URLs.
|
|
5312
|
+
* If nullptr (default), only absolute URLs can be parsed successfully.
|
|
5313
|
+
*
|
|
5314
|
+
* @return A `result<result_type>` containing either the parsed URL on success,
|
|
5315
|
+
* or an error code on failure. Use the boolean conversion or
|
|
5316
|
+
* `has_value()` to check for success.
|
|
5317
|
+
*
|
|
5318
|
+
* @note The parser is fully compliant with the WHATWG URL Standard.
|
|
5319
|
+
*
|
|
5320
|
+
* @example
|
|
5321
|
+
* ```cpp
|
|
5322
|
+
* // Parse an absolute URL
|
|
5323
|
+
* auto url = ada::parse("https://user:pass@example.com:8080/path?query#hash");
|
|
5324
|
+
* if (url) {
|
|
5325
|
+
* std::cout << url->get_hostname(); // "example.com"
|
|
5326
|
+
* std::cout << url->get_pathname(); // "/path"
|
|
5327
|
+
* }
|
|
5328
|
+
*
|
|
5329
|
+
* // Parse a relative URL with a base
|
|
5330
|
+
* auto base = ada::parse("https://example.com/dir/");
|
|
5331
|
+
* if (base) {
|
|
5332
|
+
* auto relative = ada::parse("../other/page", &*base);
|
|
5333
|
+
* if (relative) {
|
|
5334
|
+
* std::cout << relative->get_href(); //
|
|
5335
|
+
* "https://example.com/other/page"
|
|
5336
|
+
* }
|
|
5337
|
+
* }
|
|
5338
|
+
* ```
|
|
5339
|
+
*
|
|
5340
|
+
* @see https://url.spec.whatwg.org/#url-parsing
|
|
5045
5341
|
*/
|
|
5046
5342
|
template <class result_type = ada::url_aggregator>
|
|
5047
5343
|
ada_warn_unused ada::result<result_type> parse(
|
|
@@ -5053,23 +5349,56 @@ extern template ada::result<url_aggregator> parse<url_aggregator>(
|
|
|
5053
5349
|
std::string_view input, const url_aggregator* base_url);
|
|
5054
5350
|
|
|
5055
5351
|
/**
|
|
5056
|
-
*
|
|
5057
|
-
*
|
|
5352
|
+
* Checks whether a URL string can be successfully parsed.
|
|
5353
|
+
*
|
|
5354
|
+
* This is a fast validation function that checks if a URL string is valid
|
|
5355
|
+
* according to the WHATWG URL Standard without fully constructing a URL
|
|
5356
|
+
* object. Use this when you only need to validate URLs without needing
|
|
5357
|
+
* their parsed components.
|
|
5358
|
+
*
|
|
5359
|
+
* @param input The URL string to validate. Must be valid ASCII or UTF-8.
|
|
5360
|
+
* @param base_input Optional pointer to a base URL string for resolving
|
|
5361
|
+
* relative URLs. If nullptr (default), the input is validated as
|
|
5362
|
+
* an absolute URL.
|
|
5363
|
+
*
|
|
5364
|
+
* @return `true` if the URL can be parsed successfully, `false` otherwise.
|
|
5365
|
+
*
|
|
5366
|
+
* @example
|
|
5367
|
+
* ```cpp
|
|
5368
|
+
* // Check absolute URL
|
|
5369
|
+
* bool valid = ada::can_parse("https://example.com"); // true
|
|
5370
|
+
* bool invalid = ada::can_parse("not a url"); // false
|
|
5371
|
+
*
|
|
5372
|
+
* // Check relative URL with base
|
|
5373
|
+
* std::string_view base = "https://example.com/";
|
|
5374
|
+
* bool relative_valid = ada::can_parse("../path", &base); // true
|
|
5375
|
+
* ```
|
|
5376
|
+
*
|
|
5058
5377
|
* @see https://url.spec.whatwg.org/#dom-url-canparse
|
|
5059
|
-
* @return If URL can be parsed or not.
|
|
5060
5378
|
*/
|
|
5061
5379
|
bool can_parse(std::string_view input,
|
|
5062
5380
|
const std::string_view* base_input = nullptr);
|
|
5063
5381
|
|
|
5064
5382
|
#if ADA_INCLUDE_URL_PATTERN
|
|
5065
5383
|
/**
|
|
5066
|
-
*
|
|
5067
|
-
*
|
|
5384
|
+
* Parses a URL pattern according to the URLPattern specification.
|
|
5385
|
+
*
|
|
5386
|
+
* URL patterns provide a syntax for matching URLs against patterns, similar
|
|
5387
|
+
* to how regular expressions match strings. This is useful for routing and
|
|
5388
|
+
* URL-based dispatching.
|
|
5389
|
+
*
|
|
5390
|
+
* @tparam regex_provider The regex implementation to use for pattern matching.
|
|
5391
|
+
*
|
|
5392
|
+
* @param input Either a URL pattern string (valid UTF-8) or a URLPatternInit
|
|
5393
|
+
* struct specifying individual component patterns.
|
|
5394
|
+
* @param base_url Optional pointer to a base URL string (valid UTF-8) for
|
|
5395
|
+
* resolving relative patterns.
|
|
5396
|
+
* @param options Optional pointer to configuration options (e.g., ignore_case).
|
|
5068
5397
|
*
|
|
5069
|
-
* @
|
|
5070
|
-
*
|
|
5071
|
-
*
|
|
5072
|
-
* @
|
|
5398
|
+
* @return A `tl::expected` containing either the parsed url_pattern on success,
|
|
5399
|
+
* or an error code on failure.
|
|
5400
|
+
*
|
|
5401
|
+
* @see https://urlpattern.spec.whatwg.org
|
|
5073
5402
|
*/
|
|
5074
5403
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
5075
5404
|
ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
|
|
@@ -5079,9 +5408,14 @@ parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
|
|
|
5079
5408
|
#endif // ADA_INCLUDE_URL_PATTERN
|
|
5080
5409
|
|
|
5081
5410
|
/**
|
|
5082
|
-
*
|
|
5083
|
-
*
|
|
5084
|
-
*
|
|
5411
|
+
* Converts a file system path to a file:// URL.
|
|
5412
|
+
*
|
|
5413
|
+
* Creates a properly formatted file URL from a local file system path.
|
|
5414
|
+
* Handles platform-specific path separators and percent-encoding.
|
|
5415
|
+
*
|
|
5416
|
+
* @param path The file system path to convert. Must be valid ASCII or UTF-8.
|
|
5417
|
+
*
|
|
5418
|
+
* @return A file:// URL string representing the given path.
|
|
5085
5419
|
*/
|
|
5086
5420
|
std::string href_from_file(std::string_view path);
|
|
5087
5421
|
} // namespace ada
|
|
@@ -5117,6 +5451,19 @@ enum class url_pattern_part_type : uint8_t {
|
|
|
5117
5451
|
FULL_WILDCARD,
|
|
5118
5452
|
};
|
|
5119
5453
|
|
|
5454
|
+
// Pattern type for fast-path matching optimization.
|
|
5455
|
+
// This allows skipping expensive regex evaluation for common simple patterns.
|
|
5456
|
+
enum class url_pattern_component_type : uint8_t {
|
|
5457
|
+
// Pattern is "^$" - only matches empty string
|
|
5458
|
+
EMPTY,
|
|
5459
|
+
// Pattern is "^<literal>$" - exact string match (no regex needed)
|
|
5460
|
+
EXACT_MATCH,
|
|
5461
|
+
// Pattern is "^(.*)$" - matches anything (full wildcard)
|
|
5462
|
+
FULL_WILDCARD,
|
|
5463
|
+
// Pattern requires actual regex evaluation
|
|
5464
|
+
REGEXP,
|
|
5465
|
+
};
|
|
5466
|
+
|
|
5120
5467
|
enum class url_pattern_part_modifier : uint8_t {
|
|
5121
5468
|
// The part does not have a modifier.
|
|
5122
5469
|
none,
|
|
@@ -5236,11 +5583,15 @@ class url_pattern_component {
|
|
|
5236
5583
|
url_pattern_component(std::string&& new_pattern,
|
|
5237
5584
|
typename regex_provider::regex_type&& new_regexp,
|
|
5238
5585
|
std::vector<std::string>&& new_group_name_list,
|
|
5239
|
-
bool new_has_regexp_groups
|
|
5586
|
+
bool new_has_regexp_groups,
|
|
5587
|
+
url_pattern_component_type new_type,
|
|
5588
|
+
std::string&& new_exact_match_value = {})
|
|
5240
5589
|
: regexp(std::move(new_regexp)),
|
|
5241
5590
|
pattern(std::move(new_pattern)),
|
|
5242
5591
|
group_name_list(std::move(new_group_name_list)),
|
|
5243
|
-
|
|
5592
|
+
exact_match_value(std::move(new_exact_match_value)),
|
|
5593
|
+
has_regexp_groups(new_has_regexp_groups),
|
|
5594
|
+
type(new_type) {}
|
|
5244
5595
|
|
|
5245
5596
|
// @see https://urlpattern.spec.whatwg.org/#compile-a-component
|
|
5246
5597
|
template <url_pattern_encoding_callback F>
|
|
@@ -5253,6 +5604,16 @@ class url_pattern_component {
|
|
|
5253
5604
|
std::string&& input,
|
|
5254
5605
|
std::vector<std::optional<std::string>>&& exec_result);
|
|
5255
5606
|
|
|
5607
|
+
// Fast path test that returns true/false without constructing result groups.
|
|
5608
|
+
// Uses cached pattern type to skip regex evaluation for simple patterns.
|
|
5609
|
+
bool fast_test(std::string_view input) const noexcept;
|
|
5610
|
+
|
|
5611
|
+
// Fast path match that returns capture groups without regex for simple
|
|
5612
|
+
// patterns. Returns nullopt if pattern doesn't match, otherwise returns
|
|
5613
|
+
// capture groups.
|
|
5614
|
+
std::optional<std::vector<std::optional<std::string>>> fast_match(
|
|
5615
|
+
std::string_view input) const;
|
|
5616
|
+
|
|
5256
5617
|
#if ADA_TESTING
|
|
5257
5618
|
friend void PrintTo(const url_pattern_component& component,
|
|
5258
5619
|
std::ostream* os) {
|
|
@@ -5268,7 +5629,11 @@ class url_pattern_component {
|
|
|
5268
5629
|
typename regex_provider::regex_type regexp{};
|
|
5269
5630
|
std::string pattern{};
|
|
5270
5631
|
std::vector<std::string> group_name_list{};
|
|
5632
|
+
// For EXACT_MATCH type: the literal string to compare against
|
|
5633
|
+
std::string exact_match_value{};
|
|
5271
5634
|
bool has_regexp_groups = false;
|
|
5635
|
+
// Cached pattern type for fast-path optimization
|
|
5636
|
+
url_pattern_component_type type = url_pattern_component_type::REGEXP;
|
|
5272
5637
|
};
|
|
5273
5638
|
|
|
5274
5639
|
// A URLPattern input can be either a string or a URLPatternInit object.
|
|
@@ -5300,14 +5665,28 @@ struct url_pattern_options {
|
|
|
5300
5665
|
#endif // ADA_TESTING
|
|
5301
5666
|
};
|
|
5302
5667
|
|
|
5303
|
-
|
|
5304
|
-
|
|
5305
|
-
|
|
5306
|
-
|
|
5307
|
-
|
|
5308
|
-
|
|
5309
|
-
|
|
5310
|
-
|
|
5668
|
+
/**
|
|
5669
|
+
* @brief URL pattern matching class implementing the URLPattern API.
|
|
5670
|
+
*
|
|
5671
|
+
* URLPattern provides a way to match URLs against patterns with wildcards
|
|
5672
|
+
* and named capture groups. It's useful for routing, URL-based dispatching,
|
|
5673
|
+
* and URL validation.
|
|
5674
|
+
*
|
|
5675
|
+
* Pattern syntax supports:
|
|
5676
|
+
* - Literal text matching
|
|
5677
|
+
* - Named groups: `:name` (matches up to the next separator)
|
|
5678
|
+
* - Wildcards: `*` (matches everything)
|
|
5679
|
+
* - Custom regex: `(pattern)`
|
|
5680
|
+
* - Optional segments: `:name?`
|
|
5681
|
+
* - Repeated segments: `:name+`, `:name*`
|
|
5682
|
+
*
|
|
5683
|
+
* @tparam regex_provider The regex implementation to use for pattern matching.
|
|
5684
|
+
* Must satisfy the url_pattern_regex::regex_concept.
|
|
5685
|
+
*
|
|
5686
|
+
* @note All string inputs must be valid UTF-8.
|
|
5687
|
+
*
|
|
5688
|
+
* @see https://urlpattern.spec.whatwg.org/
|
|
5689
|
+
*/
|
|
5311
5690
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
5312
5691
|
class url_pattern {
|
|
5313
5692
|
public:
|
|
@@ -5360,6 +5739,13 @@ class url_pattern {
|
|
|
5360
5739
|
// @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
|
|
5361
5740
|
[[nodiscard]] bool has_regexp_groups() const;
|
|
5362
5741
|
|
|
5742
|
+
// Helper to test all components at once. Returns true if all match.
|
|
5743
|
+
[[nodiscard]] bool test_components(
|
|
5744
|
+
std::string_view protocol, std::string_view username,
|
|
5745
|
+
std::string_view password, std::string_view hostname,
|
|
5746
|
+
std::string_view port, std::string_view pathname, std::string_view search,
|
|
5747
|
+
std::string_view hash) const;
|
|
5748
|
+
|
|
5363
5749
|
#if ADA_TESTING
|
|
5364
5750
|
friend void PrintTo(const url_pattern& c, std::ostream* os) {
|
|
5365
5751
|
*os << "protocol_component: '" << c.get_protocol() << ", ";
|
|
@@ -5484,8 +5870,8 @@ enum class token_policy {
|
|
|
5484
5870
|
// @see https://urlpattern.spec.whatwg.org/#tokens
|
|
5485
5871
|
class token {
|
|
5486
5872
|
public:
|
|
5487
|
-
token(token_type _type, size_t _index, std::
|
|
5488
|
-
: type(_type), index(_index), value(
|
|
5873
|
+
token(token_type _type, size_t _index, std::string_view _value)
|
|
5874
|
+
: type(_type), index(_index), value(_value) {}
|
|
5489
5875
|
|
|
5490
5876
|
// A token has an associated type, a string, initially "invalid-char".
|
|
5491
5877
|
token_type type = token_type::INVALID_CHAR;
|
|
@@ -5496,7 +5882,7 @@ class token {
|
|
|
5496
5882
|
|
|
5497
5883
|
// A token has an associated value, a string, initially the empty string. It
|
|
5498
5884
|
// contains the code points from the pattern string represented by the token.
|
|
5499
|
-
std::
|
|
5885
|
+
std::string_view value{};
|
|
5500
5886
|
};
|
|
5501
5887
|
|
|
5502
5888
|
// @see https://urlpattern.spec.whatwg.org/#pattern-parser
|
|
@@ -5574,7 +5960,7 @@ class Tokenizer {
|
|
|
5574
5960
|
|
|
5575
5961
|
private:
|
|
5576
5962
|
// has an associated input, a pattern string, initially the empty string.
|
|
5577
|
-
std::
|
|
5963
|
+
std::string_view input;
|
|
5578
5964
|
// has an associated policy, a tokenize policy, initially "strict".
|
|
5579
5965
|
token_policy policy;
|
|
5580
5966
|
// has an associated token list, a token list, initially an empty list.
|
|
@@ -5668,7 +6054,7 @@ struct constructor_string_parser {
|
|
|
5668
6054
|
// @see https://urlpattern.spec.whatwg.org/#make-a-component-string
|
|
5669
6055
|
std::string make_component_string();
|
|
5670
6056
|
// has an associated input, a string, which must be set upon creation.
|
|
5671
|
-
std::
|
|
6057
|
+
std::string_view input;
|
|
5672
6058
|
// has an associated token list, a token list, which must be set upon
|
|
5673
6059
|
// creation.
|
|
5674
6060
|
std::vector<token> token_list;
|
|
@@ -5775,7 +6161,7 @@ bool protocol_component_matches_special_scheme(
|
|
|
5775
6161
|
ada::url_pattern_component<regex_provider>& input);
|
|
5776
6162
|
|
|
5777
6163
|
// @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
|
|
5778
|
-
std::
|
|
6164
|
+
std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);
|
|
5779
6165
|
|
|
5780
6166
|
// @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
|
|
5781
6167
|
std::string generate_segment_wildcard_regexp(
|
|
@@ -6141,7 +6527,10 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
|
|
|
6141
6527
|
/* begin file include/ada/serializers.h */
|
|
6142
6528
|
/**
|
|
6143
6529
|
* @file serializers.h
|
|
6144
|
-
* @brief
|
|
6530
|
+
* @brief IP address serialization utilities.
|
|
6531
|
+
*
|
|
6532
|
+
* This header provides functions for converting IP addresses to their
|
|
6533
|
+
* string representations according to the WHATWG URL Standard.
|
|
6145
6534
|
*/
|
|
6146
6535
|
#ifndef ADA_SERIALIZERS_H
|
|
6147
6536
|
#define ADA_SERIALIZERS_H
|
|
@@ -6152,32 +6541,41 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
|
|
|
6152
6541
|
|
|
6153
6542
|
/**
|
|
6154
6543
|
* @namespace ada::serializers
|
|
6155
|
-
* @brief
|
|
6544
|
+
* @brief IP address serialization functions.
|
|
6545
|
+
*
|
|
6546
|
+
* Contains utilities for serializing IPv4 and IPv6 addresses to strings.
|
|
6156
6547
|
*/
|
|
6157
6548
|
namespace ada::serializers {
|
|
6158
6549
|
|
|
6159
6550
|
/**
|
|
6160
|
-
* Finds
|
|
6551
|
+
* Finds the longest consecutive sequence of zero pieces in an IPv6 address.
|
|
6552
|
+
* Used for :: compression in IPv6 serialization.
|
|
6553
|
+
*
|
|
6554
|
+
* @param address The 8 16-bit pieces of the IPv6 address.
|
|
6555
|
+
* @param[out] compress Index of the start of the longest zero sequence.
|
|
6556
|
+
* @param[out] compress_length Length of the longest zero sequence.
|
|
6161
6557
|
*/
|
|
6162
6558
|
void find_longest_sequence_of_ipv6_pieces(
|
|
6163
6559
|
const std::array<uint16_t, 8>& address, size_t& compress,
|
|
6164
6560
|
size_t& compress_length) noexcept;
|
|
6165
6561
|
|
|
6166
6562
|
/**
|
|
6167
|
-
* Serializes an
|
|
6168
|
-
*
|
|
6169
|
-
*
|
|
6563
|
+
* Serializes an IPv6 address to its string representation.
|
|
6564
|
+
*
|
|
6565
|
+
* @param address The 8 16-bit pieces of the IPv6 address.
|
|
6566
|
+
* @return The serialized IPv6 string (e.g., "2001:db8::1").
|
|
6170
6567
|
* @see https://url.spec.whatwg.org/#concept-ipv6-serializer
|
|
6171
6568
|
*/
|
|
6172
|
-
std::string ipv6(const std::array<uint16_t, 8>& address)
|
|
6569
|
+
std::string ipv6(const std::array<uint16_t, 8>& address);
|
|
6173
6570
|
|
|
6174
6571
|
/**
|
|
6175
|
-
* Serializes an
|
|
6176
|
-
*
|
|
6177
|
-
*
|
|
6572
|
+
* Serializes an IPv4 address to its dotted-decimal string representation.
|
|
6573
|
+
*
|
|
6574
|
+
* @param address The 32-bit IPv4 address as an integer.
|
|
6575
|
+
* @return The serialized IPv4 string (e.g., "192.168.1.1").
|
|
6178
6576
|
* @see https://url.spec.whatwg.org/#concept-ipv4-serializer
|
|
6179
6577
|
*/
|
|
6180
|
-
std::string ipv4(uint64_t address)
|
|
6578
|
+
std::string ipv4(uint64_t address);
|
|
6181
6579
|
|
|
6182
6580
|
} // namespace ada::serializers
|
|
6183
6581
|
|
|
@@ -6186,7 +6584,12 @@ std::string ipv4(uint64_t address) noexcept;
|
|
|
6186
6584
|
/* begin file include/ada/state.h */
|
|
6187
6585
|
/**
|
|
6188
6586
|
* @file state.h
|
|
6189
|
-
* @brief
|
|
6587
|
+
* @brief URL parser state machine states.
|
|
6588
|
+
*
|
|
6589
|
+
* Defines the states used by the URL parsing state machine as specified
|
|
6590
|
+
* in the WHATWG URL Standard.
|
|
6591
|
+
*
|
|
6592
|
+
* @see https://url.spec.whatwg.org/#url-parsing
|
|
6190
6593
|
*/
|
|
6191
6594
|
#ifndef ADA_STATE_H
|
|
6192
6595
|
#define ADA_STATE_H
|
|
@@ -6197,6 +6600,11 @@ std::string ipv4(uint64_t address) noexcept;
|
|
|
6197
6600
|
namespace ada {
|
|
6198
6601
|
|
|
6199
6602
|
/**
|
|
6603
|
+
* @brief States in the URL parsing state machine.
|
|
6604
|
+
*
|
|
6605
|
+
* The URL parser processes input through a sequence of states, each handling
|
|
6606
|
+
* a specific part of the URL syntax.
|
|
6607
|
+
*
|
|
6200
6608
|
* @see https://url.spec.whatwg.org/#url-parsing
|
|
6201
6609
|
*/
|
|
6202
6610
|
enum class state {
|
|
@@ -6302,7 +6710,9 @@ enum class state {
|
|
|
6302
6710
|
};
|
|
6303
6711
|
|
|
6304
6712
|
/**
|
|
6305
|
-
*
|
|
6713
|
+
* Converts a parser state to its string name for debugging.
|
|
6714
|
+
* @param s The state to convert.
|
|
6715
|
+
* @return A string representation of the state.
|
|
6306
6716
|
*/
|
|
6307
6717
|
ada_warn_unused std::string to_string(ada::state s);
|
|
6308
6718
|
|
|
@@ -6761,7 +7171,7 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept {
|
|
|
6761
7171
|
}
|
|
6762
7172
|
}
|
|
6763
7173
|
|
|
6764
|
-
constexpr void url::copy_scheme(ada::url &&u)
|
|
7174
|
+
constexpr void url::copy_scheme(ada::url &&u) {
|
|
6765
7175
|
non_special_scheme = u.non_special_scheme;
|
|
6766
7176
|
type = u.type;
|
|
6767
7177
|
}
|
|
@@ -6771,7 +7181,7 @@ constexpr void url::copy_scheme(const ada::url &u) {
|
|
|
6771
7181
|
type = u.type;
|
|
6772
7182
|
}
|
|
6773
7183
|
|
|
6774
|
-
[[nodiscard]] ada_really_inline std::string url::get_href() const
|
|
7184
|
+
[[nodiscard]] ada_really_inline std::string url::get_href() const {
|
|
6775
7185
|
std::string output = get_protocol();
|
|
6776
7186
|
|
|
6777
7187
|
if (host.has_value()) {
|
|
@@ -6930,7 +7340,13 @@ namespace ada {
|
|
|
6930
7340
|
/* begin file include/ada/url_aggregator.h */
|
|
6931
7341
|
/**
|
|
6932
7342
|
* @file url_aggregator.h
|
|
6933
|
-
* @brief Declaration for the
|
|
7343
|
+
* @brief Declaration for the `ada::url_aggregator` class.
|
|
7344
|
+
*
|
|
7345
|
+
* This file contains the `ada::url_aggregator` struct which represents a parsed
|
|
7346
|
+
* URL using a single buffer with component offsets. This is the default and
|
|
7347
|
+
* most memory-efficient URL representation in Ada.
|
|
7348
|
+
*
|
|
7349
|
+
* @see url.h for an alternative representation using separate strings
|
|
6934
7350
|
*/
|
|
6935
7351
|
#ifndef ADA_URL_AGGREGATOR_H
|
|
6936
7352
|
#define ADA_URL_AGGREGATOR_H
|
|
@@ -6946,12 +7362,23 @@ namespace ada {
|
|
|
6946
7362
|
namespace parser {}
|
|
6947
7363
|
|
|
6948
7364
|
/**
|
|
6949
|
-
* @brief
|
|
7365
|
+
* @brief Memory-efficient URL representation using a single buffer.
|
|
7366
|
+
*
|
|
7367
|
+
* The `url_aggregator` stores the entire normalized URL in a single string
|
|
7368
|
+
* buffer and tracks component boundaries using offsets. This design minimizes
|
|
7369
|
+
* memory allocations and is ideal for read-mostly access patterns.
|
|
7370
|
+
*
|
|
7371
|
+
* Getter methods return `std::string_view` pointing into the internal buffer.
|
|
7372
|
+
* These views are lightweight (no allocation) but become invalid if the
|
|
7373
|
+
* url_aggregator is modified or destroyed.
|
|
6950
7374
|
*
|
|
6951
|
-
* @
|
|
6952
|
-
*
|
|
6953
|
-
*
|
|
6954
|
-
*
|
|
7375
|
+
* @warning Views returned by getters (e.g., `get_pathname()`) are invalidated
|
|
7376
|
+
* when any setter is called. Do not use a getter's result as input to a
|
|
7377
|
+
* setter on the same object without copying first.
|
|
7378
|
+
*
|
|
7379
|
+
* @note This is the default URL type returned by `ada::parse()`.
|
|
7380
|
+
*
|
|
7381
|
+
* @see url For an alternative using separate std::string instances
|
|
6955
7382
|
*/
|
|
6956
7383
|
struct url_aggregator : url_base {
|
|
6957
7384
|
url_aggregator() = default;
|
|
@@ -6961,6 +7388,25 @@ struct url_aggregator : url_base {
|
|
|
6961
7388
|
url_aggregator &operator=(const url_aggregator &u) = default;
|
|
6962
7389
|
~url_aggregator() override = default;
|
|
6963
7390
|
|
|
7391
|
+
/**
|
|
7392
|
+
* The setter functions follow the steps defined in the URL Standard.
|
|
7393
|
+
*
|
|
7394
|
+
* The url_aggregator has a single buffer that contains the entire normalized
|
|
7395
|
+
* URL. The various components are represented as offsets into that buffer.
|
|
7396
|
+
* When you call get_pathname(), for example, you get a std::string_view that
|
|
7397
|
+
* points into that buffer. If the url_aggregator is modified, the buffer may
|
|
7398
|
+
* be reallocated, and the std::string_view you obtained earlier may become
|
|
7399
|
+
* invalid. In particular, this implies that you cannot modify the URL using
|
|
7400
|
+
* a setter function with a std::string_view that points into the
|
|
7401
|
+
* url_aggregator E.g., the following is incorrect:
|
|
7402
|
+
* url->set_hostname(url->get_pathname()).
|
|
7403
|
+
* You must first copy the pathname to a separate string.
|
|
7404
|
+
* std::string pathname(url->get_pathname());
|
|
7405
|
+
* url->set_hostname(pathname);
|
|
7406
|
+
*
|
|
7407
|
+
* The caller is responsible for ensuring that the url_aggregator is not
|
|
7408
|
+
* modified while any std::string_view obtained from it is in use.
|
|
7409
|
+
*/
|
|
6964
7410
|
bool set_href(std::string_view input);
|
|
6965
7411
|
bool set_host(std::string_view input);
|
|
6966
7412
|
bool set_hostname(std::string_view input);
|
|
@@ -6972,115 +7418,130 @@ struct url_aggregator : url_base {
|
|
|
6972
7418
|
void set_search(std::string_view input);
|
|
6973
7419
|
void set_hash(std::string_view input);
|
|
6974
7420
|
|
|
7421
|
+
/**
|
|
7422
|
+
* Validates whether the hostname is a valid domain according to RFC 1034.
|
|
7423
|
+
* @return `true` if the domain is valid, `false` otherwise.
|
|
7424
|
+
*/
|
|
6975
7425
|
[[nodiscard]] bool has_valid_domain() const noexcept override;
|
|
7426
|
+
|
|
6976
7427
|
/**
|
|
6977
|
-
*
|
|
6978
|
-
* origin.
|
|
6979
|
-
* @return a newly allocated string.
|
|
7428
|
+
* Returns the URL's origin (scheme + host + port for special URLs).
|
|
7429
|
+
* @return A newly allocated string containing the serialized origin.
|
|
6980
7430
|
* @see https://url.spec.whatwg.org/#concept-url-origin
|
|
6981
7431
|
*/
|
|
6982
|
-
[[nodiscard]] std::string get_origin() const
|
|
7432
|
+
[[nodiscard]] std::string get_origin() const override;
|
|
7433
|
+
|
|
6983
7434
|
/**
|
|
6984
|
-
*
|
|
6985
|
-
*
|
|
6986
|
-
*
|
|
6987
|
-
* @return
|
|
7435
|
+
* Returns the full serialized URL (the href) as a string_view.
|
|
7436
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7437
|
+
* url_aggregator is modified or destroyed.
|
|
7438
|
+
* @return A string_view into the internal buffer.
|
|
6988
7439
|
* @see https://url.spec.whatwg.org/#dom-url-href
|
|
6989
|
-
* @see https://url.spec.whatwg.org/#concept-url-serializer
|
|
6990
7440
|
*/
|
|
6991
7441
|
[[nodiscard]] constexpr std::string_view get_href() const noexcept
|
|
6992
7442
|
ada_lifetime_bound;
|
|
7443
|
+
|
|
6993
7444
|
/**
|
|
6994
|
-
*
|
|
6995
|
-
*
|
|
6996
|
-
*
|
|
7445
|
+
* Returns the URL's username component.
|
|
7446
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7447
|
+
* url_aggregator is modified or destroyed.
|
|
7448
|
+
* @return A string_view of the username.
|
|
6997
7449
|
* @see https://url.spec.whatwg.org/#dom-url-username
|
|
6998
7450
|
*/
|
|
6999
|
-
[[nodiscard]] std::string_view get_username() const
|
|
7000
|
-
|
|
7451
|
+
[[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
|
|
7452
|
+
|
|
7001
7453
|
/**
|
|
7002
|
-
*
|
|
7003
|
-
*
|
|
7004
|
-
*
|
|
7454
|
+
* Returns the URL's password component.
|
|
7455
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7456
|
+
* url_aggregator is modified or destroyed.
|
|
7457
|
+
* @return A string_view of the password.
|
|
7005
7458
|
* @see https://url.spec.whatwg.org/#dom-url-password
|
|
7006
7459
|
*/
|
|
7007
|
-
[[nodiscard]] std::string_view get_password() const
|
|
7008
|
-
|
|
7460
|
+
[[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
|
|
7461
|
+
|
|
7009
7462
|
/**
|
|
7010
|
-
*
|
|
7011
|
-
*
|
|
7012
|
-
*
|
|
7463
|
+
* Returns the URL's port as a string (e.g., "8080").
|
|
7464
|
+
* Does not allocate memory. Returns empty view if no port is set.
|
|
7465
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7466
|
+
* @return A string_view of the port.
|
|
7013
7467
|
* @see https://url.spec.whatwg.org/#dom-url-port
|
|
7014
7468
|
*/
|
|
7015
|
-
[[nodiscard]] std::string_view get_port() const
|
|
7469
|
+
[[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
|
|
7470
|
+
|
|
7016
7471
|
/**
|
|
7017
|
-
*
|
|
7018
|
-
*
|
|
7019
|
-
*
|
|
7472
|
+
* Returns the URL's fragment prefixed with '#' (e.g., "#section").
|
|
7473
|
+
* Does not allocate memory. Returns empty view if no fragment is set.
|
|
7474
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7475
|
+
* @return A string_view of the hash.
|
|
7020
7476
|
* @see https://url.spec.whatwg.org/#dom-url-hash
|
|
7021
7477
|
*/
|
|
7022
|
-
[[nodiscard]] std::string_view get_hash() const
|
|
7478
|
+
[[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
|
|
7479
|
+
|
|
7023
7480
|
/**
|
|
7024
|
-
*
|
|
7025
|
-
*
|
|
7026
|
-
*
|
|
7027
|
-
*
|
|
7028
|
-
* @return a lightweight std::string_view.
|
|
7481
|
+
* Returns the URL's host and port (e.g., "example.com:8080").
|
|
7482
|
+
* Does not allocate memory. Returns empty view if no host is set.
|
|
7483
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7484
|
+
* @return A string_view of host:port.
|
|
7029
7485
|
* @see https://url.spec.whatwg.org/#dom-url-host
|
|
7030
7486
|
*/
|
|
7031
|
-
[[nodiscard]] std::string_view get_host() const
|
|
7487
|
+
[[nodiscard]] std::string_view get_host() const ada_lifetime_bound;
|
|
7488
|
+
|
|
7032
7489
|
/**
|
|
7033
|
-
*
|
|
7034
|
-
*
|
|
7035
|
-
*
|
|
7036
|
-
* @return
|
|
7490
|
+
* Returns the URL's hostname (without port).
|
|
7491
|
+
* Does not allocate memory. Returns empty view if no host is set.
|
|
7492
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7493
|
+
* @return A string_view of the hostname.
|
|
7037
7494
|
* @see https://url.spec.whatwg.org/#dom-url-hostname
|
|
7038
7495
|
*/
|
|
7039
|
-
[[nodiscard]] std::string_view get_hostname() const
|
|
7040
|
-
|
|
7496
|
+
[[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
|
|
7497
|
+
|
|
7041
7498
|
/**
|
|
7042
|
-
*
|
|
7043
|
-
*
|
|
7044
|
-
*
|
|
7045
|
-
* @return
|
|
7499
|
+
* Returns the URL's path component.
|
|
7500
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7501
|
+
* url_aggregator is modified or destroyed.
|
|
7502
|
+
* @return A string_view of the pathname.
|
|
7046
7503
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
7047
7504
|
*/
|
|
7048
|
-
[[nodiscard]] constexpr std::string_view get_pathname() const
|
|
7505
|
+
[[nodiscard]] constexpr std::string_view get_pathname() const
|
|
7049
7506
|
ada_lifetime_bound;
|
|
7507
|
+
|
|
7050
7508
|
/**
|
|
7051
|
-
*
|
|
7052
|
-
*
|
|
7053
|
-
* @return size of the pathname in bytes
|
|
7509
|
+
* Returns the byte length of the pathname without creating a string.
|
|
7510
|
+
* @return Size of the pathname in bytes.
|
|
7054
7511
|
* @see https://url.spec.whatwg.org/#dom-url-pathname
|
|
7055
7512
|
*/
|
|
7056
7513
|
[[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept;
|
|
7514
|
+
|
|
7057
7515
|
/**
|
|
7058
|
-
*
|
|
7059
|
-
*
|
|
7060
|
-
*
|
|
7516
|
+
* Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
|
|
7517
|
+
* Does not allocate memory. Returns empty view if no query is set.
|
|
7518
|
+
* The returned view becomes invalid if this url_aggregator is modified.
|
|
7519
|
+
* @return A string_view of the search/query.
|
|
7061
7520
|
* @see https://url.spec.whatwg.org/#dom-url-search
|
|
7062
7521
|
*/
|
|
7063
|
-
[[nodiscard]] std::string_view get_search() const
|
|
7522
|
+
[[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
|
|
7523
|
+
|
|
7064
7524
|
/**
|
|
7065
|
-
*
|
|
7066
|
-
*
|
|
7067
|
-
*
|
|
7068
|
-
* @return
|
|
7525
|
+
* Returns the URL's scheme followed by a colon (e.g., "https:").
|
|
7526
|
+
* Does not allocate memory. The returned view becomes invalid if this
|
|
7527
|
+
* url_aggregator is modified or destroyed.
|
|
7528
|
+
* @return A string_view of the protocol.
|
|
7069
7529
|
* @see https://url.spec.whatwg.org/#dom-url-protocol
|
|
7070
7530
|
*/
|
|
7071
|
-
[[nodiscard]] std::string_view get_protocol() const
|
|
7072
|
-
ada_lifetime_bound;
|
|
7531
|
+
[[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
|
|
7073
7532
|
|
|
7074
7533
|
/**
|
|
7075
|
-
*
|
|
7076
|
-
*
|
|
7534
|
+
* Checks if the URL has credentials (non-empty username or password).
|
|
7535
|
+
* @return `true` if username or password is non-empty, `false` otherwise.
|
|
7077
7536
|
*/
|
|
7078
7537
|
[[nodiscard]] ada_really_inline constexpr bool has_credentials()
|
|
7079
7538
|
const noexcept;
|
|
7080
7539
|
|
|
7081
7540
|
/**
|
|
7082
|
-
*
|
|
7541
|
+
* Returns the URL component offsets for efficient serialization.
|
|
7083
7542
|
*
|
|
7543
|
+
* The components represent byte offsets into the serialized URL:
|
|
7544
|
+
* ```
|
|
7084
7545
|
* https://user:pass@example.com:1234/foo/bar?baz#quux
|
|
7085
7546
|
* | | | | ^^^^| | |
|
|
7086
7547
|
* | | | | | | | `----- hash_start
|
|
@@ -7091,57 +7552,99 @@ struct url_aggregator : url_base {
|
|
|
7091
7552
|
* | | `---------------------------------- host_start
|
|
7092
7553
|
* | `--------------------------------------- username_end
|
|
7093
7554
|
* `--------------------------------------------- protocol_end
|
|
7094
|
-
*
|
|
7095
|
-
*
|
|
7096
|
-
*
|
|
7097
|
-
* @return a constant reference to the underlying component attribute.
|
|
7098
|
-
*
|
|
7099
|
-
* @see
|
|
7100
|
-
* https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
|
|
7555
|
+
* ```
|
|
7556
|
+
* @return A constant reference to the url_components struct.
|
|
7557
|
+
* @see https://github.com/servo/rust-url
|
|
7101
7558
|
*/
|
|
7102
7559
|
[[nodiscard]] ada_really_inline const url_components &get_components()
|
|
7103
7560
|
const noexcept;
|
|
7561
|
+
|
|
7104
7562
|
/**
|
|
7105
|
-
* Returns a string representation of this URL.
|
|
7563
|
+
* Returns a JSON string representation of this URL for debugging.
|
|
7564
|
+
* @return A JSON-formatted string with all URL components.
|
|
7106
7565
|
*/
|
|
7107
7566
|
[[nodiscard]] std::string to_string() const override;
|
|
7567
|
+
|
|
7108
7568
|
/**
|
|
7109
|
-
* Returns a
|
|
7569
|
+
* Returns a visual diagram showing component boundaries in the URL.
|
|
7570
|
+
* Useful for debugging and understanding URL structure.
|
|
7571
|
+
* @return A multi-line string diagram.
|
|
7110
7572
|
*/
|
|
7111
7573
|
[[nodiscard]] std::string to_diagram() const;
|
|
7112
7574
|
|
|
7113
7575
|
/**
|
|
7114
|
-
*
|
|
7115
|
-
* @return true if
|
|
7116
|
-
* possible.
|
|
7576
|
+
* Validates internal consistency of component offsets (for debugging).
|
|
7577
|
+
* @return `true` if offsets are consistent, `false` if corrupted.
|
|
7117
7578
|
*/
|
|
7118
7579
|
[[nodiscard]] constexpr bool validate() const noexcept;
|
|
7119
7580
|
|
|
7120
|
-
/**
|
|
7581
|
+
/**
|
|
7582
|
+
* Checks if the URL has an empty hostname (host is set but empty string).
|
|
7583
|
+
* @return `true` if host exists but is empty, `false` otherwise.
|
|
7584
|
+
*/
|
|
7121
7585
|
[[nodiscard]] constexpr bool has_empty_hostname() const noexcept;
|
|
7122
|
-
|
|
7586
|
+
|
|
7587
|
+
/**
|
|
7588
|
+
* Checks if the URL has a hostname (including empty hostnames).
|
|
7589
|
+
* @return `true` if host is present, `false` otherwise.
|
|
7590
|
+
*/
|
|
7123
7591
|
[[nodiscard]] constexpr bool has_hostname() const noexcept;
|
|
7124
|
-
|
|
7592
|
+
|
|
7593
|
+
/**
|
|
7594
|
+
* Checks if the URL has a non-empty username.
|
|
7595
|
+
* @return `true` if username is non-empty, `false` otherwise.
|
|
7596
|
+
*/
|
|
7125
7597
|
[[nodiscard]] constexpr bool has_non_empty_username() const noexcept;
|
|
7126
|
-
|
|
7598
|
+
|
|
7599
|
+
/**
|
|
7600
|
+
* Checks if the URL has a non-empty password.
|
|
7601
|
+
* @return `true` if password is non-empty, `false` otherwise.
|
|
7602
|
+
*/
|
|
7127
7603
|
[[nodiscard]] constexpr bool has_non_empty_password() const noexcept;
|
|
7128
|
-
|
|
7604
|
+
|
|
7605
|
+
/**
|
|
7606
|
+
* Checks if the URL has a non-default port explicitly specified.
|
|
7607
|
+
* @return `true` if a port is present, `false` otherwise.
|
|
7608
|
+
*/
|
|
7129
7609
|
[[nodiscard]] constexpr bool has_port() const noexcept;
|
|
7130
|
-
|
|
7610
|
+
|
|
7611
|
+
/**
|
|
7612
|
+
* Checks if the URL has a password component (may be empty).
|
|
7613
|
+
* @return `true` if password is present, `false` otherwise.
|
|
7614
|
+
*/
|
|
7131
7615
|
[[nodiscard]] constexpr bool has_password() const noexcept;
|
|
7132
|
-
|
|
7616
|
+
|
|
7617
|
+
/**
|
|
7618
|
+
* Checks if the URL has a fragment/hash component.
|
|
7619
|
+
* @return `true` if hash is present, `false` otherwise.
|
|
7620
|
+
*/
|
|
7133
7621
|
[[nodiscard]] constexpr bool has_hash() const noexcept override;
|
|
7134
|
-
|
|
7622
|
+
|
|
7623
|
+
/**
|
|
7624
|
+
* Checks if the URL has a query/search component.
|
|
7625
|
+
* @return `true` if query is present, `false` otherwise.
|
|
7626
|
+
*/
|
|
7135
7627
|
[[nodiscard]] constexpr bool has_search() const noexcept override;
|
|
7136
7628
|
|
|
7629
|
+
/**
|
|
7630
|
+
* Removes the port from the URL.
|
|
7631
|
+
*/
|
|
7137
7632
|
inline void clear_port();
|
|
7633
|
+
|
|
7634
|
+
/**
|
|
7635
|
+
* Removes the hash/fragment from the URL.
|
|
7636
|
+
*/
|
|
7138
7637
|
inline void clear_hash();
|
|
7638
|
+
|
|
7639
|
+
/**
|
|
7640
|
+
* Removes the query/search string from the URL.
|
|
7641
|
+
*/
|
|
7139
7642
|
inline void clear_search() override;
|
|
7140
7643
|
|
|
7141
7644
|
private:
|
|
7142
7645
|
// helper methods
|
|
7143
7646
|
friend void helpers::strip_trailing_spaces_from_opaque_path<url_aggregator>(
|
|
7144
|
-
url_aggregator &url)
|
|
7647
|
+
url_aggregator &url);
|
|
7145
7648
|
// parse_url methods
|
|
7146
7649
|
friend url_aggregator parser::parse_url<url_aggregator>(
|
|
7147
7650
|
std::string_view, const url_aggregator *);
|
|
@@ -7170,7 +7673,7 @@ struct url_aggregator : url_base {
|
|
|
7170
7673
|
*/
|
|
7171
7674
|
[[nodiscard]] ada_really_inline bool is_at_path() const noexcept;
|
|
7172
7675
|
|
|
7173
|
-
inline void add_authority_slashes_if_needed()
|
|
7676
|
+
inline void add_authority_slashes_if_needed();
|
|
7174
7677
|
|
|
7175
7678
|
/**
|
|
7176
7679
|
* To optimize performance, you may indicate how much memory to allocate
|
|
@@ -7178,10 +7681,10 @@ struct url_aggregator : url_base {
|
|
|
7178
7681
|
*/
|
|
7179
7682
|
constexpr void reserve(uint32_t capacity);
|
|
7180
7683
|
|
|
7181
|
-
ada_really_inline size_t parse_port(
|
|
7182
|
-
|
|
7684
|
+
ada_really_inline size_t parse_port(std::string_view view,
|
|
7685
|
+
bool check_trailing_content) override;
|
|
7183
7686
|
|
|
7184
|
-
ada_really_inline size_t parse_port(std::string_view view)
|
|
7687
|
+
ada_really_inline size_t parse_port(std::string_view view) override {
|
|
7185
7688
|
return this->parse_port(view, false);
|
|
7186
7689
|
}
|
|
7187
7690
|
|
|
@@ -7246,16 +7749,16 @@ struct url_aggregator : url_base {
|
|
|
7246
7749
|
std::string_view input);
|
|
7247
7750
|
[[nodiscard]] constexpr bool has_authority() const noexcept;
|
|
7248
7751
|
constexpr void set_protocol_as_file();
|
|
7249
|
-
inline void set_scheme(std::string_view new_scheme)
|
|
7752
|
+
inline void set_scheme(std::string_view new_scheme);
|
|
7250
7753
|
/**
|
|
7251
7754
|
* Fast function to set the scheme from a view with a colon in the
|
|
7252
7755
|
* buffer, does not change type.
|
|
7253
7756
|
*/
|
|
7254
7757
|
inline void set_scheme_from_view_with_colon(
|
|
7255
|
-
std::string_view new_scheme_with_colon)
|
|
7256
|
-
inline void copy_scheme(const url_aggregator &u)
|
|
7758
|
+
std::string_view new_scheme_with_colon);
|
|
7759
|
+
inline void copy_scheme(const url_aggregator &u);
|
|
7257
7760
|
|
|
7258
|
-
inline void update_host_to_base_host(const std::string_view input)
|
|
7761
|
+
inline void update_host_to_base_host(const std::string_view input);
|
|
7259
7762
|
|
|
7260
7763
|
}; // url_aggregator
|
|
7261
7764
|
|
|
@@ -8047,7 +8550,7 @@ url_aggregator::get_components() const noexcept {
|
|
|
8047
8550
|
components.protocol_end + 2) == "//";
|
|
8048
8551
|
}
|
|
8049
8552
|
|
|
8050
|
-
inline void ada::url_aggregator::add_authority_slashes_if_needed()
|
|
8553
|
+
inline void ada::url_aggregator::add_authority_slashes_if_needed() {
|
|
8051
8554
|
ada_log("url_aggregator::add_authority_slashes_if_needed");
|
|
8052
8555
|
ADA_ASSERT_TRUE(validate());
|
|
8053
8556
|
// Protocol setter will insert `http:` to the URL. It is up to hostname setter
|
|
@@ -8084,7 +8587,7 @@ constexpr bool url_aggregator::has_non_empty_username() const noexcept {
|
|
|
8084
8587
|
|
|
8085
8588
|
constexpr bool url_aggregator::has_non_empty_password() const noexcept {
|
|
8086
8589
|
ada_log("url_aggregator::has_non_empty_password");
|
|
8087
|
-
return components.host_start
|
|
8590
|
+
return components.host_start > components.username_end;
|
|
8088
8591
|
}
|
|
8089
8592
|
|
|
8090
8593
|
constexpr bool url_aggregator::has_password() const noexcept {
|
|
@@ -8156,8 +8659,8 @@ constexpr bool url_aggregator::has_port() const noexcept {
|
|
|
8156
8659
|
return buffer;
|
|
8157
8660
|
}
|
|
8158
8661
|
|
|
8159
|
-
ada_really_inline size_t
|
|
8160
|
-
|
|
8662
|
+
ada_really_inline size_t
|
|
8663
|
+
url_aggregator::parse_port(std::string_view view, bool check_trailing_content) {
|
|
8161
8664
|
ada_log("url_aggregator::parse_port('", view, "') ", view.size());
|
|
8162
8665
|
if (!view.empty() && view[0] == '-') {
|
|
8163
8666
|
ada_log("parse_port: view[0] == '0' && view.size() > 1");
|
|
@@ -8395,8 +8898,8 @@ constexpr void url_aggregator::set_protocol_as_file() {
|
|
|
8395
8898
|
return true;
|
|
8396
8899
|
}
|
|
8397
8900
|
|
|
8398
|
-
[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
|
|
8399
|
-
|
|
8901
|
+
[[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const
|
|
8902
|
+
ada_lifetime_bound {
|
|
8400
8903
|
ada_log("url_aggregator::get_pathname pathname_start = ",
|
|
8401
8904
|
components.pathname_start, " buffer.size() = ", buffer.size(),
|
|
8402
8905
|
" components.search_start = ", components.search_start,
|
|
@@ -8415,8 +8918,7 @@ inline std::ostream &operator<<(std::ostream &out,
|
|
|
8415
8918
|
return out << u.to_string();
|
|
8416
8919
|
}
|
|
8417
8920
|
|
|
8418
|
-
void url_aggregator::update_host_to_base_host(
|
|
8419
|
-
const std::string_view input) noexcept {
|
|
8921
|
+
void url_aggregator::update_host_to_base_host(const std::string_view input) {
|
|
8420
8922
|
ada_log("url_aggregator::update_host_to_base_host ", input);
|
|
8421
8923
|
ADA_ASSERT_TRUE(validate());
|
|
8422
8924
|
ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
|
|
@@ -8443,7 +8945,13 @@ void url_aggregator::update_host_to_base_host(
|
|
|
8443
8945
|
/* begin file include/ada/url_search_params.h */
|
|
8444
8946
|
/**
|
|
8445
8947
|
* @file url_search_params.h
|
|
8446
|
-
* @brief
|
|
8948
|
+
* @brief URL query string parameter manipulation.
|
|
8949
|
+
*
|
|
8950
|
+
* This file provides the `url_search_params` class for parsing, manipulating,
|
|
8951
|
+
* and serializing URL query strings. It implements the URLSearchParams API
|
|
8952
|
+
* from the WHATWG URL Standard.
|
|
8953
|
+
*
|
|
8954
|
+
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
8447
8955
|
*/
|
|
8448
8956
|
#ifndef ADA_URL_SEARCH_PARAMS_H
|
|
8449
8957
|
#define ADA_URL_SEARCH_PARAMS_H
|
|
@@ -8455,37 +8963,51 @@ void url_aggregator::update_host_to_base_host(
|
|
|
8455
8963
|
|
|
8456
8964
|
namespace ada {
|
|
8457
8965
|
|
|
8966
|
+
/**
|
|
8967
|
+
* @brief Iterator types for url_search_params iteration.
|
|
8968
|
+
*/
|
|
8458
8969
|
enum class url_search_params_iter_type {
|
|
8459
|
-
KEYS,
|
|
8460
|
-
VALUES,
|
|
8461
|
-
ENTRIES,
|
|
8970
|
+
KEYS, /**< Iterate over parameter keys only */
|
|
8971
|
+
VALUES, /**< Iterate over parameter values only */
|
|
8972
|
+
ENTRIES, /**< Iterate over key-value pairs */
|
|
8462
8973
|
};
|
|
8463
8974
|
|
|
8464
8975
|
template <typename T, url_search_params_iter_type Type>
|
|
8465
8976
|
struct url_search_params_iter;
|
|
8466
8977
|
|
|
8978
|
+
/** Type alias for a key-value pair of string views. */
|
|
8467
8979
|
typedef std::pair<std::string_view, std::string_view> key_value_view_pair;
|
|
8468
8980
|
|
|
8981
|
+
/** Iterator over search parameter keys. */
|
|
8469
8982
|
using url_search_params_keys_iter =
|
|
8470
8983
|
url_search_params_iter<std::string_view, url_search_params_iter_type::KEYS>;
|
|
8984
|
+
/** Iterator over search parameter values. */
|
|
8471
8985
|
using url_search_params_values_iter =
|
|
8472
8986
|
url_search_params_iter<std::string_view,
|
|
8473
8987
|
url_search_params_iter_type::VALUES>;
|
|
8988
|
+
/** Iterator over search parameter key-value pairs. */
|
|
8474
8989
|
using url_search_params_entries_iter =
|
|
8475
8990
|
url_search_params_iter<key_value_view_pair,
|
|
8476
8991
|
url_search_params_iter_type::ENTRIES>;
|
|
8477
8992
|
|
|
8478
8993
|
/**
|
|
8479
|
-
*
|
|
8480
|
-
*
|
|
8994
|
+
* @brief Class for parsing and manipulating URL query strings.
|
|
8995
|
+
*
|
|
8996
|
+
* The `url_search_params` class provides methods to parse, modify, and
|
|
8997
|
+
* serialize URL query parameters (the part after '?' in a URL). It handles
|
|
8998
|
+
* percent-encoding and decoding automatically.
|
|
8999
|
+
*
|
|
9000
|
+
* All string inputs must be valid UTF-8. The caller is responsible for
|
|
9001
|
+
* ensuring UTF-8 validity.
|
|
9002
|
+
*
|
|
8481
9003
|
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
8482
9004
|
*/
|
|
8483
9005
|
struct url_search_params {
|
|
8484
9006
|
url_search_params() = default;
|
|
8485
9007
|
|
|
8486
9008
|
/**
|
|
8487
|
-
*
|
|
8488
|
-
*
|
|
9009
|
+
* Constructs url_search_params by parsing a query string.
|
|
9010
|
+
* @param input A query string (with or without leading '?'). Must be UTF-8.
|
|
8489
9011
|
*/
|
|
8490
9012
|
explicit url_search_params(const std::string_view input) {
|
|
8491
9013
|
initialize(input);
|
|
@@ -8497,75 +9019,106 @@ struct url_search_params {
|
|
|
8497
9019
|
url_search_params &operator=(const url_search_params &u) = default;
|
|
8498
9020
|
~url_search_params() = default;
|
|
8499
9021
|
|
|
9022
|
+
/**
|
|
9023
|
+
* Returns the number of key-value pairs.
|
|
9024
|
+
* @return The total count of parameters.
|
|
9025
|
+
*/
|
|
8500
9026
|
[[nodiscard]] inline size_t size() const noexcept;
|
|
8501
9027
|
|
|
8502
9028
|
/**
|
|
8503
|
-
*
|
|
9029
|
+
* Appends a new key-value pair to the parameter list.
|
|
9030
|
+
* @param key The parameter name (must be valid UTF-8).
|
|
9031
|
+
* @param value The parameter value (must be valid UTF-8).
|
|
8504
9032
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-append
|
|
8505
9033
|
*/
|
|
8506
9034
|
inline void append(std::string_view key, std::string_view value);
|
|
8507
9035
|
|
|
8508
9036
|
/**
|
|
9037
|
+
* Removes all pairs with the given key.
|
|
9038
|
+
* @param key The parameter name to remove.
|
|
8509
9039
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-delete
|
|
8510
9040
|
*/
|
|
8511
9041
|
inline void remove(std::string_view key);
|
|
9042
|
+
|
|
9043
|
+
/**
|
|
9044
|
+
* Removes all pairs with the given key and value.
|
|
9045
|
+
* @param key The parameter name.
|
|
9046
|
+
* @param value The parameter value to match.
|
|
9047
|
+
*/
|
|
8512
9048
|
inline void remove(std::string_view key, std::string_view value);
|
|
8513
9049
|
|
|
8514
9050
|
/**
|
|
9051
|
+
* Returns the value of the first pair with the given key.
|
|
9052
|
+
* @param key The parameter name to search for.
|
|
9053
|
+
* @return The value if found, or std::nullopt if not present.
|
|
8515
9054
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-get
|
|
8516
9055
|
*/
|
|
8517
9056
|
inline std::optional<std::string_view> get(std::string_view key);
|
|
8518
9057
|
|
|
8519
9058
|
/**
|
|
9059
|
+
* Returns all values for pairs with the given key.
|
|
9060
|
+
* @param key The parameter name to search for.
|
|
9061
|
+
* @return A vector of all matching values (may be empty).
|
|
8520
9062
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-getall
|
|
8521
9063
|
*/
|
|
8522
9064
|
inline std::vector<std::string> get_all(std::string_view key);
|
|
8523
9065
|
|
|
8524
9066
|
/**
|
|
9067
|
+
* Checks if any pair has the given key.
|
|
9068
|
+
* @param key The parameter name to search for.
|
|
9069
|
+
* @return `true` if at least one pair has this key.
|
|
8525
9070
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-has
|
|
8526
9071
|
*/
|
|
8527
9072
|
inline bool has(std::string_view key) noexcept;
|
|
9073
|
+
|
|
9074
|
+
/**
|
|
9075
|
+
* Checks if any pair matches the given key and value.
|
|
9076
|
+
* @param key The parameter name to search for.
|
|
9077
|
+
* @param value The parameter value to match.
|
|
9078
|
+
* @return `true` if a matching pair exists.
|
|
9079
|
+
*/
|
|
8528
9080
|
inline bool has(std::string_view key, std::string_view value) noexcept;
|
|
8529
9081
|
|
|
8530
9082
|
/**
|
|
8531
|
-
*
|
|
9083
|
+
* Sets a parameter value, replacing any existing pairs with the same key.
|
|
9084
|
+
* @param key The parameter name (must be valid UTF-8).
|
|
9085
|
+
* @param value The parameter value (must be valid UTF-8).
|
|
8532
9086
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-set
|
|
8533
9087
|
*/
|
|
8534
9088
|
inline void set(std::string_view key, std::string_view value);
|
|
8535
9089
|
|
|
8536
9090
|
/**
|
|
9091
|
+
* Sorts all key-value pairs by their keys using code unit comparison.
|
|
8537
9092
|
* @see https://url.spec.whatwg.org/#dom-urlsearchparams-sort
|
|
8538
9093
|
*/
|
|
8539
9094
|
inline void sort();
|
|
8540
9095
|
|
|
8541
9096
|
/**
|
|
9097
|
+
* Serializes the parameters to a query string (without leading '?').
|
|
9098
|
+
* @return The percent-encoded query string.
|
|
8542
9099
|
* @see https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior
|
|
8543
9100
|
*/
|
|
8544
9101
|
inline std::string to_string() const;
|
|
8545
9102
|
|
|
8546
9103
|
/**
|
|
8547
|
-
* Returns
|
|
8548
|
-
*
|
|
8549
|
-
*
|
|
8550
|
-
*
|
|
8551
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9104
|
+
* Returns an iterator over all parameter keys.
|
|
9105
|
+
* Keys may repeat if there are duplicate parameters.
|
|
9106
|
+
* @return An iterator yielding string_view keys.
|
|
9107
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8552
9108
|
*/
|
|
8553
9109
|
inline url_search_params_keys_iter get_keys();
|
|
8554
9110
|
|
|
8555
9111
|
/**
|
|
8556
|
-
* Returns
|
|
8557
|
-
*
|
|
8558
|
-
*
|
|
8559
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9112
|
+
* Returns an iterator over all parameter values.
|
|
9113
|
+
* @return An iterator yielding string_view values.
|
|
9114
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8560
9115
|
*/
|
|
8561
9116
|
inline url_search_params_values_iter get_values();
|
|
8562
9117
|
|
|
8563
9118
|
/**
|
|
8564
|
-
* Returns
|
|
8565
|
-
*
|
|
8566
|
-
* The
|
|
8567
|
-
* iterator must be freed when you're done with it.
|
|
8568
|
-
* @see https://url.spec.whatwg.org/#interface-urlsearchparams
|
|
9119
|
+
* Returns an iterator over all key-value pairs.
|
|
9120
|
+
* @return An iterator yielding key-value pair views.
|
|
9121
|
+
* @note The iterator is invalidated if this object is modified.
|
|
8569
9122
|
*/
|
|
8570
9123
|
inline url_search_params_entries_iter get_entries();
|
|
8571
9124
|
|
|
@@ -8602,8 +9155,13 @@ struct url_search_params {
|
|
|
8602
9155
|
}; // url_search_params
|
|
8603
9156
|
|
|
8604
9157
|
/**
|
|
8605
|
-
*
|
|
8606
|
-
*
|
|
9158
|
+
* @brief JavaScript-style iterator for url_search_params.
|
|
9159
|
+
*
|
|
9160
|
+
* Provides a `next()` method that returns successive values until exhausted.
|
|
9161
|
+
* This matches the iterator pattern used in the Web Platform.
|
|
9162
|
+
*
|
|
9163
|
+
* @tparam T The type of value returned by the iterator.
|
|
9164
|
+
* @tparam Type The type of iteration (KEYS, VALUES, or ENTRIES).
|
|
8607
9165
|
*
|
|
8608
9166
|
* @see https://webidl.spec.whatwg.org/#idl-iterable
|
|
8609
9167
|
*/
|
|
@@ -8618,10 +9176,15 @@ struct url_search_params_iter {
|
|
|
8618
9176
|
~url_search_params_iter() = default;
|
|
8619
9177
|
|
|
8620
9178
|
/**
|
|
8621
|
-
*
|
|
9179
|
+
* Returns the next value in the iteration sequence.
|
|
9180
|
+
* @return The next value, or std::nullopt if iteration is complete.
|
|
8622
9181
|
*/
|
|
8623
9182
|
inline std::optional<T> next();
|
|
8624
9183
|
|
|
9184
|
+
/**
|
|
9185
|
+
* Checks if more values are available.
|
|
9186
|
+
* @return `true` if `next()` will return a value, `false` if exhausted.
|
|
9187
|
+
*/
|
|
8625
9188
|
inline bool has_next() const;
|
|
8626
9189
|
|
|
8627
9190
|
private:
|
|
@@ -8974,10 +9537,8 @@ url_pattern_component<regex_provider>::create_component_match_result(
|
|
|
8974
9537
|
// says we should start from 1. This case is handled by the
|
|
8975
9538
|
// std_regex_provider.
|
|
8976
9539
|
for (size_t index = 0; index < exec_result.size(); index++) {
|
|
8977
|
-
result.groups.
|
|
8978
|
-
|
|
8979
|
-
std::move(exec_result[index]),
|
|
8980
|
-
});
|
|
9540
|
+
result.groups.emplace(group_name_list[index],
|
|
9541
|
+
std::move(exec_result[index]));
|
|
8981
9542
|
}
|
|
8982
9543
|
return result;
|
|
8983
9544
|
}
|
|
@@ -9083,43 +9644,113 @@ url_pattern_component<regex_provider>::compile(
|
|
|
9083
9644
|
return tl::unexpected(part_list.error());
|
|
9084
9645
|
}
|
|
9085
9646
|
|
|
9086
|
-
//
|
|
9087
|
-
|
|
9647
|
+
// Detect pattern type early to potentially skip expensive regex compilation
|
|
9648
|
+
const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
|
|
9649
|
+
const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
|
|
9650
|
+
|
|
9651
|
+
url_pattern_component_type component_type =
|
|
9652
|
+
url_pattern_component_type::REGEXP;
|
|
9653
|
+
std::string exact_match_value{};
|
|
9654
|
+
|
|
9655
|
+
if (part_list->empty()) {
|
|
9656
|
+
component_type = url_pattern_component_type::EMPTY;
|
|
9657
|
+
} else if (part_list->size() == 1) {
|
|
9658
|
+
const auto& part = (*part_list)[0];
|
|
9659
|
+
if (part.type == url_pattern_part_type::FIXED_TEXT &&
|
|
9660
|
+
part.modifier == url_pattern_part_modifier::none &&
|
|
9661
|
+
!options.ignore_case) {
|
|
9662
|
+
component_type = url_pattern_component_type::EXACT_MATCH;
|
|
9663
|
+
exact_match_value = part.value;
|
|
9664
|
+
} else if (part.type == url_pattern_part_type::FULL_WILDCARD &&
|
|
9665
|
+
part.modifier == url_pattern_part_modifier::none &&
|
|
9666
|
+
part.prefix.empty() && part.suffix.empty()) {
|
|
9667
|
+
component_type = url_pattern_component_type::FULL_WILDCARD;
|
|
9668
|
+
}
|
|
9669
|
+
}
|
|
9670
|
+
|
|
9671
|
+
// For simple patterns, skip regex generation and compilation entirely
|
|
9672
|
+
if (component_type != url_pattern_component_type::REGEXP) {
|
|
9673
|
+
auto pattern_string =
|
|
9674
|
+
url_pattern_helpers::generate_pattern_string(*part_list, options);
|
|
9675
|
+
// For FULL_WILDCARD, we need the group name from
|
|
9676
|
+
// generate_regular_expression
|
|
9677
|
+
std::vector<std::string> name_list;
|
|
9678
|
+
if (component_type == url_pattern_component_type::FULL_WILDCARD &&
|
|
9679
|
+
!part_list->empty()) {
|
|
9680
|
+
name_list.push_back((*part_list)[0].name);
|
|
9681
|
+
}
|
|
9682
|
+
return url_pattern_component<regex_provider>(
|
|
9683
|
+
std::move(pattern_string), typename regex_provider::regex_type{},
|
|
9684
|
+
std::move(name_list), has_regexp_groups, component_type,
|
|
9685
|
+
std::move(exact_match_value));
|
|
9686
|
+
}
|
|
9687
|
+
|
|
9688
|
+
// Generate regex for complex patterns
|
|
9088
9689
|
auto [regular_expression_string, name_list] =
|
|
9089
9690
|
url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
|
|
9090
9691
|
options);
|
|
9091
|
-
|
|
9092
|
-
ada_log("regular expression string: ", regular_expression_string);
|
|
9093
|
-
|
|
9094
|
-
// Let pattern string be the result of running generate a pattern
|
|
9095
|
-
// string given part list and options.
|
|
9096
9692
|
auto pattern_string =
|
|
9097
9693
|
url_pattern_helpers::generate_pattern_string(*part_list, options);
|
|
9098
9694
|
|
|
9099
|
-
// Let regular expression be RegExpCreate(regular expression string,
|
|
9100
|
-
// flags). If this throws an exception, catch it, and throw a
|
|
9101
|
-
// TypeError.
|
|
9102
9695
|
std::optional<typename regex_provider::regex_type> regular_expression =
|
|
9103
9696
|
regex_provider::create_instance(regular_expression_string,
|
|
9104
9697
|
options.ignore_case);
|
|
9105
|
-
|
|
9106
9698
|
if (!regular_expression) {
|
|
9107
9699
|
return tl::unexpected(errors::type_error);
|
|
9108
9700
|
}
|
|
9109
9701
|
|
|
9110
|
-
// For each part of part list:
|
|
9111
|
-
// - If part's type is "regexp", then set has regexp groups to true.
|
|
9112
|
-
const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
|
|
9113
|
-
const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
|
|
9114
|
-
|
|
9115
|
-
ada_log("has regexp groups: ", has_regexp_groups);
|
|
9116
|
-
|
|
9117
|
-
// Return a new component whose pattern string is pattern string, regular
|
|
9118
|
-
// expression is regular expression, group name list is name list, and has
|
|
9119
|
-
// regexp groups is has regexp groups.
|
|
9120
9702
|
return url_pattern_component<regex_provider>(
|
|
9121
9703
|
std::move(pattern_string), std::move(*regular_expression),
|
|
9122
|
-
std::move(name_list), has_regexp_groups
|
|
9704
|
+
std::move(name_list), has_regexp_groups, component_type,
|
|
9705
|
+
std::move(exact_match_value));
|
|
9706
|
+
}
|
|
9707
|
+
|
|
9708
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9709
|
+
bool url_pattern_component<regex_provider>::fast_test(
|
|
9710
|
+
std::string_view input) const noexcept {
|
|
9711
|
+
// Fast path for simple patterns - avoid regex evaluation
|
|
9712
|
+
// Using if-else for better branch prediction on common cases
|
|
9713
|
+
if (type == url_pattern_component_type::FULL_WILDCARD) {
|
|
9714
|
+
return true;
|
|
9715
|
+
}
|
|
9716
|
+
if (type == url_pattern_component_type::EXACT_MATCH) {
|
|
9717
|
+
return input == exact_match_value;
|
|
9718
|
+
}
|
|
9719
|
+
if (type == url_pattern_component_type::EMPTY) {
|
|
9720
|
+
return input.empty();
|
|
9721
|
+
}
|
|
9722
|
+
// type == REGEXP
|
|
9723
|
+
return regex_provider::regex_match(input, regexp);
|
|
9724
|
+
}
|
|
9725
|
+
|
|
9726
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9727
|
+
std::optional<std::vector<std::optional<std::string>>>
|
|
9728
|
+
url_pattern_component<regex_provider>::fast_match(
|
|
9729
|
+
std::string_view input) const {
|
|
9730
|
+
// Handle each type directly without redundant checks
|
|
9731
|
+
if (type == url_pattern_component_type::FULL_WILDCARD) {
|
|
9732
|
+
// FULL_WILDCARD always matches - capture the input (even if empty)
|
|
9733
|
+
// If there's no group name, return empty groups
|
|
9734
|
+
if (group_name_list.empty()) {
|
|
9735
|
+
return std::vector<std::optional<std::string>>{};
|
|
9736
|
+
}
|
|
9737
|
+
// Capture the matched input (including empty strings)
|
|
9738
|
+
return std::vector<std::optional<std::string>>{std::string(input)};
|
|
9739
|
+
}
|
|
9740
|
+
if (type == url_pattern_component_type::EXACT_MATCH) {
|
|
9741
|
+
if (input == exact_match_value) {
|
|
9742
|
+
return std::vector<std::optional<std::string>>{};
|
|
9743
|
+
}
|
|
9744
|
+
return std::nullopt;
|
|
9745
|
+
}
|
|
9746
|
+
if (type == url_pattern_component_type::EMPTY) {
|
|
9747
|
+
if (input.empty()) {
|
|
9748
|
+
return std::vector<std::optional<std::string>>{};
|
|
9749
|
+
}
|
|
9750
|
+
return std::nullopt;
|
|
9751
|
+
}
|
|
9752
|
+
// type == REGEXP - use regex
|
|
9753
|
+
return regex_provider::regex_search(input, regexp);
|
|
9123
9754
|
}
|
|
9124
9755
|
|
|
9125
9756
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -9130,18 +9761,88 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
|
|
|
9130
9761
|
return match(input, base_url);
|
|
9131
9762
|
}
|
|
9132
9763
|
|
|
9764
|
+
template <url_pattern_regex::regex_concept regex_provider>
|
|
9765
|
+
bool url_pattern<regex_provider>::test_components(
|
|
9766
|
+
std::string_view protocol, std::string_view username,
|
|
9767
|
+
std::string_view password, std::string_view hostname, std::string_view port,
|
|
9768
|
+
std::string_view pathname, std::string_view search,
|
|
9769
|
+
std::string_view hash) const {
|
|
9770
|
+
return protocol_component.fast_test(protocol) &&
|
|
9771
|
+
username_component.fast_test(username) &&
|
|
9772
|
+
password_component.fast_test(password) &&
|
|
9773
|
+
hostname_component.fast_test(hostname) &&
|
|
9774
|
+
port_component.fast_test(port) &&
|
|
9775
|
+
pathname_component.fast_test(pathname) &&
|
|
9776
|
+
search_component.fast_test(search) && hash_component.fast_test(hash);
|
|
9777
|
+
}
|
|
9778
|
+
|
|
9133
9779
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
9134
9780
|
result<bool> url_pattern<regex_provider>::test(
|
|
9135
|
-
const url_pattern_input& input, const std::string_view*
|
|
9136
|
-
//
|
|
9137
|
-
|
|
9138
|
-
|
|
9139
|
-
|
|
9140
|
-
|
|
9141
|
-
|
|
9142
|
-
|
|
9781
|
+
const url_pattern_input& input, const std::string_view* base_url_string) {
|
|
9782
|
+
// If input is a URLPatternInit
|
|
9783
|
+
if (std::holds_alternative<url_pattern_init>(input)) {
|
|
9784
|
+
if (base_url_string) {
|
|
9785
|
+
return tl::unexpected(errors::type_error);
|
|
9786
|
+
}
|
|
9787
|
+
|
|
9788
|
+
std::string protocol{}, username{}, password{}, hostname{};
|
|
9789
|
+
std::string port{}, pathname{}, search{}, hash{};
|
|
9790
|
+
|
|
9791
|
+
auto apply_result = url_pattern_init::process(
|
|
9792
|
+
std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
|
|
9793
|
+
protocol, username, password, hostname, port, pathname, search, hash);
|
|
9794
|
+
|
|
9795
|
+
if (!apply_result) {
|
|
9796
|
+
return false;
|
|
9797
|
+
}
|
|
9798
|
+
|
|
9799
|
+
std::string_view search_view = *apply_result->search;
|
|
9800
|
+
if (search_view.starts_with("?")) {
|
|
9801
|
+
search_view.remove_prefix(1);
|
|
9802
|
+
}
|
|
9803
|
+
|
|
9804
|
+
return test_components(*apply_result->protocol, *apply_result->username,
|
|
9805
|
+
*apply_result->password, *apply_result->hostname,
|
|
9806
|
+
*apply_result->port, *apply_result->pathname,
|
|
9807
|
+
search_view, *apply_result->hash);
|
|
9808
|
+
}
|
|
9809
|
+
|
|
9810
|
+
// URL string input path
|
|
9811
|
+
result<url_aggregator> base_url;
|
|
9812
|
+
if (base_url_string) {
|
|
9813
|
+
base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
|
|
9814
|
+
if (!base_url) {
|
|
9815
|
+
return false;
|
|
9816
|
+
}
|
|
9817
|
+
}
|
|
9818
|
+
|
|
9819
|
+
auto url =
|
|
9820
|
+
ada::parse<url_aggregator>(std::get<std::string_view>(input),
|
|
9821
|
+
base_url.has_value() ? &*base_url : nullptr);
|
|
9822
|
+
if (!url) {
|
|
9823
|
+
return false;
|
|
9824
|
+
}
|
|
9825
|
+
|
|
9826
|
+
// Extract components as string_view
|
|
9827
|
+
auto protocol_view = url->get_protocol();
|
|
9828
|
+
if (protocol_view.ends_with(":")) {
|
|
9829
|
+
protocol_view.remove_suffix(1);
|
|
9830
|
+
}
|
|
9831
|
+
|
|
9832
|
+
auto search_view = url->get_search();
|
|
9833
|
+
if (search_view.starts_with("?")) {
|
|
9834
|
+
search_view.remove_prefix(1);
|
|
9835
|
+
}
|
|
9836
|
+
|
|
9837
|
+
auto hash_view = url->get_hash();
|
|
9838
|
+
if (hash_view.starts_with("#")) {
|
|
9839
|
+
hash_view.remove_prefix(1);
|
|
9143
9840
|
}
|
|
9144
|
-
|
|
9841
|
+
|
|
9842
|
+
return test_components(protocol_view, url->get_username(),
|
|
9843
|
+
url->get_password(), url->get_hostname(),
|
|
9844
|
+
url->get_port(), url->get_pathname(), search_view,
|
|
9845
|
+
hash_view);
|
|
9145
9846
|
}
|
|
9146
9847
|
|
|
9147
9848
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -9290,74 +9991,61 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
|
|
|
9290
9991
|
}
|
|
9291
9992
|
}
|
|
9292
9993
|
|
|
9994
|
+
// Use fast_match which skips regex for simple patterns (EMPTY, EXACT_MATCH,
|
|
9995
|
+
// FULL_WILDCARD) and only falls back to regex for complex REGEXP patterns.
|
|
9996
|
+
|
|
9293
9997
|
// Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
|
|
9294
9998
|
// component's regular expression, protocol).
|
|
9295
|
-
auto protocol_exec_result =
|
|
9296
|
-
regex_provider::regex_search(protocol, protocol_component.regexp);
|
|
9297
|
-
|
|
9999
|
+
auto protocol_exec_result = protocol_component.fast_match(protocol);
|
|
9298
10000
|
if (!protocol_exec_result) {
|
|
9299
10001
|
return std::nullopt;
|
|
9300
10002
|
}
|
|
9301
10003
|
|
|
9302
10004
|
// Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
|
|
9303
10005
|
// component's regular expression, username).
|
|
9304
|
-
auto username_exec_result =
|
|
9305
|
-
regex_provider::regex_search(username, username_component.regexp);
|
|
9306
|
-
|
|
10006
|
+
auto username_exec_result = username_component.fast_match(username);
|
|
9307
10007
|
if (!username_exec_result) {
|
|
9308
10008
|
return std::nullopt;
|
|
9309
10009
|
}
|
|
9310
10010
|
|
|
9311
10011
|
// Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
|
|
9312
10012
|
// component's regular expression, password).
|
|
9313
|
-
auto password_exec_result =
|
|
9314
|
-
regex_provider::regex_search(password, password_component.regexp);
|
|
9315
|
-
|
|
10013
|
+
auto password_exec_result = password_component.fast_match(password);
|
|
9316
10014
|
if (!password_exec_result) {
|
|
9317
10015
|
return std::nullopt;
|
|
9318
10016
|
}
|
|
9319
10017
|
|
|
9320
10018
|
// Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
|
|
9321
10019
|
// component's regular expression, hostname).
|
|
9322
|
-
auto hostname_exec_result =
|
|
9323
|
-
regex_provider::regex_search(hostname, hostname_component.regexp);
|
|
9324
|
-
|
|
10020
|
+
auto hostname_exec_result = hostname_component.fast_match(hostname);
|
|
9325
10021
|
if (!hostname_exec_result) {
|
|
9326
10022
|
return std::nullopt;
|
|
9327
10023
|
}
|
|
9328
10024
|
|
|
9329
10025
|
// Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
|
|
9330
10026
|
// regular expression, port).
|
|
9331
|
-
auto port_exec_result =
|
|
9332
|
-
regex_provider::regex_search(port, port_component.regexp);
|
|
9333
|
-
|
|
10027
|
+
auto port_exec_result = port_component.fast_match(port);
|
|
9334
10028
|
if (!port_exec_result) {
|
|
9335
10029
|
return std::nullopt;
|
|
9336
10030
|
}
|
|
9337
10031
|
|
|
9338
10032
|
// Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
|
|
9339
10033
|
// component's regular expression, pathname).
|
|
9340
|
-
auto pathname_exec_result =
|
|
9341
|
-
regex_provider::regex_search(pathname, pathname_component.regexp);
|
|
9342
|
-
|
|
10034
|
+
auto pathname_exec_result = pathname_component.fast_match(pathname);
|
|
9343
10035
|
if (!pathname_exec_result) {
|
|
9344
10036
|
return std::nullopt;
|
|
9345
10037
|
}
|
|
9346
10038
|
|
|
9347
10039
|
// Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
|
|
9348
10040
|
// regular expression, search).
|
|
9349
|
-
auto search_exec_result =
|
|
9350
|
-
regex_provider::regex_search(search, search_component.regexp);
|
|
9351
|
-
|
|
10041
|
+
auto search_exec_result = search_component.fast_match(search);
|
|
9352
10042
|
if (!search_exec_result) {
|
|
9353
10043
|
return std::nullopt;
|
|
9354
10044
|
}
|
|
9355
10045
|
|
|
9356
10046
|
// Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
|
|
9357
10047
|
// regular expression, hash).
|
|
9358
|
-
auto hash_exec_result =
|
|
9359
|
-
regex_provider::regex_search(hash, hash_component.regexp);
|
|
9360
|
-
|
|
10048
|
+
auto hash_exec_result = hash_component.fast_match(hash);
|
|
9361
10049
|
if (!hash_exec_result) {
|
|
9362
10050
|
return std::nullopt;
|
|
9363
10051
|
}
|
|
@@ -9706,8 +10394,8 @@ std::string constructor_string_parser<regex_provider>::make_component_string() {
|
|
|
9706
10394
|
const auto component_start_input_index = component_start_token->index;
|
|
9707
10395
|
// Return the code point substring from component start input index to end
|
|
9708
10396
|
// index within parser's input.
|
|
9709
|
-
return input.substr(component_start_input_index,
|
|
9710
|
-
|
|
10397
|
+
return std::string(input.substr(component_start_input_index,
|
|
10398
|
+
end_index - component_start_input_index));
|
|
9711
10399
|
}
|
|
9712
10400
|
|
|
9713
10401
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -10213,13 +10901,31 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
|
|
|
10213
10901
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
10214
10902
|
bool protocol_component_matches_special_scheme(
|
|
10215
10903
|
url_pattern_component<regex_provider>& component) {
|
|
10216
|
-
//
|
|
10217
|
-
|
|
10218
|
-
|
|
10219
|
-
|
|
10220
|
-
|
|
10221
|
-
|
|
10222
|
-
|
|
10904
|
+
// Optimization: Use fast_test for simple patterns to avoid regex overhead
|
|
10905
|
+
switch (component.type) {
|
|
10906
|
+
case url_pattern_component_type::EMPTY:
|
|
10907
|
+
// Empty pattern can't match any special scheme
|
|
10908
|
+
return false;
|
|
10909
|
+
case url_pattern_component_type::EXACT_MATCH:
|
|
10910
|
+
// Direct string comparison for exact match patterns
|
|
10911
|
+
return component.exact_match_value == "http" ||
|
|
10912
|
+
component.exact_match_value == "https" ||
|
|
10913
|
+
component.exact_match_value == "ws" ||
|
|
10914
|
+
component.exact_match_value == "wss" ||
|
|
10915
|
+
component.exact_match_value == "ftp";
|
|
10916
|
+
case url_pattern_component_type::FULL_WILDCARD:
|
|
10917
|
+
// Full wildcard matches everything including special schemes
|
|
10918
|
+
return true;
|
|
10919
|
+
case url_pattern_component_type::REGEXP:
|
|
10920
|
+
// Fall back to regex matching for complex patterns
|
|
10921
|
+
auto& regex = component.regexp;
|
|
10922
|
+
return regex_provider::regex_match("http", regex) ||
|
|
10923
|
+
regex_provider::regex_match("https", regex) ||
|
|
10924
|
+
regex_provider::regex_match("ws", regex) ||
|
|
10925
|
+
regex_provider::regex_match("wss", regex) ||
|
|
10926
|
+
regex_provider::regex_match("ftp", regex);
|
|
10927
|
+
}
|
|
10928
|
+
ada::unreachable();
|
|
10223
10929
|
}
|
|
10224
10930
|
|
|
10225
10931
|
template <url_pattern_regex::regex_concept regex_provider>
|
|
@@ -10515,14 +11221,14 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
|
|
|
10515
11221
|
#ifndef ADA_ADA_VERSION_H
|
|
10516
11222
|
#define ADA_ADA_VERSION_H
|
|
10517
11223
|
|
|
10518
|
-
#define ADA_VERSION "3.
|
|
11224
|
+
#define ADA_VERSION "3.4.2"
|
|
10519
11225
|
|
|
10520
11226
|
namespace ada {
|
|
10521
11227
|
|
|
10522
11228
|
enum {
|
|
10523
11229
|
ADA_VERSION_MAJOR = 3,
|
|
10524
|
-
ADA_VERSION_MINOR =
|
|
10525
|
-
ADA_VERSION_REVISION =
|
|
11230
|
+
ADA_VERSION_MINOR = 4,
|
|
11231
|
+
ADA_VERSION_REVISION = 2,
|
|
10526
11232
|
};
|
|
10527
11233
|
|
|
10528
11234
|
} // namespace ada
|