ada-url 1.19.0__cp312-cp312-macosx_11_0_arm64.whl → 1.29.0__cp312-cp312-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ada_url/ada.h CHANGED
@@ -1,14 +1,36 @@
1
- /* auto-generated on 2025-02-25 13:08:18 -0500. Do not edit! */
1
+ /* auto-generated on 2026-01-30 13:29:04 -0500. Do not edit! */
2
2
  /* begin file include/ada.h */
3
3
  /**
4
4
  * @file ada.h
5
- * @brief Includes all definitions for Ada.
5
+ * @brief Main header for the Ada URL parser library.
6
+ *
7
+ * This is the primary entry point for the Ada URL parser library. Including
8
+ * this single header provides access to the complete Ada API, including:
9
+ *
10
+ * - URL parsing via `ada::parse()` function
11
+ * - Two URL representations: `ada::url` and `ada::url_aggregator`
12
+ * - URL search parameters via `ada::url_search_params`
13
+ * - URL pattern matching via `ada::url_pattern` (URLPattern API)
14
+ * - IDNA (Internationalized Domain Names) support
15
+ *
16
+ * @example
17
+ * ```cpp
18
+ *
19
+ * // Parse a URL
20
+ * auto url = ada::parse("https://example.com/path?query=1");
21
+ * if (url) {
22
+ * std::cout << url->get_hostname(); // "example.com"
23
+ * }
24
+ * ```
25
+ *
26
+ * @see https://url.spec.whatwg.org/ - WHATWG URL Standard
27
+ * @see https://github.com/ada-url/ada - Ada URL Parser GitHub Repository
6
28
  */
7
29
  #ifndef ADA_H
8
30
  #define ADA_H
9
31
 
10
32
  /* begin file include/ada/ada_idna.h */
11
- /* auto-generated on 2024-12-18 09:44:34 -0500. Do not edit! */
33
+ /* auto-generated on 2026-01-30 12:00:02 -0500. Do not edit! */
12
34
  /* begin file include/idna.h */
13
35
  #ifndef ADA_IDNA_H
14
36
  #define ADA_IDNA_H
@@ -45,8 +67,6 @@ namespace ada::idna {
45
67
 
46
68
  // If the input is ascii, then the mapping is just -> lower case.
47
69
  void ascii_map(char* input, size_t length);
48
- // check whether an ascii string needs mapping
49
- bool ascii_has_upper_case(char* input, size_t length);
50
70
  // Map the characters according to IDNA, returning the empty string on error.
51
71
  std::u32string map(std::u32string_view input);
52
72
 
@@ -160,7 +180,6 @@ std::string to_unicode(std::string_view input);
160
180
 
161
181
  namespace ada::idna {
162
182
 
163
- // Access the first code point of the input string.
164
183
  // Verify if it is valid name code point given a Unicode code point and a
165
184
  // boolean first: If first is true return the result of checking if code point
166
185
  // is contained in the IdentifierStart set of code points. Otherwise return the
@@ -168,7 +187,7 @@ namespace ada::idna {
168
187
  // code points. Returns false if the input is empty or the code point is not
169
188
  // valid. There is minimal Unicode error handling: the input should be valid
170
189
  // UTF-8. https://urlpattern.spec.whatwg.org/#is-a-valid-name-code-point
171
- bool valid_name_code_point(char32_t input, bool first);
190
+ bool valid_name_code_point(char32_t code_point, bool first);
172
191
 
173
192
  } // namespace ada::idna
174
193
 
@@ -191,7 +210,11 @@ bool valid_name_code_point(char32_t input, bool first);
191
210
  /* begin file include/ada/common_defs.h */
192
211
  /**
193
212
  * @file common_defs.h
194
- * @brief Common definitions for cross-platform compiler support.
213
+ * @brief Cross-platform compiler macros and common definitions.
214
+ *
215
+ * This header provides compiler-specific macros for optimization hints,
216
+ * platform detection, SIMD support detection, and development/debug utilities.
217
+ * It ensures consistent behavior across different compilers (GCC, Clang, MSVC).
195
218
  */
196
219
  #ifndef ADA_COMMON_DEFS_H
197
220
  #define ADA_COMMON_DEFS_H
@@ -424,6 +447,10 @@ namespace ada {
424
447
  } while (0)
425
448
  #endif
426
449
 
450
+ #if defined(__SSSE3__)
451
+ #define ADA_SSSE3 1
452
+ #endif
453
+
427
454
  #if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
428
455
  (defined(_M_AMD64) || defined(_M_X64) || \
429
456
  (defined(_M_IX86_FP) && _M_IX86_FP == 2))
@@ -434,6 +461,15 @@ namespace ada {
434
461
  #define ADA_NEON 1
435
462
  #endif
436
463
 
464
+ #if defined(__loongarch_sx)
465
+ #define ADA_LSX 1
466
+ #endif
467
+
468
+ #if defined(__riscv_v) && __riscv_v_intrinsic >= 11000
469
+ // Support RVV intrinsics v0.11 and above
470
+ #define ADA_RVV 1
471
+ #endif
472
+
437
473
  #ifndef __has_cpp_attribute
438
474
  #define ada_lifetime_bound
439
475
  #elif __has_cpp_attribute(msvc::lifetimebound)
@@ -453,6 +489,10 @@ namespace ada {
453
489
  #endif
454
490
  #endif
455
491
 
492
+ #ifndef ADA_INCLUDE_URL_PATTERN
493
+ #define ADA_INCLUDE_URL_PATTERN 1
494
+ #endif // ADA_INCLUDE_URL_PATTERN
495
+
456
496
  #endif // ADA_COMMON_DEFS_H
457
497
  /* end file include/ada/common_defs.h */
458
498
  #include <cstdint>
@@ -876,7 +916,7 @@ constexpr uint8_t PATH_PERCENT_ENCODE[32] = {
876
916
  // 50 51 52 53 54 55 56 57
877
917
  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
878
918
  // 58 59 5A 5B 5C 5D 5E 5F
879
- 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
919
+ 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x40 | 0x00,
880
920
  // 60 61 62 63 64 65 66 67
881
921
  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
882
922
  // 68 69 6A 6B 6C 6D 6E 6F
@@ -942,7 +982,7 @@ constexpr uint8_t WWW_FORM_URLENCODED_PERCENT_ENCODE[32] = {
942
982
  // 50 51 52 53 54 55 56 57
943
983
  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
944
984
  // 58 59 5A 5B 5C 5D 5E 5F
945
- 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x40 | 0x00,
985
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
946
986
  // 60 61 62 63 64 65 66 67
947
987
  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
948
988
  // 68 69 6A 6B 6C 6D 6E 6F
@@ -1002,6 +1042,140 @@ ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i) {
1002
1042
 
1003
1043
  #include <bit>
1004
1044
  #include <string_view>
1045
+ /* begin file include/ada/checkers.h */
1046
+ /**
1047
+ * @file checkers.h
1048
+ * @brief Declarations for URL specific checkers used within Ada.
1049
+ */
1050
+ #ifndef ADA_CHECKERS_H
1051
+ #define ADA_CHECKERS_H
1052
+
1053
+
1054
+ #include <cstring>
1055
+ #include <string_view>
1056
+
1057
+ /**
1058
+ * These functions are not part of our public API and may
1059
+ * change at any time.
1060
+ * @private
1061
+ * @namespace ada::checkers
1062
+ * @brief Includes the definitions for validation functions
1063
+ */
1064
+ namespace ada::checkers {
1065
+
1066
+ /**
1067
+ * @private
1068
+ * Assuming that x is an ASCII letter, this function returns the lower case
1069
+ * equivalent.
1070
+ * @details More likely to be inlined by the compiler and constexpr.
1071
+ */
1072
+ constexpr char to_lower(char x) noexcept;
1073
+
1074
+ /**
1075
+ * @private
1076
+ * Returns true if the character is an ASCII letter. Equivalent to std::isalpha
1077
+ * but more likely to be inlined by the compiler.
1078
+ *
1079
+ * @attention std::isalpha is not constexpr generally.
1080
+ */
1081
+ constexpr bool is_alpha(char x) noexcept;
1082
+
1083
+ /**
1084
+ * @private
1085
+ * Check whether a string starts with 0x or 0X. The function is only
1086
+ * safe if input.size() >=2.
1087
+ *
1088
+ * @see has_hex_prefix
1089
+ */
1090
+ constexpr bool has_hex_prefix_unsafe(std::string_view input);
1091
+ /**
1092
+ * @private
1093
+ * Check whether a string starts with 0x or 0X.
1094
+ */
1095
+ constexpr bool has_hex_prefix(std::string_view input);
1096
+
1097
+ /**
1098
+ * @private
1099
+ * Check whether x is an ASCII digit. More likely to be inlined than
1100
+ * std::isdigit.
1101
+ */
1102
+ constexpr bool is_digit(char x) noexcept;
1103
+
1104
+ /**
1105
+ * @private
1106
+ * @details A string starts with a Windows drive letter if all of the following
1107
+ * are true:
1108
+ *
1109
+ * - its length is greater than or equal to 2
1110
+ * - its first two code points are a Windows drive letter
1111
+ * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
1112
+ * (?), or U+0023 (#).
1113
+ *
1114
+ * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1115
+ */
1116
+ inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
1117
+
1118
+ /**
1119
+ * @private
1120
+ * @details A normalized Windows drive letter is a Windows drive letter of which
1121
+ * the second code point is U+003A (:).
1122
+ */
1123
+ inline constexpr bool is_normalized_windows_drive_letter(
1124
+ std::string_view input) noexcept;
1125
+
1126
+ /**
1127
+ * @private
1128
+ * Returns true if an input is an ipv4 address. It is assumed that the string
1129
+ * does not contain uppercase ASCII characters (the input should have been
1130
+ * lowered cased before calling this function) and is not empty.
1131
+ */
1132
+ ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
1133
+
1134
+ /**
1135
+ * @private
1136
+ * Returns a bitset. If the first bit is set, then at least one character needs
1137
+ * percent encoding. If the second bit is set, a \\ is found. If the third bit
1138
+ * is set then we have a dot. If the fourth bit is set, then we have a percent
1139
+ * character.
1140
+ */
1141
+ ada_really_inline constexpr uint8_t path_signature(
1142
+ std::string_view input) noexcept;
1143
+
1144
+ /**
1145
+ * @private
1146
+ * Returns true if the length of the domain name and its labels are according to
1147
+ * the specifications. The length of the domain must be 255 octets (253
1148
+ * characters not including the last 2 which are the empty label reserved at the
1149
+ * end). When the empty label is included (a dot at the end), the domain name
1150
+ * can have 254 characters. The length of a label must be at least 1 and at most
1151
+ * 63 characters.
1152
+ * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
1153
+ * @see https://www.unicode.org/reports/tr46/#ToASCII
1154
+ */
1155
+ ada_really_inline constexpr bool verify_dns_length(
1156
+ std::string_view input) noexcept;
1157
+
1158
+ /**
1159
+ * @private
1160
+ * Fast-path parser for pure decimal IPv4 addresses (e.g., "192.168.1.1").
1161
+ * Returns the packed 32-bit IPv4 address on success, or a value > 0xFFFFFFFF
1162
+ * to indicate failure (caller should fall back to general parser).
1163
+ * This is optimized for the common case where the input is a well-formed
1164
+ * decimal IPv4 address with exactly 4 octets.
1165
+ */
1166
+ ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
1167
+ std::string_view input) noexcept;
1168
+
1169
+ /**
1170
+ * Sentinel value indicating try_parse_ipv4_fast() did not succeed.
1171
+ * Any value > 0xFFFFFFFF indicates the fast path should not be used.
1172
+ */
1173
+ constexpr uint64_t ipv4_fast_fail = uint64_t(1) << 32;
1174
+
1175
+ } // namespace ada::checkers
1176
+
1177
+ #endif // ADA_CHECKERS_H
1178
+ /* end file include/ada/checkers.h */
1005
1179
 
1006
1180
  namespace ada::checkers {
1007
1181
 
@@ -1044,6 +1218,64 @@ constexpr bool is_normalized_windows_drive_letter(
1044
1218
  return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':'));
1045
1219
  }
1046
1220
 
1221
+ ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
1222
+ std::string_view input) noexcept {
1223
+ const char* p = input.data();
1224
+ const char* const pend = p + input.size();
1225
+
1226
+ uint32_t ipv4 = 0;
1227
+
1228
+ for (int i = 0; i < 4; ++i) {
1229
+ if (p == pend) {
1230
+ return ipv4_fast_fail;
1231
+ }
1232
+
1233
+ uint32_t val;
1234
+ char c = *p;
1235
+ if (c >= '0' && c <= '9') {
1236
+ val = c - '0';
1237
+ p++;
1238
+ } else {
1239
+ return ipv4_fast_fail;
1240
+ }
1241
+
1242
+ if (p < pend) {
1243
+ c = *p;
1244
+ if (c >= '0' && c <= '9') {
1245
+ if (val == 0) return ipv4_fast_fail;
1246
+ val = val * 10 + (c - '0');
1247
+ p++;
1248
+ if (p < pend) {
1249
+ c = *p;
1250
+ if (c >= '0' && c <= '9') {
1251
+ val = val * 10 + (c - '0');
1252
+ p++;
1253
+ if (val > 255) return ipv4_fast_fail;
1254
+ }
1255
+ }
1256
+ }
1257
+ }
1258
+
1259
+ ipv4 = (ipv4 << 8) | val;
1260
+
1261
+ if (i < 3) {
1262
+ if (p == pend || *p != '.') {
1263
+ return ipv4_fast_fail;
1264
+ }
1265
+ p++;
1266
+ }
1267
+ }
1268
+
1269
+ if (p != pend) {
1270
+ if (p == pend - 1 && *p == '.') {
1271
+ return ipv4;
1272
+ }
1273
+ return ipv4_fast_fail;
1274
+ }
1275
+
1276
+ return ipv4;
1277
+ }
1278
+
1047
1279
  } // namespace ada::checkers
1048
1280
 
1049
1281
  #endif // ADA_CHECKERS_INL_H
@@ -1097,7 +1329,11 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
1097
1329
  /* begin file include/ada/encoding_type.h */
1098
1330
  /**
1099
1331
  * @file encoding_type.h
1100
- * @brief Definition for supported encoding types.
1332
+ * @brief Character encoding type definitions.
1333
+ *
1334
+ * Defines the encoding types supported for URL processing.
1335
+ *
1336
+ * @see https://encoding.spec.whatwg.org/
1101
1337
  */
1102
1338
  #ifndef ADA_ENCODING_TYPE_H
1103
1339
  #define ADA_ENCODING_TYPE_H
@@ -1107,21 +1343,25 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
1107
1343
  namespace ada {
1108
1344
 
1109
1345
  /**
1110
- * This specification defines three encodings with the same names as encoding
1111
- * schemes defined in the Unicode standard: UTF-8, UTF-16LE, and UTF-16BE.
1346
+ * @brief Character encoding types for URL processing.
1347
+ *
1348
+ * Specifies the character encoding used for percent-decoding and other
1349
+ * string operations. UTF-8 is the most commonly used encoding for URLs.
1112
1350
  *
1113
1351
  * @see https://encoding.spec.whatwg.org/#encodings
1114
1352
  */
1115
1353
  enum class encoding_type {
1116
- UTF8,
1117
- UTF_16LE,
1118
- UTF_16BE,
1354
+ UTF8, /**< UTF-8 encoding (default for URLs) */
1355
+ UTF_16LE, /**< UTF-16 Little Endian encoding */
1356
+ UTF_16BE, /**< UTF-16 Big Endian encoding */
1119
1357
  };
1120
1358
 
1121
1359
  /**
1122
- * Convert a encoding_type to string.
1360
+ * Converts an encoding_type to its string representation.
1361
+ * @param type The encoding type to convert.
1362
+ * @return A string view of the encoding name.
1123
1363
  */
1124
- ada_warn_unused std::string to_string(encoding_type type);
1364
+ ada_warn_unused std::string_view to_string(encoding_type type);
1125
1365
 
1126
1366
  } // namespace ada
1127
1367
 
@@ -1138,7 +1378,11 @@ ada_warn_unused std::string to_string(encoding_type type);
1138
1378
  /* begin file include/ada/url_base.h */
1139
1379
  /**
1140
1380
  * @file url_base.h
1141
- * @brief Declaration for the basic URL definitions
1381
+ * @brief Base class and common definitions for URL types.
1382
+ *
1383
+ * This file defines the `url_base` abstract base class from which both
1384
+ * `ada::url` and `ada::url_aggregator` inherit. It also defines common
1385
+ * enumerations like `url_host_type`.
1142
1386
  */
1143
1387
  #ifndef ADA_URL_BASE_H
1144
1388
  #define ADA_URL_BASE_H
@@ -1146,7 +1390,13 @@ ada_warn_unused std::string to_string(encoding_type type);
1146
1390
  /* begin file include/ada/scheme.h */
1147
1391
  /**
1148
1392
  * @file scheme.h
1149
- * @brief Declarations for the URL scheme.
1393
+ * @brief URL scheme type definitions and utilities.
1394
+ *
1395
+ * This header defines the URL scheme types (http, https, etc.) and provides
1396
+ * functions to identify special schemes and their default ports according
1397
+ * to the WHATWG URL Standard.
1398
+ *
1399
+ * @see https://url.spec.whatwg.org/#special-scheme
1150
1400
  */
1151
1401
  #ifndef ADA_SCHEME_H
1152
1402
  #define ADA_SCHEME_H
@@ -1156,62 +1406,65 @@ ada_warn_unused std::string to_string(encoding_type type);
1156
1406
 
1157
1407
  /**
1158
1408
  * @namespace ada::scheme
1159
- * @brief Includes the scheme declarations
1409
+ * @brief URL scheme utilities and constants.
1410
+ *
1411
+ * Provides functions for working with URL schemes, including identification
1412
+ * of special schemes and retrieval of default port numbers.
1160
1413
  */
1161
1414
  namespace ada::scheme {
1162
1415
 
1163
1416
  /**
1164
- * Type of the scheme as an enum.
1165
- * Using strings to represent a scheme type is not ideal because
1166
- * checking for types involves string comparisons. It is faster to use
1167
- * a simple integer.
1168
- * In C++11, we are allowed to specify the underlying type of the enum.
1169
- * We pick an 8-bit integer (which allows up to 256 types). Specifying the
1170
- * type of the enum may help integration with other systems if the type
1171
- * variable is exposed (since its value will not depend on the compiler).
1417
+ * @brief Enumeration of URL scheme types.
1418
+ *
1419
+ * Special schemes have specific parsing rules and default ports.
1420
+ * Using an enum allows efficient scheme comparisons without string operations.
1421
+ *
1422
+ * Default ports:
1423
+ * - HTTP: 80
1424
+ * - HTTPS: 443
1425
+ * - WS: 80
1426
+ * - WSS: 443
1427
+ * - FTP: 21
1428
+ * - FILE: (none)
1172
1429
  */
1173
1430
  enum type : uint8_t {
1174
- HTTP = 0,
1175
- NOT_SPECIAL = 1,
1176
- HTTPS = 2,
1177
- WS = 3,
1178
- FTP = 4,
1179
- WSS = 5,
1180
- FILE = 6
1431
+ HTTP = 0, /**< http:// scheme (port 80) */
1432
+ NOT_SPECIAL = 1, /**< Non-special scheme (no default port) */
1433
+ HTTPS = 2, /**< https:// scheme (port 443) */
1434
+ WS = 3, /**< ws:// WebSocket scheme (port 80) */
1435
+ FTP = 4, /**< ftp:// scheme (port 21) */
1436
+ WSS = 5, /**< wss:// secure WebSocket scheme (port 443) */
1437
+ FILE = 6 /**< file:// scheme (no default port) */
1181
1438
  };
1182
1439
 
1183
1440
  /**
1184
- * A special scheme is an ASCII string that is listed in the first column of the
1185
- * following table. The default port for a special scheme is listed in the
1186
- * second column on the same row. The default port for any other ASCII string is
1187
- * null.
1188
- *
1189
- * @see https://url.spec.whatwg.org/#url-miscellaneous
1190
- * @param scheme
1191
- * @return If scheme is a special scheme
1441
+ * Checks if a scheme string is a special scheme.
1442
+ * @param scheme The scheme string to check (e.g., "http", "https").
1443
+ * @return `true` if the scheme is special, `false` otherwise.
1444
+ * @see https://url.spec.whatwg.org/#special-scheme
1192
1445
  */
1193
1446
  ada_really_inline constexpr bool is_special(std::string_view scheme);
1194
1447
 
1195
1448
  /**
1196
- * A special scheme is an ASCII string that is listed in the first column of the
1197
- * following table. The default port for a special scheme is listed in the
1198
- * second column on the same row. The default port for any other ASCII string is
1199
- * null.
1200
- *
1201
- * @see https://url.spec.whatwg.org/#url-miscellaneous
1202
- * @param scheme
1203
- * @return The special port
1449
+ * Returns the default port for a special scheme string.
1450
+ * @param scheme The scheme string (e.g., "http", "https").
1451
+ * @return The default port number, or 0 if not a special scheme.
1452
+ * @see https://url.spec.whatwg.org/#special-scheme
1204
1453
  */
1205
1454
  constexpr uint16_t get_special_port(std::string_view scheme) noexcept;
1206
1455
 
1207
1456
  /**
1208
- * Returns the port number of a special scheme.
1457
+ * Returns the default port for a scheme type.
1458
+ * @param type The scheme type enum value.
1459
+ * @return The default port number, or 0 if not applicable.
1209
1460
  * @see https://url.spec.whatwg.org/#special-scheme
1210
1461
  */
1211
1462
  constexpr uint16_t get_special_port(ada::scheme::type type) noexcept;
1463
+
1212
1464
  /**
1213
- * Returns the scheme of an input, or NOT_SPECIAL if it's not a special scheme
1214
- * defined by the spec.
1465
+ * Converts a scheme string to its type enum.
1466
+ * @param scheme The scheme string to convert.
1467
+ * @return The corresponding scheme type, or NOT_SPECIAL if not recognized.
1215
1468
  */
1216
1469
  constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
1217
1470
 
@@ -1226,112 +1479,112 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
1226
1479
  namespace ada {
1227
1480
 
1228
1481
  /**
1229
- * Type of URL host as an enum.
1482
+ * @brief Enum representing the type of host in a URL.
1483
+ *
1484
+ * Used to distinguish between regular domain names, IPv4 addresses,
1485
+ * and IPv6 addresses for proper parsing and serialization.
1230
1486
  */
1231
1487
  enum url_host_type : uint8_t {
1232
- /**
1233
- * Represents common URLs such as "https://www.google.com"
1234
- */
1488
+ /** Regular domain name (e.g., "www.example.com") */
1235
1489
  DEFAULT = 0,
1236
- /**
1237
- * Represents ipv4 addresses such as "http://127.0.0.1"
1238
- */
1490
+ /** IPv4 address (e.g., "127.0.0.1") */
1239
1491
  IPV4 = 1,
1240
- /**
1241
- * Represents ipv6 addresses such as
1242
- * "http://[2001:db8:3333:4444:5555:6666:7777:8888]"
1243
- */
1492
+ /** IPv6 address (e.g., "[::1]" or "[2001:db8::1]") */
1244
1493
  IPV6 = 2,
1245
1494
  };
1246
1495
 
1247
1496
  /**
1248
- * @brief Base class of URL implementations
1497
+ * @brief Abstract base class for URL representations.
1249
1498
  *
1250
- * @details A url_base contains a few attributes: is_valid, has_opaque_path and
1251
- * type. All non-trivial implementation details are in derived classes such as
1252
- * ada::url and ada::url_aggregator.
1499
+ * The `url_base` class provides the common interface and state shared by
1500
+ * both `ada::url` and `ada::url_aggregator`. It contains basic URL attributes
1501
+ * like validity status and scheme type, but delegates component storage and
1502
+ * access to derived classes.
1253
1503
  *
1254
- * It is an abstract class that cannot be instantiated directly.
1504
+ * @note This is an abstract class and cannot be instantiated directly.
1505
+ * Use `ada::url` or `ada::url_aggregator` instead.
1506
+ *
1507
+ * @see url
1508
+ * @see url_aggregator
1255
1509
  */
1256
1510
  struct url_base {
1257
1511
  virtual ~url_base() = default;
1258
1512
 
1259
1513
  /**
1260
- * Used for returning the validity from the result of the URL parser.
1514
+ * Indicates whether the URL was successfully parsed.
1515
+ * Set to `false` if parsing failed (e.g., invalid URL syntax).
1261
1516
  */
1262
1517
  bool is_valid{true};
1263
1518
 
1264
1519
  /**
1265
- * A URL has an opaque path if its path is a string.
1520
+ * Indicates whether the URL has an opaque path (non-hierarchical).
1521
+ * Opaque paths occur in non-special URLs like `mailto:` or `javascript:`.
1266
1522
  */
1267
1523
  bool has_opaque_path{false};
1268
1524
 
1269
1525
  /**
1270
- * URL hosts type
1526
+ * The type of the URL's host (domain, IPv4, or IPv6).
1271
1527
  */
1272
1528
  url_host_type host_type = url_host_type::DEFAULT;
1273
1529
 
1274
1530
  /**
1275
1531
  * @private
1532
+ * Internal representation of the URL's scheme type.
1276
1533
  */
1277
1534
  ada::scheme::type type{ada::scheme::type::NOT_SPECIAL};
1278
1535
 
1279
1536
  /**
1280
- * A URL is special if its scheme is a special scheme. A URL is not special if
1281
- * its scheme is not a special scheme.
1537
+ * Checks if the URL has a special scheme (http, https, ws, wss, ftp, file).
1538
+ * Special schemes have specific parsing rules and default ports.
1539
+ * @return `true` if the scheme is special, `false` otherwise.
1282
1540
  */
1283
1541
  [[nodiscard]] ada_really_inline constexpr bool is_special() const noexcept;
1284
1542
 
1285
1543
  /**
1286
- * The origin getter steps are to return the serialization of this's URL's
1287
- * origin. [HTML]
1288
- * @return a newly allocated string.
1544
+ * Returns the URL's origin (scheme + host + port for special URLs).
1545
+ * @return A newly allocated string containing the serialized origin.
1289
1546
  * @see https://url.spec.whatwg.org/#concept-url-origin
1290
1547
  */
1291
- [[nodiscard]] virtual std::string get_origin() const noexcept = 0;
1548
+ [[nodiscard]] virtual std::string get_origin() const = 0;
1292
1549
 
1293
1550
  /**
1294
- * Returns true if this URL has a valid domain as per RFC 1034 and
1295
- * corresponding specifications. Among other things, it requires
1296
- * that the domain string has fewer than 255 octets.
1551
+ * Validates whether the hostname is a valid domain according to RFC 1034.
1552
+ * Checks that the domain and its labels have valid lengths.
1553
+ * @return `true` if the domain is valid, `false` otherwise.
1297
1554
  */
1298
1555
  [[nodiscard]] virtual bool has_valid_domain() const noexcept = 0;
1299
1556
 
1300
1557
  /**
1301
1558
  * @private
1302
- *
1303
- * Return the 'special port' if the URL is special and not 'file'.
1304
- * Returns 0 otherwise.
1559
+ * Returns the default port for special schemes (e.g., 443 for https).
1560
+ * Returns 0 for file:// URLs or non-special schemes.
1305
1561
  */
1306
1562
  [[nodiscard]] inline uint16_t get_special_port() const noexcept;
1307
1563
 
1308
1564
  /**
1309
1565
  * @private
1310
- *
1311
- * Get the default port if the url's scheme has one, returns 0 otherwise.
1566
+ * Returns the default port for the URL's scheme, or 0 if none.
1312
1567
  */
1313
1568
  [[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept;
1314
1569
 
1315
1570
  /**
1316
1571
  * @private
1317
- *
1318
- * Parse a port (16-bit decimal digit) from the provided input.
1319
- * We assume that the input does not contain spaces or tabs
1320
- * within the ASCII digits.
1321
- * It returns how many bytes were consumed when a number is successfully
1322
- * parsed.
1323
- * @return On failure, it returns zero.
1324
- * @see https://url.spec.whatwg.org/#host-parsing
1572
+ * Parses a port number from the input string.
1573
+ * @param view The string containing the port to parse.
1574
+ * @param check_trailing_content Whether to validate no trailing characters.
1575
+ * @return Number of bytes consumed on success, 0 on failure.
1325
1576
  */
1326
1577
  virtual size_t parse_port(std::string_view view,
1327
- bool check_trailing_content) noexcept = 0;
1578
+ bool check_trailing_content) = 0;
1328
1579
 
1329
- virtual ada_really_inline size_t parse_port(std::string_view view) noexcept {
1580
+ /** @private */
1581
+ virtual ada_really_inline size_t parse_port(std::string_view view) {
1330
1582
  return this->parse_port(view, false);
1331
1583
  }
1332
1584
 
1333
1585
  /**
1334
- * Returns a JSON string representation of this URL.
1586
+ * Returns a JSON string representation of this URL for debugging.
1587
+ * @return A JSON-formatted string with URL information.
1335
1588
  */
1336
1589
  [[nodiscard]] virtual std::string to_string() const = 0;
1337
1590
 
@@ -1400,8 +1653,7 @@ ada_really_inline std::optional<std::string_view> prune_hash(
1400
1653
  * @see https://url.spec.whatwg.org/#shorten-a-urls-path
1401
1654
  * @returns Returns true if path is shortened.
1402
1655
  */
1403
- ada_really_inline bool shorten_path(std::string& path,
1404
- ada::scheme::type type) noexcept;
1656
+ ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type);
1405
1657
 
1406
1658
  /**
1407
1659
  * @private
@@ -1410,7 +1662,7 @@ ada_really_inline bool shorten_path(std::string& path,
1410
1662
  * @returns Returns true if path is shortened.
1411
1663
  */
1412
1664
  ada_really_inline bool shorten_path(std::string_view& path,
1413
- ada::scheme::type type) noexcept;
1665
+ ada::scheme::type type);
1414
1666
 
1415
1667
  /**
1416
1668
  * @private
@@ -1431,15 +1683,14 @@ ada_really_inline void parse_prepared_path(std::string_view input,
1431
1683
  * @private
1432
1684
  * Remove and mutate all ASCII tab or newline characters from an input.
1433
1685
  */
1434
- ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept;
1686
+ ada_really_inline void remove_ascii_tab_or_newline(std::string& input);
1435
1687
 
1436
1688
  /**
1437
1689
  * @private
1438
1690
  * Return the substring from input going from index pos to the end.
1439
- * This function cannot throw.
1440
1691
  */
1441
1692
  ada_really_inline constexpr std::string_view substring(std::string_view input,
1442
- size_t pos) noexcept;
1693
+ size_t pos);
1443
1694
 
1444
1695
  /**
1445
1696
  * @private
@@ -1454,7 +1705,7 @@ bool overlaps(std::string_view input1, const std::string& input2) noexcept;
1454
1705
  */
1455
1706
  ada_really_inline constexpr std::string_view substring(std::string_view input,
1456
1707
  size_t pos1,
1457
- size_t pos2) noexcept {
1708
+ size_t pos2) {
1458
1709
  #if ADA_DEVELOPMENT_CHECKS
1459
1710
  if (pos2 < pos1) {
1460
1711
  std::cerr << "Negative-length substring: [" << pos1 << " to " << pos2 << ")"
@@ -1493,8 +1744,7 @@ void trim_c0_whitespace(std::string_view& input) noexcept;
1493
1744
  * https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
1494
1745
  */
1495
1746
  template <class url_type>
1496
- ada_really_inline void strip_trailing_spaces_from_opaque_path(
1497
- url_type& url) noexcept;
1747
+ ada_really_inline void strip_trailing_spaces_from_opaque_path(url_type& url);
1498
1748
 
1499
1749
  /**
1500
1750
  * @private
@@ -1584,7 +1834,13 @@ inline int fast_digit_count(uint32_t x) noexcept {
1584
1834
  /* begin file include/ada/parser.h */
1585
1835
  /**
1586
1836
  * @file parser.h
1587
- * @brief Definitions for the parser.
1837
+ * @brief Low-level URL parsing functions.
1838
+ *
1839
+ * This header provides the internal URL parsing implementation. Most users
1840
+ * should use `ada::parse()` from implementation.h instead of these functions
1841
+ * directly.
1842
+ *
1843
+ * @see implementation.h for the recommended public API
1588
1844
  */
1589
1845
  #ifndef ADA_PARSER_H
1590
1846
  #define ADA_PARSER_H
@@ -2328,6 +2584,7 @@ struct expected_operations_base : expected_storage_base<T, E> {
2328
2584
  }
2329
2585
 
2330
2586
  template <class Rhs>
2587
+ // NOLINTNEXTLINE(bugprone-exception-escape)
2331
2588
  void construct_with(Rhs &&rhs) noexcept {
2332
2589
  new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());
2333
2590
  this->m_has_val = true;
@@ -4113,6 +4370,7 @@ void swap(expected<T, E> &lhs,
4113
4370
 
4114
4371
  #endif
4115
4372
  /* end file include/ada/expected.h */
4373
+
4116
4374
  /* begin file include/ada/url_pattern_regex.h */
4117
4375
  /**
4118
4376
  * @file url_search_params.h
@@ -4128,6 +4386,7 @@ void swap(expected<T, E> &lhs,
4128
4386
  #include <regex>
4129
4387
  #endif // ADA_USE_UNSAFE_STD_REGEX_PROVIDER
4130
4388
 
4389
+ #if ADA_INCLUDE_URL_PATTERN
4131
4390
  namespace ada::url_pattern_regex {
4132
4391
 
4133
4392
  template <typename T>
@@ -4172,7 +4431,7 @@ class std_regex_provider final {
4172
4431
  #endif // ADA_USE_UNSAFE_STD_REGEX_PROVIDER
4173
4432
 
4174
4433
  } // namespace ada::url_pattern_regex
4175
-
4434
+ #endif // ADA_INCLUDE_URL_PATTERN
4176
4435
  #endif // ADA_URL_PATTERN_REGEX_H
4177
4436
  /* end file include/ada/url_pattern_regex.h */
4178
4437
  /* begin file include/ada/url_pattern_init.h */
@@ -4186,14 +4445,23 @@ class std_regex_provider final {
4186
4445
  /* begin file include/ada/errors.h */
4187
4446
  /**
4188
4447
  * @file errors.h
4189
- * @brief Definitions for the errors.
4448
+ * @brief Error type definitions for URL parsing.
4449
+ *
4450
+ * Defines the error codes that can be returned when URL parsing fails.
4190
4451
  */
4191
4452
  #ifndef ADA_ERRORS_H
4192
4453
  #define ADA_ERRORS_H
4193
4454
 
4194
4455
  #include <cstdint>
4195
4456
  namespace ada {
4196
- enum class errors : uint8_t { type_error };
4457
+ /**
4458
+ * @brief Error codes for URL parsing operations.
4459
+ *
4460
+ * Used with `tl::expected` to indicate why a URL parsing operation failed.
4461
+ */
4462
+ enum class errors : uint8_t {
4463
+ type_error /**< A type error occurred (e.g., invalid URL syntax). */
4464
+ };
4197
4465
  } // namespace ada
4198
4466
  #endif // ADA_ERRORS_H
4199
4467
  /* end file include/ada/errors.h */
@@ -4201,11 +4469,13 @@ enum class errors : uint8_t { type_error };
4201
4469
  #include <string_view>
4202
4470
  #include <string>
4203
4471
  #include <optional>
4472
+ #include <iostream>
4204
4473
 
4205
4474
  #if ADA_TESTING
4206
4475
  #include <iostream>
4207
4476
  #endif // ADA_TESTING
4208
4477
 
4478
+ #if ADA_INCLUDE_URL_PATTERN
4209
4479
  namespace ada {
4210
4480
 
4211
4481
  // Important: C++20 allows us to use concept rather than `using` or `typedef
@@ -4229,10 +4499,21 @@ struct url_pattern_init {
4229
4499
  pattern,
4230
4500
  };
4231
4501
 
4502
+ friend std::ostream& operator<<(std::ostream& os, process_type type) {
4503
+ switch (type) {
4504
+ case process_type::url:
4505
+ return os << "url";
4506
+ case process_type::pattern:
4507
+ return os << "pattern";
4508
+ default:
4509
+ return os << "unknown";
4510
+ }
4511
+ }
4512
+
4232
4513
  // All strings must be valid UTF-8.
4233
4514
  // @see https://urlpattern.spec.whatwg.org/#process-a-urlpatterninit
4234
4515
  static tl::expected<url_pattern_init, errors> process(
4235
- url_pattern_init init, process_type type,
4516
+ const url_pattern_init& init, process_type type,
4236
4517
  std::optional<std::string_view> protocol = std::nullopt,
4237
4518
  std::optional<std::string_view> username = std::nullopt,
4238
4519
  std::optional<std::string_view> password = std::nullopt,
@@ -4309,32 +4590,42 @@ struct url_pattern_init {
4309
4590
  std::optional<std::string> base_url{};
4310
4591
  };
4311
4592
  } // namespace ada
4312
-
4593
+ #endif // ADA_INCLUDE_URL_PATTERN
4313
4594
  #endif // ADA_URL_PATTERN_INIT_H
4314
4595
  /* end file include/ada/url_pattern_init.h */
4315
4596
 
4316
- /**
4317
- * @private
4318
- */
4597
+ /** @private Forward declarations */
4319
4598
  namespace ada {
4320
4599
  struct url_aggregator;
4321
4600
  struct url;
4601
+ #if ADA_INCLUDE_URL_PATTERN
4322
4602
  template <url_pattern_regex::regex_concept regex_provider>
4323
4603
  class url_pattern;
4324
4604
  struct url_pattern_options;
4605
+ #endif // ADA_INCLUDE_URL_PATTERN
4325
4606
  enum class errors : uint8_t;
4326
4607
  } // namespace ada
4327
4608
 
4328
4609
  /**
4329
4610
  * @namespace ada::parser
4330
- * @brief Includes the definitions for supported parsers
4611
+ * @brief Internal URL parsing implementation.
4612
+ *
4613
+ * Contains the core URL parsing algorithm as specified by the WHATWG URL
4614
+ * Standard. These functions are used internally by `ada::parse()`.
4331
4615
  */
4332
4616
  namespace ada::parser {
4333
4617
  /**
4334
- * Parses a url. The parameter user_input is the input to be parsed:
4335
- * it should be a valid UTF-8 string. The parameter base_url is an optional
4336
- * parameter that can be used to resolve relative URLs. If the base_url is
4337
- * provided, the user_input is resolved against the base_url.
4618
+ * Parses a URL string into a URL object.
4619
+ *
4620
+ * @tparam result_type The type of URL object to create (url or url_aggregator).
4621
+ *
4622
+ * @param user_input The URL string to parse (must be valid UTF-8).
4623
+ * @param base_url Optional base URL for resolving relative URLs.
4624
+ *
4625
+ * @return The parsed URL object. Check `is_valid` to determine if parsing
4626
+ * succeeded.
4627
+ *
4628
+ * @see https://url.spec.whatwg.org/#concept-basic-url-parser
4338
4629
  */
4339
4630
  template <typename result_type = url_aggregator>
4340
4631
  result_type parse_url(std::string_view user_input,
@@ -4354,10 +4645,12 @@ extern template url_aggregator parse_url_impl<url_aggregator>(
4354
4645
  extern template url parse_url_impl<url>(std::string_view user_input,
4355
4646
  const url* base_url);
4356
4647
 
4648
+ #if ADA_INCLUDE_URL_PATTERN
4357
4649
  template <url_pattern_regex::regex_concept regex_provider>
4358
4650
  tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4359
- std::variant<std::string_view, url_pattern_init> input,
4651
+ std::variant<std::string_view, url_pattern_init>&& input,
4360
4652
  const std::string_view* base_url, const url_pattern_options* options);
4653
+ #endif // ADA_INCLUDE_URL_PATTERN
4361
4654
 
4362
4655
  } // namespace ada::parser
4363
4656
 
@@ -4373,7 +4666,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4373
4666
  /* begin file include/ada/url_pattern.h */
4374
4667
  /**
4375
4668
  * @file url_pattern.h
4376
- * @brief Declaration for the URLPattern implementation.
4669
+ * @brief URLPattern API implementation.
4670
+ *
4671
+ * This header provides the URLPattern API as specified by the WHATWG URL
4672
+ * Pattern Standard. URLPattern allows matching URLs against patterns with
4673
+ * wildcards and named groups, similar to how regular expressions match strings.
4674
+ *
4675
+ * @see https://urlpattern.spec.whatwg.org/
4676
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
4377
4677
  */
4378
4678
  #ifndef ADA_URL_PATTERN_H
4379
4679
  #define ADA_URL_PATTERN_H
@@ -4381,8 +4681,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4381
4681
  /* begin file include/ada/implementation.h */
4382
4682
  /**
4383
4683
  * @file implementation.h
4384
- * @brief Definitions for user facing functions for parsing URL and it's
4385
- * components.
4684
+ * @brief User-facing functions for URL parsing and manipulation.
4685
+ *
4686
+ * This header provides the primary public API for parsing URLs in Ada.
4687
+ * It includes the main `ada::parse()` function which is the recommended
4688
+ * entry point for most users.
4689
+ *
4690
+ * @see https://url.spec.whatwg.org/#api
4386
4691
  */
4387
4692
  #ifndef ADA_IMPLEMENTATION_H
4388
4693
  #define ADA_IMPLEMENTATION_H
@@ -4394,7 +4699,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4394
4699
  /* begin file include/ada/url.h */
4395
4700
  /**
4396
4701
  * @file url.h
4397
- * @brief Declaration for the URL
4702
+ * @brief Declaration for the `ada::url` class.
4703
+ *
4704
+ * This file contains the `ada::url` struct which represents a parsed URL
4705
+ * using separate `std::string` instances for each component. This
4706
+ * representation is more flexible but uses more memory than `url_aggregator`.
4707
+ *
4708
+ * @see url_aggregator.h for a more memory-efficient alternative
4398
4709
  */
4399
4710
  #ifndef ADA_URL_H
4400
4711
  #define ADA_URL_H
@@ -4405,127 +4716,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4405
4716
  #include <string>
4406
4717
  #include <string_view>
4407
4718
 
4408
- /* begin file include/ada/checkers.h */
4409
- /**
4410
- * @file checkers.h
4411
- * @brief Declarations for URL specific checkers used within Ada.
4412
- */
4413
- #ifndef ADA_CHECKERS_H
4414
- #define ADA_CHECKERS_H
4415
-
4416
-
4417
- #include <cstring>
4418
- #include <string_view>
4419
-
4420
- /**
4421
- * These functions are not part of our public API and may
4422
- * change at any time.
4423
- * @private
4424
- * @namespace ada::checkers
4425
- * @brief Includes the definitions for validation functions
4426
- */
4427
- namespace ada::checkers {
4428
-
4429
- /**
4430
- * @private
4431
- * Assuming that x is an ASCII letter, this function returns the lower case
4432
- * equivalent.
4433
- * @details More likely to be inlined by the compiler and constexpr.
4434
- */
4435
- constexpr char to_lower(char x) noexcept;
4436
-
4437
- /**
4438
- * @private
4439
- * Returns true if the character is an ASCII letter. Equivalent to std::isalpha
4440
- * but more likely to be inlined by the compiler.
4441
- *
4442
- * @attention std::isalpha is not constexpr generally.
4443
- */
4444
- constexpr bool is_alpha(char x) noexcept;
4445
-
4446
- /**
4447
- * @private
4448
- * Check whether a string starts with 0x or 0X. The function is only
4449
- * safe if input.size() >=2.
4450
- *
4451
- * @see has_hex_prefix
4452
- */
4453
- constexpr bool has_hex_prefix_unsafe(std::string_view input);
4454
- /**
4455
- * @private
4456
- * Check whether a string starts with 0x or 0X.
4457
- */
4458
- constexpr bool has_hex_prefix(std::string_view input);
4459
-
4460
- /**
4461
- * @private
4462
- * Check whether x is an ASCII digit. More likely to be inlined than
4463
- * std::isdigit.
4464
- */
4465
- constexpr bool is_digit(char x) noexcept;
4466
-
4467
- /**
4468
- * @private
4469
- * @details A string starts with a Windows drive letter if all of the following
4470
- * are true:
4471
- *
4472
- * - its length is greater than or equal to 2
4473
- * - its first two code points are a Windows drive letter
4474
- * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
4475
- * (?), or U+0023 (#).
4476
- *
4477
- * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
4478
- */
4479
- inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
4480
-
4481
- /**
4482
- * @private
4483
- * @details A normalized Windows drive letter is a Windows drive letter of which
4484
- * the second code point is U+003A (:).
4485
- */
4486
- inline constexpr bool is_normalized_windows_drive_letter(
4487
- std::string_view input) noexcept;
4488
-
4489
- /**
4490
- * @private
4491
- * Returns true if an input is an ipv4 address. It is assumed that the string
4492
- * does not contain uppercase ASCII characters (the input should have been
4493
- * lowered cased before calling this function) and is not empty.
4494
- */
4495
- ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
4496
-
4497
- /**
4498
- * @private
4499
- * Returns a bitset. If the first bit is set, then at least one character needs
4500
- * percent encoding. If the second bit is set, a \\ is found. If the third bit
4501
- * is set then we have a dot. If the fourth bit is set, then we have a percent
4502
- * character.
4503
- */
4504
- ada_really_inline constexpr uint8_t path_signature(
4505
- std::string_view input) noexcept;
4506
-
4507
- /**
4508
- * @private
4509
- * Returns true if the length of the domain name and its labels are according to
4510
- * the specifications. The length of the domain must be 255 octets (253
4511
- * characters not including the last 2 which are the empty label reserved at the
4512
- * end). When the empty label is included (a dot at the end), the domain name
4513
- * can have 254 characters. The length of a label must be at least 1 and at most
4514
- * 63 characters.
4515
- * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
4516
- * @see https://www.unicode.org/reports/tr46/#ToASCII
4517
- */
4518
- ada_really_inline constexpr bool verify_dns_length(
4519
- std::string_view input) noexcept;
4520
-
4521
- } // namespace ada::checkers
4522
-
4523
- #endif // ADA_CHECKERS_H
4524
- /* end file include/ada/checkers.h */
4525
4719
  /* begin file include/ada/url_components.h */
4526
4720
  /**
4527
4721
  * @file url_components.h
4528
- * @brief Declaration for the URL Components
4722
+ * @brief URL component offset representation for url_aggregator.
4723
+ *
4724
+ * This file defines the `url_components` struct which stores byte offsets
4725
+ * into a URL string buffer. It is used internally by `url_aggregator` to
4726
+ * efficiently locate URL components without storing separate strings.
4529
4727
  */
4530
4728
  #ifndef ADA_URL_COMPONENTS_H
4531
4729
  #define ADA_URL_COMPONENTS_H
@@ -4533,14 +4731,32 @@ ada_really_inline constexpr bool verify_dns_length(
4533
4731
  namespace ada {
4534
4732
 
4535
4733
  /**
4536
- * @brief URL Component representations using offsets.
4734
+ * @brief Stores byte offsets for URL components within a buffer.
4537
4735
  *
4538
- * @details We design the url_components struct so that it is as small
4539
- * and simple as possible. This version uses 32 bytes.
4736
+ * The `url_components` struct uses 32-bit offsets to track the boundaries
4737
+ * of each URL component within a single string buffer. This enables efficient
4738
+ * component extraction without additional memory allocations.
4540
4739
  *
4541
- * This struct is used to extract components from a single 'href'.
4740
+ * Component layout in a URL:
4741
+ * ```
4742
+ * https://user:pass@example.com:1234/foo/bar?baz#quux
4743
+ * | | | | ^^^^| | |
4744
+ * | | | | | | | `----- hash_start
4745
+ * | | | | | | `--------- search_start
4746
+ * | | | | | `----------------- pathname_start
4747
+ * | | | | `--------------------- port
4748
+ * | | | `----------------------- host_end
4749
+ * | | `---------------------------------- host_start
4750
+ * | `--------------------------------------- username_end
4751
+ * `--------------------------------------------- protocol_end
4752
+ * ```
4753
+ *
4754
+ * @note The 32-bit offsets limit URLs to 4GB in length.
4755
+ * @note A value of `omitted` (UINT32_MAX) indicates the component is not
4756
+ * present.
4542
4757
  */
4543
4758
  struct url_components {
4759
+ /** Sentinel value indicating a component is not present. */
4544
4760
  constexpr static uint32_t omitted = uint32_t(-1);
4545
4761
 
4546
4762
  url_components() = default;
@@ -4550,47 +4766,43 @@ struct url_components {
4550
4766
  url_components &operator=(const url_components &u) = default;
4551
4767
  ~url_components() = default;
4552
4768
 
4553
- /*
4554
- * By using 32-bit integers, we implicitly assume that the URL string
4555
- * cannot exceed 4 GB.
4556
- *
4557
- * https://user:pass@example.com:1234/foo/bar?baz#quux
4558
- * | | | | ^^^^| | |
4559
- * | | | | | | | `----- hash_start
4560
- * | | | | | | `--------- search_start
4561
- * | | | | | `----------------- pathname_start
4562
- * | | | | `--------------------- port
4563
- * | | | `----------------------- host_end
4564
- * | | `---------------------------------- host_start
4565
- * | `--------------------------------------- username_end
4566
- * `--------------------------------------------- protocol_end
4567
- */
4769
+ /** Offset of the end of the protocol/scheme (position of ':'). */
4568
4770
  uint32_t protocol_end{0};
4771
+
4569
4772
  /**
4570
- * Username end is not `omitted` by default to make username and password
4571
- * getters less costly to implement.
4773
+ * Offset of the end of the username.
4774
+ * Initialized to 0 (not `omitted`) to simplify username/password getters.
4572
4775
  */
4573
4776
  uint32_t username_end{0};
4777
+
4778
+ /** Offset of the start of the host. */
4574
4779
  uint32_t host_start{0};
4780
+
4781
+ /** Offset of the end of the host. */
4575
4782
  uint32_t host_end{0};
4783
+
4784
+ /** Port number, or `omitted` if no port is specified. */
4576
4785
  uint32_t port{omitted};
4786
+
4787
+ /** Offset of the start of the pathname. */
4577
4788
  uint32_t pathname_start{0};
4789
+
4790
+ /** Offset of the '?' starting the query, or `omitted` if no query. */
4578
4791
  uint32_t search_start{omitted};
4792
+
4793
+ /** Offset of the '#' starting the fragment, or `omitted` if no fragment. */
4579
4794
  uint32_t hash_start{omitted};
4580
4795
 
4581
4796
  /**
4582
- * Check the following conditions:
4583
- * protocol_end < username_end < ... < hash_start,
4584
- * expect when a value is omitted. It also computes
4585
- * a lower bound on the possible string length that may match these
4586
- * offsets.
4587
- * @return true if the offset values are
4588
- * consistent with a possible URL string
4797
+ * Validates that offsets are in ascending order and consistent.
4798
+ * Useful for debugging to detect internal corruption.
4799
+ * @return `true` if offsets are consistent, `false` otherwise.
4589
4800
  */
4590
4801
  [[nodiscard]] constexpr bool check_offset_consistency() const noexcept;
4591
4802
 
4592
4803
  /**
4593
- * Converts a url_components to JSON stringified version.
4804
+ * Returns a JSON string representation of the offsets for debugging.
4805
+ * @return A JSON-formatted string with all offset values.
4594
4806
  */
4595
4807
  [[nodiscard]] std::string to_string() const;
4596
4808
 
@@ -4613,15 +4825,26 @@ struct url_aggregator;
4613
4825
  // }
4614
4826
 
4615
4827
  /**
4616
- * @brief Generic URL struct reliant on std::string instantiation.
4828
+ * @brief Represents a parsed URL with individual string components.
4617
4829
  *
4618
- * @details To disambiguate from a valid URL string it can also be referred to
4619
- * as a URL record. A URL is a struct that represents a universal identifier.
4620
- * Unlike the url_aggregator, the ada::url represents the different components
4621
- * of a parsed URL as independent std::string instances. This makes the
4622
- * structure heavier and more reliant on memory allocations. When getting
4623
- * components from the parsed URL, a new std::string is typically constructed.
4830
+ * The `url` struct stores each URL component (scheme, username, password,
4831
+ * host, port, path, query, fragment) as a separate `std::string`. This
4832
+ * provides flexibility but incurs more memory allocations compared to
4833
+ * `url_aggregator`.
4624
4834
  *
4835
+ * **When to use `ada::url`:**
4836
+ * - When you need to frequently modify individual URL components
4837
+ * - When you want independent ownership of component strings
4838
+ *
4839
+ * **When to use `ada::url_aggregator` instead:**
4840
+ * - For read-mostly operations on parsed URLs
4841
+ * - When memory efficiency is important
4842
+ * - When you only need string_view access to components
4843
+ *
4844
+ * @note This type is returned when parsing with `ada::parse<ada::url>()`.
4845
+ * By default, `ada::parse()` returns `ada::url_aggregator`.
4846
+ *
4847
+ * @see url_aggregator For a more memory-efficient URL representation
4625
4848
  * @see https://url.spec.whatwg.org/#url-representation
4626
4849
  */
4627
4850
  struct url : url_base {
@@ -4680,177 +4903,217 @@ struct url : url_base {
4680
4903
  */
4681
4904
  std::optional<std::string> hash{};
4682
4905
 
4683
- /** @return true if it has an host but it is the empty string */
4906
+ /**
4907
+ * Checks if the URL has an empty hostname (host is set but empty string).
4908
+ * @return `true` if host exists but is empty, `false` otherwise.
4909
+ */
4684
4910
  [[nodiscard]] inline bool has_empty_hostname() const noexcept;
4685
- /** @return true if the URL has a (non default) port */
4911
+
4912
+ /**
4913
+ * Checks if the URL has a non-default port explicitly specified.
4914
+ * @return `true` if a port is present, `false` otherwise.
4915
+ */
4686
4916
  [[nodiscard]] inline bool has_port() const noexcept;
4687
- /** @return true if it has a host (included an empty host) */
4917
+
4918
+ /**
4919
+ * Checks if the URL has a hostname (including empty hostnames).
4920
+ * @return `true` if host is present, `false` otherwise.
4921
+ */
4688
4922
  [[nodiscard]] inline bool has_hostname() const noexcept;
4923
+
4924
+ /**
4925
+ * Validates whether the hostname is a valid domain according to RFC 1034.
4926
+ * Checks that the domain and its labels have valid lengths (max 255 octets
4927
+ * total, max 63 octets per label).
4928
+ * @return `true` if the domain is valid, `false` otherwise.
4929
+ */
4689
4930
  [[nodiscard]] bool has_valid_domain() const noexcept override;
4690
4931
 
4691
4932
  /**
4692
- * Returns a JSON string representation of this URL.
4933
+ * Returns a JSON string representation of this URL for debugging.
4934
+ * @return A JSON-formatted string with all URL components.
4693
4935
  */
4694
4936
  [[nodiscard]] std::string to_string() const override;
4695
4937
 
4696
4938
  /**
4939
+ * Returns the full serialized URL (the href).
4940
+ * @return The complete URL string (allocates a new string).
4697
4941
  * @see https://url.spec.whatwg.org/#dom-url-href
4698
- * @see https://url.spec.whatwg.org/#concept-url-serializer
4699
4942
  */
4700
- [[nodiscard]] ada_really_inline std::string get_href() const noexcept;
4943
+ [[nodiscard]] ada_really_inline std::string get_href() const;
4701
4944
 
4702
4945
  /**
4703
- * The origin getter steps are to return the serialization of this's URL's
4704
- * origin. [HTML]
4705
- * @return a newly allocated string.
4946
+ * Returns the URL's origin as a string (scheme + host + port for special
4947
+ * URLs).
4948
+ * @return A newly allocated string containing the serialized origin.
4706
4949
  * @see https://url.spec.whatwg.org/#concept-url-origin
4707
4950
  */
4708
- [[nodiscard]] std::string get_origin() const noexcept override;
4951
+ [[nodiscard]] std::string get_origin() const override;
4709
4952
 
4710
4953
  /**
4711
- * The protocol getter steps are to return this's URL's scheme, followed by
4712
- * U+003A (:).
4713
- * @return a newly allocated string.
4954
+ * Returns the URL's scheme followed by a colon (e.g., "https:").
4955
+ * @return A newly allocated string with the protocol.
4714
4956
  * @see https://url.spec.whatwg.org/#dom-url-protocol
4715
4957
  */
4716
- [[nodiscard]] std::string get_protocol() const noexcept;
4958
+ [[nodiscard]] std::string get_protocol() const;
4717
4959
 
4718
4960
  /**
4719
- * Return url's host, serialized, followed by U+003A (:) and url's port,
4720
- * serialized.
4721
- * When there is no host, this function returns the empty string.
4722
- * @return a newly allocated string.
4961
+ * Returns the URL's host and port (e.g., "example.com:8080").
4962
+ * If no port is set, returns just the host. Returns empty string if no host.
4963
+ * @return A newly allocated string with host:port.
4723
4964
  * @see https://url.spec.whatwg.org/#dom-url-host
4724
4965
  */
4725
- [[nodiscard]] std::string get_host() const noexcept;
4966
+ [[nodiscard]] std::string get_host() const;
4726
4967
 
4727
4968
  /**
4728
- * Return this's URL's host, serialized.
4729
- * When there is no host, this function returns the empty string.
4730
- * @return a newly allocated string.
4969
+ * Returns the URL's hostname (without port).
4970
+ * Returns empty string if no host is set.
4971
+ * @return A newly allocated string with the hostname.
4731
4972
  * @see https://url.spec.whatwg.org/#dom-url-hostname
4732
4973
  */
4733
- [[nodiscard]] std::string get_hostname() const noexcept;
4974
+ [[nodiscard]] std::string get_hostname() const;
4734
4975
 
4735
4976
  /**
4736
- * The pathname getter steps are to return the result of URL path serializing
4737
- * this's URL.
4738
- * @return a newly allocated string.
4977
+ * Returns the URL's path component.
4978
+ * @return A string_view pointing to the path.
4739
4979
  * @see https://url.spec.whatwg.org/#dom-url-pathname
4740
4980
  */
4741
4981
  [[nodiscard]] constexpr std::string_view get_pathname() const noexcept;
4742
4982
 
4743
4983
  /**
4744
- * Compute the pathname length in bytes without instantiating a view or a
4745
- * string.
4746
- * @return size of the pathname in bytes
4984
+ * Returns the byte length of the pathname without creating a string.
4985
+ * @return Size of the pathname in bytes.
4747
4986
  * @see https://url.spec.whatwg.org/#dom-url-pathname
4748
4987
  */
4749
4988
  [[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept;
4750
4989
 
4751
4990
  /**
4752
- * Return U+003F (?), followed by this's URL's query.
4753
- * @return a newly allocated string.
4991
+ * Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
4992
+ * Returns empty string if no query is set.
4993
+ * @return A newly allocated string with the search/query.
4754
4994
  * @see https://url.spec.whatwg.org/#dom-url-search
4755
4995
  */
4756
- [[nodiscard]] std::string get_search() const noexcept;
4996
+ [[nodiscard]] std::string get_search() const;
4757
4997
 
4758
4998
  /**
4759
- * The username getter steps are to return this's URL's username.
4760
- * @return a constant reference to the underlying string.
4999
+ * Returns the URL's username component.
5000
+ * @return A constant reference to the username string.
4761
5001
  * @see https://url.spec.whatwg.org/#dom-url-username
4762
5002
  */
4763
5003
  [[nodiscard]] const std::string &get_username() const noexcept;
4764
5004
 
4765
5005
  /**
4766
- * @return Returns true on successful operation.
5006
+ * Sets the URL's username, percent-encoding special characters.
5007
+ * @param input The new username value.
5008
+ * @return `true` on success, `false` if the URL cannot have credentials.
4767
5009
  * @see https://url.spec.whatwg.org/#dom-url-username
4768
5010
  */
4769
5011
  bool set_username(std::string_view input);
4770
5012
 
4771
5013
  /**
4772
- * @return Returns true on success.
5014
+ * Sets the URL's password, percent-encoding special characters.
5015
+ * @param input The new password value.
5016
+ * @return `true` on success, `false` if the URL cannot have credentials.
4773
5017
  * @see https://url.spec.whatwg.org/#dom-url-password
4774
5018
  */
4775
5019
  bool set_password(std::string_view input);
4776
5020
 
4777
5021
  /**
4778
- * @return Returns true on success.
5022
+ * Sets the URL's port from a string (e.g., "8080").
5023
+ * @param input The port string. Empty string removes the port.
5024
+ * @return `true` on success, `false` if the URL cannot have a port.
4779
5025
  * @see https://url.spec.whatwg.org/#dom-url-port
4780
5026
  */
4781
5027
  bool set_port(std::string_view input);
4782
5028
 
4783
5029
  /**
4784
- * This function always succeeds.
5030
+ * Sets the URL's fragment/hash (the part after '#').
5031
+ * @param input The new hash value (with or without leading '#').
4785
5032
  * @see https://url.spec.whatwg.org/#dom-url-hash
4786
5033
  */
4787
5034
  void set_hash(std::string_view input);
4788
5035
 
4789
5036
  /**
4790
- * This function always succeeds.
5037
+ * Sets the URL's query string (the part after '?').
5038
+ * @param input The new query value (with or without leading '?').
4791
5039
  * @see https://url.spec.whatwg.org/#dom-url-search
4792
5040
  */
4793
5041
  void set_search(std::string_view input);
4794
5042
 
4795
5043
  /**
4796
- * @return Returns true on success.
4797
- * @see https://url.spec.whatwg.org/#dom-url-search
5044
+ * Sets the URL's pathname.
5045
+ * @param input The new path value.
5046
+ * @return `true` on success, `false` if the URL has an opaque path.
5047
+ * @see https://url.spec.whatwg.org/#dom-url-pathname
4798
5048
  */
4799
5049
  bool set_pathname(std::string_view input);
4800
5050
 
4801
5051
  /**
4802
- * @return Returns true on success.
5052
+ * Sets the URL's host (hostname and optionally port).
5053
+ * @param input The new host value (e.g., "example.com:8080").
5054
+ * @return `true` on success, `false` if parsing fails.
4803
5055
  * @see https://url.spec.whatwg.org/#dom-url-host
4804
5056
  */
4805
5057
  bool set_host(std::string_view input);
4806
5058
 
4807
5059
  /**
4808
- * @return Returns true on success.
5060
+ * Sets the URL's hostname (without port).
5061
+ * @param input The new hostname value.
5062
+ * @return `true` on success, `false` if parsing fails.
4809
5063
  * @see https://url.spec.whatwg.org/#dom-url-hostname
4810
5064
  */
4811
5065
  bool set_hostname(std::string_view input);
4812
5066
 
4813
5067
  /**
4814
- * @return Returns true on success.
5068
+ * Sets the URL's protocol/scheme.
5069
+ * @param input The new protocol (with or without trailing ':').
5070
+ * @return `true` on success, `false` if the scheme is invalid.
4815
5071
  * @see https://url.spec.whatwg.org/#dom-url-protocol
4816
5072
  */
4817
5073
  bool set_protocol(std::string_view input);
4818
5074
 
4819
5075
  /**
5076
+ * Replaces the entire URL by parsing a new href string.
5077
+ * @param input The new URL string to parse.
5078
+ * @return `true` on success, `false` if parsing fails.
4820
5079
  * @see https://url.spec.whatwg.org/#dom-url-href
4821
5080
  */
4822
5081
  bool set_href(std::string_view input);
4823
5082
 
4824
5083
  /**
4825
- * The password getter steps are to return this's URL's password.
4826
- * @return a constant reference to the underlying string.
5084
+ * Returns the URL's password component.
5085
+ * @return A constant reference to the password string.
4827
5086
  * @see https://url.spec.whatwg.org/#dom-url-password
4828
5087
  */
4829
5088
  [[nodiscard]] const std::string &get_password() const noexcept;
4830
5089
 
4831
5090
  /**
4832
- * Return this's URL's port, serialized.
4833
- * @return a newly constructed string representing the port.
5091
+ * Returns the URL's port as a string (e.g., "8080").
5092
+ * Returns empty string if no port is set.
5093
+ * @return A newly allocated string with the port.
4834
5094
  * @see https://url.spec.whatwg.org/#dom-url-port
4835
5095
  */
4836
- [[nodiscard]] std::string get_port() const noexcept;
5096
+ [[nodiscard]] std::string get_port() const;
4837
5097
 
4838
5098
  /**
4839
- * Return U+0023 (#), followed by this's URL's fragment.
4840
- * @return a newly constructed string representing the hash.
5099
+ * Returns the URL's fragment prefixed with '#' (e.g., "#section").
5100
+ * Returns empty string if no fragment is set.
5101
+ * @return A newly allocated string with the hash.
4841
5102
  * @see https://url.spec.whatwg.org/#dom-url-hash
4842
5103
  */
4843
- [[nodiscard]] std::string get_hash() const noexcept;
5104
+ [[nodiscard]] std::string get_hash() const;
4844
5105
 
4845
5106
  /**
4846
- * A URL includes credentials if its username or password is not the empty
4847
- * string.
5107
+ * Checks if the URL has credentials (non-empty username or password).
5108
+ * @return `true` if username or password is non-empty, `false` otherwise.
4848
5109
  */
4849
5110
  [[nodiscard]] ada_really_inline bool has_credentials() const noexcept;
4850
5111
 
4851
5112
  /**
4852
- * Useful for implementing efficient serialization for the URL.
5113
+ * Returns the URL component offsets for efficient serialization.
4853
5114
  *
5115
+ * The components represent byte offsets into the serialized URL:
5116
+ * ```
4854
5117
  * https://user:pass@example.com:1234/foo/bar?baz#quux
4855
5118
  * | | | | ^^^^| | |
4856
5119
  * | | | | | | | `----- hash_start
@@ -4861,19 +5124,23 @@ struct url : url_base {
4861
5124
  * | | `---------------------------------- host_start
4862
5125
  * | `--------------------------------------- username_end
4863
5126
  * `--------------------------------------------- protocol_end
4864
- *
4865
- * Inspired after servo/url
4866
- *
4867
- * @return a newly constructed component.
4868
- *
4869
- * @see
4870
- * https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
5127
+ * ```
5128
+ * @return A newly constructed url_components struct.
5129
+ * @see https://github.com/servo/rust-url
4871
5130
  */
4872
5131
  [[nodiscard]] ada_really_inline ada::url_components get_components()
4873
5132
  const noexcept;
4874
- /** @return true if the URL has a hash component */
5133
+
5134
+ /**
5135
+ * Checks if the URL has a fragment/hash component.
5136
+ * @return `true` if hash is present, `false` otherwise.
5137
+ */
4875
5138
  [[nodiscard]] constexpr bool has_hash() const noexcept override;
4876
- /** @return true if the URL has a search component */
5139
+
5140
+ /**
5141
+ * Checks if the URL has a query/search component.
5142
+ * @return `true` if query is present, `false` otherwise.
5143
+ */
4877
5144
  [[nodiscard]] constexpr bool has_search() const noexcept override;
4878
5145
 
4879
5146
  private:
@@ -4882,7 +5149,7 @@ struct url : url_base {
4882
5149
  friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(
4883
5150
  std::string_view, const ada::url_aggregator *);
4884
5151
  friend void ada::helpers::strip_trailing_spaces_from_opaque_path<ada::url>(
4885
- ada::url &url) noexcept;
5152
+ ada::url &url);
4886
5153
 
4887
5154
  friend ada::url ada::parser::parse_url_impl<ada::url, true>(std::string_view,
4888
5155
  const ada::url *);
@@ -4989,7 +5256,7 @@ struct url : url_base {
4989
5256
  * Take the scheme from another URL. The scheme string is moved from the
4990
5257
  * provided url.
4991
5258
  */
4992
- constexpr void copy_scheme(ada::url &&u) noexcept;
5259
+ constexpr void copy_scheme(ada::url &&u);
4993
5260
 
4994
5261
  /**
4995
5262
  * Take the scheme from another URL. The scheme string is copied from the
@@ -5007,17 +5274,70 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u);
5007
5274
 
5008
5275
  namespace ada {
5009
5276
 
5277
+ /**
5278
+ * Result type for URL parsing operations.
5279
+ *
5280
+ * Uses `tl::expected` to represent either a successfully parsed URL or an
5281
+ * error. This allows for exception-free error handling.
5282
+ *
5283
+ * @tparam result_type The URL type to return (default: `ada::url_aggregator`)
5284
+ *
5285
+ * @example
5286
+ * ```cpp
5287
+ * ada::result<ada::url_aggregator> result = ada::parse("https://example.com");
5288
+ * if (result) {
5289
+ * // Success: use result.value() or *result
5290
+ * } else {
5291
+ * // Error: handle result.error()
5292
+ * }
5293
+ * ```
5294
+ */
5010
5295
  template <class result_type = ada::url_aggregator>
5011
5296
  using result = tl::expected<result_type, ada::errors>;
5012
5297
 
5013
5298
  /**
5014
- * The URL parser takes a scalar value string input, with an optional null or
5015
- * base URL base (default null). The parser assumes the input is a valid ASCII
5016
- * or UTF-8 string.
5299
+ * Parses a URL string according to the WHATWG URL Standard.
5300
+ *
5301
+ * This is the main entry point for URL parsing in Ada. The function takes
5302
+ * a string input and optionally a base URL for resolving relative URLs.
5303
+ *
5304
+ * @tparam result_type The URL type to return. Can be either `ada::url` or
5305
+ * `ada::url_aggregator` (default). The `url_aggregator` type is more
5306
+ * memory-efficient as it stores components as offsets into a single
5307
+ * buffer.
5308
+ *
5309
+ * @param input The URL string to parse. Must be valid ASCII or UTF-8 encoded.
5310
+ * Leading and trailing whitespace is automatically trimmed.
5311
+ * @param base_url Optional pointer to a base URL for resolving relative URLs.
5312
+ * If nullptr (default), only absolute URLs can be parsed successfully.
5313
+ *
5314
+ * @return A `result<result_type>` containing either the parsed URL on success,
5315
+ * or an error code on failure. Use the boolean conversion or
5316
+ * `has_value()` to check for success.
5317
+ *
5318
+ * @note The parser is fully compliant with the WHATWG URL Standard.
5017
5319
  *
5018
- * @param input the string input to analyze (must be valid ASCII or UTF-8)
5019
- * @param base_url the optional URL input to use as a base url.
5020
- * @return a parsed URL.
5320
+ * @example
5321
+ * ```cpp
5322
+ * // Parse an absolute URL
5323
+ * auto url = ada::parse("https://user:pass@example.com:8080/path?query#hash");
5324
+ * if (url) {
5325
+ * std::cout << url->get_hostname(); // "example.com"
5326
+ * std::cout << url->get_pathname(); // "/path"
5327
+ * }
5328
+ *
5329
+ * // Parse a relative URL with a base
5330
+ * auto base = ada::parse("https://example.com/dir/");
5331
+ * if (base) {
5332
+ * auto relative = ada::parse("../other/page", &*base);
5333
+ * if (relative) {
5334
+ * std::cout << relative->get_href(); //
5335
+ * "https://example.com/other/page"
5336
+ * }
5337
+ * }
5338
+ * ```
5339
+ *
5340
+ * @see https://url.spec.whatwg.org/#url-parsing
5021
5341
  */
5022
5342
  template <class result_type = ada::url_aggregator>
5023
5343
  ada_warn_unused ada::result<result_type> parse(
@@ -5029,33 +5349,73 @@ extern template ada::result<url_aggregator> parse<url_aggregator>(
5029
5349
  std::string_view input, const url_aggregator* base_url);
5030
5350
 
5031
5351
  /**
5032
- * Verifies whether the URL strings can be parsed. The function assumes
5033
- * that the inputs are valid ASCII or UTF-8 strings.
5352
+ * Checks whether a URL string can be successfully parsed.
5353
+ *
5354
+ * This is a fast validation function that checks if a URL string is valid
5355
+ * according to the WHATWG URL Standard without fully constructing a URL
5356
+ * object. Use this when you only need to validate URLs without needing
5357
+ * their parsed components.
5358
+ *
5359
+ * @param input The URL string to validate. Must be valid ASCII or UTF-8.
5360
+ * @param base_input Optional pointer to a base URL string for resolving
5361
+ * relative URLs. If nullptr (default), the input is validated as
5362
+ * an absolute URL.
5363
+ *
5364
+ * @return `true` if the URL can be parsed successfully, `false` otherwise.
5365
+ *
5366
+ * @example
5367
+ * ```cpp
5368
+ * // Check absolute URL
5369
+ * bool valid = ada::can_parse("https://example.com"); // true
5370
+ * bool invalid = ada::can_parse("not a url"); // false
5371
+ *
5372
+ * // Check relative URL with base
5373
+ * std::string_view base = "https://example.com/";
5374
+ * bool relative_valid = ada::can_parse("../path", &base); // true
5375
+ * ```
5376
+ *
5034
5377
  * @see https://url.spec.whatwg.org/#dom-url-canparse
5035
- * @return If URL can be parsed or not.
5036
5378
  */
5037
5379
  bool can_parse(std::string_view input,
5038
5380
  const std::string_view* base_input = nullptr);
5039
5381
 
5382
+ #if ADA_INCLUDE_URL_PATTERN
5040
5383
  /**
5041
- * Implementation of the URL pattern parsing algorithm.
5042
- * @see https://urlpattern.spec.whatwg.org
5384
+ * Parses a URL pattern according to the URLPattern specification.
5385
+ *
5386
+ * URL patterns provide a syntax for matching URLs against patterns, similar
5387
+ * to how regular expressions match strings. This is useful for routing and
5388
+ * URL-based dispatching.
5389
+ *
5390
+ * @tparam regex_provider The regex implementation to use for pattern matching.
5043
5391
  *
5044
- * @param input valid UTF-8 string or URLPatternInit struct
5045
- * @param base_url an optional valid UTF-8 string
5046
- * @param options an optional url_pattern_options struct
5047
- * @return url_pattern instance
5392
+ * @param input Either a URL pattern string (valid UTF-8) or a URLPatternInit
5393
+ * struct specifying individual component patterns.
5394
+ * @param base_url Optional pointer to a base URL string (valid UTF-8) for
5395
+ * resolving relative patterns.
5396
+ * @param options Optional pointer to configuration options (e.g., ignore_case).
5397
+ *
5398
+ * @return A `tl::expected` containing either the parsed url_pattern on success,
5399
+ * or an error code on failure.
5400
+ *
5401
+ * @see https://urlpattern.spec.whatwg.org
5048
5402
  */
5049
5403
  template <url_pattern_regex::regex_concept regex_provider>
5050
5404
  ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
5051
- parse_url_pattern(std::variant<std::string_view, url_pattern_init> input,
5405
+ parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
5052
5406
  const std::string_view* base_url = nullptr,
5053
5407
  const url_pattern_options* options = nullptr);
5408
+ #endif // ADA_INCLUDE_URL_PATTERN
5054
5409
 
5055
5410
  /**
5056
- * Computes a href string from a file path. The function assumes
5057
- * that the input is a valid ASCII or UTF-8 string.
5058
- * @return a href string (starts with file:://)
5411
+ * Converts a file system path to a file:// URL.
5412
+ *
5413
+ * Creates a properly formatted file URL from a local file system path.
5414
+ * Handles platform-specific path separators and percent-encoding.
5415
+ *
5416
+ * @param path The file system path to convert. Must be valid ASCII or UTF-8.
5417
+ *
5418
+ * @return A file:// URL string representing the given path.
5059
5419
  */
5060
5420
  std::string href_from_file(std::string_view path);
5061
5421
  } // namespace ada
@@ -5074,6 +5434,7 @@ std::string href_from_file(std::string_view path);
5074
5434
  #include <iostream>
5075
5435
  #endif // ADA_TESTING
5076
5436
 
5437
+ #if ADA_INCLUDE_URL_PATTERN
5077
5438
  namespace ada {
5078
5439
 
5079
5440
  enum class url_pattern_part_type : uint8_t {
@@ -5090,6 +5451,19 @@ enum class url_pattern_part_type : uint8_t {
5090
5451
  FULL_WILDCARD,
5091
5452
  };
5092
5453
 
5454
+ // Pattern type for fast-path matching optimization.
5455
+ // This allows skipping expensive regex evaluation for common simple patterns.
5456
+ enum class url_pattern_component_type : uint8_t {
5457
+ // Pattern is "^$" - only matches empty string
5458
+ EMPTY,
5459
+ // Pattern is "^<literal>$" - exact string match (no regex needed)
5460
+ EXACT_MATCH,
5461
+ // Pattern is "^(.*)$" - matches anything (full wildcard)
5462
+ FULL_WILDCARD,
5463
+ // Pattern requires actual regex evaluation
5464
+ REGEXP,
5465
+ };
5466
+
5093
5467
  enum class url_pattern_part_modifier : uint8_t {
5094
5468
  // The part does not have a modifier.
5095
5469
  none,
@@ -5108,17 +5482,17 @@ class url_pattern_part {
5108
5482
  public:
5109
5483
  url_pattern_part(url_pattern_part_type _type, std::string&& _value,
5110
5484
  url_pattern_part_modifier _modifier)
5111
- : type(_type), value(_value), modifier(_modifier) {}
5485
+ : type(_type), value(std::move(_value)), modifier(_modifier) {}
5112
5486
 
5113
5487
  url_pattern_part(url_pattern_part_type _type, std::string&& _value,
5114
5488
  url_pattern_part_modifier _modifier, std::string&& _name,
5115
5489
  std::string&& _prefix, std::string&& _suffix)
5116
5490
  : type(_type),
5117
- value(_value),
5491
+ value(std::move(_value)),
5118
5492
  modifier(_modifier),
5119
- name(_name),
5120
- prefix(_prefix),
5121
- suffix(_suffix) {}
5493
+ name(std::move(_name)),
5494
+ prefix(std::move(_prefix)),
5495
+ suffix(std::move(_suffix)) {}
5122
5496
  // A part has an associated type, a string, which must be set upon creation.
5123
5497
  url_pattern_part_type type;
5124
5498
  // A part has an associated value, a string, which must be set upon creation.
@@ -5209,11 +5583,15 @@ class url_pattern_component {
5209
5583
  url_pattern_component(std::string&& new_pattern,
5210
5584
  typename regex_provider::regex_type&& new_regexp,
5211
5585
  std::vector<std::string>&& new_group_name_list,
5212
- bool new_has_regexp_groups)
5586
+ bool new_has_regexp_groups,
5587
+ url_pattern_component_type new_type,
5588
+ std::string&& new_exact_match_value = {})
5213
5589
  : regexp(std::move(new_regexp)),
5214
5590
  pattern(std::move(new_pattern)),
5215
- group_name_list(new_group_name_list),
5216
- has_regexp_groups(new_has_regexp_groups) {}
5591
+ group_name_list(std::move(new_group_name_list)),
5592
+ exact_match_value(std::move(new_exact_match_value)),
5593
+ has_regexp_groups(new_has_regexp_groups),
5594
+ type(new_type) {}
5217
5595
 
5218
5596
  // @see https://urlpattern.spec.whatwg.org/#compile-a-component
5219
5597
  template <url_pattern_encoding_callback F>
@@ -5226,6 +5604,16 @@ class url_pattern_component {
5226
5604
  std::string&& input,
5227
5605
  std::vector<std::optional<std::string>>&& exec_result);
5228
5606
 
5607
+ // Fast path test that returns true/false without constructing result groups.
5608
+ // Uses cached pattern type to skip regex evaluation for simple patterns.
5609
+ bool fast_test(std::string_view input) const noexcept;
5610
+
5611
+ // Fast path match that returns capture groups without regex for simple
5612
+ // patterns. Returns nullopt if pattern doesn't match, otherwise returns
5613
+ // capture groups.
5614
+ std::optional<std::vector<std::optional<std::string>>> fast_match(
5615
+ std::string_view input) const;
5616
+
5229
5617
  #if ADA_TESTING
5230
5618
  friend void PrintTo(const url_pattern_component& component,
5231
5619
  std::ostream* os) {
@@ -5241,7 +5629,11 @@ class url_pattern_component {
5241
5629
  typename regex_provider::regex_type regexp{};
5242
5630
  std::string pattern{};
5243
5631
  std::vector<std::string> group_name_list{};
5632
+ // For EXACT_MATCH type: the literal string to compare against
5633
+ std::string exact_match_value{};
5244
5634
  bool has_regexp_groups = false;
5635
+ // Cached pattern type for fast-path optimization
5636
+ url_pattern_component_type type = url_pattern_component_type::REGEXP;
5245
5637
  };
5246
5638
 
5247
5639
  // A URLPattern input can be either a string or a URLPatternInit object.
@@ -5273,14 +5665,28 @@ struct url_pattern_options {
5273
5665
  #endif // ADA_TESTING
5274
5666
  };
5275
5667
 
5276
- // URLPattern is a Web Platform standard API for matching URLs against a
5277
- // pattern syntax (think of it as a regular expression for URLs). It is
5278
- // defined in https://wicg.github.io/urlpattern.
5279
- // More information about the URL Pattern syntax can be found at
5280
- // https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
5281
- //
5282
- // We require all strings to be valid UTF-8: it is the user's responsibility
5283
- // to ensure that the provided strings are valid UTF-8.
5668
+ /**
5669
+ * @brief URL pattern matching class implementing the URLPattern API.
5670
+ *
5671
+ * URLPattern provides a way to match URLs against patterns with wildcards
5672
+ * and named capture groups. It's useful for routing, URL-based dispatching,
5673
+ * and URL validation.
5674
+ *
5675
+ * Pattern syntax supports:
5676
+ * - Literal text matching
5677
+ * - Named groups: `:name` (matches up to the next separator)
5678
+ * - Wildcards: `*` (matches everything)
5679
+ * - Custom regex: `(pattern)`
5680
+ * - Optional segments: `:name?`
5681
+ * - Repeated segments: `:name+`, `:name*`
5682
+ *
5683
+ * @tparam regex_provider The regex implementation to use for pattern matching.
5684
+ * Must satisfy the url_pattern_regex::regex_concept.
5685
+ *
5686
+ * @note All string inputs must be valid UTF-8.
5687
+ *
5688
+ * @see https://urlpattern.spec.whatwg.org/
5689
+ */
5284
5690
  template <url_pattern_regex::regex_concept regex_provider>
5285
5691
  class url_pattern {
5286
5692
  public:
@@ -5333,6 +5739,13 @@ class url_pattern {
5333
5739
  // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
5334
5740
  [[nodiscard]] bool has_regexp_groups() const;
5335
5741
 
5742
+ // Helper to test all components at once. Returns true if all match.
5743
+ [[nodiscard]] bool test_components(
5744
+ std::string_view protocol, std::string_view username,
5745
+ std::string_view password, std::string_view hostname,
5746
+ std::string_view port, std::string_view pathname, std::string_view search,
5747
+ std::string_view hash) const;
5748
+
5336
5749
  #if ADA_TESTING
5337
5750
  friend void PrintTo(const url_pattern& c, std::ostream* os) {
5338
5751
  *os << "protocol_component: '" << c.get_protocol() << ", ";
@@ -5348,7 +5761,7 @@ class url_pattern {
5348
5761
 
5349
5762
  template <url_pattern_regex::regex_concept P>
5350
5763
  friend tl::expected<url_pattern<P>, errors> parser::parse_url_pattern_impl(
5351
- std::variant<std::string_view, url_pattern_init> input,
5764
+ std::variant<std::string_view, url_pattern_init>&& input,
5352
5765
  const std::string_view* base_url, const url_pattern_options* options);
5353
5766
 
5354
5767
  /**
@@ -5406,9 +5819,8 @@ class url_pattern {
5406
5819
  */
5407
5820
  bool ignore_case_ = false;
5408
5821
  };
5409
-
5410
5822
  } // namespace ada
5411
-
5823
+ #endif // ADA_INCLUDE_URL_PATTERN
5412
5824
  #endif
5413
5825
  /* end file include/ada/url_pattern.h */
5414
5826
  /* begin file include/ada/url_pattern_helpers.h */
@@ -5424,6 +5836,7 @@ class url_pattern {
5424
5836
  #include <tuple>
5425
5837
  #include <vector>
5426
5838
 
5839
+ #if ADA_INCLUDE_URL_PATTERN
5427
5840
  namespace ada {
5428
5841
  enum class errors : uint8_t;
5429
5842
  }
@@ -5457,8 +5870,8 @@ enum class token_policy {
5457
5870
  // @see https://urlpattern.spec.whatwg.org/#tokens
5458
5871
  class token {
5459
5872
  public:
5460
- token(token_type _type, size_t _index, std::string&& _value)
5461
- : type(_type), index(_index), value(std::move(_value)) {}
5873
+ token(token_type _type, size_t _index, std::string_view _value)
5874
+ : type(_type), index(_index), value(_value) {}
5462
5875
 
5463
5876
  // A token has an associated type, a string, initially "invalid-char".
5464
5877
  token_type type = token_type::INVALID_CHAR;
@@ -5469,7 +5882,7 @@ class token {
5469
5882
 
5470
5883
  // A token has an associated value, a string, initially the empty string. It
5471
5884
  // contains the code points from the pattern string represented by the token.
5472
- std::string value{};
5885
+ std::string_view value{};
5473
5886
  };
5474
5887
 
5475
5888
  // @see https://urlpattern.spec.whatwg.org/#pattern-parser
@@ -5547,7 +5960,7 @@ class Tokenizer {
5547
5960
 
5548
5961
  private:
5549
5962
  // has an associated input, a pattern string, initially the empty string.
5550
- std::string input;
5963
+ std::string_view input;
5551
5964
  // has an associated policy, a tokenize policy, initially "strict".
5552
5965
  token_policy policy;
5553
5966
  // has an associated token list, a token list, initially an empty list.
@@ -5641,7 +6054,7 @@ struct constructor_string_parser {
5641
6054
  // @see https://urlpattern.spec.whatwg.org/#make-a-component-string
5642
6055
  std::string make_component_string();
5643
6056
  // has an associated input, a string, which must be set upon creation.
5644
- std::string input;
6057
+ std::string_view input;
5645
6058
  // has an associated token list, a token list, which must be set upon
5646
6059
  // creation.
5647
6060
  std::vector<token> token_list;
@@ -5748,14 +6161,14 @@ bool protocol_component_matches_special_scheme(
5748
6161
  ada::url_pattern_component<regex_provider>& input);
5749
6162
 
5750
6163
  // @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
5751
- std::string convert_modifier_to_string(url_pattern_part_modifier modifier);
6164
+ std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);
5752
6165
 
5753
6166
  // @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
5754
6167
  std::string generate_segment_wildcard_regexp(
5755
6168
  url_pattern_compile_component_options options);
5756
6169
 
5757
6170
  } // namespace ada::url_pattern_helpers
5758
-
6171
+ #endif // ADA_INCLUDE_URL_PATTERN
5759
6172
  #endif
5760
6173
  /* end file include/ada/url_pattern_helpers.h */
5761
6174
 
@@ -5764,9 +6177,10 @@ std::string generate_segment_wildcard_regexp(
5764
6177
  #include <variant>
5765
6178
 
5766
6179
  namespace ada::parser {
6180
+ #if ADA_INCLUDE_URL_PATTERN
5767
6181
  template <url_pattern_regex::regex_concept regex_provider>
5768
6182
  tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5769
- std::variant<std::string_view, url_pattern_init> input,
6183
+ std::variant<std::string_view, url_pattern_init>&& input,
5770
6184
  const std::string_view* base_url, const url_pattern_options* options) {
5771
6185
  // Let init be null.
5772
6186
  url_pattern_init init;
@@ -5815,7 +6229,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5815
6229
  return tl::unexpected(processed_init.error());
5816
6230
  }
5817
6231
 
5818
- // For each componentName of « "protocol", "username", "password", "hostname",
6232
+ // For each componentName of "protocol", "username", "password", "hostname",
5819
6233
  // "port", "pathname", "search", "hash" If processedInit[componentName] does
5820
6234
  // not exist, then set processedInit[componentName] to "*".
5821
6235
  ADA_ASSERT_TRUE(processed_init.has_value());
@@ -5843,7 +6257,6 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5843
6257
  // TODO: Optimization opportunity.
5844
6258
  if (scheme::is_special(*processed_init->protocol)) {
5845
6259
  std::string_view port = processed_init->port.value();
5846
- helpers::trim_c0_whitespace(port);
5847
6260
  if (std::to_string(scheme::get_special_port(*processed_init->protocol)) ==
5848
6261
  port) {
5849
6262
  processed_init->port->clear();
@@ -5853,7 +6266,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5853
6266
  // Let urlPattern be a new URL pattern.
5854
6267
  url_pattern<regex_provider> url_pattern_{};
5855
6268
 
5856
- // Set urlPatterns protocol component to the result of compiling a component
6269
+ // Set urlPattern's protocol component to the result of compiling a component
5857
6270
  // given processedInit["protocol"], canonicalize a protocol, and default
5858
6271
  // options.
5859
6272
  auto protocol_component = url_pattern_component<regex_provider>::compile(
@@ -5867,7 +6280,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5867
6280
  }
5868
6281
  url_pattern_.protocol_component = std::move(*protocol_component);
5869
6282
 
5870
- // Set urlPatterns username component to the result of compiling a component
6283
+ // Set urlPattern's username component to the result of compiling a component
5871
6284
  // given processedInit["username"], canonicalize a username, and default
5872
6285
  // options.
5873
6286
  auto username_component = url_pattern_component<regex_provider>::compile(
@@ -5881,7 +6294,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5881
6294
  }
5882
6295
  url_pattern_.username_component = std::move(*username_component);
5883
6296
 
5884
- // Set urlPatterns password component to the result of compiling a component
6297
+ // Set urlPattern's password component to the result of compiling a component
5885
6298
  // given processedInit["password"], canonicalize a password, and default
5886
6299
  // options.
5887
6300
  auto password_component = url_pattern_component<regex_provider>::compile(
@@ -5898,12 +6311,12 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5898
6311
  // TODO: Optimization opportunity. The following if statement can be
5899
6312
  // simplified.
5900
6313
  // If the result running hostname pattern is an IPv6 address given
5901
- // processedInit["hostname"] is true, then set urlPatterns hostname component
6314
+ // processedInit["hostname"] is true, then set urlPattern's hostname component
5902
6315
  // to the result of compiling a component given processedInit["hostname"],
5903
6316
  // canonicalize an IPv6 hostname, and hostname options.
5904
6317
  if (url_pattern_helpers::is_ipv6_address(processed_init->hostname.value())) {
5905
6318
  ada_log("processed_init->hostname is ipv6 address");
5906
- // then set urlPatterns hostname component to the result of compiling a
6319
+ // then set urlPattern's hostname component to the result of compiling a
5907
6320
  // component given processedInit["hostname"], canonicalize an IPv6 hostname,
5908
6321
  // and hostname options.
5909
6322
  auto hostname_component = url_pattern_component<regex_provider>::compile(
@@ -5917,7 +6330,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5917
6330
  }
5918
6331
  url_pattern_.hostname_component = std::move(*hostname_component);
5919
6332
  } else {
5920
- // Otherwise, set urlPatterns hostname component to the result of compiling
6333
+ // Otherwise, set urlPattern's hostname component to the result of compiling
5921
6334
  // a component given processedInit["hostname"], canonicalize a hostname, and
5922
6335
  // hostname options.
5923
6336
  auto hostname_component = url_pattern_component<regex_provider>::compile(
@@ -5932,7 +6345,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5932
6345
  url_pattern_.hostname_component = std::move(*hostname_component);
5933
6346
  }
5934
6347
 
5935
- // Set urlPatterns port component to the result of compiling a component
6348
+ // Set urlPattern's port component to the result of compiling a component
5936
6349
  // given processedInit["port"], canonicalize a port, and default options.
5937
6350
  auto port_component = url_pattern_component<regex_provider>::compile(
5938
6351
  processed_init->port.value(), url_pattern_helpers::canonicalize_port,
@@ -5953,7 +6366,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5953
6366
 
5954
6367
  // TODO: Optimization opportunity: Simplify this if statement.
5955
6368
  // If the result of running protocol component matches a special scheme given
5956
- // urlPatterns protocol component is true, then:
6369
+ // urlPattern's protocol component is true, then:
5957
6370
  if (url_pattern_helpers::protocol_component_matches_special_scheme<
5958
6371
  regex_provider>(url_pattern_.protocol_component)) {
5959
6372
  // Let pathCompileOptions be copy of the pathname options with the ignore
@@ -5963,7 +6376,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5963
6376
  path_compile_options.ignore_case = options->ignore_case;
5964
6377
  }
5965
6378
 
5966
- // Set urlPatterns pathname component to the result of compiling a
6379
+ // Set urlPattern's pathname component to the result of compiling a
5967
6380
  // component given processedInit["pathname"], canonicalize a pathname, and
5968
6381
  // pathCompileOptions.
5969
6382
  auto pathname_component = url_pattern_component<regex_provider>::compile(
@@ -5976,7 +6389,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5976
6389
  }
5977
6390
  url_pattern_.pathname_component = std::move(*pathname_component);
5978
6391
  } else {
5979
- // Otherwise set urlPatterns pathname component to the result of compiling
6392
+ // Otherwise set urlPattern's pathname component to the result of compiling
5980
6393
  // a component given processedInit["pathname"], canonicalize an opaque
5981
6394
  // pathname, and compileOptions.
5982
6395
  auto pathname_component = url_pattern_component<regex_provider>::compile(
@@ -5990,7 +6403,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
5990
6403
  url_pattern_.pathname_component = std::move(*pathname_component);
5991
6404
  }
5992
6405
 
5993
- // Set urlPatterns search component to the result of compiling a component
6406
+ // Set urlPattern's search component to the result of compiling a component
5994
6407
  // given processedInit["search"], canonicalize a search, and compileOptions.
5995
6408
  auto search_component = url_pattern_component<regex_provider>::compile(
5996
6409
  processed_init->search.value(), url_pattern_helpers::canonicalize_search,
@@ -6002,7 +6415,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
6002
6415
  }
6003
6416
  url_pattern_.search_component = std::move(*search_component);
6004
6417
 
6005
- // Set urlPatterns hash component to the result of compiling a component
6418
+ // Set urlPattern's hash component to the result of compiling a component
6006
6419
  // given processedInit["hash"], canonicalize a hash, and compileOptions.
6007
6420
  auto hash_component = url_pattern_component<regex_provider>::compile(
6008
6421
  processed_init->hash.value(), url_pattern_helpers::canonicalize_hash,
@@ -6017,6 +6430,7 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
6017
6430
  // Return urlPattern.
6018
6431
  return url_pattern_;
6019
6432
  }
6433
+ #endif // ADA_INCLUDE_URL_PATTERN
6020
6434
 
6021
6435
  } // namespace ada::parser
6022
6436
 
@@ -6113,7 +6527,10 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
6113
6527
  /* begin file include/ada/serializers.h */
6114
6528
  /**
6115
6529
  * @file serializers.h
6116
- * @brief Definitions for the URL serializers.
6530
+ * @brief IP address serialization utilities.
6531
+ *
6532
+ * This header provides functions for converting IP addresses to their
6533
+ * string representations according to the WHATWG URL Standard.
6117
6534
  */
6118
6535
  #ifndef ADA_SERIALIZERS_H
6119
6536
  #define ADA_SERIALIZERS_H
@@ -6124,32 +6541,41 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
6124
6541
 
6125
6542
  /**
6126
6543
  * @namespace ada::serializers
6127
- * @brief Includes the definitions for URL serializers
6544
+ * @brief IP address serialization functions.
6545
+ *
6546
+ * Contains utilities for serializing IPv4 and IPv6 addresses to strings.
6128
6547
  */
6129
6548
  namespace ada::serializers {
6130
6549
 
6131
6550
  /**
6132
- * Finds and returns the longest sequence of 0 values in a ipv6 input.
6551
+ * Finds the longest consecutive sequence of zero pieces in an IPv6 address.
6552
+ * Used for :: compression in IPv6 serialization.
6553
+ *
6554
+ * @param address The 8 16-bit pieces of the IPv6 address.
6555
+ * @param[out] compress Index of the start of the longest zero sequence.
6556
+ * @param[out] compress_length Length of the longest zero sequence.
6133
6557
  */
6134
6558
  void find_longest_sequence_of_ipv6_pieces(
6135
6559
  const std::array<uint16_t, 8>& address, size_t& compress,
6136
6560
  size_t& compress_length) noexcept;
6137
6561
 
6138
6562
  /**
6139
- * Serializes an ipv6 address.
6140
- * @details An IPv6 address is a 128-bit unsigned integer that identifies a
6141
- * network address.
6563
+ * Serializes an IPv6 address to its string representation.
6564
+ *
6565
+ * @param address The 8 16-bit pieces of the IPv6 address.
6566
+ * @return The serialized IPv6 string (e.g., "2001:db8::1").
6142
6567
  * @see https://url.spec.whatwg.org/#concept-ipv6-serializer
6143
6568
  */
6144
- std::string ipv6(const std::array<uint16_t, 8>& address) noexcept;
6569
+ std::string ipv6(const std::array<uint16_t, 8>& address);
6145
6570
 
6146
6571
  /**
6147
- * Serializes an ipv4 address.
6148
- * @details An IPv4 address is a 32-bit unsigned integer that identifies a
6149
- * network address.
6572
+ * Serializes an IPv4 address to its dotted-decimal string representation.
6573
+ *
6574
+ * @param address The 32-bit IPv4 address as an integer.
6575
+ * @return The serialized IPv4 string (e.g., "192.168.1.1").
6150
6576
  * @see https://url.spec.whatwg.org/#concept-ipv4-serializer
6151
6577
  */
6152
- std::string ipv4(uint64_t address) noexcept;
6578
+ std::string ipv4(uint64_t address);
6153
6579
 
6154
6580
  } // namespace ada::serializers
6155
6581
 
@@ -6158,7 +6584,12 @@ std::string ipv4(uint64_t address) noexcept;
6158
6584
  /* begin file include/ada/state.h */
6159
6585
  /**
6160
6586
  * @file state.h
6161
- * @brief Definitions for the states of the URL state machine.
6587
+ * @brief URL parser state machine states.
6588
+ *
6589
+ * Defines the states used by the URL parsing state machine as specified
6590
+ * in the WHATWG URL Standard.
6591
+ *
6592
+ * @see https://url.spec.whatwg.org/#url-parsing
6162
6593
  */
6163
6594
  #ifndef ADA_STATE_H
6164
6595
  #define ADA_STATE_H
@@ -6169,6 +6600,11 @@ std::string ipv4(uint64_t address) noexcept;
6169
6600
  namespace ada {
6170
6601
 
6171
6602
  /**
6603
+ * @brief States in the URL parsing state machine.
6604
+ *
6605
+ * The URL parser processes input through a sequence of states, each handling
6606
+ * a specific part of the URL syntax.
6607
+ *
6172
6608
  * @see https://url.spec.whatwg.org/#url-parsing
6173
6609
  */
6174
6610
  enum class state {
@@ -6274,7 +6710,9 @@ enum class state {
6274
6710
  };
6275
6711
 
6276
6712
  /**
6277
- * Stringify a URL state machine state.
6713
+ * Converts a parser state to its string name for debugging.
6714
+ * @param s The state to convert.
6715
+ * @return A string representation of the state.
6278
6716
  */
6279
6717
  ada_warn_unused std::string to_string(ada::state s);
6280
6718
 
@@ -6613,6 +7051,7 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
6613
7051
  out.protocol_end = uint32_t(get_protocol().size());
6614
7052
 
6615
7053
  // Trailing index is always the next character of the current one.
7054
+ // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
6616
7055
  size_t running_index = out.protocol_end;
6617
7056
 
6618
7057
  if (host.has_value()) {
@@ -6732,7 +7171,7 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept {
6732
7171
  }
6733
7172
  }
6734
7173
 
6735
- constexpr void url::copy_scheme(ada::url &&u) noexcept {
7174
+ constexpr void url::copy_scheme(ada::url &&u) {
6736
7175
  non_special_scheme = u.non_special_scheme;
6737
7176
  type = u.type;
6738
7177
  }
@@ -6742,7 +7181,7 @@ constexpr void url::copy_scheme(const ada::url &u) {
6742
7181
  type = u.type;
6743
7182
  }
6744
7183
 
6745
- [[nodiscard]] ada_really_inline std::string url::get_href() const noexcept {
7184
+ [[nodiscard]] ada_really_inline std::string url::get_href() const {
6746
7185
  std::string output = get_protocol();
6747
7186
 
6748
7187
  if (host.has_value()) {
@@ -6901,7 +7340,13 @@ namespace ada {
6901
7340
  /* begin file include/ada/url_aggregator.h */
6902
7341
  /**
6903
7342
  * @file url_aggregator.h
6904
- * @brief Declaration for the basic URL definitions
7343
+ * @brief Declaration for the `ada::url_aggregator` class.
7344
+ *
7345
+ * This file contains the `ada::url_aggregator` struct which represents a parsed
7346
+ * URL using a single buffer with component offsets. This is the default and
7347
+ * most memory-efficient URL representation in Ada.
7348
+ *
7349
+ * @see url.h for an alternative representation using separate strings
6905
7350
  */
6906
7351
  #ifndef ADA_URL_AGGREGATOR_H
6907
7352
  #define ADA_URL_AGGREGATOR_H
@@ -6917,12 +7362,23 @@ namespace ada {
6917
7362
  namespace parser {}
6918
7363
 
6919
7364
  /**
6920
- * @brief Lightweight URL struct.
7365
+ * @brief Memory-efficient URL representation using a single buffer.
7366
+ *
7367
+ * The `url_aggregator` stores the entire normalized URL in a single string
7368
+ * buffer and tracks component boundaries using offsets. This design minimizes
7369
+ * memory allocations and is ideal for read-mostly access patterns.
7370
+ *
7371
+ * Getter methods return `std::string_view` pointing into the internal buffer.
7372
+ * These views are lightweight (no allocation) but become invalid if the
7373
+ * url_aggregator is modified or destroyed.
6921
7374
  *
6922
- * @details The url_aggregator class aims to minimize temporary memory
6923
- * allocation while representing a parsed URL. Internally, it contains a single
6924
- * normalized URL (the href), and it makes available the components, mostly
6925
- * using std::string_view.
7375
+ * @warning Views returned by getters (e.g., `get_pathname()`) are invalidated
7376
+ * when any setter is called. Do not use a getter's result as input to a
7377
+ * setter on the same object without copying first.
7378
+ *
7379
+ * @note This is the default URL type returned by `ada::parse()`.
7380
+ *
7381
+ * @see url For an alternative using separate std::string instances
6926
7382
  */
6927
7383
  struct url_aggregator : url_base {
6928
7384
  url_aggregator() = default;
@@ -6932,6 +7388,25 @@ struct url_aggregator : url_base {
6932
7388
  url_aggregator &operator=(const url_aggregator &u) = default;
6933
7389
  ~url_aggregator() override = default;
6934
7390
 
7391
+ /**
7392
+ * The setter functions follow the steps defined in the URL Standard.
7393
+ *
7394
+ * The url_aggregator has a single buffer that contains the entire normalized
7395
+ * URL. The various components are represented as offsets into that buffer.
7396
+ * When you call get_pathname(), for example, you get a std::string_view that
7397
+ * points into that buffer. If the url_aggregator is modified, the buffer may
7398
+ * be reallocated, and the std::string_view you obtained earlier may become
7399
+ * invalid. In particular, this implies that you cannot modify the URL using
7400
+ * a setter function with a std::string_view that points into the
7401
+ * url_aggregator E.g., the following is incorrect:
7402
+ * url->set_hostname(url->get_pathname()).
7403
+ * You must first copy the pathname to a separate string.
7404
+ * std::string pathname(url->get_pathname());
7405
+ * url->set_hostname(pathname);
7406
+ *
7407
+ * The caller is responsible for ensuring that the url_aggregator is not
7408
+ * modified while any std::string_view obtained from it is in use.
7409
+ */
6935
7410
  bool set_href(std::string_view input);
6936
7411
  bool set_host(std::string_view input);
6937
7412
  bool set_hostname(std::string_view input);
@@ -6943,115 +7418,130 @@ struct url_aggregator : url_base {
6943
7418
  void set_search(std::string_view input);
6944
7419
  void set_hash(std::string_view input);
6945
7420
 
7421
+ /**
7422
+ * Validates whether the hostname is a valid domain according to RFC 1034.
7423
+ * @return `true` if the domain is valid, `false` otherwise.
7424
+ */
6946
7425
  [[nodiscard]] bool has_valid_domain() const noexcept override;
7426
+
6947
7427
  /**
6948
- * The origin getter steps are to return the serialization of this's URL's
6949
- * origin. [HTML]
6950
- * @return a newly allocated string.
7428
+ * Returns the URL's origin (scheme + host + port for special URLs).
7429
+ * @return A newly allocated string containing the serialized origin.
6951
7430
  * @see https://url.spec.whatwg.org/#concept-url-origin
6952
7431
  */
6953
- [[nodiscard]] std::string get_origin() const noexcept override;
7432
+ [[nodiscard]] std::string get_origin() const override;
7433
+
6954
7434
  /**
6955
- * Return the normalized string.
6956
- * This function does not allocate memory.
6957
- * It is highly efficient.
6958
- * @return a constant reference to the underlying normalized URL.
7435
+ * Returns the full serialized URL (the href) as a string_view.
7436
+ * Does not allocate memory. The returned view becomes invalid if this
7437
+ * url_aggregator is modified or destroyed.
7438
+ * @return A string_view into the internal buffer.
6959
7439
  * @see https://url.spec.whatwg.org/#dom-url-href
6960
- * @see https://url.spec.whatwg.org/#concept-url-serializer
6961
7440
  */
6962
7441
  [[nodiscard]] constexpr std::string_view get_href() const noexcept
6963
7442
  ada_lifetime_bound;
7443
+
6964
7444
  /**
6965
- * The username getter steps are to return this's URL's username.
6966
- * This function does not allocate memory.
6967
- * @return a lightweight std::string_view.
7445
+ * Returns the URL's username component.
7446
+ * Does not allocate memory. The returned view becomes invalid if this
7447
+ * url_aggregator is modified or destroyed.
7448
+ * @return A string_view of the username.
6968
7449
  * @see https://url.spec.whatwg.org/#dom-url-username
6969
7450
  */
6970
- [[nodiscard]] std::string_view get_username() const noexcept
6971
- ada_lifetime_bound;
7451
+ [[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
7452
+
6972
7453
  /**
6973
- * The password getter steps are to return this's URL's password.
6974
- * This function does not allocate memory.
6975
- * @return a lightweight std::string_view.
7454
+ * Returns the URL's password component.
7455
+ * Does not allocate memory. The returned view becomes invalid if this
7456
+ * url_aggregator is modified or destroyed.
7457
+ * @return A string_view of the password.
6976
7458
  * @see https://url.spec.whatwg.org/#dom-url-password
6977
7459
  */
6978
- [[nodiscard]] std::string_view get_password() const noexcept
6979
- ada_lifetime_bound;
7460
+ [[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
7461
+
6980
7462
  /**
6981
- * Return this's URL's port, serialized.
6982
- * This function does not allocate memory.
6983
- * @return a lightweight std::string_view.
7463
+ * Returns the URL's port as a string (e.g., "8080").
7464
+ * Does not allocate memory. Returns empty view if no port is set.
7465
+ * The returned view becomes invalid if this url_aggregator is modified.
7466
+ * @return A string_view of the port.
6984
7467
  * @see https://url.spec.whatwg.org/#dom-url-port
6985
7468
  */
6986
- [[nodiscard]] std::string_view get_port() const noexcept ada_lifetime_bound;
7469
+ [[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
7470
+
6987
7471
  /**
6988
- * Return U+0023 (#), followed by this's URL's fragment.
6989
- * This function does not allocate memory.
6990
- * @return a lightweight std::string_view..
7472
+ * Returns the URL's fragment prefixed with '#' (e.g., "#section").
7473
+ * Does not allocate memory. Returns empty view if no fragment is set.
7474
+ * The returned view becomes invalid if this url_aggregator is modified.
7475
+ * @return A string_view of the hash.
6991
7476
  * @see https://url.spec.whatwg.org/#dom-url-hash
6992
7477
  */
6993
- [[nodiscard]] std::string_view get_hash() const noexcept ada_lifetime_bound;
7478
+ [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
7479
+
6994
7480
  /**
6995
- * Return url's host, serialized, followed by U+003A (:) and url's port,
6996
- * serialized.
6997
- * This function does not allocate memory.
6998
- * When there is no host, this function returns the empty view.
6999
- * @return a lightweight std::string_view.
7481
+ * Returns the URL's host and port (e.g., "example.com:8080").
7482
+ * Does not allocate memory. Returns empty view if no host is set.
7483
+ * The returned view becomes invalid if this url_aggregator is modified.
7484
+ * @return A string_view of host:port.
7000
7485
  * @see https://url.spec.whatwg.org/#dom-url-host
7001
7486
  */
7002
- [[nodiscard]] std::string_view get_host() const noexcept ada_lifetime_bound;
7487
+ [[nodiscard]] std::string_view get_host() const ada_lifetime_bound;
7488
+
7003
7489
  /**
7004
- * Return this's URL's host, serialized.
7005
- * This function does not allocate memory.
7006
- * When there is no host, this function returns the empty view.
7007
- * @return a lightweight std::string_view.
7490
+ * Returns the URL's hostname (without port).
7491
+ * Does not allocate memory. Returns empty view if no host is set.
7492
+ * The returned view becomes invalid if this url_aggregator is modified.
7493
+ * @return A string_view of the hostname.
7008
7494
  * @see https://url.spec.whatwg.org/#dom-url-hostname
7009
7495
  */
7010
- [[nodiscard]] std::string_view get_hostname() const noexcept
7011
- ada_lifetime_bound;
7496
+ [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
7497
+
7012
7498
  /**
7013
- * The pathname getter steps are to return the result of URL path serializing
7014
- * this's URL.
7015
- * This function does not allocate memory.
7016
- * @return a lightweight std::string_view.
7499
+ * Returns the URL's path component.
7500
+ * Does not allocate memory. The returned view becomes invalid if this
7501
+ * url_aggregator is modified or destroyed.
7502
+ * @return A string_view of the pathname.
7017
7503
  * @see https://url.spec.whatwg.org/#dom-url-pathname
7018
7504
  */
7019
- [[nodiscard]] constexpr std::string_view get_pathname() const noexcept
7505
+ [[nodiscard]] constexpr std::string_view get_pathname() const
7020
7506
  ada_lifetime_bound;
7507
+
7021
7508
  /**
7022
- * Compute the pathname length in bytes without instantiating a view or a
7023
- * string.
7024
- * @return size of the pathname in bytes
7509
+ * Returns the byte length of the pathname without creating a string.
7510
+ * @return Size of the pathname in bytes.
7025
7511
  * @see https://url.spec.whatwg.org/#dom-url-pathname
7026
7512
  */
7027
7513
  [[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept;
7514
+
7028
7515
  /**
7029
- * Return U+003F (?), followed by this's URL's query.
7030
- * This function does not allocate memory.
7031
- * @return a lightweight std::string_view.
7516
+ * Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
7517
+ * Does not allocate memory. Returns empty view if no query is set.
7518
+ * The returned view becomes invalid if this url_aggregator is modified.
7519
+ * @return A string_view of the search/query.
7032
7520
  * @see https://url.spec.whatwg.org/#dom-url-search
7033
7521
  */
7034
- [[nodiscard]] std::string_view get_search() const noexcept ada_lifetime_bound;
7522
+ [[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
7523
+
7035
7524
  /**
7036
- * The protocol getter steps are to return this's URL's scheme, followed by
7037
- * U+003A (:).
7038
- * This function does not allocate memory.
7039
- * @return a lightweight std::string_view.
7525
+ * Returns the URL's scheme followed by a colon (e.g., "https:").
7526
+ * Does not allocate memory. The returned view becomes invalid if this
7527
+ * url_aggregator is modified or destroyed.
7528
+ * @return A string_view of the protocol.
7040
7529
  * @see https://url.spec.whatwg.org/#dom-url-protocol
7041
7530
  */
7042
- [[nodiscard]] std::string_view get_protocol() const noexcept
7043
- ada_lifetime_bound;
7531
+ [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
7044
7532
 
7045
7533
  /**
7046
- * A URL includes credentials if its username or password is not the empty
7047
- * string.
7534
+ * Checks if the URL has credentials (non-empty username or password).
7535
+ * @return `true` if username or password is non-empty, `false` otherwise.
7048
7536
  */
7049
7537
  [[nodiscard]] ada_really_inline constexpr bool has_credentials()
7050
7538
  const noexcept;
7051
7539
 
7052
7540
  /**
7053
- * Useful for implementing efficient serialization for the URL.
7541
+ * Returns the URL component offsets for efficient serialization.
7054
7542
  *
7543
+ * The components represent byte offsets into the serialized URL:
7544
+ * ```
7055
7545
  * https://user:pass@example.com:1234/foo/bar?baz#quux
7056
7546
  * | | | | ^^^^| | |
7057
7547
  * | | | | | | | `----- hash_start
@@ -7062,57 +7552,99 @@ struct url_aggregator : url_base {
7062
7552
  * | | `---------------------------------- host_start
7063
7553
  * | `--------------------------------------- username_end
7064
7554
  * `--------------------------------------------- protocol_end
7065
- *
7066
- * Inspired after servo/url
7067
- *
7068
- * @return a constant reference to the underlying component attribute.
7069
- *
7070
- * @see
7071
- * https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
7555
+ * ```
7556
+ * @return A constant reference to the url_components struct.
7557
+ * @see https://github.com/servo/rust-url
7072
7558
  */
7073
7559
  [[nodiscard]] ada_really_inline const url_components &get_components()
7074
7560
  const noexcept;
7561
+
7075
7562
  /**
7076
- * Returns a string representation of this URL.
7563
+ * Returns a JSON string representation of this URL for debugging.
7564
+ * @return A JSON-formatted string with all URL components.
7077
7565
  */
7078
7566
  [[nodiscard]] std::string to_string() const override;
7567
+
7079
7568
  /**
7080
- * Returns a string diagram of this URL.
7569
+ * Returns a visual diagram showing component boundaries in the URL.
7570
+ * Useful for debugging and understanding URL structure.
7571
+ * @return A multi-line string diagram.
7081
7572
  */
7082
7573
  [[nodiscard]] std::string to_diagram() const;
7083
7574
 
7084
7575
  /**
7085
- * Verifies that the parsed URL could be valid. Useful for debugging purposes.
7086
- * @return true if the URL is valid, otherwise return true of the offsets are
7087
- * possible.
7576
+ * Validates internal consistency of component offsets (for debugging).
7577
+ * @return `true` if offsets are consistent, `false` if corrupted.
7088
7578
  */
7089
7579
  [[nodiscard]] constexpr bool validate() const noexcept;
7090
7580
 
7091
- /** @return true if it has an host but it is the empty string */
7581
+ /**
7582
+ * Checks if the URL has an empty hostname (host is set but empty string).
7583
+ * @return `true` if host exists but is empty, `false` otherwise.
7584
+ */
7092
7585
  [[nodiscard]] constexpr bool has_empty_hostname() const noexcept;
7093
- /** @return true if it has a host (included an empty host) */
7586
+
7587
+ /**
7588
+ * Checks if the URL has a hostname (including empty hostnames).
7589
+ * @return `true` if host is present, `false` otherwise.
7590
+ */
7094
7591
  [[nodiscard]] constexpr bool has_hostname() const noexcept;
7095
- /** @return true if the URL has a non-empty username */
7592
+
7593
+ /**
7594
+ * Checks if the URL has a non-empty username.
7595
+ * @return `true` if username is non-empty, `false` otherwise.
7596
+ */
7096
7597
  [[nodiscard]] constexpr bool has_non_empty_username() const noexcept;
7097
- /** @return true if the URL has a non-empty password */
7598
+
7599
+ /**
7600
+ * Checks if the URL has a non-empty password.
7601
+ * @return `true` if password is non-empty, `false` otherwise.
7602
+ */
7098
7603
  [[nodiscard]] constexpr bool has_non_empty_password() const noexcept;
7099
- /** @return true if the URL has a (non default) port */
7604
+
7605
+ /**
7606
+ * Checks if the URL has a non-default port explicitly specified.
7607
+ * @return `true` if a port is present, `false` otherwise.
7608
+ */
7100
7609
  [[nodiscard]] constexpr bool has_port() const noexcept;
7101
- /** @return true if the URL has a password */
7610
+
7611
+ /**
7612
+ * Checks if the URL has a password component (may be empty).
7613
+ * @return `true` if password is present, `false` otherwise.
7614
+ */
7102
7615
  [[nodiscard]] constexpr bool has_password() const noexcept;
7103
- /** @return true if the URL has a hash component */
7616
+
7617
+ /**
7618
+ * Checks if the URL has a fragment/hash component.
7619
+ * @return `true` if hash is present, `false` otherwise.
7620
+ */
7104
7621
  [[nodiscard]] constexpr bool has_hash() const noexcept override;
7105
- /** @return true if the URL has a search component */
7622
+
7623
+ /**
7624
+ * Checks if the URL has a query/search component.
7625
+ * @return `true` if query is present, `false` otherwise.
7626
+ */
7106
7627
  [[nodiscard]] constexpr bool has_search() const noexcept override;
7107
7628
 
7629
+ /**
7630
+ * Removes the port from the URL.
7631
+ */
7108
7632
  inline void clear_port();
7633
+
7634
+ /**
7635
+ * Removes the hash/fragment from the URL.
7636
+ */
7109
7637
  inline void clear_hash();
7638
+
7639
+ /**
7640
+ * Removes the query/search string from the URL.
7641
+ */
7110
7642
  inline void clear_search() override;
7111
7643
 
7112
7644
  private:
7113
7645
  // helper methods
7114
7646
  friend void helpers::strip_trailing_spaces_from_opaque_path<url_aggregator>(
7115
- url_aggregator &url) noexcept;
7647
+ url_aggregator &url);
7116
7648
  // parse_url methods
7117
7649
  friend url_aggregator parser::parse_url<url_aggregator>(
7118
7650
  std::string_view, const url_aggregator *);
@@ -7121,12 +7653,15 @@ struct url_aggregator : url_base {
7121
7653
  std::string_view, const url_aggregator *);
7122
7654
  friend url_aggregator parser::parse_url_impl<url_aggregator, false>(
7123
7655
  std::string_view, const url_aggregator *);
7656
+
7657
+ #if ADA_INCLUDE_URL_PATTERN
7124
7658
  // url_pattern methods
7125
7659
  template <url_pattern_regex::regex_concept regex_provider>
7126
7660
  friend tl::expected<url_pattern<regex_provider>, errors>
7127
- parse_url_pattern_impl(std::variant<std::string_view, url_pattern_init> input,
7128
- const std::string_view *base_url,
7129
- const url_pattern_options *options);
7661
+ parse_url_pattern_impl(
7662
+ std::variant<std::string_view, url_pattern_init> &&input,
7663
+ const std::string_view *base_url, const url_pattern_options *options);
7664
+ #endif // ADA_INCLUDE_URL_PATTERN
7130
7665
 
7131
7666
  std::string buffer{};
7132
7667
  url_components components{};
@@ -7138,7 +7673,7 @@ struct url_aggregator : url_base {
7138
7673
  */
7139
7674
  [[nodiscard]] ada_really_inline bool is_at_path() const noexcept;
7140
7675
 
7141
- inline void add_authority_slashes_if_needed() noexcept;
7676
+ inline void add_authority_slashes_if_needed();
7142
7677
 
7143
7678
  /**
7144
7679
  * To optimize performance, you may indicate how much memory to allocate
@@ -7146,10 +7681,10 @@ struct url_aggregator : url_base {
7146
7681
  */
7147
7682
  constexpr void reserve(uint32_t capacity);
7148
7683
 
7149
- ada_really_inline size_t parse_port(
7150
- std::string_view view, bool check_trailing_content) noexcept override;
7684
+ ada_really_inline size_t parse_port(std::string_view view,
7685
+ bool check_trailing_content) override;
7151
7686
 
7152
- ada_really_inline size_t parse_port(std::string_view view) noexcept override {
7687
+ ada_really_inline size_t parse_port(std::string_view view) override {
7153
7688
  return this->parse_port(view, false);
7154
7689
  }
7155
7690
 
@@ -7214,16 +7749,16 @@ struct url_aggregator : url_base {
7214
7749
  std::string_view input);
7215
7750
  [[nodiscard]] constexpr bool has_authority() const noexcept;
7216
7751
  constexpr void set_protocol_as_file();
7217
- inline void set_scheme(std::string_view new_scheme) noexcept;
7752
+ inline void set_scheme(std::string_view new_scheme);
7218
7753
  /**
7219
7754
  * Fast function to set the scheme from a view with a colon in the
7220
7755
  * buffer, does not change type.
7221
7756
  */
7222
7757
  inline void set_scheme_from_view_with_colon(
7223
- std::string_view new_scheme_with_colon) noexcept;
7224
- inline void copy_scheme(const url_aggregator &u) noexcept;
7758
+ std::string_view new_scheme_with_colon);
7759
+ inline void copy_scheme(const url_aggregator &u);
7225
7760
 
7226
- inline void update_host_to_base_host(const std::string_view input) noexcept;
7761
+ inline void update_host_to_base_host(const std::string_view input);
7227
7762
 
7228
7763
  }; // url_aggregator
7229
7764
 
@@ -8015,7 +8550,7 @@ url_aggregator::get_components() const noexcept {
8015
8550
  components.protocol_end + 2) == "//";
8016
8551
  }
8017
8552
 
8018
- inline void ada::url_aggregator::add_authority_slashes_if_needed() noexcept {
8553
+ inline void ada::url_aggregator::add_authority_slashes_if_needed() {
8019
8554
  ada_log("url_aggregator::add_authority_slashes_if_needed");
8020
8555
  ADA_ASSERT_TRUE(validate());
8021
8556
  // Protocol setter will insert `http:` to the URL. It is up to hostname setter
@@ -8052,7 +8587,7 @@ constexpr bool url_aggregator::has_non_empty_username() const noexcept {
8052
8587
 
8053
8588
  constexpr bool url_aggregator::has_non_empty_password() const noexcept {
8054
8589
  ada_log("url_aggregator::has_non_empty_password");
8055
- return components.host_start - components.username_end > 0;
8590
+ return components.host_start > components.username_end;
8056
8591
  }
8057
8592
 
8058
8593
  constexpr bool url_aggregator::has_password() const noexcept {
@@ -8124,8 +8659,8 @@ constexpr bool url_aggregator::has_port() const noexcept {
8124
8659
  return buffer;
8125
8660
  }
8126
8661
 
8127
- ada_really_inline size_t url_aggregator::parse_port(
8128
- std::string_view view, bool check_trailing_content) noexcept {
8662
+ ada_really_inline size_t
8663
+ url_aggregator::parse_port(std::string_view view, bool check_trailing_content) {
8129
8664
  ada_log("url_aggregator::parse_port('", view, "') ", view.size());
8130
8665
  if (!view.empty() && view[0] == '-') {
8131
8666
  ada_log("parse_port: view[0] == '0' && view.size() > 1");
@@ -8363,8 +8898,8 @@ constexpr void url_aggregator::set_protocol_as_file() {
8363
8898
  return true;
8364
8899
  }
8365
8900
 
8366
- [[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
8367
- const noexcept ada_lifetime_bound {
8901
+ [[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const
8902
+ ada_lifetime_bound {
8368
8903
  ada_log("url_aggregator::get_pathname pathname_start = ",
8369
8904
  components.pathname_start, " buffer.size() = ", buffer.size(),
8370
8905
  " components.search_start = ", components.search_start,
@@ -8383,8 +8918,7 @@ inline std::ostream &operator<<(std::ostream &out,
8383
8918
  return out << u.to_string();
8384
8919
  }
8385
8920
 
8386
- void url_aggregator::update_host_to_base_host(
8387
- const std::string_view input) noexcept {
8921
+ void url_aggregator::update_host_to_base_host(const std::string_view input) {
8388
8922
  ada_log("url_aggregator::update_host_to_base_host ", input);
8389
8923
  ADA_ASSERT_TRUE(validate());
8390
8924
  ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
@@ -8411,7 +8945,13 @@ void url_aggregator::update_host_to_base_host(
8411
8945
  /* begin file include/ada/url_search_params.h */
8412
8946
  /**
8413
8947
  * @file url_search_params.h
8414
- * @brief Declaration for the URL Search Params
8948
+ * @brief URL query string parameter manipulation.
8949
+ *
8950
+ * This file provides the `url_search_params` class for parsing, manipulating,
8951
+ * and serializing URL query strings. It implements the URLSearchParams API
8952
+ * from the WHATWG URL Standard.
8953
+ *
8954
+ * @see https://url.spec.whatwg.org/#interface-urlsearchparams
8415
8955
  */
8416
8956
  #ifndef ADA_URL_SEARCH_PARAMS_H
8417
8957
  #define ADA_URL_SEARCH_PARAMS_H
@@ -8423,37 +8963,51 @@ void url_aggregator::update_host_to_base_host(
8423
8963
 
8424
8964
  namespace ada {
8425
8965
 
8966
+ /**
8967
+ * @brief Iterator types for url_search_params iteration.
8968
+ */
8426
8969
  enum class url_search_params_iter_type {
8427
- KEYS,
8428
- VALUES,
8429
- ENTRIES,
8970
+ KEYS, /**< Iterate over parameter keys only */
8971
+ VALUES, /**< Iterate over parameter values only */
8972
+ ENTRIES, /**< Iterate over key-value pairs */
8430
8973
  };
8431
8974
 
8432
8975
  template <typename T, url_search_params_iter_type Type>
8433
8976
  struct url_search_params_iter;
8434
8977
 
8978
+ /** Type alias for a key-value pair of string views. */
8435
8979
  typedef std::pair<std::string_view, std::string_view> key_value_view_pair;
8436
8980
 
8981
+ /** Iterator over search parameter keys. */
8437
8982
  using url_search_params_keys_iter =
8438
8983
  url_search_params_iter<std::string_view, url_search_params_iter_type::KEYS>;
8984
+ /** Iterator over search parameter values. */
8439
8985
  using url_search_params_values_iter =
8440
8986
  url_search_params_iter<std::string_view,
8441
8987
  url_search_params_iter_type::VALUES>;
8988
+ /** Iterator over search parameter key-value pairs. */
8442
8989
  using url_search_params_entries_iter =
8443
8990
  url_search_params_iter<key_value_view_pair,
8444
8991
  url_search_params_iter_type::ENTRIES>;
8445
8992
 
8446
8993
  /**
8447
- * We require all strings to be valid UTF-8. It is the user's responsibility to
8448
- * ensure that the provided strings are valid UTF-8.
8994
+ * @brief Class for parsing and manipulating URL query strings.
8995
+ *
8996
+ * The `url_search_params` class provides methods to parse, modify, and
8997
+ * serialize URL query parameters (the part after '?' in a URL). It handles
8998
+ * percent-encoding and decoding automatically.
8999
+ *
9000
+ * All string inputs must be valid UTF-8. The caller is responsible for
9001
+ * ensuring UTF-8 validity.
9002
+ *
8449
9003
  * @see https://url.spec.whatwg.org/#interface-urlsearchparams
8450
9004
  */
8451
9005
  struct url_search_params {
8452
9006
  url_search_params() = default;
8453
9007
 
8454
9008
  /**
8455
- * @see
8456
- * https://github.com/web-platform-tests/wpt/blob/master/url/urlsearchparams-constructor.any.js
9009
+ * Constructs url_search_params by parsing a query string.
9010
+ * @param input A query string (with or without leading '?'). Must be UTF-8.
8457
9011
  */
8458
9012
  explicit url_search_params(const std::string_view input) {
8459
9013
  initialize(input);
@@ -8465,75 +9019,106 @@ struct url_search_params {
8465
9019
  url_search_params &operator=(const url_search_params &u) = default;
8466
9020
  ~url_search_params() = default;
8467
9021
 
9022
+ /**
9023
+ * Returns the number of key-value pairs.
9024
+ * @return The total count of parameters.
9025
+ */
8468
9026
  [[nodiscard]] inline size_t size() const noexcept;
8469
9027
 
8470
9028
  /**
8471
- * Both key and value must be valid UTF-8.
9029
+ * Appends a new key-value pair to the parameter list.
9030
+ * @param key The parameter name (must be valid UTF-8).
9031
+ * @param value The parameter value (must be valid UTF-8).
8472
9032
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-append
8473
9033
  */
8474
9034
  inline void append(std::string_view key, std::string_view value);
8475
9035
 
8476
9036
  /**
9037
+ * Removes all pairs with the given key.
9038
+ * @param key The parameter name to remove.
8477
9039
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-delete
8478
9040
  */
8479
9041
  inline void remove(std::string_view key);
9042
+
9043
+ /**
9044
+ * Removes all pairs with the given key and value.
9045
+ * @param key The parameter name.
9046
+ * @param value The parameter value to match.
9047
+ */
8480
9048
  inline void remove(std::string_view key, std::string_view value);
8481
9049
 
8482
9050
  /**
9051
+ * Returns the value of the first pair with the given key.
9052
+ * @param key The parameter name to search for.
9053
+ * @return The value if found, or std::nullopt if not present.
8483
9054
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-get
8484
9055
  */
8485
9056
  inline std::optional<std::string_view> get(std::string_view key);
8486
9057
 
8487
9058
  /**
9059
+ * Returns all values for pairs with the given key.
9060
+ * @param key The parameter name to search for.
9061
+ * @return A vector of all matching values (may be empty).
8488
9062
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-getall
8489
9063
  */
8490
9064
  inline std::vector<std::string> get_all(std::string_view key);
8491
9065
 
8492
9066
  /**
9067
+ * Checks if any pair has the given key.
9068
+ * @param key The parameter name to search for.
9069
+ * @return `true` if at least one pair has this key.
8493
9070
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-has
8494
9071
  */
8495
9072
  inline bool has(std::string_view key) noexcept;
9073
+
9074
+ /**
9075
+ * Checks if any pair matches the given key and value.
9076
+ * @param key The parameter name to search for.
9077
+ * @param value The parameter value to match.
9078
+ * @return `true` if a matching pair exists.
9079
+ */
8496
9080
  inline bool has(std::string_view key, std::string_view value) noexcept;
8497
9081
 
8498
9082
  /**
8499
- * Both key and value must be valid UTF-8.
9083
+ * Sets a parameter value, replacing any existing pairs with the same key.
9084
+ * @param key The parameter name (must be valid UTF-8).
9085
+ * @param value The parameter value (must be valid UTF-8).
8500
9086
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-set
8501
9087
  */
8502
9088
  inline void set(std::string_view key, std::string_view value);
8503
9089
 
8504
9090
  /**
9091
+ * Sorts all key-value pairs by their keys using code unit comparison.
8505
9092
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-sort
8506
9093
  */
8507
9094
  inline void sort();
8508
9095
 
8509
9096
  /**
9097
+ * Serializes the parameters to a query string (without leading '?').
9098
+ * @return The percent-encoded query string.
8510
9099
  * @see https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior
8511
9100
  */
8512
9101
  inline std::string to_string() const;
8513
9102
 
8514
9103
  /**
8515
- * Returns a simple JS-style iterator over all of the keys in this
8516
- * url_search_params. The keys in the iterator are not unique. The valid
8517
- * lifespan of the iterator is tied to the url_search_params. The iterator
8518
- * must be freed when you're done with it.
8519
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9104
+ * Returns an iterator over all parameter keys.
9105
+ * Keys may repeat if there are duplicate parameters.
9106
+ * @return An iterator yielding string_view keys.
9107
+ * @note The iterator is invalidated if this object is modified.
8520
9108
  */
8521
9109
  inline url_search_params_keys_iter get_keys();
8522
9110
 
8523
9111
  /**
8524
- * Returns a simple JS-style iterator over all of the values in this
8525
- * url_search_params. The valid lifespan of the iterator is tied to the
8526
- * url_search_params. The iterator must be freed when you're done with it.
8527
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9112
+ * Returns an iterator over all parameter values.
9113
+ * @return An iterator yielding string_view values.
9114
+ * @note The iterator is invalidated if this object is modified.
8528
9115
  */
8529
9116
  inline url_search_params_values_iter get_values();
8530
9117
 
8531
9118
  /**
8532
- * Returns a simple JS-style iterator over all of the entries in this
8533
- * url_search_params. The entries are pairs of keys and corresponding values.
8534
- * The valid lifespan of the iterator is tied to the url_search_params. The
8535
- * iterator must be freed when you're done with it.
8536
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9119
+ * Returns an iterator over all key-value pairs.
9120
+ * @return An iterator yielding key-value pair views.
9121
+ * @note The iterator is invalidated if this object is modified.
8537
9122
  */
8538
9123
  inline url_search_params_entries_iter get_entries();
8539
9124
 
@@ -8570,8 +9155,13 @@ struct url_search_params {
8570
9155
  }; // url_search_params
8571
9156
 
8572
9157
  /**
8573
- * Implements a non-conventional iterator pattern that is closer in style to
8574
- * JavaScript's definition of an iterator.
9158
+ * @brief JavaScript-style iterator for url_search_params.
9159
+ *
9160
+ * Provides a `next()` method that returns successive values until exhausted.
9161
+ * This matches the iterator pattern used in the Web Platform.
9162
+ *
9163
+ * @tparam T The type of value returned by the iterator.
9164
+ * @tparam Type The type of iteration (KEYS, VALUES, or ENTRIES).
8575
9165
  *
8576
9166
  * @see https://webidl.spec.whatwg.org/#idl-iterable
8577
9167
  */
@@ -8586,10 +9176,15 @@ struct url_search_params_iter {
8586
9176
  ~url_search_params_iter() = default;
8587
9177
 
8588
9178
  /**
8589
- * Return the next item in the iterator or std::nullopt if done.
9179
+ * Returns the next value in the iteration sequence.
9180
+ * @return The next value, or std::nullopt if iteration is complete.
8590
9181
  */
8591
9182
  inline std::optional<T> next();
8592
9183
 
9184
+ /**
9185
+ * Checks if more values are available.
9186
+ * @return `true` if `next()` will return a value, `false` if exhausted.
9187
+ */
8593
9188
  inline bool has_next() const;
8594
9189
 
8595
9190
  private:
@@ -8895,6 +9490,7 @@ url_search_params_entries_iter::next() {
8895
9490
 
8896
9491
  #endif // ADA_URL_SEARCH_PARAMS_INL_H
8897
9492
  /* end file include/ada/url_search_params-inl.h */
9493
+
8898
9494
  /* begin file include/ada/url_pattern-inl.h */
8899
9495
  /**
8900
9496
  * @file url_pattern-inl.h
@@ -8908,6 +9504,7 @@ url_search_params_entries_iter::next() {
8908
9504
  #include <string_view>
8909
9505
  #include <utility>
8910
9506
 
9507
+ #if ADA_INCLUDE_URL_PATTERN
8911
9508
  namespace ada {
8912
9509
 
8913
9510
  inline bool url_pattern_init::operator==(const url_pattern_init& other) const {
@@ -8940,10 +9537,8 @@ url_pattern_component<regex_provider>::create_component_match_result(
8940
9537
  // says we should start from 1. This case is handled by the
8941
9538
  // std_regex_provider.
8942
9539
  for (size_t index = 0; index < exec_result.size(); index++) {
8943
- result.groups.insert({
8944
- group_name_list[index],
8945
- std::move(exec_result[index]),
8946
- });
9540
+ result.groups.emplace(group_name_list[index],
9541
+ std::move(exec_result[index]));
8947
9542
  }
8948
9543
  return result;
8949
9544
  }
@@ -9049,43 +9644,113 @@ url_pattern_component<regex_provider>::compile(
9049
9644
  return tl::unexpected(part_list.error());
9050
9645
  }
9051
9646
 
9052
- // Let (regular expression string, name list) be the result of running
9053
- // generate a regular expression and name list given part list and options.
9647
+ // Detect pattern type early to potentially skip expensive regex compilation
9648
+ const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
9649
+ const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
9650
+
9651
+ url_pattern_component_type component_type =
9652
+ url_pattern_component_type::REGEXP;
9653
+ std::string exact_match_value{};
9654
+
9655
+ if (part_list->empty()) {
9656
+ component_type = url_pattern_component_type::EMPTY;
9657
+ } else if (part_list->size() == 1) {
9658
+ const auto& part = (*part_list)[0];
9659
+ if (part.type == url_pattern_part_type::FIXED_TEXT &&
9660
+ part.modifier == url_pattern_part_modifier::none &&
9661
+ !options.ignore_case) {
9662
+ component_type = url_pattern_component_type::EXACT_MATCH;
9663
+ exact_match_value = part.value;
9664
+ } else if (part.type == url_pattern_part_type::FULL_WILDCARD &&
9665
+ part.modifier == url_pattern_part_modifier::none &&
9666
+ part.prefix.empty() && part.suffix.empty()) {
9667
+ component_type = url_pattern_component_type::FULL_WILDCARD;
9668
+ }
9669
+ }
9670
+
9671
+ // For simple patterns, skip regex generation and compilation entirely
9672
+ if (component_type != url_pattern_component_type::REGEXP) {
9673
+ auto pattern_string =
9674
+ url_pattern_helpers::generate_pattern_string(*part_list, options);
9675
+ // For FULL_WILDCARD, we need the group name from
9676
+ // generate_regular_expression
9677
+ std::vector<std::string> name_list;
9678
+ if (component_type == url_pattern_component_type::FULL_WILDCARD &&
9679
+ !part_list->empty()) {
9680
+ name_list.push_back((*part_list)[0].name);
9681
+ }
9682
+ return url_pattern_component<regex_provider>(
9683
+ std::move(pattern_string), typename regex_provider::regex_type{},
9684
+ std::move(name_list), has_regexp_groups, component_type,
9685
+ std::move(exact_match_value));
9686
+ }
9687
+
9688
+ // Generate regex for complex patterns
9054
9689
  auto [regular_expression_string, name_list] =
9055
9690
  url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
9056
9691
  options);
9057
-
9058
- ada_log("regular expression string: ", regular_expression_string);
9059
-
9060
- // Let pattern string be the result of running generate a pattern
9061
- // string given part list and options.
9062
9692
  auto pattern_string =
9063
9693
  url_pattern_helpers::generate_pattern_string(*part_list, options);
9064
9694
 
9065
- // Let regular expression be RegExpCreate(regular expression string,
9066
- // flags). If this throws an exception, catch it, and throw a
9067
- // TypeError.
9068
9695
  std::optional<typename regex_provider::regex_type> regular_expression =
9069
9696
  regex_provider::create_instance(regular_expression_string,
9070
9697
  options.ignore_case);
9071
-
9072
9698
  if (!regular_expression) {
9073
9699
  return tl::unexpected(errors::type_error);
9074
9700
  }
9075
9701
 
9076
- // For each part of part list:
9077
- // - If part’s type is "regexp", then set has regexp groups to true.
9078
- const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
9079
- const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
9080
-
9081
- ada_log("has regexp groups: ", has_regexp_groups);
9082
-
9083
- // Return a new component whose pattern string is pattern string, regular
9084
- // expression is regular expression, group name list is name list, and has
9085
- // regexp groups is has regexp groups.
9086
9702
  return url_pattern_component<regex_provider>(
9087
9703
  std::move(pattern_string), std::move(*regular_expression),
9088
- std::move(name_list), has_regexp_groups);
9704
+ std::move(name_list), has_regexp_groups, component_type,
9705
+ std::move(exact_match_value));
9706
+ }
9707
+
9708
+ template <url_pattern_regex::regex_concept regex_provider>
9709
+ bool url_pattern_component<regex_provider>::fast_test(
9710
+ std::string_view input) const noexcept {
9711
+ // Fast path for simple patterns - avoid regex evaluation
9712
+ // Using if-else for better branch prediction on common cases
9713
+ if (type == url_pattern_component_type::FULL_WILDCARD) {
9714
+ return true;
9715
+ }
9716
+ if (type == url_pattern_component_type::EXACT_MATCH) {
9717
+ return input == exact_match_value;
9718
+ }
9719
+ if (type == url_pattern_component_type::EMPTY) {
9720
+ return input.empty();
9721
+ }
9722
+ // type == REGEXP
9723
+ return regex_provider::regex_match(input, regexp);
9724
+ }
9725
+
9726
+ template <url_pattern_regex::regex_concept regex_provider>
9727
+ std::optional<std::vector<std::optional<std::string>>>
9728
+ url_pattern_component<regex_provider>::fast_match(
9729
+ std::string_view input) const {
9730
+ // Handle each type directly without redundant checks
9731
+ if (type == url_pattern_component_type::FULL_WILDCARD) {
9732
+ // FULL_WILDCARD always matches - capture the input (even if empty)
9733
+ // If there's no group name, return empty groups
9734
+ if (group_name_list.empty()) {
9735
+ return std::vector<std::optional<std::string>>{};
9736
+ }
9737
+ // Capture the matched input (including empty strings)
9738
+ return std::vector<std::optional<std::string>>{std::string(input)};
9739
+ }
9740
+ if (type == url_pattern_component_type::EXACT_MATCH) {
9741
+ if (input == exact_match_value) {
9742
+ return std::vector<std::optional<std::string>>{};
9743
+ }
9744
+ return std::nullopt;
9745
+ }
9746
+ if (type == url_pattern_component_type::EMPTY) {
9747
+ if (input.empty()) {
9748
+ return std::vector<std::optional<std::string>>{};
9749
+ }
9750
+ return std::nullopt;
9751
+ }
9752
+ // type == REGEXP - use regex
9753
+ return regex_provider::regex_search(input, regexp);
9089
9754
  }
9090
9755
 
9091
9756
  template <url_pattern_regex::regex_concept regex_provider>
@@ -9096,18 +9761,88 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
9096
9761
  return match(input, base_url);
9097
9762
  }
9098
9763
 
9764
+ template <url_pattern_regex::regex_concept regex_provider>
9765
+ bool url_pattern<regex_provider>::test_components(
9766
+ std::string_view protocol, std::string_view username,
9767
+ std::string_view password, std::string_view hostname, std::string_view port,
9768
+ std::string_view pathname, std::string_view search,
9769
+ std::string_view hash) const {
9770
+ return protocol_component.fast_test(protocol) &&
9771
+ username_component.fast_test(username) &&
9772
+ password_component.fast_test(password) &&
9773
+ hostname_component.fast_test(hostname) &&
9774
+ port_component.fast_test(port) &&
9775
+ pathname_component.fast_test(pathname) &&
9776
+ search_component.fast_test(search) && hash_component.fast_test(hash);
9777
+ }
9778
+
9099
9779
  template <url_pattern_regex::regex_concept regex_provider>
9100
9780
  result<bool> url_pattern<regex_provider>::test(
9101
- const url_pattern_input& input, const std::string_view* base_url) {
9102
- // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
9103
- // Implement a fast path just like `can_parse()` in ada_url.
9104
- // Let result be the result of match given this's associated URL pattern,
9105
- // input, and baseURL if given.
9106
- // If result is null, return false.
9107
- if (auto result = match(input, base_url); result.has_value()) {
9108
- return result->has_value();
9781
+ const url_pattern_input& input, const std::string_view* base_url_string) {
9782
+ // If input is a URLPatternInit
9783
+ if (std::holds_alternative<url_pattern_init>(input)) {
9784
+ if (base_url_string) {
9785
+ return tl::unexpected(errors::type_error);
9786
+ }
9787
+
9788
+ std::string protocol{}, username{}, password{}, hostname{};
9789
+ std::string port{}, pathname{}, search{}, hash{};
9790
+
9791
+ auto apply_result = url_pattern_init::process(
9792
+ std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
9793
+ protocol, username, password, hostname, port, pathname, search, hash);
9794
+
9795
+ if (!apply_result) {
9796
+ return false;
9797
+ }
9798
+
9799
+ std::string_view search_view = *apply_result->search;
9800
+ if (search_view.starts_with("?")) {
9801
+ search_view.remove_prefix(1);
9802
+ }
9803
+
9804
+ return test_components(*apply_result->protocol, *apply_result->username,
9805
+ *apply_result->password, *apply_result->hostname,
9806
+ *apply_result->port, *apply_result->pathname,
9807
+ search_view, *apply_result->hash);
9808
+ }
9809
+
9810
+ // URL string input path
9811
+ result<url_aggregator> base_url;
9812
+ if (base_url_string) {
9813
+ base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
9814
+ if (!base_url) {
9815
+ return false;
9816
+ }
9817
+ }
9818
+
9819
+ auto url =
9820
+ ada::parse<url_aggregator>(std::get<std::string_view>(input),
9821
+ base_url.has_value() ? &*base_url : nullptr);
9822
+ if (!url) {
9823
+ return false;
9824
+ }
9825
+
9826
+ // Extract components as string_view
9827
+ auto protocol_view = url->get_protocol();
9828
+ if (protocol_view.ends_with(":")) {
9829
+ protocol_view.remove_suffix(1);
9830
+ }
9831
+
9832
+ auto search_view = url->get_search();
9833
+ if (search_view.starts_with("?")) {
9834
+ search_view.remove_prefix(1);
9835
+ }
9836
+
9837
+ auto hash_view = url->get_hash();
9838
+ if (hash_view.starts_with("#")) {
9839
+ hash_view.remove_prefix(1);
9109
9840
  }
9110
- return tl::unexpected(errors::type_error);
9841
+
9842
+ return test_components(protocol_view, url->get_username(),
9843
+ url->get_password(), url->get_hostname(),
9844
+ url->get_port(), url->get_pathname(), search_view,
9845
+ hash_view);
9111
9846
  }
9112
9847
 
9113
9848
  template <url_pattern_regex::regex_concept regex_provider>
@@ -9221,24 +9956,24 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9221
9956
  return std::nullopt;
9222
9957
  }
9223
9958
 
9224
- // Set protocol to urls scheme.
9959
+ // Set protocol to url's scheme.
9225
9960
  // IMPORTANT: Not documented on the URLPattern spec, but protocol suffix ':'
9226
9961
  // is removed. Similar work was done on workerd:
9227
9962
  // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2038
9228
9963
  protocol = url->get_protocol().substr(0, url->get_protocol().size() - 1);
9229
- // Set username to urls username.
9964
+ // Set username to url's username.
9230
9965
  username = url->get_username();
9231
- // Set password to urls password.
9966
+ // Set password to url's password.
9232
9967
  password = url->get_password();
9233
- // Set hostname to urls host, serialized, or the empty string if the value
9968
+ // Set hostname to url's host, serialized, or the empty string if the value
9234
9969
  // is null.
9235
9970
  hostname = url->get_hostname();
9236
- // Set port to urls port, serialized, or the empty string if the value is
9971
+ // Set port to url's port, serialized, or the empty string if the value is
9237
9972
  // null.
9238
9973
  port = url->get_port();
9239
9974
  // Set pathname to the result of URL path serializing url.
9240
9975
  pathname = url->get_pathname();
9241
- // Set search to urls query or the empty string if the value is null.
9976
+ // Set search to url's query or the empty string if the value is null.
9242
9977
  // IMPORTANT: Not documented on the URLPattern spec, but search prefix '?'
9243
9978
  // is removed. Similar work was done on workerd:
9244
9979
  // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2232
@@ -9246,7 +9981,7 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9246
9981
  auto view = url->get_search();
9247
9982
  search = view.starts_with("?") ? url->get_search().substr(1) : view;
9248
9983
  }
9249
- // Set hash to urls fragment or the empty string if the value is null.
9984
+ // Set hash to url's fragment or the empty string if the value is null.
9250
9985
  // IMPORTANT: Not documented on the URLPattern spec, but hash prefix '#' is
9251
9986
  // removed. Similar work was done on workerd:
9252
9987
  // https://github.com/cloudflare/workerd/blob/8620d14012513a6ce04d079e401d3becac3c67bd/src/workerd/jsg/url.c%2B%2B#L2242
@@ -9256,74 +9991,61 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9256
9991
  }
9257
9992
  }
9258
9993
 
9259
- // Let protocolExecResult be RegExpBuiltinExec(urlPattern’s protocol
9260
- // component's regular expression, protocol).
9261
- auto protocol_exec_result =
9262
- regex_provider::regex_search(protocol, protocol_component.regexp);
9994
+ // Use fast_match which skips regex for simple patterns (EMPTY, EXACT_MATCH,
9995
+ // FULL_WILDCARD) and only falls back to regex for complex REGEXP patterns.
9263
9996
 
9997
+ // Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
9998
+ // component's regular expression, protocol).
9999
+ auto protocol_exec_result = protocol_component.fast_match(protocol);
9264
10000
  if (!protocol_exec_result) {
9265
10001
  return std::nullopt;
9266
10002
  }
9267
10003
 
9268
- // Let usernameExecResult be RegExpBuiltinExec(urlPatterns username
10004
+ // Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
9269
10005
  // component's regular expression, username).
9270
- auto username_exec_result =
9271
- regex_provider::regex_search(username, username_component.regexp);
9272
-
10006
+ auto username_exec_result = username_component.fast_match(username);
9273
10007
  if (!username_exec_result) {
9274
10008
  return std::nullopt;
9275
10009
  }
9276
10010
 
9277
- // Let passwordExecResult be RegExpBuiltinExec(urlPatterns password
10011
+ // Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
9278
10012
  // component's regular expression, password).
9279
- auto password_exec_result =
9280
- regex_provider::regex_search(password, password_component.regexp);
9281
-
10013
+ auto password_exec_result = password_component.fast_match(password);
9282
10014
  if (!password_exec_result) {
9283
10015
  return std::nullopt;
9284
10016
  }
9285
10017
 
9286
- // Let hostnameExecResult be RegExpBuiltinExec(urlPatterns hostname
10018
+ // Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
9287
10019
  // component's regular expression, hostname).
9288
- auto hostname_exec_result =
9289
- regex_provider::regex_search(hostname, hostname_component.regexp);
9290
-
10020
+ auto hostname_exec_result = hostname_component.fast_match(hostname);
9291
10021
  if (!hostname_exec_result) {
9292
10022
  return std::nullopt;
9293
10023
  }
9294
10024
 
9295
- // Let portExecResult be RegExpBuiltinExec(urlPatterns port component's
10025
+ // Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
9296
10026
  // regular expression, port).
9297
- auto port_exec_result =
9298
- regex_provider::regex_search(port, port_component.regexp);
9299
-
10027
+ auto port_exec_result = port_component.fast_match(port);
9300
10028
  if (!port_exec_result) {
9301
10029
  return std::nullopt;
9302
10030
  }
9303
10031
 
9304
- // Let pathnameExecResult be RegExpBuiltinExec(urlPatterns pathname
10032
+ // Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
9305
10033
  // component's regular expression, pathname).
9306
- auto pathname_exec_result =
9307
- regex_provider::regex_search(pathname, pathname_component.regexp);
9308
-
10034
+ auto pathname_exec_result = pathname_component.fast_match(pathname);
9309
10035
  if (!pathname_exec_result) {
9310
10036
  return std::nullopt;
9311
10037
  }
9312
10038
 
9313
- // Let searchExecResult be RegExpBuiltinExec(urlPatterns search component's
10039
+ // Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
9314
10040
  // regular expression, search).
9315
- auto search_exec_result =
9316
- regex_provider::regex_search(search, search_component.regexp);
9317
-
10041
+ auto search_exec_result = search_component.fast_match(search);
9318
10042
  if (!search_exec_result) {
9319
10043
  return std::nullopt;
9320
10044
  }
9321
10045
 
9322
- // Let hashExecResult be RegExpBuiltinExec(urlPatterns hash component's
10046
+ // Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
9323
10047
  // regular expression, hash).
9324
- auto hash_exec_result =
9325
- regex_provider::regex_search(hash, hash_component.regexp);
9326
-
10048
+ auto hash_exec_result = hash_component.fast_match(hash);
9327
10049
  if (!hash_exec_result) {
9328
10050
  return std::nullopt;
9329
10051
  }
@@ -9333,42 +10055,42 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9333
10055
  // Set result["inputs"] to inputs.
9334
10056
  result.inputs = std::move(inputs);
9335
10057
  // Set result["protocol"] to the result of creating a component match result
9336
- // given urlPatterns protocol component, protocol, and protocolExecResult.
10058
+ // given urlPattern's protocol component, protocol, and protocolExecResult.
9337
10059
  result.protocol = protocol_component.create_component_match_result(
9338
10060
  std::move(protocol), std::move(*protocol_exec_result));
9339
10061
 
9340
10062
  // Set result["username"] to the result of creating a component match result
9341
- // given urlPatterns username component, username, and usernameExecResult.
10063
+ // given urlPattern's username component, username, and usernameExecResult.
9342
10064
  result.username = username_component.create_component_match_result(
9343
10065
  std::move(username), std::move(*username_exec_result));
9344
10066
 
9345
10067
  // Set result["password"] to the result of creating a component match result
9346
- // given urlPatterns password component, password, and passwordExecResult.
10068
+ // given urlPattern's password component, password, and passwordExecResult.
9347
10069
  result.password = password_component.create_component_match_result(
9348
10070
  std::move(password), std::move(*password_exec_result));
9349
10071
 
9350
10072
  // Set result["hostname"] to the result of creating a component match result
9351
- // given urlPatterns hostname component, hostname, and hostnameExecResult.
10073
+ // given urlPattern's hostname component, hostname, and hostnameExecResult.
9352
10074
  result.hostname = hostname_component.create_component_match_result(
9353
10075
  std::move(hostname), std::move(*hostname_exec_result));
9354
10076
 
9355
10077
  // Set result["port"] to the result of creating a component match result given
9356
- // urlPatterns port component, port, and portExecResult.
10078
+ // urlPattern's port component, port, and portExecResult.
9357
10079
  result.port = port_component.create_component_match_result(
9358
10080
  std::move(port), std::move(*port_exec_result));
9359
10081
 
9360
10082
  // Set result["pathname"] to the result of creating a component match result
9361
- // given urlPatterns pathname component, pathname, and pathnameExecResult.
10083
+ // given urlPattern's pathname component, pathname, and pathnameExecResult.
9362
10084
  result.pathname = pathname_component.create_component_match_result(
9363
10085
  std::move(pathname), std::move(*pathname_exec_result));
9364
10086
 
9365
10087
  // Set result["search"] to the result of creating a component match result
9366
- // given urlPatterns search component, search, and searchExecResult.
10088
+ // given urlPattern's search component, search, and searchExecResult.
9367
10089
  result.search = search_component.create_component_match_result(
9368
10090
  std::move(search), std::move(*search_exec_result));
9369
10091
 
9370
10092
  // Set result["hash"] to the result of creating a component match result given
9371
- // urlPatterns hash component, hash, and hashExecResult.
10093
+ // urlPattern's hash component, hash, and hashExecResult.
9372
10094
  result.hash = hash_component.create_component_match_result(
9373
10095
  std::move(hash), std::move(*hash_exec_result));
9374
10096
 
@@ -9376,7 +10098,7 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9376
10098
  }
9377
10099
 
9378
10100
  } // namespace ada
9379
-
10101
+ #endif // ADA_INCLUDE_URL_PATTERN
9380
10102
  #endif
9381
10103
  /* end file include/ada/url_pattern-inl.h */
9382
10104
  /* begin file include/ada/url_pattern_helpers-inl.h */
@@ -9391,8 +10113,9 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9391
10113
  #include <string_view>
9392
10114
 
9393
10115
 
10116
+ #if ADA_INCLUDE_URL_PATTERN
9394
10117
  namespace ada::url_pattern_helpers {
9395
- #ifdef ADA_TESTING
10118
+ #if defined(ADA_TESTING) || defined(ADA_LOGGING)
9396
10119
  inline std::string to_string(token_type type) {
9397
10120
  switch (type) {
9398
10121
  case token_type::INVALID_CHAR:
@@ -9419,32 +10142,32 @@ inline std::string to_string(token_type type) {
9419
10142
  ada::unreachable();
9420
10143
  }
9421
10144
  }
9422
- #endif // ADA_TESTING
10145
+ #endif // defined(ADA_TESTING) || defined(ADA_LOGGING)
9423
10146
 
9424
10147
  template <url_pattern_regex::regex_concept regex_provider>
9425
10148
  constexpr void constructor_string_parser<regex_provider>::rewind() {
9426
- // Set parsers token index to parsers component start.
10149
+ // Set parser's token index to parser's component start.
9427
10150
  token_index = component_start;
9428
- // Set parsers token increment to 0.
10151
+ // Set parser's token increment to 0.
9429
10152
  token_increment = 0;
9430
10153
  }
9431
10154
 
9432
10155
  template <url_pattern_regex::regex_concept regex_provider>
9433
10156
  constexpr bool constructor_string_parser<regex_provider>::is_hash_prefix() {
9434
10157
  // Return the result of running is a non-special pattern char given parser,
9435
- // parsers token index and "#".
10158
+ // parser's token index and "#".
9436
10159
  return is_non_special_pattern_char(token_index, '#');
9437
10160
  }
9438
10161
 
9439
10162
  template <url_pattern_regex::regex_concept regex_provider>
9440
10163
  constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
9441
- // If result of running is a non-special pattern char given parser, parsers
10164
+ // If result of running is a non-special pattern char given parser, parser's
9442
10165
  // token index and "?" is true, then return true.
9443
10166
  if (is_non_special_pattern_char(token_index, '?')) {
9444
10167
  return true;
9445
10168
  }
9446
10169
 
9447
- // If parsers token list[parsers token index]'s value is not "?", then
10170
+ // If parser's token list[parser's token index]'s value is not "?", then
9448
10171
  // return false.
9449
10172
  if (token_list[token_index].value != "?") {
9450
10173
  return false;
@@ -9452,17 +10175,17 @@ constexpr bool constructor_string_parser<regex_provider>::is_search_prefix() {
9452
10175
 
9453
10176
  // If previous index is less than 0, then return true.
9454
10177
  if (token_index == 0) return true;
9455
- // Let previous index be parsers token index 1.
10178
+ // Let previous index be parser's token index - 1.
9456
10179
  auto previous_index = token_index - 1;
9457
10180
  // Let previous token be the result of running get a safe token given parser
9458
10181
  // and previous index.
9459
10182
  auto previous_token = get_safe_token(previous_index);
9460
10183
  ADA_ASSERT_TRUE(previous_token);
9461
10184
  // If any of the following are true, then return false:
9462
- // - previous tokens type is "name".
9463
- // - previous tokens type is "regexp".
9464
- // - previous tokens type is "close".
9465
- // - previous tokens type is "asterisk".
10185
+ // - previous token's type is "name".
10186
+ // - previous token's type is "regexp".
10187
+ // - previous token's type is "close".
10188
+ // - previous token's type is "asterisk".
9466
10189
  return !(previous_token->type == token_type::NAME ||
9467
10190
  previous_token->type == token_type::REGEXP ||
9468
10191
  previous_token->type == token_type::CLOSE ||
@@ -9477,17 +10200,18 @@ constructor_string_parser<regex_provider>::is_non_special_pattern_char(
9477
10200
  auto token = get_safe_token(index);
9478
10201
  ADA_ASSERT_TRUE(token);
9479
10202
 
9480
- // If tokens value is not value, then return false.
10203
+ // If token's value is not value, then return false.
9481
10204
  // TODO: Remove this once we make sure get_safe_token returns a non-empty
9482
10205
  // string.
9483
- if (!token->value.empty() && token->value[0] != value) {
10206
+ if (!token->value.empty() &&
10207
+ static_cast<uint32_t>(token->value[0]) != value) {
9484
10208
  return false;
9485
10209
  }
9486
10210
 
9487
10211
  // If any of the following are true:
9488
- // - tokens type is "char";
9489
- // - tokens type is "escaped-char"; or
9490
- // - tokens type is "invalid-char",
10212
+ // - token's type is "char";
10213
+ // - token's type is "escaped-char"; or
10214
+ // - token's type is "invalid-char",
9491
10215
  // - then return true.
9492
10216
  return token->type == token_type::CHAR ||
9493
10217
  token->type == token_type::ESCAPED_CHAR ||
@@ -9497,17 +10221,17 @@ constructor_string_parser<regex_provider>::is_non_special_pattern_char(
9497
10221
  template <url_pattern_regex::regex_concept regex_provider>
9498
10222
  constexpr const token*
9499
10223
  constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
9500
- // If index is less than parsers token list's size, then return parsers
10224
+ // If index is less than parser's token list's size, then return parser's
9501
10225
  // token list[index].
9502
10226
  if (index < token_list.size()) [[likely]] {
9503
10227
  return &token_list[index];
9504
10228
  }
9505
10229
 
9506
- // Assert: parsers token list's size is greater than or equal to 1.
10230
+ // Assert: parser's token list's size is greater than or equal to 1.
9507
10231
  ADA_ASSERT_TRUE(!token_list.empty());
9508
10232
 
9509
- // Let token be parsers token list[last index].
9510
- // Assert: tokens type is "end".
10233
+ // Let token be parser's token list[last index].
10234
+ // Assert: token's type is "end".
9511
10235
  ADA_ASSERT_TRUE(token_list.back().type == token_type::END);
9512
10236
 
9513
10237
  // Return token.
@@ -9517,7 +10241,7 @@ constructor_string_parser<regex_provider>::get_safe_token(size_t index) const {
9517
10241
  template <url_pattern_regex::regex_concept regex_provider>
9518
10242
  constexpr bool constructor_string_parser<regex_provider>::is_group_open()
9519
10243
  const {
9520
- // If parsers token list[parsers token index]'s type is "open", then return
10244
+ // If parser's token list[parser's token index]'s type is "open", then return
9521
10245
  // true.
9522
10246
  return token_list[token_index].type == token_type::OPEN;
9523
10247
  }
@@ -9525,7 +10249,7 @@ constexpr bool constructor_string_parser<regex_provider>::is_group_open()
9525
10249
  template <url_pattern_regex::regex_concept regex_provider>
9526
10250
  constexpr bool constructor_string_parser<regex_provider>::is_group_close()
9527
10251
  const {
9528
- // If parsers token list[parsers token index]'s type is "close", then return
10252
+ // If parser's token list[parser's token index]'s type is "close", then return
9529
10253
  // true.
9530
10254
  return token_list[token_index].type == token_type::CLOSE;
9531
10255
  }
@@ -9534,12 +10258,12 @@ template <url_pattern_regex::regex_concept regex_provider>
9534
10258
  constexpr bool
9535
10259
  constructor_string_parser<regex_provider>::next_is_authority_slashes() const {
9536
10260
  // If the result of running is a non-special pattern char given parser,
9537
- // parsers token index + 1, and "/" is false, then return false.
10261
+ // parser's token index + 1, and "/" is false, then return false.
9538
10262
  if (!is_non_special_pattern_char(token_index + 1, '/')) {
9539
10263
  return false;
9540
10264
  }
9541
10265
  // If the result of running is a non-special pattern char given parser,
9542
- // parsers token index + 2, and "/" is false, then return false.
10266
+ // parser's token index + 2, and "/" is false, then return false.
9543
10267
  if (!is_non_special_pattern_char(token_index + 2, '/')) {
9544
10268
  return false;
9545
10269
  }
@@ -9550,15 +10274,15 @@ template <url_pattern_regex::regex_concept regex_provider>
9550
10274
  constexpr bool constructor_string_parser<regex_provider>::is_protocol_suffix()
9551
10275
  const {
9552
10276
  // Return the result of running is a non-special pattern char given parser,
9553
- // parsers token index, and ":".
10277
+ // parser's token index, and ":".
9554
10278
  return is_non_special_pattern_char(token_index, ':');
9555
10279
  }
9556
10280
 
9557
10281
  template <url_pattern_regex::regex_concept regex_provider>
9558
10282
  void constructor_string_parser<regex_provider>::change_state(State new_state,
9559
10283
  size_t skip) {
9560
- // If parsers state is not "init", not "authority", and not "done", then set
9561
- // parsers result[parsers state] to the result of running make a component
10284
+ // If parser's state is not "init", not "authority", and not "done", then set
10285
+ // parser's result[parser's state] to the result of running make a component
9562
10286
  // string given parser.
9563
10287
  if (state != State::INIT && state != State::AUTHORITY &&
9564
10288
  state != State::DONE) {
@@ -9602,11 +10326,11 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
9602
10326
  }
9603
10327
  }
9604
10328
 
9605
- // If parsers state is not "init" and new state is not "done", then:
10329
+ // If parser's state is not "init" and new state is not "done", then:
9606
10330
  if (state != State::INIT && new_state != State::DONE) {
9607
- // If parsers state is "protocol", "authority", "username", or "password";
9608
- // new state is "port", "pathname", "search", or "hash"; and parsers
9609
- // result["hostname"] does not exist, then set parsers result["hostname"]
10331
+ // If parser's state is "protocol", "authority", "username", or "password";
10332
+ // new state is "port", "pathname", "search", or "hash"; and parser's
10333
+ // result["hostname"] does not exist, then set parser's result["hostname"]
9610
10334
  // to the empty string.
9611
10335
  if ((state == State::PROTOCOL || state == State::AUTHORITY ||
9612
10336
  state == State::USERNAME || state == State::PASSWORD) &&
@@ -9616,8 +10340,8 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
9616
10340
  result.hostname = "";
9617
10341
  }
9618
10342
 
9619
- // If parsers state is "protocol", "authority", "username", "password",
9620
- // "hostname", or "port"; new state is "search" or "hash"; and parsers
10343
+ // If parser's state is "protocol", "authority", "username", "password",
10344
+ // "hostname", or "port"; new state is "search" or "hash"; and parser's
9621
10345
  // result["pathname"] does not exist, then:
9622
10346
  if ((state == State::PROTOCOL || state == State::AUTHORITY ||
9623
10347
  state == State::USERNAME || state == State::PASSWORD ||
@@ -9627,14 +10351,14 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
9627
10351
  if (protocol_matches_a_special_scheme_flag) {
9628
10352
  result.pathname = "/";
9629
10353
  } else {
9630
- // Otherwise, set parsers result["pathname"] to the empty string.
10354
+ // Otherwise, set parser's result["pathname"] to the empty string.
9631
10355
  result.pathname = "";
9632
10356
  }
9633
10357
  }
9634
10358
 
9635
- // If parsers state is "protocol", "authority", "username", "password",
9636
- // "hostname", "port", or "pathname"; new state is "hash"; and parsers
9637
- // result["search"] does not exist, then set parsers result["search"] to
10359
+ // If parser's state is "protocol", "authority", "username", "password",
10360
+ // "hostname", "port", or "pathname"; new state is "hash"; and parser's
10361
+ // result["search"] does not exist, then set parser's result["search"] to
9638
10362
  // the empty string.
9639
10363
  if ((state == State::PROTOCOL || state == State::AUTHORITY ||
9640
10364
  state == State::USERNAME || state == State::PASSWORD ||
@@ -9644,41 +10368,41 @@ void constructor_string_parser<regex_provider>::change_state(State new_state,
9644
10368
  result.search = "";
9645
10369
  }
9646
10370
 
9647
- // Set parsers state to new state.
10371
+ // Set parser's state to new state.
9648
10372
  state = new_state;
9649
- // Increment parsers token index by skip.
10373
+ // Increment parser's token index by skip.
9650
10374
  token_index += skip;
9651
- // Set parsers component start to parsers token index.
10375
+ // Set parser's component start to parser's token index.
9652
10376
  component_start = token_index;
9653
- // Set parsers token increment to 0.
10377
+ // Set parser's token increment to 0.
9654
10378
  token_increment = 0;
9655
10379
  }
9656
10380
 
9657
10381
  template <url_pattern_regex::regex_concept regex_provider>
9658
10382
  std::string constructor_string_parser<regex_provider>::make_component_string() {
9659
- // Assert: parsers token index is less than parsers token list's size.
10383
+ // Assert: parser's token index is less than parser's token list's size.
9660
10384
  ADA_ASSERT_TRUE(token_index < token_list.size());
9661
10385
 
9662
- // Let token be parsers token list[parsers token index].
9663
- // Let end index be tokens index.
10386
+ // Let token be parser's token list[parser's token index].
10387
+ // Let end index be token's index.
9664
10388
  const auto end_index = token_list[token_index].index;
9665
10389
  // Let component start token be the result of running get a safe token given
9666
- // parser and parsers component start.
10390
+ // parser and parser's component start.
9667
10391
  const auto component_start_token = get_safe_token(component_start);
9668
10392
  ADA_ASSERT_TRUE(component_start_token);
9669
- // Let component start input index be component start tokens index.
10393
+ // Let component start input index be component start token's index.
9670
10394
  const auto component_start_input_index = component_start_token->index;
9671
10395
  // Return the code point substring from component start input index to end
9672
- // index within parsers input.
9673
- return input.substr(component_start_input_index,
9674
- end_index - component_start_input_index);
10396
+ // index within parser's input.
10397
+ return std::string(input.substr(component_start_input_index,
10398
+ end_index - component_start_input_index));
9675
10399
  }
9676
10400
 
9677
10401
  template <url_pattern_regex::regex_concept regex_provider>
9678
10402
  constexpr bool
9679
10403
  constructor_string_parser<regex_provider>::is_an_identity_terminator() const {
9680
10404
  // Return the result of running is a non-special pattern char given parser,
9681
- // parsers token index, and "@".
10405
+ // parser's token index, and "@".
9682
10406
  return is_non_special_pattern_char(token_index, '@');
9683
10407
  }
9684
10408
 
@@ -9686,7 +10410,7 @@ template <url_pattern_regex::regex_concept regex_provider>
9686
10410
  constexpr bool constructor_string_parser<regex_provider>::is_pathname_start()
9687
10411
  const {
9688
10412
  // Return the result of running is a non-special pattern char given parser,
9689
- // parsers token index, and "/".
10413
+ // parser's token index, and "/".
9690
10414
  return is_non_special_pattern_char(token_index, '/');
9691
10415
  }
9692
10416
 
@@ -9694,7 +10418,7 @@ template <url_pattern_regex::regex_concept regex_provider>
9694
10418
  constexpr bool constructor_string_parser<regex_provider>::is_password_prefix()
9695
10419
  const {
9696
10420
  // Return the result of running is a non-special pattern char given parser,
9697
- // parsers token index, and ":".
10421
+ // parser's token index, and ":".
9698
10422
  return is_non_special_pattern_char(token_index, ':');
9699
10423
  }
9700
10424
 
@@ -9702,7 +10426,7 @@ template <url_pattern_regex::regex_concept regex_provider>
9702
10426
  constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_open()
9703
10427
  const {
9704
10428
  // Return the result of running is a non-special pattern char given parser,
9705
- // parsers token index, and "[".
10429
+ // parser's token index, and "[".
9706
10430
  return is_non_special_pattern_char(token_index, '[');
9707
10431
  }
9708
10432
 
@@ -9710,7 +10434,7 @@ template <url_pattern_regex::regex_concept regex_provider>
9710
10434
  constexpr bool constructor_string_parser<regex_provider>::is_an_ipv6_close()
9711
10435
  const {
9712
10436
  // Return the result of running is a non-special pattern char given parser,
9713
- // parsers token index, and "]".
10437
+ // parser's token index, and "]".
9714
10438
  return is_non_special_pattern_char(token_index, ']');
9715
10439
  }
9716
10440
 
@@ -9718,7 +10442,7 @@ template <url_pattern_regex::regex_concept regex_provider>
9718
10442
  constexpr bool constructor_string_parser<regex_provider>::is_port_prefix()
9719
10443
  const {
9720
10444
  // Return the result of running is a non-special pattern char given parser,
9721
- // parsers token index, and ":".
10445
+ // parser's token index, and ":".
9722
10446
  return is_non_special_pattern_char(token_index, ':');
9723
10447
  }
9724
10448
 
@@ -9772,7 +10496,7 @@ constexpr void Tokenizer::get_next_code_point() {
9772
10496
  constexpr void Tokenizer::seek_and_get_next_code_point(size_t new_index) {
9773
10497
  ada_log("Tokenizer::seek_and_get_next_code_point called with new_index=",
9774
10498
  new_index);
9775
- // Set tokenizers next index to index.
10499
+ // Set tokenizer's next index to index.
9776
10500
  next_index = new_index;
9777
10501
  // Run get the next code point given tokenizer.
9778
10502
  get_next_code_point();
@@ -9785,21 +10509,21 @@ inline void Tokenizer::add_token(token_type type, size_t next_position,
9785
10509
  ADA_ASSERT_TRUE(next_position >= value_position);
9786
10510
 
9787
10511
  // Let token be a new token.
9788
- // Set tokens type to type.
9789
- // Set tokens index to tokenizers index.
9790
- // Set tokens value to the code point substring from value position with
9791
- // length value length within tokenizers input.
9792
- // Append token to the back of tokenizers token list.
10512
+ // Set token's type to type.
10513
+ // Set token's index to tokenizer's index.
10514
+ // Set token's value to the code point substring from value position with
10515
+ // length value length within tokenizer's input.
10516
+ // Append token to the back of tokenizer's token list.
9793
10517
  token_list.emplace_back(type, index,
9794
10518
  input.substr(value_position, value_length));
9795
- // Set tokenizers index to next position.
10519
+ // Set tokenizer's index to next position.
9796
10520
  index = next_position;
9797
10521
  }
9798
10522
 
9799
10523
  inline void Tokenizer::add_token_with_default_length(token_type type,
9800
10524
  size_t next_position,
9801
10525
  size_t value_position) {
9802
- // Let computed length be next position value position.
10526
+ // Let computed length be next position - value position.
9803
10527
  auto computed_length = next_position - value_position;
9804
10528
  // Run add a token given tokenizer, type, next position, value position, and
9805
10529
  // computed length.
@@ -9809,21 +10533,21 @@ inline void Tokenizer::add_token_with_default_length(token_type type,
9809
10533
  inline void Tokenizer::add_token_with_defaults(token_type type) {
9810
10534
  ada_log("Tokenizer::add_token_with_defaults called with type=",
9811
10535
  to_string(type));
9812
- // Run add a token with default length given tokenizer, type, tokenizers next
9813
- // index, and tokenizers index.
10536
+ // Run add a token with default length given tokenizer, type, tokenizer's next
10537
+ // index, and tokenizer's index.
9814
10538
  add_token_with_default_length(type, next_index, index);
9815
10539
  }
9816
10540
 
9817
10541
  inline ada_warn_unused std::optional<errors>
9818
10542
  Tokenizer::process_tokenizing_error(size_t next_position,
9819
10543
  size_t value_position) {
9820
- // If tokenizers policy is "strict", then throw a TypeError.
10544
+ // If tokenizer's policy is "strict", then throw a TypeError.
9821
10545
  if (policy == token_policy::strict) {
9822
10546
  ada_log("process_tokenizing_error failed with next_position=",
9823
10547
  next_position, " value_position=", value_position);
9824
10548
  return errors::type_error;
9825
10549
  }
9826
- // Assert: tokenizers policy is "lenient".
10550
+ // Assert: tokenizer's policy is "lenient".
9827
10551
  ADA_ASSERT_TRUE(policy == token_policy::lenient);
9828
10552
  // Run add a token with default length given tokenizer, "invalid-char", next
9829
10553
  // position, and value position.
@@ -9864,13 +10588,13 @@ template <url_pattern_encoding_callback F>
9864
10588
  token* url_pattern_parser<F>::try_consume_token(token_type type) {
9865
10589
  ada_log("url_pattern_parser::try_consume_token called with type=",
9866
10590
  to_string(type));
9867
- // Assert: parsers index is less than parsers token list size.
10591
+ // Assert: parser's index is less than parser's token list size.
9868
10592
  ADA_ASSERT_TRUE(index < tokens.size());
9869
- // Let next token be parsers token list[parsers index].
10593
+ // Let next token be parser's token list[parser's index].
9870
10594
  auto& next_token = tokens[index];
9871
- // If next tokens type is not type return null.
10595
+ // If next token's type is not type return null.
9872
10596
  if (next_token.type != type) return nullptr;
9873
- // Increase parsers index by 1.
10597
+ // Increase parser's index by 1.
9874
10598
  index++;
9875
10599
  // Return next token.
9876
10600
  return &next_token;
@@ -9890,7 +10614,7 @@ std::string url_pattern_parser<F>::consume_text() {
9890
10614
  if (!token) token = try_consume_token(token_type::ESCAPED_CHAR);
9891
10615
  // If token is null, then break.
9892
10616
  if (!token) break;
9893
- // Append tokens value to the end of result.
10617
+ // Append token's value to the end of result.
9894
10618
  result.append(token->value);
9895
10619
  }
9896
10620
  // Return result.
@@ -9909,23 +10633,23 @@ bool url_pattern_parser<F>::consume_required_token(token_type type) {
9909
10633
  template <url_pattern_encoding_callback F>
9910
10634
  std::optional<errors>
9911
10635
  url_pattern_parser<F>::maybe_add_part_from_the_pending_fixed_value() {
9912
- // If parsers pending fixed value is the empty string, then return.
10636
+ // If parser's pending fixed value is the empty string, then return.
9913
10637
  if (pending_fixed_value.empty()) {
9914
10638
  ada_log("pending_fixed_value is empty");
9915
10639
  return std::nullopt;
9916
10640
  }
9917
- // Let encoded value be the result of running parsers encoding callback given
9918
- // parsers pending fixed value.
10641
+ // Let encoded value be the result of running parser's encoding callback given
10642
+ // parser's pending fixed value.
9919
10643
  auto encoded_value = encoding_callback(pending_fixed_value);
9920
10644
  if (!encoded_value) {
9921
10645
  ada_log("failed to encode pending_fixed_value: ", pending_fixed_value);
9922
10646
  return encoded_value.error();
9923
10647
  }
9924
- // Set parsers pending fixed value to the empty string.
10648
+ // Set parser's pending fixed value to the empty string.
9925
10649
  pending_fixed_value.clear();
9926
10650
  // Let part be a new part whose type is "fixed-text", value is encoded value,
9927
10651
  // and modifier is "none".
9928
- // Append part to parsers part list.
10652
+ // Append part to parser's part list.
9929
10653
  parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
9930
10654
  std::move(*encoded_value),
9931
10655
  url_pattern_part_modifier::none);
@@ -9940,15 +10664,15 @@ std::optional<errors> url_pattern_parser<F>::add_part(
9940
10664
  auto modifier = url_pattern_part_modifier::none;
9941
10665
  // If modifier token is not null:
9942
10666
  if (modifier_token) {
9943
- // If modifier tokens value is "?" then set modifier to "optional".
10667
+ // If modifier token's value is "?" then set modifier to "optional".
9944
10668
  if (modifier_token->value == "?") {
9945
10669
  modifier = url_pattern_part_modifier::optional;
9946
10670
  } else if (modifier_token->value == "*") {
9947
- // Otherwise if modifier tokens value is "*" then set modifier to
10671
+ // Otherwise if modifier token's value is "*" then set modifier to
9948
10672
  // "zero-or-more".
9949
10673
  modifier = url_pattern_part_modifier::zero_or_more;
9950
10674
  } else if (modifier_token->value == "+") {
9951
- // Otherwise if modifier tokens value is "+" then set modifier to
10675
+ // Otherwise if modifier token's value is "+" then set modifier to
9952
10676
  // "one-or-more".
9953
10677
  modifier = url_pattern_part_modifier::one_or_more;
9954
10678
  }
@@ -9957,7 +10681,7 @@ std::optional<errors> url_pattern_parser<F>::add_part(
9957
10681
  // is "none":
9958
10682
  if (!name_token && !regexp_or_wildcard_token &&
9959
10683
  modifier == url_pattern_part_modifier::none) {
9960
- // Append prefix to the end of parsers pending fixed value.
10684
+ // Append prefix to the end of parser's pending fixed value.
9961
10685
  pending_fixed_value.append(prefix);
9962
10686
  return std::nullopt;
9963
10687
  }
@@ -9971,7 +10695,7 @@ std::optional<errors> url_pattern_parser<F>::add_part(
9971
10695
  ADA_ASSERT_TRUE(suffix.empty());
9972
10696
  // If prefix is the empty string, then return.
9973
10697
  if (prefix.empty()) return std::nullopt;
9974
- // Let encoded value be the result of running parsers encoding callback
10698
+ // Let encoded value be the result of running parser's encoding callback
9975
10699
  // given prefix.
9976
10700
  auto encoded_value = encoding_callback(prefix);
9977
10701
  if (!encoded_value) {
@@ -9979,28 +10703,28 @@ std::optional<errors> url_pattern_parser<F>::add_part(
9979
10703
  }
9980
10704
  // Let part be a new part whose type is "fixed-text", value is encoded
9981
10705
  // value, and modifier is modifier.
9982
- // Append part to parsers part list.
10706
+ // Append part to parser's part list.
9983
10707
  parts.emplace_back(url_pattern_part_type::FIXED_TEXT,
9984
10708
  std::move(*encoded_value), modifier);
9985
10709
  return std::nullopt;
9986
10710
  }
9987
10711
  // Let regexp value be the empty string.
9988
10712
  std::string regexp_value{};
9989
- // If regexp or wildcard token is null, then set regexp value to parsers
10713
+ // If regexp or wildcard token is null, then set regexp value to parser's
9990
10714
  // segment wildcard regexp.
9991
10715
  if (!regexp_or_wildcard_token) {
9992
10716
  regexp_value = segment_wildcard_regexp;
9993
10717
  } else if (regexp_or_wildcard_token->type == token_type::ASTERISK) {
9994
- // Otherwise if regexp or wildcard tokens type is "asterisk", then set
10718
+ // Otherwise if regexp or wildcard token's type is "asterisk", then set
9995
10719
  // regexp value to the full wildcard regexp value.
9996
10720
  regexp_value = ".*";
9997
10721
  } else {
9998
- // Otherwise set regexp value to regexp or wildcard tokens value.
10722
+ // Otherwise set regexp value to regexp or wildcard token's value.
9999
10723
  regexp_value = regexp_or_wildcard_token->value;
10000
10724
  }
10001
10725
  // Let type be "regexp".
10002
10726
  auto type = url_pattern_part_type::REGEXP;
10003
- // If regexp value is parsers segment wildcard regexp:
10727
+ // If regexp value is parser's segment wildcard regexp:
10004
10728
  if (regexp_value == segment_wildcard_regexp) {
10005
10729
  // Set type to "segment-wildcard".
10006
10730
  type = url_pattern_part_type::SEGMENT_WILDCARD;
@@ -10015,14 +10739,14 @@ std::optional<errors> url_pattern_parser<F>::add_part(
10015
10739
  }
10016
10740
  // Let name be the empty string.
10017
10741
  std::string name{};
10018
- // If name token is not null, then set name to name tokens value.
10742
+ // If name token is not null, then set name to name token's value.
10019
10743
  if (name_token) {
10020
10744
  name = name_token->value;
10021
10745
  } else if (regexp_or_wildcard_token != nullptr) {
10022
10746
  // Otherwise if regexp or wildcard token is not null:
10023
- // Set name to parsers next numeric name, serialized.
10747
+ // Set name to parser's next numeric name, serialized.
10024
10748
  name = std::to_string(next_numeric_name);
10025
- // Increment parsers next numeric name by 1.
10749
+ // Increment parser's next numeric name by 1.
10026
10750
  next_numeric_name++;
10027
10751
  }
10028
10752
  // If the result of running is a duplicate name given parser and name is
@@ -10031,18 +10755,18 @@ std::optional<errors> url_pattern_parser<F>::add_part(
10031
10755
  parts, [&name](const auto& part) { return part.name == name; })) {
10032
10756
  return errors::type_error;
10033
10757
  }
10034
- // Let encoded prefix be the result of running parsers encoding callback
10758
+ // Let encoded prefix be the result of running parser's encoding callback
10035
10759
  // given prefix.
10036
10760
  auto encoded_prefix = encoding_callback(prefix);
10037
10761
  if (!encoded_prefix) return encoded_prefix.error();
10038
- // Let encoded suffix be the result of running parsers encoding callback
10762
+ // Let encoded suffix be the result of running parser's encoding callback
10039
10763
  // given suffix.
10040
10764
  auto encoded_suffix = encoding_callback(suffix);
10041
10765
  if (!encoded_suffix) return encoded_suffix.error();
10042
10766
  // Let part be a new part whose type is type, value is regexp value,
10043
10767
  // modifier is modifier, name is name, prefix is encoded prefix, and suffix
10044
10768
  // is encoded suffix.
10045
- // Append part to parsers part list.
10769
+ // Append part to parser's part list.
10046
10770
  parts.emplace_back(type, std::move(regexp_value), modifier, std::move(name),
10047
10771
  std::move(*encoded_prefix), std::move(*encoded_suffix));
10048
10772
  return std::nullopt;
@@ -10058,7 +10782,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10058
10782
  // segment wildcard regexp given options.
10059
10783
  auto parser = url_pattern_parser<F>(
10060
10784
  encoding_callback, generate_segment_wildcard_regexp(options));
10061
- // Set parsers token list to the result of running tokenize given input and
10785
+ // Set parser's token list to the result of running tokenize given input and
10062
10786
  // "strict".
10063
10787
  auto tokenize_result = tokenize(input, token_policy::strict);
10064
10788
  if (!tokenize_result) {
@@ -10067,7 +10791,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10067
10791
  }
10068
10792
  parser.tokens = std::move(*tokenize_result);
10069
10793
 
10070
- // While parsers index is less than parsers token list's size:
10794
+ // While parser's index is less than parser's token list's size:
10071
10795
  while (parser.can_continue()) {
10072
10796
  // Let char token be the result of running try to consume a token given
10073
10797
  // parser and "char".
@@ -10083,11 +10807,11 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10083
10807
  if (name_token || regexp_or_wildcard_token) {
10084
10808
  // Let prefix be the empty string.
10085
10809
  std::string prefix{};
10086
- // If char token is not null then set prefix to char tokens value.
10810
+ // If char token is not null then set prefix to char token's value.
10087
10811
  if (char_token) prefix = char_token->value;
10088
- // If prefix is not the empty string and not optionss prefix code point:
10812
+ // If prefix is not the empty string and not options's prefix code point:
10089
10813
  if (!prefix.empty() && prefix != options.get_prefix()) {
10090
- // Append prefix to the end of parsers pending fixed value.
10814
+ // Append prefix to the end of parser's pending fixed value.
10091
10815
  parser.pending_fixed_value.append(prefix);
10092
10816
  // Set prefix to the empty string.
10093
10817
  prefix.clear();
@@ -10120,7 +10844,7 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10120
10844
  fixed_token = parser.try_consume_token(token_type::ESCAPED_CHAR);
10121
10845
  // If fixed token is not null:
10122
10846
  if (fixed_token) {
10123
- // Append fixed tokens value to parsers pending fixed value.
10847
+ // Append fixed token's value to parser's pending fixed value.
10124
10848
  parser.pending_fixed_value.append(fixed_token->value);
10125
10849
  // Continue.
10126
10850
  continue;
@@ -10170,20 +10894,38 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10170
10894
  }
10171
10895
  }
10172
10896
  ada_log("parser.parts size is: ", parser.parts.size());
10173
- // Return parsers part list.
10897
+ // Return parser's part list.
10174
10898
  return parser.parts;
10175
10899
  }
10176
10900
 
10177
10901
  template <url_pattern_regex::regex_concept regex_provider>
10178
10902
  bool protocol_component_matches_special_scheme(
10179
10903
  url_pattern_component<regex_provider>& component) {
10180
- // let's avoid unnecessary copy here.
10181
- auto& regex = component.regexp;
10182
- return regex_provider::regex_match("http", regex) ||
10183
- regex_provider::regex_match("https", regex) ||
10184
- regex_provider::regex_match("ws", regex) ||
10185
- regex_provider::regex_match("wss", regex) ||
10186
- regex_provider::regex_match("ftp", regex);
10904
+ // Optimization: Use fast_test for simple patterns to avoid regex overhead
10905
+ switch (component.type) {
10906
+ case url_pattern_component_type::EMPTY:
10907
+ // Empty pattern can't match any special scheme
10908
+ return false;
10909
+ case url_pattern_component_type::EXACT_MATCH:
10910
+ // Direct string comparison for exact match patterns
10911
+ return component.exact_match_value == "http" ||
10912
+ component.exact_match_value == "https" ||
10913
+ component.exact_match_value == "ws" ||
10914
+ component.exact_match_value == "wss" ||
10915
+ component.exact_match_value == "ftp";
10916
+ case url_pattern_component_type::FULL_WILDCARD:
10917
+ // Full wildcard matches everything including special schemes
10918
+ return true;
10919
+ case url_pattern_component_type::REGEXP:
10920
+ // Fall back to regex matching for complex patterns
10921
+ auto& regex = component.regexp;
10922
+ return regex_provider::regex_match("http", regex) ||
10923
+ regex_provider::regex_match("https", regex) ||
10924
+ regex_provider::regex_match("ws", regex) ||
10925
+ regex_provider::regex_match("wss", regex) ||
10926
+ regex_provider::regex_match("ftp", regex);
10927
+ }
10928
+ ada::unreachable();
10187
10929
  }
10188
10930
 
10189
10931
  template <url_pattern_regex::regex_concept regex_provider>
@@ -10206,7 +10948,7 @@ inline std::optional<errors> constructor_string_parser<
10206
10948
  return protocol_component.error();
10207
10949
  }
10208
10950
  // If the result of running protocol component matches a special scheme given
10209
- // protocol component is true, then set parsers protocol matches a special
10951
+ // protocol component is true, then set parser's protocol matches a special
10210
10952
  // scheme flag to true.
10211
10953
  if (protocol_component_matches_special_scheme(*protocol_component)) {
10212
10954
  protocol_matches_a_special_scheme_flag = true;
@@ -10226,14 +10968,14 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10226
10968
  }
10227
10969
  auto parser = constructor_string_parser(input, std::move(*token_list));
10228
10970
 
10229
- // While parsers token index is less than parsers token list size:
10971
+ // While parser's token index is less than parser's token list size:
10230
10972
  while (parser.token_index < parser.token_list.size()) {
10231
- // Set parsers token increment to 1.
10973
+ // Set parser's token increment to 1.
10232
10974
  parser.token_increment = 1;
10233
10975
 
10234
- // If parsers token list[parsers token index]'s type is "end" then:
10976
+ // If parser's token list[parser's token index]'s type is "end" then:
10235
10977
  if (parser.token_list[parser.token_index].type == token_type::END) {
10236
- // If parsers state is "init":
10978
+ // If parser's state is "init":
10237
10979
  if (parser.state == State::INIT) {
10238
10980
  // Run rewind given parser.
10239
10981
  parser.rewind();
@@ -10249,18 +10991,18 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10249
10991
  // Run change state given parser, "pathname" and 0.
10250
10992
  parser.change_state(State::PATHNAME, 0);
10251
10993
  }
10252
- // Increment parsers token index by parsers token increment.
10994
+ // Increment parser's token index by parser's token increment.
10253
10995
  parser.token_index += parser.token_increment;
10254
10996
  // Continue.
10255
10997
  continue;
10256
10998
  }
10257
10999
 
10258
11000
  if (parser.state == State::AUTHORITY) {
10259
- // If parsers state is "authority":
11001
+ // If parser's state is "authority":
10260
11002
  // Run rewind and set state given parser, and "hostname".
10261
11003
  parser.rewind();
10262
11004
  parser.change_state(State::HOSTNAME, 0);
10263
- // Increment parsers token index by parsers token increment.
11005
+ // Increment parser's token index by parser's token increment.
10264
11006
  parser.token_index += parser.token_increment;
10265
11007
  // Continue.
10266
11008
  continue;
@@ -10274,26 +11016,26 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10274
11016
 
10275
11017
  // If the result of running is a group open given parser is true:
10276
11018
  if (parser.is_group_open()) {
10277
- // Increment parsers group depth by 1.
11019
+ // Increment parser's group depth by 1.
10278
11020
  parser.group_depth += 1;
10279
- // Increment parsers token index by parsers token increment.
11021
+ // Increment parser's token index by parser's token increment.
10280
11022
  parser.token_index += parser.token_increment;
10281
11023
  }
10282
11024
 
10283
- // If parsers group depth is greater than 0:
11025
+ // If parser's group depth is greater than 0:
10284
11026
  if (parser.group_depth > 0) {
10285
11027
  // If the result of running is a group close given parser is true, then
10286
- // decrement parsers group depth by 1.
11028
+ // decrement parser's group depth by 1.
10287
11029
  if (parser.is_group_close()) {
10288
11030
  parser.group_depth -= 1;
10289
11031
  } else {
10290
- // Increment parsers token index by parsers token increment.
11032
+ // Increment parser's token index by parser's token increment.
10291
11033
  parser.token_index += parser.token_increment;
10292
11034
  continue;
10293
11035
  }
10294
11036
  }
10295
11037
 
10296
- // Switch on parsers state and run the associated steps:
11038
+ // Switch on parser's state and run the associated steps:
10297
11039
  switch (parser.state) {
10298
11040
  case State::INIT: {
10299
11041
  // If the result of running is a protocol suffix given parser is true:
@@ -10325,7 +11067,7 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10325
11067
  // Set skip to 3.
10326
11068
  skip = 3;
10327
11069
  } else if (parser.protocol_matches_a_special_scheme_flag) {
10328
- // Otherwise if parsers protocol matches a special scheme flag is
11070
+ // Otherwise if parser's protocol matches a special scheme flag is
10329
11071
  // true, then set next state to "authority".
10330
11072
  next_state = State::AUTHORITY;
10331
11073
  }
@@ -10376,17 +11118,17 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10376
11118
  }
10377
11119
  case State::HOSTNAME: {
10378
11120
  // If the result of running is an IPv6 open given parser is true, then
10379
- // increment parsers hostname IPv6 bracket depth by 1.
11121
+ // increment parser's hostname IPv6 bracket depth by 1.
10380
11122
  if (parser.is_an_ipv6_open()) {
10381
11123
  parser.hostname_ipv6_bracket_depth += 1;
10382
11124
  } else if (parser.is_an_ipv6_close()) {
10383
11125
  // Otherwise if the result of running is an IPv6 close given parser is
10384
- // true, then decrement parsers hostname IPv6 bracket depth by 1.
11126
+ // true, then decrement parser's hostname IPv6 bracket depth by 1.
10385
11127
  parser.hostname_ipv6_bracket_depth -= 1;
10386
11128
  } else if (parser.is_port_prefix() &&
10387
11129
  parser.hostname_ipv6_bracket_depth == 0) {
10388
11130
  // Otherwise if the result of running is a port prefix given parser is
10389
- // true and parsers hostname IPv6 bracket depth is zero, then run
11131
+ // true and parser's hostname IPv6 bracket depth is zero, then run
10390
11132
  // change state given parser, "port", and 1.
10391
11133
  parser.change_state(State::PORT, 1);
10392
11134
  } else if (parser.is_pathname_start()) {
@@ -10439,6 +11181,7 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10439
11181
  if (parser.is_hash_prefix()) {
10440
11182
  parser.change_state(State::HASH, 1);
10441
11183
  }
11184
+ break;
10442
11185
  }
10443
11186
  case State::HASH: {
10444
11187
  // Do nothing
@@ -10450,22 +11193,22 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10450
11193
  }
10451
11194
  }
10452
11195
 
10453
- // Increment parsers token index by parsers token increment.
11196
+ // Increment parser's token index by parser's token increment.
10454
11197
  parser.token_index += parser.token_increment;
10455
11198
  }
10456
11199
 
10457
- // If parsers result contains "hostname" and not "port", then set parsers
11200
+ // If parser's result contains "hostname" and not "port", then set parser's
10458
11201
  // result["port"] to the empty string.
10459
11202
  if (parser.result.hostname && !parser.result.port) {
10460
11203
  parser.result.port = "";
10461
11204
  }
10462
11205
 
10463
- // Return parsers result.
11206
+ // Return parser's result.
10464
11207
  return parser.result;
10465
11208
  }
10466
11209
 
10467
11210
  } // namespace ada::url_pattern_helpers
10468
-
11211
+ #endif // ADA_INCLUDE_URL_PATTERN
10469
11212
  #endif
10470
11213
  /* end file include/ada/url_pattern_helpers-inl.h */
10471
11214
 
@@ -10478,13 +11221,13 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10478
11221
  #ifndef ADA_ADA_VERSION_H
10479
11222
  #define ADA_ADA_VERSION_H
10480
11223
 
10481
- #define ADA_VERSION "3.1.2"
11224
+ #define ADA_VERSION "3.4.2"
10482
11225
 
10483
11226
  namespace ada {
10484
11227
 
10485
11228
  enum {
10486
11229
  ADA_VERSION_MAJOR = 3,
10487
- ADA_VERSION_MINOR = 1,
11230
+ ADA_VERSION_MINOR = 4,
10488
11231
  ADA_VERSION_REVISION = 2,
10489
11232
  };
10490
11233
 
@@ -10500,19 +11243,22 @@ enum {
10500
11243
  #define ADA_IMPLEMENTATION_INL_H
10501
11244
 
10502
11245
 
11246
+
10503
11247
  #include <variant>
10504
11248
  #include <string_view>
10505
11249
 
10506
11250
  namespace ada {
10507
11251
 
11252
+ #if ADA_INCLUDE_URL_PATTERN
10508
11253
  template <url_pattern_regex::regex_concept regex_provider>
10509
11254
  ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
10510
- parse_url_pattern(std::variant<std::string_view, url_pattern_init> input,
11255
+ parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
10511
11256
  const std::string_view* base_url,
10512
11257
  const url_pattern_options* options) {
10513
11258
  return parser::parse_url_pattern_impl<regex_provider>(std::move(input),
10514
11259
  base_url, options);
10515
11260
  }
11261
+ #endif // ADA_INCLUDE_URL_PATTERN
10516
11262
 
10517
11263
  } // namespace ada
10518
11264