ada-url 1.25.0__cp311-cp311-musllinux_1_2_aarch64.whl → 1.29.0__cp311-cp311-musllinux_1_2_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ada_url/ada.h CHANGED
@@ -1,14 +1,36 @@
1
- /* auto-generated on 2025-07-16 22:15:14 -0400. Do not edit! */
1
+ /* auto-generated on 2026-01-30 13:29:04 -0500. Do not edit! */
2
2
  /* begin file include/ada.h */
3
3
  /**
4
4
  * @file ada.h
5
- * @brief Includes all definitions for Ada.
5
+ * @brief Main header for the Ada URL parser library.
6
+ *
7
+ * This is the primary entry point for the Ada URL parser library. Including
8
+ * this single header provides access to the complete Ada API, including:
9
+ *
10
+ * - URL parsing via `ada::parse()` function
11
+ * - Two URL representations: `ada::url` and `ada::url_aggregator`
12
+ * - URL search parameters via `ada::url_search_params`
13
+ * - URL pattern matching via `ada::url_pattern` (URLPattern API)
14
+ * - IDNA (Internationalized Domain Names) support
15
+ *
16
+ * @example
17
+ * ```cpp
18
+ *
19
+ * // Parse a URL
20
+ * auto url = ada::parse("https://example.com/path?query=1");
21
+ * if (url) {
22
+ * std::cout << url->get_hostname(); // "example.com"
23
+ * }
24
+ * ```
25
+ *
26
+ * @see https://url.spec.whatwg.org/ - WHATWG URL Standard
27
+ * @see https://github.com/ada-url/ada - Ada URL Parser GitHub Repository
6
28
  */
7
29
  #ifndef ADA_H
8
30
  #define ADA_H
9
31
 
10
32
  /* begin file include/ada/ada_idna.h */
11
- /* auto-generated on 2025-03-08 13:17:11 -0500. Do not edit! */
33
+ /* auto-generated on 2026-01-30 12:00:02 -0500. Do not edit! */
12
34
  /* begin file include/idna.h */
13
35
  #ifndef ADA_IDNA_H
14
36
  #define ADA_IDNA_H
@@ -188,7 +210,11 @@ bool valid_name_code_point(char32_t code_point, bool first);
188
210
  /* begin file include/ada/common_defs.h */
189
211
  /**
190
212
  * @file common_defs.h
191
- * @brief Common definitions for cross-platform compiler support.
213
+ * @brief Cross-platform compiler macros and common definitions.
214
+ *
215
+ * This header provides compiler-specific macros for optimization hints,
216
+ * platform detection, SIMD support detection, and development/debug utilities.
217
+ * It ensures consistent behavior across different compilers (GCC, Clang, MSVC).
192
218
  */
193
219
  #ifndef ADA_COMMON_DEFS_H
194
220
  #define ADA_COMMON_DEFS_H
@@ -421,6 +447,10 @@ namespace ada {
421
447
  } while (0)
422
448
  #endif
423
449
 
450
+ #if defined(__SSSE3__)
451
+ #define ADA_SSSE3 1
452
+ #endif
453
+
424
454
  #if defined(__SSE2__) || defined(__x86_64__) || defined(__x86_64) || \
425
455
  (defined(_M_AMD64) || defined(_M_X64) || \
426
456
  (defined(_M_IX86_FP) && _M_IX86_FP == 2))
@@ -435,6 +465,11 @@ namespace ada {
435
465
  #define ADA_LSX 1
436
466
  #endif
437
467
 
468
+ #if defined(__riscv_v) && __riscv_v_intrinsic >= 11000
469
+ // Support RVV intrinsics v0.11 and above
470
+ #define ADA_RVV 1
471
+ #endif
472
+
438
473
  #ifndef __has_cpp_attribute
439
474
  #define ada_lifetime_bound
440
475
  #elif __has_cpp_attribute(msvc::lifetimebound)
@@ -947,7 +982,7 @@ constexpr uint8_t WWW_FORM_URLENCODED_PERCENT_ENCODE[32] = {
947
982
  // 50 51 52 53 54 55 56 57
948
983
  0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
949
984
  // 58 59 5A 5B 5C 5D 5E 5F
950
- 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x40 | 0x00,
985
+ 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
951
986
  // 60 61 62 63 64 65 66 67
952
987
  0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
953
988
  // 68 69 6A 6B 6C 6D 6E 6F
@@ -1007,6 +1042,140 @@ ada_really_inline constexpr bool bit_at(const uint8_t a[], const uint8_t i) {
1007
1042
 
1008
1043
  #include <bit>
1009
1044
  #include <string_view>
1045
+ /* begin file include/ada/checkers.h */
1046
+ /**
1047
+ * @file checkers.h
1048
+ * @brief Declarations for URL specific checkers used within Ada.
1049
+ */
1050
+ #ifndef ADA_CHECKERS_H
1051
+ #define ADA_CHECKERS_H
1052
+
1053
+
1054
+ #include <cstring>
1055
+ #include <string_view>
1056
+
1057
+ /**
1058
+ * These functions are not part of our public API and may
1059
+ * change at any time.
1060
+ * @private
1061
+ * @namespace ada::checkers
1062
+ * @brief Includes the definitions for validation functions
1063
+ */
1064
+ namespace ada::checkers {
1065
+
1066
+ /**
1067
+ * @private
1068
+ * Assuming that x is an ASCII letter, this function returns the lower case
1069
+ * equivalent.
1070
+ * @details More likely to be inlined by the compiler and constexpr.
1071
+ */
1072
+ constexpr char to_lower(char x) noexcept;
1073
+
1074
+ /**
1075
+ * @private
1076
+ * Returns true if the character is an ASCII letter. Equivalent to std::isalpha
1077
+ * but more likely to be inlined by the compiler.
1078
+ *
1079
+ * @attention std::isalpha is not constexpr generally.
1080
+ */
1081
+ constexpr bool is_alpha(char x) noexcept;
1082
+
1083
+ /**
1084
+ * @private
1085
+ * Check whether a string starts with 0x or 0X. The function is only
1086
+ * safe if input.size() >=2.
1087
+ *
1088
+ * @see has_hex_prefix
1089
+ */
1090
+ constexpr bool has_hex_prefix_unsafe(std::string_view input);
1091
+ /**
1092
+ * @private
1093
+ * Check whether a string starts with 0x or 0X.
1094
+ */
1095
+ constexpr bool has_hex_prefix(std::string_view input);
1096
+
1097
+ /**
1098
+ * @private
1099
+ * Check whether x is an ASCII digit. More likely to be inlined than
1100
+ * std::isdigit.
1101
+ */
1102
+ constexpr bool is_digit(char x) noexcept;
1103
+
1104
+ /**
1105
+ * @private
1106
+ * @details A string starts with a Windows drive letter if all of the following
1107
+ * are true:
1108
+ *
1109
+ * - its length is greater than or equal to 2
1110
+ * - its first two code points are a Windows drive letter
1111
+ * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
1112
+ * (?), or U+0023 (#).
1113
+ *
1114
+ * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
1115
+ */
1116
+ inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
1117
+
1118
+ /**
1119
+ * @private
1120
+ * @details A normalized Windows drive letter is a Windows drive letter of which
1121
+ * the second code point is U+003A (:).
1122
+ */
1123
+ inline constexpr bool is_normalized_windows_drive_letter(
1124
+ std::string_view input) noexcept;
1125
+
1126
+ /**
1127
+ * @private
1128
+ * Returns true if an input is an ipv4 address. It is assumed that the string
1129
+ * does not contain uppercase ASCII characters (the input should have been
1130
+ * lowered cased before calling this function) and is not empty.
1131
+ */
1132
+ ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
1133
+
1134
+ /**
1135
+ * @private
1136
+ * Returns a bitset. If the first bit is set, then at least one character needs
1137
+ * percent encoding. If the second bit is set, a \\ is found. If the third bit
1138
+ * is set then we have a dot. If the fourth bit is set, then we have a percent
1139
+ * character.
1140
+ */
1141
+ ada_really_inline constexpr uint8_t path_signature(
1142
+ std::string_view input) noexcept;
1143
+
1144
+ /**
1145
+ * @private
1146
+ * Returns true if the length of the domain name and its labels are according to
1147
+ * the specifications. The length of the domain must be 255 octets (253
1148
+ * characters not including the last 2 which are the empty label reserved at the
1149
+ * end). When the empty label is included (a dot at the end), the domain name
1150
+ * can have 254 characters. The length of a label must be at least 1 and at most
1151
+ * 63 characters.
1152
+ * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
1153
+ * @see https://www.unicode.org/reports/tr46/#ToASCII
1154
+ */
1155
+ ada_really_inline constexpr bool verify_dns_length(
1156
+ std::string_view input) noexcept;
1157
+
1158
+ /**
1159
+ * @private
1160
+ * Fast-path parser for pure decimal IPv4 addresses (e.g., "192.168.1.1").
1161
+ * Returns the packed 32-bit IPv4 address on success, or a value > 0xFFFFFFFF
1162
+ * to indicate failure (caller should fall back to general parser).
1163
+ * This is optimized for the common case where the input is a well-formed
1164
+ * decimal IPv4 address with exactly 4 octets.
1165
+ */
1166
+ ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
1167
+ std::string_view input) noexcept;
1168
+
1169
+ /**
1170
+ * Sentinel value indicating try_parse_ipv4_fast() did not succeed.
1171
+ * Any value > 0xFFFFFFFF indicates the fast path should not be used.
1172
+ */
1173
+ constexpr uint64_t ipv4_fast_fail = uint64_t(1) << 32;
1174
+
1175
+ } // namespace ada::checkers
1176
+
1177
+ #endif // ADA_CHECKERS_H
1178
+ /* end file include/ada/checkers.h */
1010
1179
 
1011
1180
  namespace ada::checkers {
1012
1181
 
@@ -1049,6 +1218,64 @@ constexpr bool is_normalized_windows_drive_letter(
1049
1218
  return input.size() >= 2 && (is_alpha(input[0]) && (input[1] == ':'));
1050
1219
  }
1051
1220
 
1221
+ ada_really_inline constexpr uint64_t try_parse_ipv4_fast(
1222
+ std::string_view input) noexcept {
1223
+ const char* p = input.data();
1224
+ const char* const pend = p + input.size();
1225
+
1226
+ uint32_t ipv4 = 0;
1227
+
1228
+ for (int i = 0; i < 4; ++i) {
1229
+ if (p == pend) {
1230
+ return ipv4_fast_fail;
1231
+ }
1232
+
1233
+ uint32_t val;
1234
+ char c = *p;
1235
+ if (c >= '0' && c <= '9') {
1236
+ val = c - '0';
1237
+ p++;
1238
+ } else {
1239
+ return ipv4_fast_fail;
1240
+ }
1241
+
1242
+ if (p < pend) {
1243
+ c = *p;
1244
+ if (c >= '0' && c <= '9') {
1245
+ if (val == 0) return ipv4_fast_fail;
1246
+ val = val * 10 + (c - '0');
1247
+ p++;
1248
+ if (p < pend) {
1249
+ c = *p;
1250
+ if (c >= '0' && c <= '9') {
1251
+ val = val * 10 + (c - '0');
1252
+ p++;
1253
+ if (val > 255) return ipv4_fast_fail;
1254
+ }
1255
+ }
1256
+ }
1257
+ }
1258
+
1259
+ ipv4 = (ipv4 << 8) | val;
1260
+
1261
+ if (i < 3) {
1262
+ if (p == pend || *p != '.') {
1263
+ return ipv4_fast_fail;
1264
+ }
1265
+ p++;
1266
+ }
1267
+ }
1268
+
1269
+ if (p != pend) {
1270
+ if (p == pend - 1 && *p == '.') {
1271
+ return ipv4;
1272
+ }
1273
+ return ipv4_fast_fail;
1274
+ }
1275
+
1276
+ return ipv4;
1277
+ }
1278
+
1052
1279
  } // namespace ada::checkers
1053
1280
 
1054
1281
  #endif // ADA_CHECKERS_INL_H
@@ -1102,7 +1329,11 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
1102
1329
  /* begin file include/ada/encoding_type.h */
1103
1330
  /**
1104
1331
  * @file encoding_type.h
1105
- * @brief Definition for supported encoding types.
1332
+ * @brief Character encoding type definitions.
1333
+ *
1334
+ * Defines the encoding types supported for URL processing.
1335
+ *
1336
+ * @see https://encoding.spec.whatwg.org/
1106
1337
  */
1107
1338
  #ifndef ADA_ENCODING_TYPE_H
1108
1339
  #define ADA_ENCODING_TYPE_H
@@ -1112,19 +1343,23 @@ constexpr ada_really_inline void log([[maybe_unused]] Args... args) {
1112
1343
  namespace ada {
1113
1344
 
1114
1345
  /**
1115
- * This specification defines three encodings with the same names as encoding
1116
- * schemes defined in the Unicode standard: UTF-8, UTF-16LE, and UTF-16BE.
1346
+ * @brief Character encoding types for URL processing.
1347
+ *
1348
+ * Specifies the character encoding used for percent-decoding and other
1349
+ * string operations. UTF-8 is the most commonly used encoding for URLs.
1117
1350
  *
1118
1351
  * @see https://encoding.spec.whatwg.org/#encodings
1119
1352
  */
1120
1353
  enum class encoding_type {
1121
- UTF8,
1122
- UTF_16LE,
1123
- UTF_16BE,
1354
+ UTF8, /**< UTF-8 encoding (default for URLs) */
1355
+ UTF_16LE, /**< UTF-16 Little Endian encoding */
1356
+ UTF_16BE, /**< UTF-16 Big Endian encoding */
1124
1357
  };
1125
1358
 
1126
1359
  /**
1127
- * Convert a encoding_type to string.
1360
+ * Converts an encoding_type to its string representation.
1361
+ * @param type The encoding type to convert.
1362
+ * @return A string view of the encoding name.
1128
1363
  */
1129
1364
  ada_warn_unused std::string_view to_string(encoding_type type);
1130
1365
 
@@ -1143,7 +1378,11 @@ ada_warn_unused std::string_view to_string(encoding_type type);
1143
1378
  /* begin file include/ada/url_base.h */
1144
1379
  /**
1145
1380
  * @file url_base.h
1146
- * @brief Declaration for the basic URL definitions
1381
+ * @brief Base class and common definitions for URL types.
1382
+ *
1383
+ * This file defines the `url_base` abstract base class from which both
1384
+ * `ada::url` and `ada::url_aggregator` inherit. It also defines common
1385
+ * enumerations like `url_host_type`.
1147
1386
  */
1148
1387
  #ifndef ADA_URL_BASE_H
1149
1388
  #define ADA_URL_BASE_H
@@ -1151,7 +1390,13 @@ ada_warn_unused std::string_view to_string(encoding_type type);
1151
1390
  /* begin file include/ada/scheme.h */
1152
1391
  /**
1153
1392
  * @file scheme.h
1154
- * @brief Declarations for the URL scheme.
1393
+ * @brief URL scheme type definitions and utilities.
1394
+ *
1395
+ * This header defines the URL scheme types (http, https, etc.) and provides
1396
+ * functions to identify special schemes and their default ports according
1397
+ * to the WHATWG URL Standard.
1398
+ *
1399
+ * @see https://url.spec.whatwg.org/#special-scheme
1155
1400
  */
1156
1401
  #ifndef ADA_SCHEME_H
1157
1402
  #define ADA_SCHEME_H
@@ -1161,62 +1406,65 @@ ada_warn_unused std::string_view to_string(encoding_type type);
1161
1406
 
1162
1407
  /**
1163
1408
  * @namespace ada::scheme
1164
- * @brief Includes the scheme declarations
1409
+ * @brief URL scheme utilities and constants.
1410
+ *
1411
+ * Provides functions for working with URL schemes, including identification
1412
+ * of special schemes and retrieval of default port numbers.
1165
1413
  */
1166
1414
  namespace ada::scheme {
1167
1415
 
1168
1416
  /**
1169
- * Type of the scheme as an enum.
1170
- * Using strings to represent a scheme type is not ideal because
1171
- * checking for types involves string comparisons. It is faster to use
1172
- * a simple integer.
1173
- * In C++11, we are allowed to specify the underlying type of the enum.
1174
- * We pick an 8-bit integer (which allows up to 256 types). Specifying the
1175
- * type of the enum may help integration with other systems if the type
1176
- * variable is exposed (since its value will not depend on the compiler).
1417
+ * @brief Enumeration of URL scheme types.
1418
+ *
1419
+ * Special schemes have specific parsing rules and default ports.
1420
+ * Using an enum allows efficient scheme comparisons without string operations.
1421
+ *
1422
+ * Default ports:
1423
+ * - HTTP: 80
1424
+ * - HTTPS: 443
1425
+ * - WS: 80
1426
+ * - WSS: 443
1427
+ * - FTP: 21
1428
+ * - FILE: (none)
1177
1429
  */
1178
1430
  enum type : uint8_t {
1179
- HTTP = 0,
1180
- NOT_SPECIAL = 1,
1181
- HTTPS = 2,
1182
- WS = 3,
1183
- FTP = 4,
1184
- WSS = 5,
1185
- FILE = 6
1431
+ HTTP = 0, /**< http:// scheme (port 80) */
1432
+ NOT_SPECIAL = 1, /**< Non-special scheme (no default port) */
1433
+ HTTPS = 2, /**< https:// scheme (port 443) */
1434
+ WS = 3, /**< ws:// WebSocket scheme (port 80) */
1435
+ FTP = 4, /**< ftp:// scheme (port 21) */
1436
+ WSS = 5, /**< wss:// secure WebSocket scheme (port 443) */
1437
+ FILE = 6 /**< file:// scheme (no default port) */
1186
1438
  };
1187
1439
 
1188
1440
  /**
1189
- * A special scheme is an ASCII string that is listed in the first column of the
1190
- * following table. The default port for a special scheme is listed in the
1191
- * second column on the same row. The default port for any other ASCII string is
1192
- * null.
1193
- *
1194
- * @see https://url.spec.whatwg.org/#url-miscellaneous
1195
- * @param scheme
1196
- * @return If scheme is a special scheme
1441
+ * Checks if a scheme string is a special scheme.
1442
+ * @param scheme The scheme string to check (e.g., "http", "https").
1443
+ * @return `true` if the scheme is special, `false` otherwise.
1444
+ * @see https://url.spec.whatwg.org/#special-scheme
1197
1445
  */
1198
1446
  ada_really_inline constexpr bool is_special(std::string_view scheme);
1199
1447
 
1200
1448
  /**
1201
- * A special scheme is an ASCII string that is listed in the first column of the
1202
- * following table. The default port for a special scheme is listed in the
1203
- * second column on the same row. The default port for any other ASCII string is
1204
- * null.
1205
- *
1206
- * @see https://url.spec.whatwg.org/#url-miscellaneous
1207
- * @param scheme
1208
- * @return The special port
1449
+ * Returns the default port for a special scheme string.
1450
+ * @param scheme The scheme string (e.g., "http", "https").
1451
+ * @return The default port number, or 0 if not a special scheme.
1452
+ * @see https://url.spec.whatwg.org/#special-scheme
1209
1453
  */
1210
1454
  constexpr uint16_t get_special_port(std::string_view scheme) noexcept;
1211
1455
 
1212
1456
  /**
1213
- * Returns the port number of a special scheme.
1457
+ * Returns the default port for a scheme type.
1458
+ * @param type The scheme type enum value.
1459
+ * @return The default port number, or 0 if not applicable.
1214
1460
  * @see https://url.spec.whatwg.org/#special-scheme
1215
1461
  */
1216
1462
  constexpr uint16_t get_special_port(ada::scheme::type type) noexcept;
1463
+
1217
1464
  /**
1218
- * Returns the scheme of an input, or NOT_SPECIAL if it's not a special scheme
1219
- * defined by the spec.
1465
+ * Converts a scheme string to its type enum.
1466
+ * @param scheme The scheme string to convert.
1467
+ * @return The corresponding scheme type, or NOT_SPECIAL if not recognized.
1220
1468
  */
1221
1469
  constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
1222
1470
 
@@ -1231,112 +1479,112 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept;
1231
1479
  namespace ada {
1232
1480
 
1233
1481
  /**
1234
- * Type of URL host as an enum.
1482
+ * @brief Enum representing the type of host in a URL.
1483
+ *
1484
+ * Used to distinguish between regular domain names, IPv4 addresses,
1485
+ * and IPv6 addresses for proper parsing and serialization.
1235
1486
  */
1236
1487
  enum url_host_type : uint8_t {
1237
- /**
1238
- * Represents common URLs such as "https://www.google.com"
1239
- */
1488
+ /** Regular domain name (e.g., "www.example.com") */
1240
1489
  DEFAULT = 0,
1241
- /**
1242
- * Represents ipv4 addresses such as "http://127.0.0.1"
1243
- */
1490
+ /** IPv4 address (e.g., "127.0.0.1") */
1244
1491
  IPV4 = 1,
1245
- /**
1246
- * Represents ipv6 addresses such as
1247
- * "http://[2001:db8:3333:4444:5555:6666:7777:8888]"
1248
- */
1492
+ /** IPv6 address (e.g., "[::1]" or "[2001:db8::1]") */
1249
1493
  IPV6 = 2,
1250
1494
  };
1251
1495
 
1252
1496
  /**
1253
- * @brief Base class of URL implementations
1497
+ * @brief Abstract base class for URL representations.
1498
+ *
1499
+ * The `url_base` class provides the common interface and state shared by
1500
+ * both `ada::url` and `ada::url_aggregator`. It contains basic URL attributes
1501
+ * like validity status and scheme type, but delegates component storage and
1502
+ * access to derived classes.
1254
1503
  *
1255
- * @details A url_base contains a few attributes: is_valid, has_opaque_path and
1256
- * type. All non-trivial implementation details are in derived classes such as
1257
- * ada::url and ada::url_aggregator.
1504
+ * @note This is an abstract class and cannot be instantiated directly.
1505
+ * Use `ada::url` or `ada::url_aggregator` instead.
1258
1506
  *
1259
- * It is an abstract class that cannot be instantiated directly.
1507
+ * @see url
1508
+ * @see url_aggregator
1260
1509
  */
1261
1510
  struct url_base {
1262
1511
  virtual ~url_base() = default;
1263
1512
 
1264
1513
  /**
1265
- * Used for returning the validity from the result of the URL parser.
1514
+ * Indicates whether the URL was successfully parsed.
1515
+ * Set to `false` if parsing failed (e.g., invalid URL syntax).
1266
1516
  */
1267
1517
  bool is_valid{true};
1268
1518
 
1269
1519
  /**
1270
- * A URL has an opaque path if its path is a string.
1520
+ * Indicates whether the URL has an opaque path (non-hierarchical).
1521
+ * Opaque paths occur in non-special URLs like `mailto:` or `javascript:`.
1271
1522
  */
1272
1523
  bool has_opaque_path{false};
1273
1524
 
1274
1525
  /**
1275
- * URL hosts type
1526
+ * The type of the URL's host (domain, IPv4, or IPv6).
1276
1527
  */
1277
1528
  url_host_type host_type = url_host_type::DEFAULT;
1278
1529
 
1279
1530
  /**
1280
1531
  * @private
1532
+ * Internal representation of the URL's scheme type.
1281
1533
  */
1282
1534
  ada::scheme::type type{ada::scheme::type::NOT_SPECIAL};
1283
1535
 
1284
1536
  /**
1285
- * A URL is special if its scheme is a special scheme. A URL is not special if
1286
- * its scheme is not a special scheme.
1537
+ * Checks if the URL has a special scheme (http, https, ws, wss, ftp, file).
1538
+ * Special schemes have specific parsing rules and default ports.
1539
+ * @return `true` if the scheme is special, `false` otherwise.
1287
1540
  */
1288
1541
  [[nodiscard]] ada_really_inline constexpr bool is_special() const noexcept;
1289
1542
 
1290
1543
  /**
1291
- * The origin getter steps are to return the serialization of this's URL's
1292
- * origin. [HTML]
1293
- * @return a newly allocated string.
1544
+ * Returns the URL's origin (scheme + host + port for special URLs).
1545
+ * @return A newly allocated string containing the serialized origin.
1294
1546
  * @see https://url.spec.whatwg.org/#concept-url-origin
1295
1547
  */
1296
- [[nodiscard]] virtual std::string get_origin() const noexcept = 0;
1548
+ [[nodiscard]] virtual std::string get_origin() const = 0;
1297
1549
 
1298
1550
  /**
1299
- * Returns true if this URL has a valid domain as per RFC 1034 and
1300
- * corresponding specifications. Among other things, it requires
1301
- * that the domain string has fewer than 255 octets.
1551
+ * Validates whether the hostname is a valid domain according to RFC 1034.
1552
+ * Checks that the domain and its labels have valid lengths.
1553
+ * @return `true` if the domain is valid, `false` otherwise.
1302
1554
  */
1303
1555
  [[nodiscard]] virtual bool has_valid_domain() const noexcept = 0;
1304
1556
 
1305
1557
  /**
1306
1558
  * @private
1307
- *
1308
- * Return the 'special port' if the URL is special and not 'file'.
1309
- * Returns 0 otherwise.
1559
+ * Returns the default port for special schemes (e.g., 443 for https).
1560
+ * Returns 0 for file:// URLs or non-special schemes.
1310
1561
  */
1311
1562
  [[nodiscard]] inline uint16_t get_special_port() const noexcept;
1312
1563
 
1313
1564
  /**
1314
1565
  * @private
1315
- *
1316
- * Get the default port if the url's scheme has one, returns 0 otherwise.
1566
+ * Returns the default port for the URL's scheme, or 0 if none.
1317
1567
  */
1318
1568
  [[nodiscard]] ada_really_inline uint16_t scheme_default_port() const noexcept;
1319
1569
 
1320
1570
  /**
1321
1571
  * @private
1322
- *
1323
- * Parse a port (16-bit decimal digit) from the provided input.
1324
- * We assume that the input does not contain spaces or tabs
1325
- * within the ASCII digits.
1326
- * It returns how many bytes were consumed when a number is successfully
1327
- * parsed.
1328
- * @return On failure, it returns zero.
1329
- * @see https://url.spec.whatwg.org/#host-parsing
1572
+ * Parses a port number from the input string.
1573
+ * @param view The string containing the port to parse.
1574
+ * @param check_trailing_content Whether to validate no trailing characters.
1575
+ * @return Number of bytes consumed on success, 0 on failure.
1330
1576
  */
1331
1577
  virtual size_t parse_port(std::string_view view,
1332
- bool check_trailing_content) noexcept = 0;
1578
+ bool check_trailing_content) = 0;
1333
1579
 
1334
- virtual ada_really_inline size_t parse_port(std::string_view view) noexcept {
1580
+ /** @private */
1581
+ virtual ada_really_inline size_t parse_port(std::string_view view) {
1335
1582
  return this->parse_port(view, false);
1336
1583
  }
1337
1584
 
1338
1585
  /**
1339
- * Returns a JSON string representation of this URL.
1586
+ * Returns a JSON string representation of this URL for debugging.
1587
+ * @return A JSON-formatted string with URL information.
1340
1588
  */
1341
1589
  [[nodiscard]] virtual std::string to_string() const = 0;
1342
1590
 
@@ -1405,8 +1653,7 @@ ada_really_inline std::optional<std::string_view> prune_hash(
1405
1653
  * @see https://url.spec.whatwg.org/#shorten-a-urls-path
1406
1654
  * @returns Returns true if path is shortened.
1407
1655
  */
1408
- ada_really_inline bool shorten_path(std::string& path,
1409
- ada::scheme::type type) noexcept;
1656
+ ada_really_inline bool shorten_path(std::string& path, ada::scheme::type type);
1410
1657
 
1411
1658
  /**
1412
1659
  * @private
@@ -1415,7 +1662,7 @@ ada_really_inline bool shorten_path(std::string& path,
1415
1662
  * @returns Returns true if path is shortened.
1416
1663
  */
1417
1664
  ada_really_inline bool shorten_path(std::string_view& path,
1418
- ada::scheme::type type) noexcept;
1665
+ ada::scheme::type type);
1419
1666
 
1420
1667
  /**
1421
1668
  * @private
@@ -1436,15 +1683,14 @@ ada_really_inline void parse_prepared_path(std::string_view input,
1436
1683
  * @private
1437
1684
  * Remove and mutate all ASCII tab or newline characters from an input.
1438
1685
  */
1439
- ada_really_inline void remove_ascii_tab_or_newline(std::string& input) noexcept;
1686
+ ada_really_inline void remove_ascii_tab_or_newline(std::string& input);
1440
1687
 
1441
1688
  /**
1442
1689
  * @private
1443
1690
  * Return the substring from input going from index pos to the end.
1444
- * This function cannot throw.
1445
1691
  */
1446
1692
  ada_really_inline constexpr std::string_view substring(std::string_view input,
1447
- size_t pos) noexcept;
1693
+ size_t pos);
1448
1694
 
1449
1695
  /**
1450
1696
  * @private
@@ -1459,7 +1705,7 @@ bool overlaps(std::string_view input1, const std::string& input2) noexcept;
1459
1705
  */
1460
1706
  ada_really_inline constexpr std::string_view substring(std::string_view input,
1461
1707
  size_t pos1,
1462
- size_t pos2) noexcept {
1708
+ size_t pos2) {
1463
1709
  #if ADA_DEVELOPMENT_CHECKS
1464
1710
  if (pos2 < pos1) {
1465
1711
  std::cerr << "Negative-length substring: [" << pos1 << " to " << pos2 << ")"
@@ -1498,8 +1744,7 @@ void trim_c0_whitespace(std::string_view& input) noexcept;
1498
1744
  * https://url.spec.whatwg.org/#potentially-strip-trailing-spaces-from-an-opaque-path
1499
1745
  */
1500
1746
  template <class url_type>
1501
- ada_really_inline void strip_trailing_spaces_from_opaque_path(
1502
- url_type& url) noexcept;
1747
+ ada_really_inline void strip_trailing_spaces_from_opaque_path(url_type& url);
1503
1748
 
1504
1749
  /**
1505
1750
  * @private
@@ -1589,7 +1834,13 @@ inline int fast_digit_count(uint32_t x) noexcept {
1589
1834
  /* begin file include/ada/parser.h */
1590
1835
  /**
1591
1836
  * @file parser.h
1592
- * @brief Definitions for the parser.
1837
+ * @brief Low-level URL parsing functions.
1838
+ *
1839
+ * This header provides the internal URL parsing implementation. Most users
1840
+ * should use `ada::parse()` from implementation.h instead of these functions
1841
+ * directly.
1842
+ *
1843
+ * @see implementation.h for the recommended public API
1593
1844
  */
1594
1845
  #ifndef ADA_PARSER_H
1595
1846
  #define ADA_PARSER_H
@@ -2333,6 +2584,7 @@ struct expected_operations_base : expected_storage_base<T, E> {
2333
2584
  }
2334
2585
 
2335
2586
  template <class Rhs>
2587
+ // NOLINTNEXTLINE(bugprone-exception-escape)
2336
2588
  void construct_with(Rhs &&rhs) noexcept {
2337
2589
  new (std::addressof(this->m_val)) T(std::forward<Rhs>(rhs).get());
2338
2590
  this->m_has_val = true;
@@ -4193,14 +4445,23 @@ class std_regex_provider final {
4193
4445
  /* begin file include/ada/errors.h */
4194
4446
  /**
4195
4447
  * @file errors.h
4196
- * @brief Definitions for the errors.
4448
+ * @brief Error type definitions for URL parsing.
4449
+ *
4450
+ * Defines the error codes that can be returned when URL parsing fails.
4197
4451
  */
4198
4452
  #ifndef ADA_ERRORS_H
4199
4453
  #define ADA_ERRORS_H
4200
4454
 
4201
4455
  #include <cstdint>
4202
4456
  namespace ada {
4203
- enum class errors : uint8_t { type_error };
4457
+ /**
4458
+ * @brief Error codes for URL parsing operations.
4459
+ *
4460
+ * Used with `tl::expected` to indicate why a URL parsing operation failed.
4461
+ */
4462
+ enum class errors : uint8_t {
4463
+ type_error /**< A type error occurred (e.g., invalid URL syntax). */
4464
+ };
4204
4465
  } // namespace ada
4205
4466
  #endif // ADA_ERRORS_H
4206
4467
  /* end file include/ada/errors.h */
@@ -4333,9 +4594,7 @@ struct url_pattern_init {
4333
4594
  #endif // ADA_URL_PATTERN_INIT_H
4334
4595
  /* end file include/ada/url_pattern_init.h */
4335
4596
 
4336
- /**
4337
- * @private
4338
- */
4597
+ /** @private Forward declarations */
4339
4598
  namespace ada {
4340
4599
  struct url_aggregator;
4341
4600
  struct url;
@@ -4349,14 +4608,24 @@ enum class errors : uint8_t;
4349
4608
 
4350
4609
  /**
4351
4610
  * @namespace ada::parser
4352
- * @brief Includes the definitions for supported parsers
4611
+ * @brief Internal URL parsing implementation.
4612
+ *
4613
+ * Contains the core URL parsing algorithm as specified by the WHATWG URL
4614
+ * Standard. These functions are used internally by `ada::parse()`.
4353
4615
  */
4354
4616
  namespace ada::parser {
4355
4617
  /**
4356
- * Parses a url. The parameter user_input is the input to be parsed:
4357
- * it should be a valid UTF-8 string. The parameter base_url is an optional
4358
- * parameter that can be used to resolve relative URLs. If the base_url is
4359
- * provided, the user_input is resolved against the base_url.
4618
+ * Parses a URL string into a URL object.
4619
+ *
4620
+ * @tparam result_type The type of URL object to create (url or url_aggregator).
4621
+ *
4622
+ * @param user_input The URL string to parse (must be valid UTF-8).
4623
+ * @param base_url Optional base URL for resolving relative URLs.
4624
+ *
4625
+ * @return The parsed URL object. Check `is_valid` to determine if parsing
4626
+ * succeeded.
4627
+ *
4628
+ * @see https://url.spec.whatwg.org/#concept-basic-url-parser
4360
4629
  */
4361
4630
  template <typename result_type = url_aggregator>
4362
4631
  result_type parse_url(std::string_view user_input,
@@ -4397,7 +4666,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4397
4666
  /* begin file include/ada/url_pattern.h */
4398
4667
  /**
4399
4668
  * @file url_pattern.h
4400
- * @brief Declaration for the URLPattern implementation.
4669
+ * @brief URLPattern API implementation.
4670
+ *
4671
+ * This header provides the URLPattern API as specified by the WHATWG URL
4672
+ * Pattern Standard. URLPattern allows matching URLs against patterns with
4673
+ * wildcards and named groups, similar to how regular expressions match strings.
4674
+ *
4675
+ * @see https://urlpattern.spec.whatwg.org/
4676
+ * @see https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
4401
4677
  */
4402
4678
  #ifndef ADA_URL_PATTERN_H
4403
4679
  #define ADA_URL_PATTERN_H
@@ -4405,8 +4681,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4405
4681
  /* begin file include/ada/implementation.h */
4406
4682
  /**
4407
4683
  * @file implementation.h
4408
- * @brief Definitions for user facing functions for parsing URL and it's
4409
- * components.
4684
+ * @brief User-facing functions for URL parsing and manipulation.
4685
+ *
4686
+ * This header provides the primary public API for parsing URLs in Ada.
4687
+ * It includes the main `ada::parse()` function which is the recommended
4688
+ * entry point for most users.
4689
+ *
4690
+ * @see https://url.spec.whatwg.org/#api
4410
4691
  */
4411
4692
  #ifndef ADA_IMPLEMENTATION_H
4412
4693
  #define ADA_IMPLEMENTATION_H
@@ -4418,7 +4699,13 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4418
4699
  /* begin file include/ada/url.h */
4419
4700
  /**
4420
4701
  * @file url.h
4421
- * @brief Declaration for the URL
4702
+ * @brief Declaration for the `ada::url` class.
4703
+ *
4704
+ * This file contains the `ada::url` struct which represents a parsed URL
4705
+ * using separate `std::string` instances for each component. This
4706
+ * representation is more flexible but uses more memory than `url_aggregator`.
4707
+ *
4708
+ * @see url_aggregator.h for a more memory-efficient alternative
4422
4709
  */
4423
4710
  #ifndef ADA_URL_H
4424
4711
  #define ADA_URL_H
@@ -4429,127 +4716,14 @@ tl::expected<url_pattern<regex_provider>, errors> parse_url_pattern_impl(
4429
4716
  #include <string>
4430
4717
  #include <string_view>
4431
4718
 
4432
- /* begin file include/ada/checkers.h */
4433
- /**
4434
- * @file checkers.h
4435
- * @brief Declarations for URL specific checkers used within Ada.
4436
- */
4437
- #ifndef ADA_CHECKERS_H
4438
- #define ADA_CHECKERS_H
4439
-
4440
-
4441
- #include <cstring>
4442
- #include <string_view>
4443
-
4444
- /**
4445
- * These functions are not part of our public API and may
4446
- * change at any time.
4447
- * @private
4448
- * @namespace ada::checkers
4449
- * @brief Includes the definitions for validation functions
4450
- */
4451
- namespace ada::checkers {
4452
-
4453
- /**
4454
- * @private
4455
- * Assuming that x is an ASCII letter, this function returns the lower case
4456
- * equivalent.
4457
- * @details More likely to be inlined by the compiler and constexpr.
4458
- */
4459
- constexpr char to_lower(char x) noexcept;
4460
-
4461
- /**
4462
- * @private
4463
- * Returns true if the character is an ASCII letter. Equivalent to std::isalpha
4464
- * but more likely to be inlined by the compiler.
4465
- *
4466
- * @attention std::isalpha is not constexpr generally.
4467
- */
4468
- constexpr bool is_alpha(char x) noexcept;
4469
-
4470
- /**
4471
- * @private
4472
- * Check whether a string starts with 0x or 0X. The function is only
4473
- * safe if input.size() >=2.
4474
- *
4475
- * @see has_hex_prefix
4476
- */
4477
- constexpr bool has_hex_prefix_unsafe(std::string_view input);
4478
- /**
4479
- * @private
4480
- * Check whether a string starts with 0x or 0X.
4481
- */
4482
- constexpr bool has_hex_prefix(std::string_view input);
4483
-
4484
- /**
4485
- * @private
4486
- * Check whether x is an ASCII digit. More likely to be inlined than
4487
- * std::isdigit.
4488
- */
4489
- constexpr bool is_digit(char x) noexcept;
4490
-
4491
- /**
4492
- * @private
4493
- * @details A string starts with a Windows drive letter if all of the following
4494
- * are true:
4495
- *
4496
- * - its length is greater than or equal to 2
4497
- * - its first two code points are a Windows drive letter
4498
- * - its length is 2 or its third code point is U+002F (/), U+005C (\), U+003F
4499
- * (?), or U+0023 (#).
4500
- *
4501
- * https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
4502
- */
4503
- inline constexpr bool is_windows_drive_letter(std::string_view input) noexcept;
4504
-
4505
- /**
4506
- * @private
4507
- * @details A normalized Windows drive letter is a Windows drive letter of which
4508
- * the second code point is U+003A (:).
4509
- */
4510
- inline constexpr bool is_normalized_windows_drive_letter(
4511
- std::string_view input) noexcept;
4512
-
4513
- /**
4514
- * @private
4515
- * Returns true if an input is an ipv4 address. It is assumed that the string
4516
- * does not contain uppercase ASCII characters (the input should have been
4517
- * lowered cased before calling this function) and is not empty.
4518
- */
4519
- ada_really_inline constexpr bool is_ipv4(std::string_view view) noexcept;
4520
-
4521
- /**
4522
- * @private
4523
- * Returns a bitset. If the first bit is set, then at least one character needs
4524
- * percent encoding. If the second bit is set, a \\ is found. If the third bit
4525
- * is set then we have a dot. If the fourth bit is set, then we have a percent
4526
- * character.
4527
- */
4528
- ada_really_inline constexpr uint8_t path_signature(
4529
- std::string_view input) noexcept;
4530
-
4531
- /**
4532
- * @private
4533
- * Returns true if the length of the domain name and its labels are according to
4534
- * the specifications. The length of the domain must be 255 octets (253
4535
- * characters not including the last 2 which are the empty label reserved at the
4536
- * end). When the empty label is included (a dot at the end), the domain name
4537
- * can have 254 characters. The length of a label must be at least 1 and at most
4538
- * 63 characters.
4539
- * @see section 3.1. of https://www.rfc-editor.org/rfc/rfc1034
4540
- * @see https://www.unicode.org/reports/tr46/#ToASCII
4541
- */
4542
- ada_really_inline constexpr bool verify_dns_length(
4543
- std::string_view input) noexcept;
4544
-
4545
- } // namespace ada::checkers
4546
-
4547
- #endif // ADA_CHECKERS_H
4548
- /* end file include/ada/checkers.h */
4549
4719
  /* begin file include/ada/url_components.h */
4550
4720
  /**
4551
4721
  * @file url_components.h
4552
- * @brief Declaration for the URL Components
4722
+ * @brief URL component offset representation for url_aggregator.
4723
+ *
4724
+ * This file defines the `url_components` struct which stores byte offsets
4725
+ * into a URL string buffer. It is used internally by `url_aggregator` to
4726
+ * efficiently locate URL components without storing separate strings.
4553
4727
  */
4554
4728
  #ifndef ADA_URL_COMPONENTS_H
4555
4729
  #define ADA_URL_COMPONENTS_H
@@ -4557,14 +4731,32 @@ ada_really_inline constexpr bool verify_dns_length(
4557
4731
  namespace ada {
4558
4732
 
4559
4733
  /**
4560
- * @brief URL Component representations using offsets.
4734
+ * @brief Stores byte offsets for URL components within a buffer.
4735
+ *
4736
+ * The `url_components` struct uses 32-bit offsets to track the boundaries
4737
+ * of each URL component within a single string buffer. This enables efficient
4738
+ * component extraction without additional memory allocations.
4561
4739
  *
4562
- * @details We design the url_components struct so that it is as small
4563
- * and simple as possible. This version uses 32 bytes.
4740
+ * Component layout in a URL:
4741
+ * ```
4742
+ * https://user:pass@example.com:1234/foo/bar?baz#quux
4743
+ * | | | | ^^^^| | |
4744
+ * | | | | | | | `----- hash_start
4745
+ * | | | | | | `--------- search_start
4746
+ * | | | | | `----------------- pathname_start
4747
+ * | | | | `--------------------- port
4748
+ * | | | `----------------------- host_end
4749
+ * | | `---------------------------------- host_start
4750
+ * | `--------------------------------------- username_end
4751
+ * `--------------------------------------------- protocol_end
4752
+ * ```
4564
4753
  *
4565
- * This struct is used to extract components from a single 'href'.
4754
+ * @note The 32-bit offsets limit URLs to 4GB in length.
4755
+ * @note A value of `omitted` (UINT32_MAX) indicates the component is not
4756
+ * present.
4566
4757
  */
4567
4758
  struct url_components {
4759
+ /** Sentinel value indicating a component is not present. */
4568
4760
  constexpr static uint32_t omitted = uint32_t(-1);
4569
4761
 
4570
4762
  url_components() = default;
@@ -4574,47 +4766,43 @@ struct url_components {
4574
4766
  url_components &operator=(const url_components &u) = default;
4575
4767
  ~url_components() = default;
4576
4768
 
4577
- /*
4578
- * By using 32-bit integers, we implicitly assume that the URL string
4579
- * cannot exceed 4 GB.
4580
- *
4581
- * https://user:pass@example.com:1234/foo/bar?baz#quux
4582
- * | | | | ^^^^| | |
4583
- * | | | | | | | `----- hash_start
4584
- * | | | | | | `--------- search_start
4585
- * | | | | | `----------------- pathname_start
4586
- * | | | | `--------------------- port
4587
- * | | | `----------------------- host_end
4588
- * | | `---------------------------------- host_start
4589
- * | `--------------------------------------- username_end
4590
- * `--------------------------------------------- protocol_end
4591
- */
4769
+ /** Offset of the end of the protocol/scheme (position of ':'). */
4592
4770
  uint32_t protocol_end{0};
4771
+
4593
4772
  /**
4594
- * Username end is not `omitted` by default to make username and password
4595
- * getters less costly to implement.
4773
+ * Offset of the end of the username.
4774
+ * Initialized to 0 (not `omitted`) to simplify username/password getters.
4596
4775
  */
4597
4776
  uint32_t username_end{0};
4777
+
4778
+ /** Offset of the start of the host. */
4598
4779
  uint32_t host_start{0};
4780
+
4781
+ /** Offset of the end of the host. */
4599
4782
  uint32_t host_end{0};
4783
+
4784
+ /** Port number, or `omitted` if no port is specified. */
4600
4785
  uint32_t port{omitted};
4786
+
4787
+ /** Offset of the start of the pathname. */
4601
4788
  uint32_t pathname_start{0};
4789
+
4790
+ /** Offset of the '?' starting the query, or `omitted` if no query. */
4602
4791
  uint32_t search_start{omitted};
4792
+
4793
+ /** Offset of the '#' starting the fragment, or `omitted` if no fragment. */
4603
4794
  uint32_t hash_start{omitted};
4604
4795
 
4605
4796
  /**
4606
- * Check the following conditions:
4607
- * protocol_end < username_end < ... < hash_start,
4608
- * expect when a value is omitted. It also computes
4609
- * a lower bound on the possible string length that may match these
4610
- * offsets.
4611
- * @return true if the offset values are
4612
- * consistent with a possible URL string
4797
+ * Validates that offsets are in ascending order and consistent.
4798
+ * Useful for debugging to detect internal corruption.
4799
+ * @return `true` if offsets are consistent, `false` otherwise.
4613
4800
  */
4614
4801
  [[nodiscard]] constexpr bool check_offset_consistency() const noexcept;
4615
4802
 
4616
4803
  /**
4617
- * Converts a url_components to JSON stringified version.
4804
+ * Returns a JSON string representation of the offsets for debugging.
4805
+ * @return A JSON-formatted string with all offset values.
4618
4806
  */
4619
4807
  [[nodiscard]] std::string to_string() const;
4620
4808
 
@@ -4637,15 +4825,26 @@ struct url_aggregator;
4637
4825
  // }
4638
4826
 
4639
4827
  /**
4640
- * @brief Generic URL struct reliant on std::string instantiation.
4828
+ * @brief Represents a parsed URL with individual string components.
4641
4829
  *
4642
- * @details To disambiguate from a valid URL string it can also be referred to
4643
- * as a URL record. A URL is a struct that represents a universal identifier.
4644
- * Unlike the url_aggregator, the ada::url represents the different components
4645
- * of a parsed URL as independent std::string instances. This makes the
4646
- * structure heavier and more reliant on memory allocations. When getting
4647
- * components from the parsed URL, a new std::string is typically constructed.
4830
+ * The `url` struct stores each URL component (scheme, username, password,
4831
+ * host, port, path, query, fragment) as a separate `std::string`. This
4832
+ * provides flexibility but incurs more memory allocations compared to
4833
+ * `url_aggregator`.
4648
4834
  *
4835
+ * **When to use `ada::url`:**
4836
+ * - When you need to frequently modify individual URL components
4837
+ * - When you want independent ownership of component strings
4838
+ *
4839
+ * **When to use `ada::url_aggregator` instead:**
4840
+ * - For read-mostly operations on parsed URLs
4841
+ * - When memory efficiency is important
4842
+ * - When you only need string_view access to components
4843
+ *
4844
+ * @note This type is returned when parsing with `ada::parse<ada::url>()`.
4845
+ * By default, `ada::parse()` returns `ada::url_aggregator`.
4846
+ *
4847
+ * @see url_aggregator For a more memory-efficient URL representation
4649
4848
  * @see https://url.spec.whatwg.org/#url-representation
4650
4849
  */
4651
4850
  struct url : url_base {
@@ -4704,177 +4903,217 @@ struct url : url_base {
4704
4903
  */
4705
4904
  std::optional<std::string> hash{};
4706
4905
 
4707
- /** @return true if it has an host but it is the empty string */
4906
+ /**
4907
+ * Checks if the URL has an empty hostname (host is set but empty string).
4908
+ * @return `true` if host exists but is empty, `false` otherwise.
4909
+ */
4708
4910
  [[nodiscard]] inline bool has_empty_hostname() const noexcept;
4709
- /** @return true if the URL has a (non default) port */
4911
+
4912
+ /**
4913
+ * Checks if the URL has a non-default port explicitly specified.
4914
+ * @return `true` if a port is present, `false` otherwise.
4915
+ */
4710
4916
  [[nodiscard]] inline bool has_port() const noexcept;
4711
- /** @return true if it has a host (included an empty host) */
4917
+
4918
+ /**
4919
+ * Checks if the URL has a hostname (including empty hostnames).
4920
+ * @return `true` if host is present, `false` otherwise.
4921
+ */
4712
4922
  [[nodiscard]] inline bool has_hostname() const noexcept;
4923
+
4924
+ /**
4925
+ * Validates whether the hostname is a valid domain according to RFC 1034.
4926
+ * Checks that the domain and its labels have valid lengths (max 255 octets
4927
+ * total, max 63 octets per label).
4928
+ * @return `true` if the domain is valid, `false` otherwise.
4929
+ */
4713
4930
  [[nodiscard]] bool has_valid_domain() const noexcept override;
4714
4931
 
4715
4932
  /**
4716
- * Returns a JSON string representation of this URL.
4933
+ * Returns a JSON string representation of this URL for debugging.
4934
+ * @return A JSON-formatted string with all URL components.
4717
4935
  */
4718
4936
  [[nodiscard]] std::string to_string() const override;
4719
4937
 
4720
4938
  /**
4939
+ * Returns the full serialized URL (the href).
4940
+ * @return The complete URL string (allocates a new string).
4721
4941
  * @see https://url.spec.whatwg.org/#dom-url-href
4722
- * @see https://url.spec.whatwg.org/#concept-url-serializer
4723
4942
  */
4724
- [[nodiscard]] ada_really_inline std::string get_href() const noexcept;
4943
+ [[nodiscard]] ada_really_inline std::string get_href() const;
4725
4944
 
4726
4945
  /**
4727
- * The origin getter steps are to return the serialization of this's URL's
4728
- * origin. [HTML]
4729
- * @return a newly allocated string.
4946
+ * Returns the URL's origin as a string (scheme + host + port for special
4947
+ * URLs).
4948
+ * @return A newly allocated string containing the serialized origin.
4730
4949
  * @see https://url.spec.whatwg.org/#concept-url-origin
4731
4950
  */
4732
- [[nodiscard]] std::string get_origin() const noexcept override;
4951
+ [[nodiscard]] std::string get_origin() const override;
4733
4952
 
4734
4953
  /**
4735
- * The protocol getter steps are to return this's URL's scheme, followed by
4736
- * U+003A (:).
4737
- * @return a newly allocated string.
4954
+ * Returns the URL's scheme followed by a colon (e.g., "https:").
4955
+ * @return A newly allocated string with the protocol.
4738
4956
  * @see https://url.spec.whatwg.org/#dom-url-protocol
4739
4957
  */
4740
- [[nodiscard]] std::string get_protocol() const noexcept;
4958
+ [[nodiscard]] std::string get_protocol() const;
4741
4959
 
4742
4960
  /**
4743
- * Return url's host, serialized, followed by U+003A (:) and url's port,
4744
- * serialized.
4745
- * When there is no host, this function returns the empty string.
4746
- * @return a newly allocated string.
4961
+ * Returns the URL's host and port (e.g., "example.com:8080").
4962
+ * If no port is set, returns just the host. Returns empty string if no host.
4963
+ * @return A newly allocated string with host:port.
4747
4964
  * @see https://url.spec.whatwg.org/#dom-url-host
4748
4965
  */
4749
- [[nodiscard]] std::string get_host() const noexcept;
4966
+ [[nodiscard]] std::string get_host() const;
4750
4967
 
4751
4968
  /**
4752
- * Return this's URL's host, serialized.
4753
- * When there is no host, this function returns the empty string.
4754
- * @return a newly allocated string.
4969
+ * Returns the URL's hostname (without port).
4970
+ * Returns empty string if no host is set.
4971
+ * @return A newly allocated string with the hostname.
4755
4972
  * @see https://url.spec.whatwg.org/#dom-url-hostname
4756
4973
  */
4757
- [[nodiscard]] std::string get_hostname() const noexcept;
4974
+ [[nodiscard]] std::string get_hostname() const;
4758
4975
 
4759
4976
  /**
4760
- * The pathname getter steps are to return the result of URL path serializing
4761
- * this's URL.
4762
- * @return a newly allocated string.
4977
+ * Returns the URL's path component.
4978
+ * @return A string_view pointing to the path.
4763
4979
  * @see https://url.spec.whatwg.org/#dom-url-pathname
4764
4980
  */
4765
4981
  [[nodiscard]] constexpr std::string_view get_pathname() const noexcept;
4766
4982
 
4767
4983
  /**
4768
- * Compute the pathname length in bytes without instantiating a view or a
4769
- * string.
4770
- * @return size of the pathname in bytes
4984
+ * Returns the byte length of the pathname without creating a string.
4985
+ * @return Size of the pathname in bytes.
4771
4986
  * @see https://url.spec.whatwg.org/#dom-url-pathname
4772
4987
  */
4773
4988
  [[nodiscard]] ada_really_inline size_t get_pathname_length() const noexcept;
4774
4989
 
4775
4990
  /**
4776
- * Return U+003F (?), followed by this's URL's query.
4777
- * @return a newly allocated string.
4991
+ * Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
4992
+ * Returns empty string if no query is set.
4993
+ * @return A newly allocated string with the search/query.
4778
4994
  * @see https://url.spec.whatwg.org/#dom-url-search
4779
4995
  */
4780
- [[nodiscard]] std::string get_search() const noexcept;
4996
+ [[nodiscard]] std::string get_search() const;
4781
4997
 
4782
4998
  /**
4783
- * The username getter steps are to return this's URL's username.
4784
- * @return a constant reference to the underlying string.
4999
+ * Returns the URL's username component.
5000
+ * @return A constant reference to the username string.
4785
5001
  * @see https://url.spec.whatwg.org/#dom-url-username
4786
5002
  */
4787
5003
  [[nodiscard]] const std::string &get_username() const noexcept;
4788
5004
 
4789
5005
  /**
4790
- * @return Returns true on successful operation.
5006
+ * Sets the URL's username, percent-encoding special characters.
5007
+ * @param input The new username value.
5008
+ * @return `true` on success, `false` if the URL cannot have credentials.
4791
5009
  * @see https://url.spec.whatwg.org/#dom-url-username
4792
5010
  */
4793
5011
  bool set_username(std::string_view input);
4794
5012
 
4795
5013
  /**
4796
- * @return Returns true on success.
5014
+ * Sets the URL's password, percent-encoding special characters.
5015
+ * @param input The new password value.
5016
+ * @return `true` on success, `false` if the URL cannot have credentials.
4797
5017
  * @see https://url.spec.whatwg.org/#dom-url-password
4798
5018
  */
4799
5019
  bool set_password(std::string_view input);
4800
5020
 
4801
5021
  /**
4802
- * @return Returns true on success.
5022
+ * Sets the URL's port from a string (e.g., "8080").
5023
+ * @param input The port string. Empty string removes the port.
5024
+ * @return `true` on success, `false` if the URL cannot have a port.
4803
5025
  * @see https://url.spec.whatwg.org/#dom-url-port
4804
5026
  */
4805
5027
  bool set_port(std::string_view input);
4806
5028
 
4807
5029
  /**
4808
- * This function always succeeds.
5030
+ * Sets the URL's fragment/hash (the part after '#').
5031
+ * @param input The new hash value (with or without leading '#').
4809
5032
  * @see https://url.spec.whatwg.org/#dom-url-hash
4810
5033
  */
4811
5034
  void set_hash(std::string_view input);
4812
5035
 
4813
5036
  /**
4814
- * This function always succeeds.
5037
+ * Sets the URL's query string (the part after '?').
5038
+ * @param input The new query value (with or without leading '?').
4815
5039
  * @see https://url.spec.whatwg.org/#dom-url-search
4816
5040
  */
4817
5041
  void set_search(std::string_view input);
4818
5042
 
4819
5043
  /**
4820
- * @return Returns true on success.
4821
- * @see https://url.spec.whatwg.org/#dom-url-search
5044
+ * Sets the URL's pathname.
5045
+ * @param input The new path value.
5046
+ * @return `true` on success, `false` if the URL has an opaque path.
5047
+ * @see https://url.spec.whatwg.org/#dom-url-pathname
4822
5048
  */
4823
5049
  bool set_pathname(std::string_view input);
4824
5050
 
4825
5051
  /**
4826
- * @return Returns true on success.
5052
+ * Sets the URL's host (hostname and optionally port).
5053
+ * @param input The new host value (e.g., "example.com:8080").
5054
+ * @return `true` on success, `false` if parsing fails.
4827
5055
  * @see https://url.spec.whatwg.org/#dom-url-host
4828
5056
  */
4829
5057
  bool set_host(std::string_view input);
4830
5058
 
4831
5059
  /**
4832
- * @return Returns true on success.
5060
+ * Sets the URL's hostname (without port).
5061
+ * @param input The new hostname value.
5062
+ * @return `true` on success, `false` if parsing fails.
4833
5063
  * @see https://url.spec.whatwg.org/#dom-url-hostname
4834
5064
  */
4835
5065
  bool set_hostname(std::string_view input);
4836
5066
 
4837
5067
  /**
4838
- * @return Returns true on success.
5068
+ * Sets the URL's protocol/scheme.
5069
+ * @param input The new protocol (with or without trailing ':').
5070
+ * @return `true` on success, `false` if the scheme is invalid.
4839
5071
  * @see https://url.spec.whatwg.org/#dom-url-protocol
4840
5072
  */
4841
5073
  bool set_protocol(std::string_view input);
4842
5074
 
4843
5075
  /**
5076
+ * Replaces the entire URL by parsing a new href string.
5077
+ * @param input The new URL string to parse.
5078
+ * @return `true` on success, `false` if parsing fails.
4844
5079
  * @see https://url.spec.whatwg.org/#dom-url-href
4845
5080
  */
4846
5081
  bool set_href(std::string_view input);
4847
5082
 
4848
5083
  /**
4849
- * The password getter steps are to return this's URL's password.
4850
- * @return a constant reference to the underlying string.
5084
+ * Returns the URL's password component.
5085
+ * @return A constant reference to the password string.
4851
5086
  * @see https://url.spec.whatwg.org/#dom-url-password
4852
5087
  */
4853
5088
  [[nodiscard]] const std::string &get_password() const noexcept;
4854
5089
 
4855
5090
  /**
4856
- * Return this's URL's port, serialized.
4857
- * @return a newly constructed string representing the port.
5091
+ * Returns the URL's port as a string (e.g., "8080").
5092
+ * Returns empty string if no port is set.
5093
+ * @return A newly allocated string with the port.
4858
5094
  * @see https://url.spec.whatwg.org/#dom-url-port
4859
5095
  */
4860
- [[nodiscard]] std::string get_port() const noexcept;
5096
+ [[nodiscard]] std::string get_port() const;
4861
5097
 
4862
5098
  /**
4863
- * Return U+0023 (#), followed by this's URL's fragment.
4864
- * @return a newly constructed string representing the hash.
5099
+ * Returns the URL's fragment prefixed with '#' (e.g., "#section").
5100
+ * Returns empty string if no fragment is set.
5101
+ * @return A newly allocated string with the hash.
4865
5102
  * @see https://url.spec.whatwg.org/#dom-url-hash
4866
5103
  */
4867
- [[nodiscard]] std::string get_hash() const noexcept;
5104
+ [[nodiscard]] std::string get_hash() const;
4868
5105
 
4869
5106
  /**
4870
- * A URL includes credentials if its username or password is not the empty
4871
- * string.
5107
+ * Checks if the URL has credentials (non-empty username or password).
5108
+ * @return `true` if username or password is non-empty, `false` otherwise.
4872
5109
  */
4873
5110
  [[nodiscard]] ada_really_inline bool has_credentials() const noexcept;
4874
5111
 
4875
5112
  /**
4876
- * Useful for implementing efficient serialization for the URL.
5113
+ * Returns the URL component offsets for efficient serialization.
4877
5114
  *
5115
+ * The components represent byte offsets into the serialized URL:
5116
+ * ```
4878
5117
  * https://user:pass@example.com:1234/foo/bar?baz#quux
4879
5118
  * | | | | ^^^^| | |
4880
5119
  * | | | | | | | `----- hash_start
@@ -4885,19 +5124,23 @@ struct url : url_base {
4885
5124
  * | | `---------------------------------- host_start
4886
5125
  * | `--------------------------------------- username_end
4887
5126
  * `--------------------------------------------- protocol_end
4888
- *
4889
- * Inspired after servo/url
4890
- *
4891
- * @return a newly constructed component.
4892
- *
4893
- * @see
4894
- * https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
5127
+ * ```
5128
+ * @return A newly constructed url_components struct.
5129
+ * @see https://github.com/servo/rust-url
4895
5130
  */
4896
5131
  [[nodiscard]] ada_really_inline ada::url_components get_components()
4897
5132
  const noexcept;
4898
- /** @return true if the URL has a hash component */
5133
+
5134
+ /**
5135
+ * Checks if the URL has a fragment/hash component.
5136
+ * @return `true` if hash is present, `false` otherwise.
5137
+ */
4899
5138
  [[nodiscard]] constexpr bool has_hash() const noexcept override;
4900
- /** @return true if the URL has a search component */
5139
+
5140
+ /**
5141
+ * Checks if the URL has a query/search component.
5142
+ * @return `true` if query is present, `false` otherwise.
5143
+ */
4901
5144
  [[nodiscard]] constexpr bool has_search() const noexcept override;
4902
5145
 
4903
5146
  private:
@@ -4906,7 +5149,7 @@ struct url : url_base {
4906
5149
  friend ada::url_aggregator ada::parser::parse_url<ada::url_aggregator>(
4907
5150
  std::string_view, const ada::url_aggregator *);
4908
5151
  friend void ada::helpers::strip_trailing_spaces_from_opaque_path<ada::url>(
4909
- ada::url &url) noexcept;
5152
+ ada::url &url);
4910
5153
 
4911
5154
  friend ada::url ada::parser::parse_url_impl<ada::url, true>(std::string_view,
4912
5155
  const ada::url *);
@@ -5013,7 +5256,7 @@ struct url : url_base {
5013
5256
  * Take the scheme from another URL. The scheme string is moved from the
5014
5257
  * provided url.
5015
5258
  */
5016
- constexpr void copy_scheme(ada::url &&u) noexcept;
5259
+ constexpr void copy_scheme(ada::url &&u);
5017
5260
 
5018
5261
  /**
5019
5262
  * Take the scheme from another URL. The scheme string is copied from the
@@ -5031,17 +5274,70 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u);
5031
5274
 
5032
5275
  namespace ada {
5033
5276
 
5277
+ /**
5278
+ * Result type for URL parsing operations.
5279
+ *
5280
+ * Uses `tl::expected` to represent either a successfully parsed URL or an
5281
+ * error. This allows for exception-free error handling.
5282
+ *
5283
+ * @tparam result_type The URL type to return (default: `ada::url_aggregator`)
5284
+ *
5285
+ * @example
5286
+ * ```cpp
5287
+ * ada::result<ada::url_aggregator> result = ada::parse("https://example.com");
5288
+ * if (result) {
5289
+ * // Success: use result.value() or *result
5290
+ * } else {
5291
+ * // Error: handle result.error()
5292
+ * }
5293
+ * ```
5294
+ */
5034
5295
  template <class result_type = ada::url_aggregator>
5035
5296
  using result = tl::expected<result_type, ada::errors>;
5036
5297
 
5037
5298
  /**
5038
- * The URL parser takes a scalar value string input, with an optional null or
5039
- * base URL base (default null). The parser assumes the input is a valid ASCII
5040
- * or UTF-8 string.
5299
+ * Parses a URL string according to the WHATWG URL Standard.
5300
+ *
5301
+ * This is the main entry point for URL parsing in Ada. The function takes
5302
+ * a string input and optionally a base URL for resolving relative URLs.
5303
+ *
5304
+ * @tparam result_type The URL type to return. Can be either `ada::url` or
5305
+ * `ada::url_aggregator` (default). The `url_aggregator` type is more
5306
+ * memory-efficient as it stores components as offsets into a single
5307
+ * buffer.
5041
5308
  *
5042
- * @param input the string input to analyze (must be valid ASCII or UTF-8)
5043
- * @param base_url the optional URL input to use as a base url.
5044
- * @return a parsed URL.
5309
+ * @param input The URL string to parse. Must be valid ASCII or UTF-8 encoded.
5310
+ * Leading and trailing whitespace is automatically trimmed.
5311
+ * @param base_url Optional pointer to a base URL for resolving relative URLs.
5312
+ * If nullptr (default), only absolute URLs can be parsed successfully.
5313
+ *
5314
+ * @return A `result<result_type>` containing either the parsed URL on success,
5315
+ * or an error code on failure. Use the boolean conversion or
5316
+ * `has_value()` to check for success.
5317
+ *
5318
+ * @note The parser is fully compliant with the WHATWG URL Standard.
5319
+ *
5320
+ * @example
5321
+ * ```cpp
5322
+ * // Parse an absolute URL
5323
+ * auto url = ada::parse("https://user:pass@example.com:8080/path?query#hash");
5324
+ * if (url) {
5325
+ * std::cout << url->get_hostname(); // "example.com"
5326
+ * std::cout << url->get_pathname(); // "/path"
5327
+ * }
5328
+ *
5329
+ * // Parse a relative URL with a base
5330
+ * auto base = ada::parse("https://example.com/dir/");
5331
+ * if (base) {
5332
+ * auto relative = ada::parse("../other/page", &*base);
5333
+ * if (relative) {
5334
+ * std::cout << relative->get_href(); //
5335
+ * "https://example.com/other/page"
5336
+ * }
5337
+ * }
5338
+ * ```
5339
+ *
5340
+ * @see https://url.spec.whatwg.org/#url-parsing
5045
5341
  */
5046
5342
  template <class result_type = ada::url_aggregator>
5047
5343
  ada_warn_unused ada::result<result_type> parse(
@@ -5053,23 +5349,56 @@ extern template ada::result<url_aggregator> parse<url_aggregator>(
5053
5349
  std::string_view input, const url_aggregator* base_url);
5054
5350
 
5055
5351
  /**
5056
- * Verifies whether the URL strings can be parsed. The function assumes
5057
- * that the inputs are valid ASCII or UTF-8 strings.
5352
+ * Checks whether a URL string can be successfully parsed.
5353
+ *
5354
+ * This is a fast validation function that checks if a URL string is valid
5355
+ * according to the WHATWG URL Standard without fully constructing a URL
5356
+ * object. Use this when you only need to validate URLs without needing
5357
+ * their parsed components.
5358
+ *
5359
+ * @param input The URL string to validate. Must be valid ASCII or UTF-8.
5360
+ * @param base_input Optional pointer to a base URL string for resolving
5361
+ * relative URLs. If nullptr (default), the input is validated as
5362
+ * an absolute URL.
5363
+ *
5364
+ * @return `true` if the URL can be parsed successfully, `false` otherwise.
5365
+ *
5366
+ * @example
5367
+ * ```cpp
5368
+ * // Check absolute URL
5369
+ * bool valid = ada::can_parse("https://example.com"); // true
5370
+ * bool invalid = ada::can_parse("not a url"); // false
5371
+ *
5372
+ * // Check relative URL with base
5373
+ * std::string_view base = "https://example.com/";
5374
+ * bool relative_valid = ada::can_parse("../path", &base); // true
5375
+ * ```
5376
+ *
5058
5377
  * @see https://url.spec.whatwg.org/#dom-url-canparse
5059
- * @return If URL can be parsed or not.
5060
5378
  */
5061
5379
  bool can_parse(std::string_view input,
5062
5380
  const std::string_view* base_input = nullptr);
5063
5381
 
5064
5382
  #if ADA_INCLUDE_URL_PATTERN
5065
5383
  /**
5066
- * Implementation of the URL pattern parsing algorithm.
5067
- * @see https://urlpattern.spec.whatwg.org
5384
+ * Parses a URL pattern according to the URLPattern specification.
5385
+ *
5386
+ * URL patterns provide a syntax for matching URLs against patterns, similar
5387
+ * to how regular expressions match strings. This is useful for routing and
5388
+ * URL-based dispatching.
5389
+ *
5390
+ * @tparam regex_provider The regex implementation to use for pattern matching.
5391
+ *
5392
+ * @param input Either a URL pattern string (valid UTF-8) or a URLPatternInit
5393
+ * struct specifying individual component patterns.
5394
+ * @param base_url Optional pointer to a base URL string (valid UTF-8) for
5395
+ * resolving relative patterns.
5396
+ * @param options Optional pointer to configuration options (e.g., ignore_case).
5068
5397
  *
5069
- * @param input valid UTF-8 string or URLPatternInit struct
5070
- * @param base_url an optional valid UTF-8 string
5071
- * @param options an optional url_pattern_options struct
5072
- * @return url_pattern instance
5398
+ * @return A `tl::expected` containing either the parsed url_pattern on success,
5399
+ * or an error code on failure.
5400
+ *
5401
+ * @see https://urlpattern.spec.whatwg.org
5073
5402
  */
5074
5403
  template <url_pattern_regex::regex_concept regex_provider>
5075
5404
  ada_warn_unused tl::expected<url_pattern<regex_provider>, errors>
@@ -5079,9 +5408,14 @@ parse_url_pattern(std::variant<std::string_view, url_pattern_init>&& input,
5079
5408
  #endif // ADA_INCLUDE_URL_PATTERN
5080
5409
 
5081
5410
  /**
5082
- * Computes a href string from a file path. The function assumes
5083
- * that the input is a valid ASCII or UTF-8 string.
5084
- * @return a href string (starts with file:://)
5411
+ * Converts a file system path to a file:// URL.
5412
+ *
5413
+ * Creates a properly formatted file URL from a local file system path.
5414
+ * Handles platform-specific path separators and percent-encoding.
5415
+ *
5416
+ * @param path The file system path to convert. Must be valid ASCII or UTF-8.
5417
+ *
5418
+ * @return A file:// URL string representing the given path.
5085
5419
  */
5086
5420
  std::string href_from_file(std::string_view path);
5087
5421
  } // namespace ada
@@ -5117,6 +5451,19 @@ enum class url_pattern_part_type : uint8_t {
5117
5451
  FULL_WILDCARD,
5118
5452
  };
5119
5453
 
5454
+ // Pattern type for fast-path matching optimization.
5455
+ // This allows skipping expensive regex evaluation for common simple patterns.
5456
+ enum class url_pattern_component_type : uint8_t {
5457
+ // Pattern is "^$" - only matches empty string
5458
+ EMPTY,
5459
+ // Pattern is "^<literal>$" - exact string match (no regex needed)
5460
+ EXACT_MATCH,
5461
+ // Pattern is "^(.*)$" - matches anything (full wildcard)
5462
+ FULL_WILDCARD,
5463
+ // Pattern requires actual regex evaluation
5464
+ REGEXP,
5465
+ };
5466
+
5120
5467
  enum class url_pattern_part_modifier : uint8_t {
5121
5468
  // The part does not have a modifier.
5122
5469
  none,
@@ -5236,11 +5583,15 @@ class url_pattern_component {
5236
5583
  url_pattern_component(std::string&& new_pattern,
5237
5584
  typename regex_provider::regex_type&& new_regexp,
5238
5585
  std::vector<std::string>&& new_group_name_list,
5239
- bool new_has_regexp_groups)
5586
+ bool new_has_regexp_groups,
5587
+ url_pattern_component_type new_type,
5588
+ std::string&& new_exact_match_value = {})
5240
5589
  : regexp(std::move(new_regexp)),
5241
5590
  pattern(std::move(new_pattern)),
5242
5591
  group_name_list(std::move(new_group_name_list)),
5243
- has_regexp_groups(new_has_regexp_groups) {}
5592
+ exact_match_value(std::move(new_exact_match_value)),
5593
+ has_regexp_groups(new_has_regexp_groups),
5594
+ type(new_type) {}
5244
5595
 
5245
5596
  // @see https://urlpattern.spec.whatwg.org/#compile-a-component
5246
5597
  template <url_pattern_encoding_callback F>
@@ -5253,6 +5604,16 @@ class url_pattern_component {
5253
5604
  std::string&& input,
5254
5605
  std::vector<std::optional<std::string>>&& exec_result);
5255
5606
 
5607
+ // Fast path test that returns true/false without constructing result groups.
5608
+ // Uses cached pattern type to skip regex evaluation for simple patterns.
5609
+ bool fast_test(std::string_view input) const noexcept;
5610
+
5611
+ // Fast path match that returns capture groups without regex for simple
5612
+ // patterns. Returns nullopt if pattern doesn't match, otherwise returns
5613
+ // capture groups.
5614
+ std::optional<std::vector<std::optional<std::string>>> fast_match(
5615
+ std::string_view input) const;
5616
+
5256
5617
  #if ADA_TESTING
5257
5618
  friend void PrintTo(const url_pattern_component& component,
5258
5619
  std::ostream* os) {
@@ -5268,7 +5629,11 @@ class url_pattern_component {
5268
5629
  typename regex_provider::regex_type regexp{};
5269
5630
  std::string pattern{};
5270
5631
  std::vector<std::string> group_name_list{};
5632
+ // For EXACT_MATCH type: the literal string to compare against
5633
+ std::string exact_match_value{};
5271
5634
  bool has_regexp_groups = false;
5635
+ // Cached pattern type for fast-path optimization
5636
+ url_pattern_component_type type = url_pattern_component_type::REGEXP;
5272
5637
  };
5273
5638
 
5274
5639
  // A URLPattern input can be either a string or a URLPatternInit object.
@@ -5300,14 +5665,28 @@ struct url_pattern_options {
5300
5665
  #endif // ADA_TESTING
5301
5666
  };
5302
5667
 
5303
- // URLPattern is a Web Platform standard API for matching URLs against a
5304
- // pattern syntax (think of it as a regular expression for URLs). It is
5305
- // defined in https://wicg.github.io/urlpattern.
5306
- // More information about the URL Pattern syntax can be found at
5307
- // https://developer.mozilla.org/en-US/docs/Web/API/URL_Pattern_API
5308
- //
5309
- // We require all strings to be valid UTF-8: it is the user's responsibility
5310
- // to ensure that the provided strings are valid UTF-8.
5668
+ /**
5669
+ * @brief URL pattern matching class implementing the URLPattern API.
5670
+ *
5671
+ * URLPattern provides a way to match URLs against patterns with wildcards
5672
+ * and named capture groups. It's useful for routing, URL-based dispatching,
5673
+ * and URL validation.
5674
+ *
5675
+ * Pattern syntax supports:
5676
+ * - Literal text matching
5677
+ * - Named groups: `:name` (matches up to the next separator)
5678
+ * - Wildcards: `*` (matches everything)
5679
+ * - Custom regex: `(pattern)`
5680
+ * - Optional segments: `:name?`
5681
+ * - Repeated segments: `:name+`, `:name*`
5682
+ *
5683
+ * @tparam regex_provider The regex implementation to use for pattern matching.
5684
+ * Must satisfy the url_pattern_regex::regex_concept.
5685
+ *
5686
+ * @note All string inputs must be valid UTF-8.
5687
+ *
5688
+ * @see https://urlpattern.spec.whatwg.org/
5689
+ */
5311
5690
  template <url_pattern_regex::regex_concept regex_provider>
5312
5691
  class url_pattern {
5313
5692
  public:
@@ -5360,6 +5739,13 @@ class url_pattern {
5360
5739
  // @see https://urlpattern.spec.whatwg.org/#url-pattern-has-regexp-groups
5361
5740
  [[nodiscard]] bool has_regexp_groups() const;
5362
5741
 
5742
+ // Helper to test all components at once. Returns true if all match.
5743
+ [[nodiscard]] bool test_components(
5744
+ std::string_view protocol, std::string_view username,
5745
+ std::string_view password, std::string_view hostname,
5746
+ std::string_view port, std::string_view pathname, std::string_view search,
5747
+ std::string_view hash) const;
5748
+
5363
5749
  #if ADA_TESTING
5364
5750
  friend void PrintTo(const url_pattern& c, std::ostream* os) {
5365
5751
  *os << "protocol_component: '" << c.get_protocol() << ", ";
@@ -5484,8 +5870,8 @@ enum class token_policy {
5484
5870
  // @see https://urlpattern.spec.whatwg.org/#tokens
5485
5871
  class token {
5486
5872
  public:
5487
- token(token_type _type, size_t _index, std::string&& _value)
5488
- : type(_type), index(_index), value(std::move(_value)) {}
5873
+ token(token_type _type, size_t _index, std::string_view _value)
5874
+ : type(_type), index(_index), value(_value) {}
5489
5875
 
5490
5876
  // A token has an associated type, a string, initially "invalid-char".
5491
5877
  token_type type = token_type::INVALID_CHAR;
@@ -5496,7 +5882,7 @@ class token {
5496
5882
 
5497
5883
  // A token has an associated value, a string, initially the empty string. It
5498
5884
  // contains the code points from the pattern string represented by the token.
5499
- std::string value{};
5885
+ std::string_view value{};
5500
5886
  };
5501
5887
 
5502
5888
  // @see https://urlpattern.spec.whatwg.org/#pattern-parser
@@ -5574,7 +5960,7 @@ class Tokenizer {
5574
5960
 
5575
5961
  private:
5576
5962
  // has an associated input, a pattern string, initially the empty string.
5577
- std::string input;
5963
+ std::string_view input;
5578
5964
  // has an associated policy, a tokenize policy, initially "strict".
5579
5965
  token_policy policy;
5580
5966
  // has an associated token list, a token list, initially an empty list.
@@ -5668,7 +6054,7 @@ struct constructor_string_parser {
5668
6054
  // @see https://urlpattern.spec.whatwg.org/#make-a-component-string
5669
6055
  std::string make_component_string();
5670
6056
  // has an associated input, a string, which must be set upon creation.
5671
- std::string input;
6057
+ std::string_view input;
5672
6058
  // has an associated token list, a token list, which must be set upon
5673
6059
  // creation.
5674
6060
  std::vector<token> token_list;
@@ -5775,7 +6161,7 @@ bool protocol_component_matches_special_scheme(
5775
6161
  ada::url_pattern_component<regex_provider>& input);
5776
6162
 
5777
6163
  // @see https://urlpattern.spec.whatwg.org/#convert-a-modifier-to-a-string
5778
- std::string convert_modifier_to_string(url_pattern_part_modifier modifier);
6164
+ std::string_view convert_modifier_to_string(url_pattern_part_modifier modifier);
5779
6165
 
5780
6166
  // @see https://urlpattern.spec.whatwg.org/#generate-a-segment-wildcard-regexp
5781
6167
  std::string generate_segment_wildcard_regexp(
@@ -6141,7 +6527,10 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
6141
6527
  /* begin file include/ada/serializers.h */
6142
6528
  /**
6143
6529
  * @file serializers.h
6144
- * @brief Definitions for the URL serializers.
6530
+ * @brief IP address serialization utilities.
6531
+ *
6532
+ * This header provides functions for converting IP addresses to their
6533
+ * string representations according to the WHATWG URL Standard.
6145
6534
  */
6146
6535
  #ifndef ADA_SERIALIZERS_H
6147
6536
  #define ADA_SERIALIZERS_H
@@ -6152,32 +6541,41 @@ constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept {
6152
6541
 
6153
6542
  /**
6154
6543
  * @namespace ada::serializers
6155
- * @brief Includes the definitions for URL serializers
6544
+ * @brief IP address serialization functions.
6545
+ *
6546
+ * Contains utilities for serializing IPv4 and IPv6 addresses to strings.
6156
6547
  */
6157
6548
  namespace ada::serializers {
6158
6549
 
6159
6550
  /**
6160
- * Finds and returns the longest sequence of 0 values in a ipv6 input.
6551
+ * Finds the longest consecutive sequence of zero pieces in an IPv6 address.
6552
+ * Used for :: compression in IPv6 serialization.
6553
+ *
6554
+ * @param address The 8 16-bit pieces of the IPv6 address.
6555
+ * @param[out] compress Index of the start of the longest zero sequence.
6556
+ * @param[out] compress_length Length of the longest zero sequence.
6161
6557
  */
6162
6558
  void find_longest_sequence_of_ipv6_pieces(
6163
6559
  const std::array<uint16_t, 8>& address, size_t& compress,
6164
6560
  size_t& compress_length) noexcept;
6165
6561
 
6166
6562
  /**
6167
- * Serializes an ipv6 address.
6168
- * @details An IPv6 address is a 128-bit unsigned integer that identifies a
6169
- * network address.
6563
+ * Serializes an IPv6 address to its string representation.
6564
+ *
6565
+ * @param address The 8 16-bit pieces of the IPv6 address.
6566
+ * @return The serialized IPv6 string (e.g., "2001:db8::1").
6170
6567
  * @see https://url.spec.whatwg.org/#concept-ipv6-serializer
6171
6568
  */
6172
- std::string ipv6(const std::array<uint16_t, 8>& address) noexcept;
6569
+ std::string ipv6(const std::array<uint16_t, 8>& address);
6173
6570
 
6174
6571
  /**
6175
- * Serializes an ipv4 address.
6176
- * @details An IPv4 address is a 32-bit unsigned integer that identifies a
6177
- * network address.
6572
+ * Serializes an IPv4 address to its dotted-decimal string representation.
6573
+ *
6574
+ * @param address The 32-bit IPv4 address as an integer.
6575
+ * @return The serialized IPv4 string (e.g., "192.168.1.1").
6178
6576
  * @see https://url.spec.whatwg.org/#concept-ipv4-serializer
6179
6577
  */
6180
- std::string ipv4(uint64_t address) noexcept;
6578
+ std::string ipv4(uint64_t address);
6181
6579
 
6182
6580
  } // namespace ada::serializers
6183
6581
 
@@ -6186,7 +6584,12 @@ std::string ipv4(uint64_t address) noexcept;
6186
6584
  /* begin file include/ada/state.h */
6187
6585
  /**
6188
6586
  * @file state.h
6189
- * @brief Definitions for the states of the URL state machine.
6587
+ * @brief URL parser state machine states.
6588
+ *
6589
+ * Defines the states used by the URL parsing state machine as specified
6590
+ * in the WHATWG URL Standard.
6591
+ *
6592
+ * @see https://url.spec.whatwg.org/#url-parsing
6190
6593
  */
6191
6594
  #ifndef ADA_STATE_H
6192
6595
  #define ADA_STATE_H
@@ -6197,6 +6600,11 @@ std::string ipv4(uint64_t address) noexcept;
6197
6600
  namespace ada {
6198
6601
 
6199
6602
  /**
6603
+ * @brief States in the URL parsing state machine.
6604
+ *
6605
+ * The URL parser processes input through a sequence of states, each handling
6606
+ * a specific part of the URL syntax.
6607
+ *
6200
6608
  * @see https://url.spec.whatwg.org/#url-parsing
6201
6609
  */
6202
6610
  enum class state {
@@ -6302,7 +6710,9 @@ enum class state {
6302
6710
  };
6303
6711
 
6304
6712
  /**
6305
- * Stringify a URL state machine state.
6713
+ * Converts a parser state to its string name for debugging.
6714
+ * @param s The state to convert.
6715
+ * @return A string representation of the state.
6306
6716
  */
6307
6717
  ada_warn_unused std::string to_string(ada::state s);
6308
6718
 
@@ -6641,6 +7051,7 @@ inline std::ostream &operator<<(std::ostream &out, const ada::url &u) {
6641
7051
  out.protocol_end = uint32_t(get_protocol().size());
6642
7052
 
6643
7053
  // Trailing index is always the next character of the current one.
7054
+ // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores)
6644
7055
  size_t running_index = out.protocol_end;
6645
7056
 
6646
7057
  if (host.has_value()) {
@@ -6760,7 +7171,7 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept {
6760
7171
  }
6761
7172
  }
6762
7173
 
6763
- constexpr void url::copy_scheme(ada::url &&u) noexcept {
7174
+ constexpr void url::copy_scheme(ada::url &&u) {
6764
7175
  non_special_scheme = u.non_special_scheme;
6765
7176
  type = u.type;
6766
7177
  }
@@ -6770,7 +7181,7 @@ constexpr void url::copy_scheme(const ada::url &u) {
6770
7181
  type = u.type;
6771
7182
  }
6772
7183
 
6773
- [[nodiscard]] ada_really_inline std::string url::get_href() const noexcept {
7184
+ [[nodiscard]] ada_really_inline std::string url::get_href() const {
6774
7185
  std::string output = get_protocol();
6775
7186
 
6776
7187
  if (host.has_value()) {
@@ -6929,7 +7340,13 @@ namespace ada {
6929
7340
  /* begin file include/ada/url_aggregator.h */
6930
7341
  /**
6931
7342
  * @file url_aggregator.h
6932
- * @brief Declaration for the basic URL definitions
7343
+ * @brief Declaration for the `ada::url_aggregator` class.
7344
+ *
7345
+ * This file contains the `ada::url_aggregator` struct which represents a parsed
7346
+ * URL using a single buffer with component offsets. This is the default and
7347
+ * most memory-efficient URL representation in Ada.
7348
+ *
7349
+ * @see url.h for an alternative representation using separate strings
6933
7350
  */
6934
7351
  #ifndef ADA_URL_AGGREGATOR_H
6935
7352
  #define ADA_URL_AGGREGATOR_H
@@ -6945,12 +7362,23 @@ namespace ada {
6945
7362
  namespace parser {}
6946
7363
 
6947
7364
  /**
6948
- * @brief Lightweight URL struct.
7365
+ * @brief Memory-efficient URL representation using a single buffer.
7366
+ *
7367
+ * The `url_aggregator` stores the entire normalized URL in a single string
7368
+ * buffer and tracks component boundaries using offsets. This design minimizes
7369
+ * memory allocations and is ideal for read-mostly access patterns.
7370
+ *
7371
+ * Getter methods return `std::string_view` pointing into the internal buffer.
7372
+ * These views are lightweight (no allocation) but become invalid if the
7373
+ * url_aggregator is modified or destroyed.
6949
7374
  *
6950
- * @details The url_aggregator class aims to minimize temporary memory
6951
- * allocation while representing a parsed URL. Internally, it contains a single
6952
- * normalized URL (the href), and it makes available the components, mostly
6953
- * using std::string_view.
7375
+ * @warning Views returned by getters (e.g., `get_pathname()`) are invalidated
7376
+ * when any setter is called. Do not use a getter's result as input to a
7377
+ * setter on the same object without copying first.
7378
+ *
7379
+ * @note This is the default URL type returned by `ada::parse()`.
7380
+ *
7381
+ * @see url For an alternative using separate std::string instances
6954
7382
  */
6955
7383
  struct url_aggregator : url_base {
6956
7384
  url_aggregator() = default;
@@ -6960,6 +7388,25 @@ struct url_aggregator : url_base {
6960
7388
  url_aggregator &operator=(const url_aggregator &u) = default;
6961
7389
  ~url_aggregator() override = default;
6962
7390
 
7391
+ /**
7392
+ * The setter functions follow the steps defined in the URL Standard.
7393
+ *
7394
+ * The url_aggregator has a single buffer that contains the entire normalized
7395
+ * URL. The various components are represented as offsets into that buffer.
7396
+ * When you call get_pathname(), for example, you get a std::string_view that
7397
+ * points into that buffer. If the url_aggregator is modified, the buffer may
7398
+ * be reallocated, and the std::string_view you obtained earlier may become
7399
+ * invalid. In particular, this implies that you cannot modify the URL using
7400
+ * a setter function with a std::string_view that points into the
7401
+ * url_aggregator E.g., the following is incorrect:
7402
+ * url->set_hostname(url->get_pathname()).
7403
+ * You must first copy the pathname to a separate string.
7404
+ * std::string pathname(url->get_pathname());
7405
+ * url->set_hostname(pathname);
7406
+ *
7407
+ * The caller is responsible for ensuring that the url_aggregator is not
7408
+ * modified while any std::string_view obtained from it is in use.
7409
+ */
6963
7410
  bool set_href(std::string_view input);
6964
7411
  bool set_host(std::string_view input);
6965
7412
  bool set_hostname(std::string_view input);
@@ -6971,115 +7418,130 @@ struct url_aggregator : url_base {
6971
7418
  void set_search(std::string_view input);
6972
7419
  void set_hash(std::string_view input);
6973
7420
 
7421
+ /**
7422
+ * Validates whether the hostname is a valid domain according to RFC 1034.
7423
+ * @return `true` if the domain is valid, `false` otherwise.
7424
+ */
6974
7425
  [[nodiscard]] bool has_valid_domain() const noexcept override;
7426
+
6975
7427
  /**
6976
- * The origin getter steps are to return the serialization of this's URL's
6977
- * origin. [HTML]
6978
- * @return a newly allocated string.
7428
+ * Returns the URL's origin (scheme + host + port for special URLs).
7429
+ * @return A newly allocated string containing the serialized origin.
6979
7430
  * @see https://url.spec.whatwg.org/#concept-url-origin
6980
7431
  */
6981
- [[nodiscard]] std::string get_origin() const noexcept override;
7432
+ [[nodiscard]] std::string get_origin() const override;
7433
+
6982
7434
  /**
6983
- * Return the normalized string.
6984
- * This function does not allocate memory.
6985
- * It is highly efficient.
6986
- * @return a constant reference to the underlying normalized URL.
7435
+ * Returns the full serialized URL (the href) as a string_view.
7436
+ * Does not allocate memory. The returned view becomes invalid if this
7437
+ * url_aggregator is modified or destroyed.
7438
+ * @return A string_view into the internal buffer.
6987
7439
  * @see https://url.spec.whatwg.org/#dom-url-href
6988
- * @see https://url.spec.whatwg.org/#concept-url-serializer
6989
7440
  */
6990
7441
  [[nodiscard]] constexpr std::string_view get_href() const noexcept
6991
7442
  ada_lifetime_bound;
7443
+
6992
7444
  /**
6993
- * The username getter steps are to return this's URL's username.
6994
- * This function does not allocate memory.
6995
- * @return a lightweight std::string_view.
7445
+ * Returns the URL's username component.
7446
+ * Does not allocate memory. The returned view becomes invalid if this
7447
+ * url_aggregator is modified or destroyed.
7448
+ * @return A string_view of the username.
6996
7449
  * @see https://url.spec.whatwg.org/#dom-url-username
6997
7450
  */
6998
- [[nodiscard]] std::string_view get_username() const noexcept
6999
- ada_lifetime_bound;
7451
+ [[nodiscard]] std::string_view get_username() const ada_lifetime_bound;
7452
+
7000
7453
  /**
7001
- * The password getter steps are to return this's URL's password.
7002
- * This function does not allocate memory.
7003
- * @return a lightweight std::string_view.
7454
+ * Returns the URL's password component.
7455
+ * Does not allocate memory. The returned view becomes invalid if this
7456
+ * url_aggregator is modified or destroyed.
7457
+ * @return A string_view of the password.
7004
7458
  * @see https://url.spec.whatwg.org/#dom-url-password
7005
7459
  */
7006
- [[nodiscard]] std::string_view get_password() const noexcept
7007
- ada_lifetime_bound;
7460
+ [[nodiscard]] std::string_view get_password() const ada_lifetime_bound;
7461
+
7008
7462
  /**
7009
- * Return this's URL's port, serialized.
7010
- * This function does not allocate memory.
7011
- * @return a lightweight std::string_view.
7463
+ * Returns the URL's port as a string (e.g., "8080").
7464
+ * Does not allocate memory. Returns empty view if no port is set.
7465
+ * The returned view becomes invalid if this url_aggregator is modified.
7466
+ * @return A string_view of the port.
7012
7467
  * @see https://url.spec.whatwg.org/#dom-url-port
7013
7468
  */
7014
- [[nodiscard]] std::string_view get_port() const noexcept ada_lifetime_bound;
7469
+ [[nodiscard]] std::string_view get_port() const ada_lifetime_bound;
7470
+
7015
7471
  /**
7016
- * Return U+0023 (#), followed by this's URL's fragment.
7017
- * This function does not allocate memory.
7018
- * @return a lightweight std::string_view..
7472
+ * Returns the URL's fragment prefixed with '#' (e.g., "#section").
7473
+ * Does not allocate memory. Returns empty view if no fragment is set.
7474
+ * The returned view becomes invalid if this url_aggregator is modified.
7475
+ * @return A string_view of the hash.
7019
7476
  * @see https://url.spec.whatwg.org/#dom-url-hash
7020
7477
  */
7021
- [[nodiscard]] std::string_view get_hash() const noexcept ada_lifetime_bound;
7478
+ [[nodiscard]] std::string_view get_hash() const ada_lifetime_bound;
7479
+
7022
7480
  /**
7023
- * Return url's host, serialized, followed by U+003A (:) and url's port,
7024
- * serialized.
7025
- * This function does not allocate memory.
7026
- * When there is no host, this function returns the empty view.
7027
- * @return a lightweight std::string_view.
7481
+ * Returns the URL's host and port (e.g., "example.com:8080").
7482
+ * Does not allocate memory. Returns empty view if no host is set.
7483
+ * The returned view becomes invalid if this url_aggregator is modified.
7484
+ * @return A string_view of host:port.
7028
7485
  * @see https://url.spec.whatwg.org/#dom-url-host
7029
7486
  */
7030
- [[nodiscard]] std::string_view get_host() const noexcept ada_lifetime_bound;
7487
+ [[nodiscard]] std::string_view get_host() const ada_lifetime_bound;
7488
+
7031
7489
  /**
7032
- * Return this's URL's host, serialized.
7033
- * This function does not allocate memory.
7034
- * When there is no host, this function returns the empty view.
7035
- * @return a lightweight std::string_view.
7490
+ * Returns the URL's hostname (without port).
7491
+ * Does not allocate memory. Returns empty view if no host is set.
7492
+ * The returned view becomes invalid if this url_aggregator is modified.
7493
+ * @return A string_view of the hostname.
7036
7494
  * @see https://url.spec.whatwg.org/#dom-url-hostname
7037
7495
  */
7038
- [[nodiscard]] std::string_view get_hostname() const noexcept
7039
- ada_lifetime_bound;
7496
+ [[nodiscard]] std::string_view get_hostname() const ada_lifetime_bound;
7497
+
7040
7498
  /**
7041
- * The pathname getter steps are to return the result of URL path serializing
7042
- * this's URL.
7043
- * This function does not allocate memory.
7044
- * @return a lightweight std::string_view.
7499
+ * Returns the URL's path component.
7500
+ * Does not allocate memory. The returned view becomes invalid if this
7501
+ * url_aggregator is modified or destroyed.
7502
+ * @return A string_view of the pathname.
7045
7503
  * @see https://url.spec.whatwg.org/#dom-url-pathname
7046
7504
  */
7047
- [[nodiscard]] constexpr std::string_view get_pathname() const noexcept
7505
+ [[nodiscard]] constexpr std::string_view get_pathname() const
7048
7506
  ada_lifetime_bound;
7507
+
7049
7508
  /**
7050
- * Compute the pathname length in bytes without instantiating a view or a
7051
- * string.
7052
- * @return size of the pathname in bytes
7509
+ * Returns the byte length of the pathname without creating a string.
7510
+ * @return Size of the pathname in bytes.
7053
7511
  * @see https://url.spec.whatwg.org/#dom-url-pathname
7054
7512
  */
7055
7513
  [[nodiscard]] ada_really_inline uint32_t get_pathname_length() const noexcept;
7514
+
7056
7515
  /**
7057
- * Return U+003F (?), followed by this's URL's query.
7058
- * This function does not allocate memory.
7059
- * @return a lightweight std::string_view.
7516
+ * Returns the URL's query string prefixed with '?' (e.g., "?foo=bar").
7517
+ * Does not allocate memory. Returns empty view if no query is set.
7518
+ * The returned view becomes invalid if this url_aggregator is modified.
7519
+ * @return A string_view of the search/query.
7060
7520
  * @see https://url.spec.whatwg.org/#dom-url-search
7061
7521
  */
7062
- [[nodiscard]] std::string_view get_search() const noexcept ada_lifetime_bound;
7522
+ [[nodiscard]] std::string_view get_search() const ada_lifetime_bound;
7523
+
7063
7524
  /**
7064
- * The protocol getter steps are to return this's URL's scheme, followed by
7065
- * U+003A (:).
7066
- * This function does not allocate memory.
7067
- * @return a lightweight std::string_view.
7525
+ * Returns the URL's scheme followed by a colon (e.g., "https:").
7526
+ * Does not allocate memory. The returned view becomes invalid if this
7527
+ * url_aggregator is modified or destroyed.
7528
+ * @return A string_view of the protocol.
7068
7529
  * @see https://url.spec.whatwg.org/#dom-url-protocol
7069
7530
  */
7070
- [[nodiscard]] std::string_view get_protocol() const noexcept
7071
- ada_lifetime_bound;
7531
+ [[nodiscard]] std::string_view get_protocol() const ada_lifetime_bound;
7072
7532
 
7073
7533
  /**
7074
- * A URL includes credentials if its username or password is not the empty
7075
- * string.
7534
+ * Checks if the URL has credentials (non-empty username or password).
7535
+ * @return `true` if username or password is non-empty, `false` otherwise.
7076
7536
  */
7077
7537
  [[nodiscard]] ada_really_inline constexpr bool has_credentials()
7078
7538
  const noexcept;
7079
7539
 
7080
7540
  /**
7081
- * Useful for implementing efficient serialization for the URL.
7541
+ * Returns the URL component offsets for efficient serialization.
7082
7542
  *
7543
+ * The components represent byte offsets into the serialized URL:
7544
+ * ```
7083
7545
  * https://user:pass@example.com:1234/foo/bar?baz#quux
7084
7546
  * | | | | ^^^^| | |
7085
7547
  * | | | | | | | `----- hash_start
@@ -7090,57 +7552,99 @@ struct url_aggregator : url_base {
7090
7552
  * | | `---------------------------------- host_start
7091
7553
  * | `--------------------------------------- username_end
7092
7554
  * `--------------------------------------------- protocol_end
7093
- *
7094
- * Inspired after servo/url
7095
- *
7096
- * @return a constant reference to the underlying component attribute.
7097
- *
7098
- * @see
7099
- * https://github.com/servo/rust-url/blob/b65a45515c10713f6d212e6726719a020203cc98/url/src/quirks.rs#L31
7555
+ * ```
7556
+ * @return A constant reference to the url_components struct.
7557
+ * @see https://github.com/servo/rust-url
7100
7558
  */
7101
7559
  [[nodiscard]] ada_really_inline const url_components &get_components()
7102
7560
  const noexcept;
7561
+
7103
7562
  /**
7104
- * Returns a string representation of this URL.
7563
+ * Returns a JSON string representation of this URL for debugging.
7564
+ * @return A JSON-formatted string with all URL components.
7105
7565
  */
7106
7566
  [[nodiscard]] std::string to_string() const override;
7567
+
7107
7568
  /**
7108
- * Returns a string diagram of this URL.
7569
+ * Returns a visual diagram showing component boundaries in the URL.
7570
+ * Useful for debugging and understanding URL structure.
7571
+ * @return A multi-line string diagram.
7109
7572
  */
7110
7573
  [[nodiscard]] std::string to_diagram() const;
7111
7574
 
7112
7575
  /**
7113
- * Verifies that the parsed URL could be valid. Useful for debugging purposes.
7114
- * @return true if the URL is valid, otherwise return true of the offsets are
7115
- * possible.
7576
+ * Validates internal consistency of component offsets (for debugging).
7577
+ * @return `true` if offsets are consistent, `false` if corrupted.
7116
7578
  */
7117
7579
  [[nodiscard]] constexpr bool validate() const noexcept;
7118
7580
 
7119
- /** @return true if it has an host but it is the empty string */
7581
+ /**
7582
+ * Checks if the URL has an empty hostname (host is set but empty string).
7583
+ * @return `true` if host exists but is empty, `false` otherwise.
7584
+ */
7120
7585
  [[nodiscard]] constexpr bool has_empty_hostname() const noexcept;
7121
- /** @return true if it has a host (included an empty host) */
7586
+
7587
+ /**
7588
+ * Checks if the URL has a hostname (including empty hostnames).
7589
+ * @return `true` if host is present, `false` otherwise.
7590
+ */
7122
7591
  [[nodiscard]] constexpr bool has_hostname() const noexcept;
7123
- /** @return true if the URL has a non-empty username */
7592
+
7593
+ /**
7594
+ * Checks if the URL has a non-empty username.
7595
+ * @return `true` if username is non-empty, `false` otherwise.
7596
+ */
7124
7597
  [[nodiscard]] constexpr bool has_non_empty_username() const noexcept;
7125
- /** @return true if the URL has a non-empty password */
7598
+
7599
+ /**
7600
+ * Checks if the URL has a non-empty password.
7601
+ * @return `true` if password is non-empty, `false` otherwise.
7602
+ */
7126
7603
  [[nodiscard]] constexpr bool has_non_empty_password() const noexcept;
7127
- /** @return true if the URL has a (non default) port */
7604
+
7605
+ /**
7606
+ * Checks if the URL has a non-default port explicitly specified.
7607
+ * @return `true` if a port is present, `false` otherwise.
7608
+ */
7128
7609
  [[nodiscard]] constexpr bool has_port() const noexcept;
7129
- /** @return true if the URL has a password */
7610
+
7611
+ /**
7612
+ * Checks if the URL has a password component (may be empty).
7613
+ * @return `true` if password is present, `false` otherwise.
7614
+ */
7130
7615
  [[nodiscard]] constexpr bool has_password() const noexcept;
7131
- /** @return true if the URL has a hash component */
7616
+
7617
+ /**
7618
+ * Checks if the URL has a fragment/hash component.
7619
+ * @return `true` if hash is present, `false` otherwise.
7620
+ */
7132
7621
  [[nodiscard]] constexpr bool has_hash() const noexcept override;
7133
- /** @return true if the URL has a search component */
7622
+
7623
+ /**
7624
+ * Checks if the URL has a query/search component.
7625
+ * @return `true` if query is present, `false` otherwise.
7626
+ */
7134
7627
  [[nodiscard]] constexpr bool has_search() const noexcept override;
7135
7628
 
7629
+ /**
7630
+ * Removes the port from the URL.
7631
+ */
7136
7632
  inline void clear_port();
7633
+
7634
+ /**
7635
+ * Removes the hash/fragment from the URL.
7636
+ */
7137
7637
  inline void clear_hash();
7638
+
7639
+ /**
7640
+ * Removes the query/search string from the URL.
7641
+ */
7138
7642
  inline void clear_search() override;
7139
7643
 
7140
7644
  private:
7141
7645
  // helper methods
7142
7646
  friend void helpers::strip_trailing_spaces_from_opaque_path<url_aggregator>(
7143
- url_aggregator &url) noexcept;
7647
+ url_aggregator &url);
7144
7648
  // parse_url methods
7145
7649
  friend url_aggregator parser::parse_url<url_aggregator>(
7146
7650
  std::string_view, const url_aggregator *);
@@ -7169,7 +7673,7 @@ struct url_aggregator : url_base {
7169
7673
  */
7170
7674
  [[nodiscard]] ada_really_inline bool is_at_path() const noexcept;
7171
7675
 
7172
- inline void add_authority_slashes_if_needed() noexcept;
7676
+ inline void add_authority_slashes_if_needed();
7173
7677
 
7174
7678
  /**
7175
7679
  * To optimize performance, you may indicate how much memory to allocate
@@ -7177,10 +7681,10 @@ struct url_aggregator : url_base {
7177
7681
  */
7178
7682
  constexpr void reserve(uint32_t capacity);
7179
7683
 
7180
- ada_really_inline size_t parse_port(
7181
- std::string_view view, bool check_trailing_content) noexcept override;
7684
+ ada_really_inline size_t parse_port(std::string_view view,
7685
+ bool check_trailing_content) override;
7182
7686
 
7183
- ada_really_inline size_t parse_port(std::string_view view) noexcept override {
7687
+ ada_really_inline size_t parse_port(std::string_view view) override {
7184
7688
  return this->parse_port(view, false);
7185
7689
  }
7186
7690
 
@@ -7245,16 +7749,16 @@ struct url_aggregator : url_base {
7245
7749
  std::string_view input);
7246
7750
  [[nodiscard]] constexpr bool has_authority() const noexcept;
7247
7751
  constexpr void set_protocol_as_file();
7248
- inline void set_scheme(std::string_view new_scheme) noexcept;
7752
+ inline void set_scheme(std::string_view new_scheme);
7249
7753
  /**
7250
7754
  * Fast function to set the scheme from a view with a colon in the
7251
7755
  * buffer, does not change type.
7252
7756
  */
7253
7757
  inline void set_scheme_from_view_with_colon(
7254
- std::string_view new_scheme_with_colon) noexcept;
7255
- inline void copy_scheme(const url_aggregator &u) noexcept;
7758
+ std::string_view new_scheme_with_colon);
7759
+ inline void copy_scheme(const url_aggregator &u);
7256
7760
 
7257
- inline void update_host_to_base_host(const std::string_view input) noexcept;
7761
+ inline void update_host_to_base_host(const std::string_view input);
7258
7762
 
7259
7763
  }; // url_aggregator
7260
7764
 
@@ -8046,7 +8550,7 @@ url_aggregator::get_components() const noexcept {
8046
8550
  components.protocol_end + 2) == "//";
8047
8551
  }
8048
8552
 
8049
- inline void ada::url_aggregator::add_authority_slashes_if_needed() noexcept {
8553
+ inline void ada::url_aggregator::add_authority_slashes_if_needed() {
8050
8554
  ada_log("url_aggregator::add_authority_slashes_if_needed");
8051
8555
  ADA_ASSERT_TRUE(validate());
8052
8556
  // Protocol setter will insert `http:` to the URL. It is up to hostname setter
@@ -8083,7 +8587,7 @@ constexpr bool url_aggregator::has_non_empty_username() const noexcept {
8083
8587
 
8084
8588
  constexpr bool url_aggregator::has_non_empty_password() const noexcept {
8085
8589
  ada_log("url_aggregator::has_non_empty_password");
8086
- return components.host_start - components.username_end > 0;
8590
+ return components.host_start > components.username_end;
8087
8591
  }
8088
8592
 
8089
8593
  constexpr bool url_aggregator::has_password() const noexcept {
@@ -8155,8 +8659,8 @@ constexpr bool url_aggregator::has_port() const noexcept {
8155
8659
  return buffer;
8156
8660
  }
8157
8661
 
8158
- ada_really_inline size_t url_aggregator::parse_port(
8159
- std::string_view view, bool check_trailing_content) noexcept {
8662
+ ada_really_inline size_t
8663
+ url_aggregator::parse_port(std::string_view view, bool check_trailing_content) {
8160
8664
  ada_log("url_aggregator::parse_port('", view, "') ", view.size());
8161
8665
  if (!view.empty() && view[0] == '-') {
8162
8666
  ada_log("parse_port: view[0] == '0' && view.size() > 1");
@@ -8394,8 +8898,8 @@ constexpr void url_aggregator::set_protocol_as_file() {
8394
8898
  return true;
8395
8899
  }
8396
8900
 
8397
- [[nodiscard]] constexpr std::string_view url_aggregator::get_pathname()
8398
- const noexcept ada_lifetime_bound {
8901
+ [[nodiscard]] constexpr std::string_view url_aggregator::get_pathname() const
8902
+ ada_lifetime_bound {
8399
8903
  ada_log("url_aggregator::get_pathname pathname_start = ",
8400
8904
  components.pathname_start, " buffer.size() = ", buffer.size(),
8401
8905
  " components.search_start = ", components.search_start,
@@ -8414,8 +8918,7 @@ inline std::ostream &operator<<(std::ostream &out,
8414
8918
  return out << u.to_string();
8415
8919
  }
8416
8920
 
8417
- void url_aggregator::update_host_to_base_host(
8418
- const std::string_view input) noexcept {
8921
+ void url_aggregator::update_host_to_base_host(const std::string_view input) {
8419
8922
  ada_log("url_aggregator::update_host_to_base_host ", input);
8420
8923
  ADA_ASSERT_TRUE(validate());
8421
8924
  ADA_ASSERT_TRUE(!helpers::overlaps(input, buffer));
@@ -8442,7 +8945,13 @@ void url_aggregator::update_host_to_base_host(
8442
8945
  /* begin file include/ada/url_search_params.h */
8443
8946
  /**
8444
8947
  * @file url_search_params.h
8445
- * @brief Declaration for the URL Search Params
8948
+ * @brief URL query string parameter manipulation.
8949
+ *
8950
+ * This file provides the `url_search_params` class for parsing, manipulating,
8951
+ * and serializing URL query strings. It implements the URLSearchParams API
8952
+ * from the WHATWG URL Standard.
8953
+ *
8954
+ * @see https://url.spec.whatwg.org/#interface-urlsearchparams
8446
8955
  */
8447
8956
  #ifndef ADA_URL_SEARCH_PARAMS_H
8448
8957
  #define ADA_URL_SEARCH_PARAMS_H
@@ -8454,37 +8963,51 @@ void url_aggregator::update_host_to_base_host(
8454
8963
 
8455
8964
  namespace ada {
8456
8965
 
8966
+ /**
8967
+ * @brief Iterator types for url_search_params iteration.
8968
+ */
8457
8969
  enum class url_search_params_iter_type {
8458
- KEYS,
8459
- VALUES,
8460
- ENTRIES,
8970
+ KEYS, /**< Iterate over parameter keys only */
8971
+ VALUES, /**< Iterate over parameter values only */
8972
+ ENTRIES, /**< Iterate over key-value pairs */
8461
8973
  };
8462
8974
 
8463
8975
  template <typename T, url_search_params_iter_type Type>
8464
8976
  struct url_search_params_iter;
8465
8977
 
8978
+ /** Type alias for a key-value pair of string views. */
8466
8979
  typedef std::pair<std::string_view, std::string_view> key_value_view_pair;
8467
8980
 
8981
+ /** Iterator over search parameter keys. */
8468
8982
  using url_search_params_keys_iter =
8469
8983
  url_search_params_iter<std::string_view, url_search_params_iter_type::KEYS>;
8984
+ /** Iterator over search parameter values. */
8470
8985
  using url_search_params_values_iter =
8471
8986
  url_search_params_iter<std::string_view,
8472
8987
  url_search_params_iter_type::VALUES>;
8988
+ /** Iterator over search parameter key-value pairs. */
8473
8989
  using url_search_params_entries_iter =
8474
8990
  url_search_params_iter<key_value_view_pair,
8475
8991
  url_search_params_iter_type::ENTRIES>;
8476
8992
 
8477
8993
  /**
8478
- * We require all strings to be valid UTF-8. It is the user's responsibility to
8479
- * ensure that the provided strings are valid UTF-8.
8994
+ * @brief Class for parsing and manipulating URL query strings.
8995
+ *
8996
+ * The `url_search_params` class provides methods to parse, modify, and
8997
+ * serialize URL query parameters (the part after '?' in a URL). It handles
8998
+ * percent-encoding and decoding automatically.
8999
+ *
9000
+ * All string inputs must be valid UTF-8. The caller is responsible for
9001
+ * ensuring UTF-8 validity.
9002
+ *
8480
9003
  * @see https://url.spec.whatwg.org/#interface-urlsearchparams
8481
9004
  */
8482
9005
  struct url_search_params {
8483
9006
  url_search_params() = default;
8484
9007
 
8485
9008
  /**
8486
- * @see
8487
- * https://github.com/web-platform-tests/wpt/blob/master/url/urlsearchparams-constructor.any.js
9009
+ * Constructs url_search_params by parsing a query string.
9010
+ * @param input A query string (with or without leading '?'). Must be UTF-8.
8488
9011
  */
8489
9012
  explicit url_search_params(const std::string_view input) {
8490
9013
  initialize(input);
@@ -8496,75 +9019,106 @@ struct url_search_params {
8496
9019
  url_search_params &operator=(const url_search_params &u) = default;
8497
9020
  ~url_search_params() = default;
8498
9021
 
9022
+ /**
9023
+ * Returns the number of key-value pairs.
9024
+ * @return The total count of parameters.
9025
+ */
8499
9026
  [[nodiscard]] inline size_t size() const noexcept;
8500
9027
 
8501
9028
  /**
8502
- * Both key and value must be valid UTF-8.
9029
+ * Appends a new key-value pair to the parameter list.
9030
+ * @param key The parameter name (must be valid UTF-8).
9031
+ * @param value The parameter value (must be valid UTF-8).
8503
9032
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-append
8504
9033
  */
8505
9034
  inline void append(std::string_view key, std::string_view value);
8506
9035
 
8507
9036
  /**
9037
+ * Removes all pairs with the given key.
9038
+ * @param key The parameter name to remove.
8508
9039
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-delete
8509
9040
  */
8510
9041
  inline void remove(std::string_view key);
9042
+
9043
+ /**
9044
+ * Removes all pairs with the given key and value.
9045
+ * @param key The parameter name.
9046
+ * @param value The parameter value to match.
9047
+ */
8511
9048
  inline void remove(std::string_view key, std::string_view value);
8512
9049
 
8513
9050
  /**
9051
+ * Returns the value of the first pair with the given key.
9052
+ * @param key The parameter name to search for.
9053
+ * @return The value if found, or std::nullopt if not present.
8514
9054
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-get
8515
9055
  */
8516
9056
  inline std::optional<std::string_view> get(std::string_view key);
8517
9057
 
8518
9058
  /**
9059
+ * Returns all values for pairs with the given key.
9060
+ * @param key The parameter name to search for.
9061
+ * @return A vector of all matching values (may be empty).
8519
9062
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-getall
8520
9063
  */
8521
9064
  inline std::vector<std::string> get_all(std::string_view key);
8522
9065
 
8523
9066
  /**
9067
+ * Checks if any pair has the given key.
9068
+ * @param key The parameter name to search for.
9069
+ * @return `true` if at least one pair has this key.
8524
9070
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-has
8525
9071
  */
8526
9072
  inline bool has(std::string_view key) noexcept;
9073
+
9074
+ /**
9075
+ * Checks if any pair matches the given key and value.
9076
+ * @param key The parameter name to search for.
9077
+ * @param value The parameter value to match.
9078
+ * @return `true` if a matching pair exists.
9079
+ */
8527
9080
  inline bool has(std::string_view key, std::string_view value) noexcept;
8528
9081
 
8529
9082
  /**
8530
- * Both key and value must be valid UTF-8.
9083
+ * Sets a parameter value, replacing any existing pairs with the same key.
9084
+ * @param key The parameter name (must be valid UTF-8).
9085
+ * @param value The parameter value (must be valid UTF-8).
8531
9086
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-set
8532
9087
  */
8533
9088
  inline void set(std::string_view key, std::string_view value);
8534
9089
 
8535
9090
  /**
9091
+ * Sorts all key-value pairs by their keys using code unit comparison.
8536
9092
  * @see https://url.spec.whatwg.org/#dom-urlsearchparams-sort
8537
9093
  */
8538
9094
  inline void sort();
8539
9095
 
8540
9096
  /**
9097
+ * Serializes the parameters to a query string (without leading '?').
9098
+ * @return The percent-encoded query string.
8541
9099
  * @see https://url.spec.whatwg.org/#urlsearchparams-stringification-behavior
8542
9100
  */
8543
9101
  inline std::string to_string() const;
8544
9102
 
8545
9103
  /**
8546
- * Returns a simple JS-style iterator over all of the keys in this
8547
- * url_search_params. The keys in the iterator are not unique. The valid
8548
- * lifespan of the iterator is tied to the url_search_params. The iterator
8549
- * must be freed when you're done with it.
8550
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9104
+ * Returns an iterator over all parameter keys.
9105
+ * Keys may repeat if there are duplicate parameters.
9106
+ * @return An iterator yielding string_view keys.
9107
+ * @note The iterator is invalidated if this object is modified.
8551
9108
  */
8552
9109
  inline url_search_params_keys_iter get_keys();
8553
9110
 
8554
9111
  /**
8555
- * Returns a simple JS-style iterator over all of the values in this
8556
- * url_search_params. The valid lifespan of the iterator is tied to the
8557
- * url_search_params. The iterator must be freed when you're done with it.
8558
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9112
+ * Returns an iterator over all parameter values.
9113
+ * @return An iterator yielding string_view values.
9114
+ * @note The iterator is invalidated if this object is modified.
8559
9115
  */
8560
9116
  inline url_search_params_values_iter get_values();
8561
9117
 
8562
9118
  /**
8563
- * Returns a simple JS-style iterator over all of the entries in this
8564
- * url_search_params. The entries are pairs of keys and corresponding values.
8565
- * The valid lifespan of the iterator is tied to the url_search_params. The
8566
- * iterator must be freed when you're done with it.
8567
- * @see https://url.spec.whatwg.org/#interface-urlsearchparams
9119
+ * Returns an iterator over all key-value pairs.
9120
+ * @return An iterator yielding key-value pair views.
9121
+ * @note The iterator is invalidated if this object is modified.
8568
9122
  */
8569
9123
  inline url_search_params_entries_iter get_entries();
8570
9124
 
@@ -8601,8 +9155,13 @@ struct url_search_params {
8601
9155
  }; // url_search_params
8602
9156
 
8603
9157
  /**
8604
- * Implements a non-conventional iterator pattern that is closer in style to
8605
- * JavaScript's definition of an iterator.
9158
+ * @brief JavaScript-style iterator for url_search_params.
9159
+ *
9160
+ * Provides a `next()` method that returns successive values until exhausted.
9161
+ * This matches the iterator pattern used in the Web Platform.
9162
+ *
9163
+ * @tparam T The type of value returned by the iterator.
9164
+ * @tparam Type The type of iteration (KEYS, VALUES, or ENTRIES).
8606
9165
  *
8607
9166
  * @see https://webidl.spec.whatwg.org/#idl-iterable
8608
9167
  */
@@ -8617,10 +9176,15 @@ struct url_search_params_iter {
8617
9176
  ~url_search_params_iter() = default;
8618
9177
 
8619
9178
  /**
8620
- * Return the next item in the iterator or std::nullopt if done.
9179
+ * Returns the next value in the iteration sequence.
9180
+ * @return The next value, or std::nullopt if iteration is complete.
8621
9181
  */
8622
9182
  inline std::optional<T> next();
8623
9183
 
9184
+ /**
9185
+ * Checks if more values are available.
9186
+ * @return `true` if `next()` will return a value, `false` if exhausted.
9187
+ */
8624
9188
  inline bool has_next() const;
8625
9189
 
8626
9190
  private:
@@ -8973,10 +9537,8 @@ url_pattern_component<regex_provider>::create_component_match_result(
8973
9537
  // says we should start from 1. This case is handled by the
8974
9538
  // std_regex_provider.
8975
9539
  for (size_t index = 0; index < exec_result.size(); index++) {
8976
- result.groups.insert({
8977
- group_name_list[index],
8978
- std::move(exec_result[index]),
8979
- });
9540
+ result.groups.emplace(group_name_list[index],
9541
+ std::move(exec_result[index]));
8980
9542
  }
8981
9543
  return result;
8982
9544
  }
@@ -9082,43 +9644,113 @@ url_pattern_component<regex_provider>::compile(
9082
9644
  return tl::unexpected(part_list.error());
9083
9645
  }
9084
9646
 
9085
- // Let (regular expression string, name list) be the result of running
9086
- // generate a regular expression and name list given part list and options.
9647
+ // Detect pattern type early to potentially skip expensive regex compilation
9648
+ const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
9649
+ const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
9650
+
9651
+ url_pattern_component_type component_type =
9652
+ url_pattern_component_type::REGEXP;
9653
+ std::string exact_match_value{};
9654
+
9655
+ if (part_list->empty()) {
9656
+ component_type = url_pattern_component_type::EMPTY;
9657
+ } else if (part_list->size() == 1) {
9658
+ const auto& part = (*part_list)[0];
9659
+ if (part.type == url_pattern_part_type::FIXED_TEXT &&
9660
+ part.modifier == url_pattern_part_modifier::none &&
9661
+ !options.ignore_case) {
9662
+ component_type = url_pattern_component_type::EXACT_MATCH;
9663
+ exact_match_value = part.value;
9664
+ } else if (part.type == url_pattern_part_type::FULL_WILDCARD &&
9665
+ part.modifier == url_pattern_part_modifier::none &&
9666
+ part.prefix.empty() && part.suffix.empty()) {
9667
+ component_type = url_pattern_component_type::FULL_WILDCARD;
9668
+ }
9669
+ }
9670
+
9671
+ // For simple patterns, skip regex generation and compilation entirely
9672
+ if (component_type != url_pattern_component_type::REGEXP) {
9673
+ auto pattern_string =
9674
+ url_pattern_helpers::generate_pattern_string(*part_list, options);
9675
+ // For FULL_WILDCARD, we need the group name from
9676
+ // generate_regular_expression
9677
+ std::vector<std::string> name_list;
9678
+ if (component_type == url_pattern_component_type::FULL_WILDCARD &&
9679
+ !part_list->empty()) {
9680
+ name_list.push_back((*part_list)[0].name);
9681
+ }
9682
+ return url_pattern_component<regex_provider>(
9683
+ std::move(pattern_string), typename regex_provider::regex_type{},
9684
+ std::move(name_list), has_regexp_groups, component_type,
9685
+ std::move(exact_match_value));
9686
+ }
9687
+
9688
+ // Generate regex for complex patterns
9087
9689
  auto [regular_expression_string, name_list] =
9088
9690
  url_pattern_helpers::generate_regular_expression_and_name_list(*part_list,
9089
9691
  options);
9090
-
9091
- ada_log("regular expression string: ", regular_expression_string);
9092
-
9093
- // Let pattern string be the result of running generate a pattern
9094
- // string given part list and options.
9095
9692
  auto pattern_string =
9096
9693
  url_pattern_helpers::generate_pattern_string(*part_list, options);
9097
9694
 
9098
- // Let regular expression be RegExpCreate(regular expression string,
9099
- // flags). If this throws an exception, catch it, and throw a
9100
- // TypeError.
9101
9695
  std::optional<typename regex_provider::regex_type> regular_expression =
9102
9696
  regex_provider::create_instance(regular_expression_string,
9103
9697
  options.ignore_case);
9104
-
9105
9698
  if (!regular_expression) {
9106
9699
  return tl::unexpected(errors::type_error);
9107
9700
  }
9108
9701
 
9109
- // For each part of part list:
9110
- // - If part's type is "regexp", then set has regexp groups to true.
9111
- const auto has_regexp = [](const auto& part) { return part.is_regexp(); };
9112
- const bool has_regexp_groups = std::ranges::any_of(*part_list, has_regexp);
9113
-
9114
- ada_log("has regexp groups: ", has_regexp_groups);
9115
-
9116
- // Return a new component whose pattern string is pattern string, regular
9117
- // expression is regular expression, group name list is name list, and has
9118
- // regexp groups is has regexp groups.
9119
9702
  return url_pattern_component<regex_provider>(
9120
9703
  std::move(pattern_string), std::move(*regular_expression),
9121
- std::move(name_list), has_regexp_groups);
9704
+ std::move(name_list), has_regexp_groups, component_type,
9705
+ std::move(exact_match_value));
9706
+ }
9707
+
9708
+ template <url_pattern_regex::regex_concept regex_provider>
9709
+ bool url_pattern_component<regex_provider>::fast_test(
9710
+ std::string_view input) const noexcept {
9711
+ // Fast path for simple patterns - avoid regex evaluation
9712
+ // Using if-else for better branch prediction on common cases
9713
+ if (type == url_pattern_component_type::FULL_WILDCARD) {
9714
+ return true;
9715
+ }
9716
+ if (type == url_pattern_component_type::EXACT_MATCH) {
9717
+ return input == exact_match_value;
9718
+ }
9719
+ if (type == url_pattern_component_type::EMPTY) {
9720
+ return input.empty();
9721
+ }
9722
+ // type == REGEXP
9723
+ return regex_provider::regex_match(input, regexp);
9724
+ }
9725
+
9726
+ template <url_pattern_regex::regex_concept regex_provider>
9727
+ std::optional<std::vector<std::optional<std::string>>>
9728
+ url_pattern_component<regex_provider>::fast_match(
9729
+ std::string_view input) const {
9730
+ // Handle each type directly without redundant checks
9731
+ if (type == url_pattern_component_type::FULL_WILDCARD) {
9732
+ // FULL_WILDCARD always matches - capture the input (even if empty)
9733
+ // If there's no group name, return empty groups
9734
+ if (group_name_list.empty()) {
9735
+ return std::vector<std::optional<std::string>>{};
9736
+ }
9737
+ // Capture the matched input (including empty strings)
9738
+ return std::vector<std::optional<std::string>>{std::string(input)};
9739
+ }
9740
+ if (type == url_pattern_component_type::EXACT_MATCH) {
9741
+ if (input == exact_match_value) {
9742
+ return std::vector<std::optional<std::string>>{};
9743
+ }
9744
+ return std::nullopt;
9745
+ }
9746
+ if (type == url_pattern_component_type::EMPTY) {
9747
+ if (input.empty()) {
9748
+ return std::vector<std::optional<std::string>>{};
9749
+ }
9750
+ return std::nullopt;
9751
+ }
9752
+ // type == REGEXP - use regex
9753
+ return regex_provider::regex_search(input, regexp);
9122
9754
  }
9123
9755
 
9124
9756
  template <url_pattern_regex::regex_concept regex_provider>
@@ -9129,18 +9761,88 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::exec(
9129
9761
  return match(input, base_url);
9130
9762
  }
9131
9763
 
9764
+ template <url_pattern_regex::regex_concept regex_provider>
9765
+ bool url_pattern<regex_provider>::test_components(
9766
+ std::string_view protocol, std::string_view username,
9767
+ std::string_view password, std::string_view hostname, std::string_view port,
9768
+ std::string_view pathname, std::string_view search,
9769
+ std::string_view hash) const {
9770
+ return protocol_component.fast_test(protocol) &&
9771
+ username_component.fast_test(username) &&
9772
+ password_component.fast_test(password) &&
9773
+ hostname_component.fast_test(hostname) &&
9774
+ port_component.fast_test(port) &&
9775
+ pathname_component.fast_test(pathname) &&
9776
+ search_component.fast_test(search) && hash_component.fast_test(hash);
9777
+ }
9778
+
9132
9779
  template <url_pattern_regex::regex_concept regex_provider>
9133
9780
  result<bool> url_pattern<regex_provider>::test(
9134
- const url_pattern_input& input, const std::string_view* base_url) {
9135
- // TODO: Optimization opportunity. Rather than returning `url_pattern_result`
9136
- // Implement a fast path just like `can_parse()` in ada_url.
9137
- // Let result be the result of match given this's associated URL pattern,
9138
- // input, and baseURL if given.
9139
- // If result is null, return false.
9140
- if (auto result = match(input, base_url); result.has_value()) {
9141
- return result->has_value();
9781
+ const url_pattern_input& input, const std::string_view* base_url_string) {
9782
+ // If input is a URLPatternInit
9783
+ if (std::holds_alternative<url_pattern_init>(input)) {
9784
+ if (base_url_string) {
9785
+ return tl::unexpected(errors::type_error);
9786
+ }
9787
+
9788
+ std::string protocol{}, username{}, password{}, hostname{};
9789
+ std::string port{}, pathname{}, search{}, hash{};
9790
+
9791
+ auto apply_result = url_pattern_init::process(
9792
+ std::get<url_pattern_init>(input), url_pattern_init::process_type::url,
9793
+ protocol, username, password, hostname, port, pathname, search, hash);
9794
+
9795
+ if (!apply_result) {
9796
+ return false;
9797
+ }
9798
+
9799
+ std::string_view search_view = *apply_result->search;
9800
+ if (search_view.starts_with("?")) {
9801
+ search_view.remove_prefix(1);
9802
+ }
9803
+
9804
+ return test_components(*apply_result->protocol, *apply_result->username,
9805
+ *apply_result->password, *apply_result->hostname,
9806
+ *apply_result->port, *apply_result->pathname,
9807
+ search_view, *apply_result->hash);
9808
+ }
9809
+
9810
+ // URL string input path
9811
+ result<url_aggregator> base_url;
9812
+ if (base_url_string) {
9813
+ base_url = ada::parse<url_aggregator>(*base_url_string, nullptr);
9814
+ if (!base_url) {
9815
+ return false;
9816
+ }
9817
+ }
9818
+
9819
+ auto url =
9820
+ ada::parse<url_aggregator>(std::get<std::string_view>(input),
9821
+ base_url.has_value() ? &*base_url : nullptr);
9822
+ if (!url) {
9823
+ return false;
9824
+ }
9825
+
9826
+ // Extract components as string_view
9827
+ auto protocol_view = url->get_protocol();
9828
+ if (protocol_view.ends_with(":")) {
9829
+ protocol_view.remove_suffix(1);
9830
+ }
9831
+
9832
+ auto search_view = url->get_search();
9833
+ if (search_view.starts_with("?")) {
9834
+ search_view.remove_prefix(1);
9835
+ }
9836
+
9837
+ auto hash_view = url->get_hash();
9838
+ if (hash_view.starts_with("#")) {
9839
+ hash_view.remove_prefix(1);
9142
9840
  }
9143
- return tl::unexpected(errors::type_error);
9841
+
9842
+ return test_components(protocol_view, url->get_username(),
9843
+ url->get_password(), url->get_hostname(),
9844
+ url->get_port(), url->get_pathname(), search_view,
9845
+ hash_view);
9144
9846
  }
9145
9847
 
9146
9848
  template <url_pattern_regex::regex_concept regex_provider>
@@ -9289,74 +9991,61 @@ result<std::optional<url_pattern_result>> url_pattern<regex_provider>::match(
9289
9991
  }
9290
9992
  }
9291
9993
 
9994
+ // Use fast_match which skips regex for simple patterns (EMPTY, EXACT_MATCH,
9995
+ // FULL_WILDCARD) and only falls back to regex for complex REGEXP patterns.
9996
+
9292
9997
  // Let protocolExecResult be RegExpBuiltinExec(urlPattern's protocol
9293
9998
  // component's regular expression, protocol).
9294
- auto protocol_exec_result =
9295
- regex_provider::regex_search(protocol, protocol_component.regexp);
9296
-
9999
+ auto protocol_exec_result = protocol_component.fast_match(protocol);
9297
10000
  if (!protocol_exec_result) {
9298
10001
  return std::nullopt;
9299
10002
  }
9300
10003
 
9301
10004
  // Let usernameExecResult be RegExpBuiltinExec(urlPattern's username
9302
10005
  // component's regular expression, username).
9303
- auto username_exec_result =
9304
- regex_provider::regex_search(username, username_component.regexp);
9305
-
10006
+ auto username_exec_result = username_component.fast_match(username);
9306
10007
  if (!username_exec_result) {
9307
10008
  return std::nullopt;
9308
10009
  }
9309
10010
 
9310
10011
  // Let passwordExecResult be RegExpBuiltinExec(urlPattern's password
9311
10012
  // component's regular expression, password).
9312
- auto password_exec_result =
9313
- regex_provider::regex_search(password, password_component.regexp);
9314
-
10013
+ auto password_exec_result = password_component.fast_match(password);
9315
10014
  if (!password_exec_result) {
9316
10015
  return std::nullopt;
9317
10016
  }
9318
10017
 
9319
10018
  // Let hostnameExecResult be RegExpBuiltinExec(urlPattern's hostname
9320
10019
  // component's regular expression, hostname).
9321
- auto hostname_exec_result =
9322
- regex_provider::regex_search(hostname, hostname_component.regexp);
9323
-
10020
+ auto hostname_exec_result = hostname_component.fast_match(hostname);
9324
10021
  if (!hostname_exec_result) {
9325
10022
  return std::nullopt;
9326
10023
  }
9327
10024
 
9328
10025
  // Let portExecResult be RegExpBuiltinExec(urlPattern's port component's
9329
10026
  // regular expression, port).
9330
- auto port_exec_result =
9331
- regex_provider::regex_search(port, port_component.regexp);
9332
-
10027
+ auto port_exec_result = port_component.fast_match(port);
9333
10028
  if (!port_exec_result) {
9334
10029
  return std::nullopt;
9335
10030
  }
9336
10031
 
9337
10032
  // Let pathnameExecResult be RegExpBuiltinExec(urlPattern's pathname
9338
10033
  // component's regular expression, pathname).
9339
- auto pathname_exec_result =
9340
- regex_provider::regex_search(pathname, pathname_component.regexp);
9341
-
10034
+ auto pathname_exec_result = pathname_component.fast_match(pathname);
9342
10035
  if (!pathname_exec_result) {
9343
10036
  return std::nullopt;
9344
10037
  }
9345
10038
 
9346
10039
  // Let searchExecResult be RegExpBuiltinExec(urlPattern's search component's
9347
10040
  // regular expression, search).
9348
- auto search_exec_result =
9349
- regex_provider::regex_search(search, search_component.regexp);
9350
-
10041
+ auto search_exec_result = search_component.fast_match(search);
9351
10042
  if (!search_exec_result) {
9352
10043
  return std::nullopt;
9353
10044
  }
9354
10045
 
9355
10046
  // Let hashExecResult be RegExpBuiltinExec(urlPattern's hash component's
9356
10047
  // regular expression, hash).
9357
- auto hash_exec_result =
9358
- regex_provider::regex_search(hash, hash_component.regexp);
9359
-
10048
+ auto hash_exec_result = hash_component.fast_match(hash);
9360
10049
  if (!hash_exec_result) {
9361
10050
  return std::nullopt;
9362
10051
  }
@@ -9705,8 +10394,8 @@ std::string constructor_string_parser<regex_provider>::make_component_string() {
9705
10394
  const auto component_start_input_index = component_start_token->index;
9706
10395
  // Return the code point substring from component start input index to end
9707
10396
  // index within parser's input.
9708
- return input.substr(component_start_input_index,
9709
- end_index - component_start_input_index);
10397
+ return std::string(input.substr(component_start_input_index,
10398
+ end_index - component_start_input_index));
9710
10399
  }
9711
10400
 
9712
10401
  template <url_pattern_regex::regex_concept regex_provider>
@@ -10212,13 +10901,31 @@ tl::expected<std::vector<url_pattern_part>, errors> parse_pattern_string(
10212
10901
  template <url_pattern_regex::regex_concept regex_provider>
10213
10902
  bool protocol_component_matches_special_scheme(
10214
10903
  url_pattern_component<regex_provider>& component) {
10215
- // let's avoid unnecessary copy here.
10216
- auto& regex = component.regexp;
10217
- return regex_provider::regex_match("http", regex) ||
10218
- regex_provider::regex_match("https", regex) ||
10219
- regex_provider::regex_match("ws", regex) ||
10220
- regex_provider::regex_match("wss", regex) ||
10221
- regex_provider::regex_match("ftp", regex);
10904
+ // Optimization: Use fast_test for simple patterns to avoid regex overhead
10905
+ switch (component.type) {
10906
+ case url_pattern_component_type::EMPTY:
10907
+ // Empty pattern can't match any special scheme
10908
+ return false;
10909
+ case url_pattern_component_type::EXACT_MATCH:
10910
+ // Direct string comparison for exact match patterns
10911
+ return component.exact_match_value == "http" ||
10912
+ component.exact_match_value == "https" ||
10913
+ component.exact_match_value == "ws" ||
10914
+ component.exact_match_value == "wss" ||
10915
+ component.exact_match_value == "ftp";
10916
+ case url_pattern_component_type::FULL_WILDCARD:
10917
+ // Full wildcard matches everything including special schemes
10918
+ return true;
10919
+ case url_pattern_component_type::REGEXP:
10920
+ // Fall back to regex matching for complex patterns
10921
+ auto& regex = component.regexp;
10922
+ return regex_provider::regex_match("http", regex) ||
10923
+ regex_provider::regex_match("https", regex) ||
10924
+ regex_provider::regex_match("ws", regex) ||
10925
+ regex_provider::regex_match("wss", regex) ||
10926
+ regex_provider::regex_match("ftp", regex);
10927
+ }
10928
+ ada::unreachable();
10222
10929
  }
10223
10930
 
10224
10931
  template <url_pattern_regex::regex_concept regex_provider>
@@ -10514,14 +11221,14 @@ constructor_string_parser<regex_provider>::parse(std::string_view input) {
10514
11221
  #ifndef ADA_ADA_VERSION_H
10515
11222
  #define ADA_ADA_VERSION_H
10516
11223
 
10517
- #define ADA_VERSION "3.2.6"
11224
+ #define ADA_VERSION "3.4.2"
10518
11225
 
10519
11226
  namespace ada {
10520
11227
 
10521
11228
  enum {
10522
11229
  ADA_VERSION_MAJOR = 3,
10523
- ADA_VERSION_MINOR = 2,
10524
- ADA_VERSION_REVISION = 6,
11230
+ ADA_VERSION_MINOR = 4,
11231
+ ADA_VERSION_REVISION = 2,
10525
11232
  };
10526
11233
 
10527
11234
  } // namespace ada