uri_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +6 -0
  2. data/.rvmrc +1 -0
  3. data/Gemfile +6 -0
  4. data/Rakefile +13 -0
  5. data/ext/uri_parser/basictypes.h +89 -0
  6. data/ext/uri_parser/extconf.h +6 -0
  7. data/ext/uri_parser/extconf.rb +50 -0
  8. data/ext/uri_parser/logging.h +5 -0
  9. data/ext/uri_parser/scoped_ptr.h +322 -0
  10. data/ext/uri_parser/string16.cc +95 -0
  11. data/ext/uri_parser/string16.h +194 -0
  12. data/ext/uri_parser/uri_parser.cc +87 -0
  13. data/ext/uri_parser/url_canon.h +872 -0
  14. data/ext/uri_parser/url_canon_etc.cc +392 -0
  15. data/ext/uri_parser/url_canon_fileurl.cc +215 -0
  16. data/ext/uri_parser/url_canon_host.cc +401 -0
  17. data/ext/uri_parser/url_canon_icu.cc +207 -0
  18. data/ext/uri_parser/url_canon_icu.h +63 -0
  19. data/ext/uri_parser/url_canon_internal.cc +427 -0
  20. data/ext/uri_parser/url_canon_internal.h +453 -0
  21. data/ext/uri_parser/url_canon_internal_file.h +157 -0
  22. data/ext/uri_parser/url_canon_ip.cc +737 -0
  23. data/ext/uri_parser/url_canon_ip.h +101 -0
  24. data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
  25. data/ext/uri_parser/url_canon_path.cc +380 -0
  26. data/ext/uri_parser/url_canon_pathurl.cc +128 -0
  27. data/ext/uri_parser/url_canon_query.cc +189 -0
  28. data/ext/uri_parser/url_canon_relative.cc +572 -0
  29. data/ext/uri_parser/url_canon_stdstring.h +134 -0
  30. data/ext/uri_parser/url_canon_stdurl.cc +211 -0
  31. data/ext/uri_parser/url_common.h +48 -0
  32. data/ext/uri_parser/url_file.h +108 -0
  33. data/ext/uri_parser/url_parse.cc +760 -0
  34. data/ext/uri_parser/url_parse.h +336 -0
  35. data/ext/uri_parser/url_parse_file.cc +243 -0
  36. data/ext/uri_parser/url_parse_internal.h +112 -0
  37. data/ext/uri_parser/url_util.cc +553 -0
  38. data/ext/uri_parser/url_util.h +222 -0
  39. data/lib/uri_parser.rb +28 -0
  40. data/lib/uri_parser/version.rb +3 -0
  41. data/spec/spec_helper.rb +16 -0
  42. data/spec/uri_parser_spec.rb +54 -0
  43. data/uri_parser.gemspec +26 -0
  44. metadata +117 -0
@@ -0,0 +1,760 @@
1
+ /* Based on nsURLParsers.cc from Mozilla
2
+ * -------------------------------------
3
+ * The contents of this file are subject to the Mozilla Public License Version
4
+ * 1.1 (the "License"); you may not use this file except in compliance with
5
+ * the License. You may obtain a copy of the License at
6
+ * http://www.mozilla.org/MPL/
7
+ *
8
+ * Software distributed under the License is distributed on an "AS IS" basis,
9
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
10
+ * for the specific language governing rights and limitations under the
11
+ * License.
12
+ *
13
+ * The Original Code is mozilla.org code.
14
+ *
15
+ * The Initial Developer of the Original Code is
16
+ * Netscape Communications Corporation.
17
+ * Portions created by the Initial Developer are Copyright (C) 1998
18
+ * the Initial Developer. All Rights Reserved.
19
+ *
20
+ * Contributor(s):
21
+ * Darin Fisher (original author)
22
+ *
23
+ * Alternatively, the contents of this file may be used under the terms of
24
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
25
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26
+ * in which case the provisions of the GPL or the LGPL are applicable instead
27
+ * of those above. If you wish to allow use of your version of this file only
28
+ * under the terms of either the GPL or the LGPL, and not to allow others to
29
+ * use your version of this file under the terms of the MPL, indicate your
30
+ * decision by deleting the provisions above and replace them with the notice
31
+ * and other provisions required by the GPL or the LGPL. If you do not delete
32
+ * the provisions above, a recipient may use your version of this file under
33
+ * the terms of any one of the MPL, the GPL or the LGPL.
34
+ *
35
+ * ***** END LICENSE BLOCK ***** */
36
+
37
+ #include "url_parse.h"
38
+
39
+ #include <stdlib.h>
40
+
41
+ #include "logging.h"
42
+ #include "url_parse_internal.h"
43
+
44
+ namespace url_parse {
45
+
46
+ namespace {
47
+
48
+ // Returns true if the given character is a valid digit to use in a port.
49
+ inline bool IsPortDigit(char16 ch) {
50
+ return ch >= '0' && ch <= '9';
51
+ }
52
+
53
+ // Returns the offset of the next authority terminator in the input starting
54
+ // from start_offset. If no terminator is found, the return value will be equal
55
+ // to spec_len.
56
+ template<typename CHAR>
57
+ int FindNextAuthorityTerminator(const CHAR* spec,
58
+ int start_offset,
59
+ int spec_len) {
60
+ for (int i = start_offset; i < spec_len; i++) {
61
+ if (IsAuthorityTerminator(spec[i]))
62
+ return i;
63
+ }
64
+ return spec_len; // Not found.
65
+ }
66
+
67
+ template<typename CHAR>
68
+ void ParseUserInfo(const CHAR* spec,
69
+ const Component& user,
70
+ Component* username,
71
+ Component* password) {
72
+ // Find the first colon in the user section, which separates the username and
73
+ // password.
74
+ int colon_offset = 0;
75
+ while (colon_offset < user.len && spec[user.begin + colon_offset] != ':')
76
+ colon_offset++;
77
+
78
+ if (colon_offset < user.len) {
79
+ // Found separator: <username>:<password>
80
+ *username = Component(user.begin, colon_offset);
81
+ *password = MakeRange(user.begin + colon_offset + 1,
82
+ user.begin + user.len);
83
+ } else {
84
+ // No separator, treat everything as the username
85
+ *username = user;
86
+ *password = Component();
87
+ }
88
+ }
89
+
90
+ template<typename CHAR>
91
+ void ParseServerInfo(const CHAR* spec,
92
+ const Component& serverinfo,
93
+ Component* hostname,
94
+ Component* port_num) {
95
+ if (serverinfo.len == 0) {
96
+ // No server info, host name is empty.
97
+ hostname->reset();
98
+ port_num->reset();
99
+ return;
100
+ }
101
+
102
+ // If the host starts with a left-bracket, assume the entire host is an
103
+ // IPv6 literal. Otherwise, assume none of the host is an IPv6 literal.
104
+ // This assumption will be overridden if we find a right-bracket.
105
+ //
106
+ // Our IPv6 address canonicalization code requires both brackets to exist,
107
+ // but the ability to locate an incomplete address can still be useful.
108
+ int ipv6_terminator = spec[serverinfo.begin] == '[' ? serverinfo.end() : -1;
109
+ int colon = -1;
110
+
111
+ // Find the last right-bracket, and the last colon.
112
+ for (int i = serverinfo.begin; i < serverinfo.end(); i++) {
113
+ switch (spec[i]) {
114
+ case ']':
115
+ ipv6_terminator = i;
116
+ break;
117
+ case ':':
118
+ colon = i;
119
+ break;
120
+ }
121
+ }
122
+
123
+ if (colon > ipv6_terminator) {
124
+ // Found a port number: <hostname>:<port>
125
+ *hostname = MakeRange(serverinfo.begin, colon);
126
+ if (hostname->len == 0)
127
+ hostname->reset();
128
+ *port_num = MakeRange(colon + 1, serverinfo.end());
129
+ } else {
130
+ // No port: <hostname>
131
+ *hostname = serverinfo;
132
+ port_num->reset();
133
+ }
134
+ }
135
+
136
+ // Given an already-identified auth section, breaks it into its consituent
137
+ // parts. The port number will be parsed and the resulting integer will be
138
+ // filled into the given *port variable, or -1 if there is no port number or it
139
+ // is invalid.
140
+ template<typename CHAR>
141
+ void DoParseAuthority(const CHAR* spec,
142
+ const Component& auth,
143
+ Component* username,
144
+ Component* password,
145
+ Component* hostname,
146
+ Component* port_num) {
147
+ DCHECK(auth.is_valid()) << "We should always get an authority";
148
+ if (auth.len == 0) {
149
+ username->reset();
150
+ password->reset();
151
+ hostname->reset();
152
+ port_num->reset();
153
+ return;
154
+ }
155
+
156
+ // Search backwards for @, which is the separator between the user info and
157
+ // the server info.
158
+ int i = auth.begin + auth.len - 1;
159
+ while (i > auth.begin && spec[i] != '@')
160
+ i--;
161
+
162
+ if (spec[i] == '@') {
163
+ // Found user info: <user-info>@<server-info>
164
+ ParseUserInfo(spec, Component(auth.begin, i - auth.begin),
165
+ username, password);
166
+ ParseServerInfo(spec, MakeRange(i + 1, auth.begin + auth.len),
167
+ hostname, port_num);
168
+ } else {
169
+ // No user info, everything is server info.
170
+ username->reset();
171
+ password->reset();
172
+ ParseServerInfo(spec, auth, hostname, port_num);
173
+ }
174
+ }
175
+
176
+ template<typename CHAR>
177
+ void ParsePath(const CHAR* spec,
178
+ const Component& path,
179
+ Component* filepath,
180
+ Component* query,
181
+ Component* ref) {
182
+ // path = [/]<segment1>/<segment2>/<...>/<segmentN>;<param>?<query>#<ref>
183
+
184
+ // Special case when there is no path.
185
+ if (path.len == -1) {
186
+ filepath->reset();
187
+ query->reset();
188
+ ref->reset();
189
+ return;
190
+ }
191
+ DCHECK(path.len > 0) << "We should never have 0 length paths";
192
+
193
+ // Search for first occurrence of either ? or #.
194
+ int path_end = path.begin + path.len;
195
+
196
+ int query_separator = -1; // Index of the '?'
197
+ int ref_separator = -1; // Index of the '#'
198
+ for (int i = path.begin; i < path_end; i++) {
199
+ switch (spec[i]) {
200
+ case '?':
201
+ // Only match the query string if it precedes the reference fragment
202
+ // and when we haven't found one already.
203
+ if (ref_separator < 0 && query_separator < 0)
204
+ query_separator = i;
205
+ break;
206
+ case '#':
207
+ // Record the first # sign only.
208
+ if (ref_separator < 0)
209
+ ref_separator = i;
210
+ break;
211
+ }
212
+ }
213
+
214
+ // Markers pointing to the character after each of these corresponding
215
+ // components. The code below words from the end back to the beginning,
216
+ // and will update these indices as it finds components that exist.
217
+ int file_end, query_end;
218
+
219
+ // Ref fragment: from the # to the end of the path.
220
+ if (ref_separator >= 0) {
221
+ file_end = query_end = ref_separator;
222
+ *ref = MakeRange(ref_separator + 1, path_end);
223
+ } else {
224
+ file_end = query_end = path_end;
225
+ ref->reset();
226
+ }
227
+
228
+ // Query fragment: everything from the ? to the next boundary (either the end
229
+ // of the path or the ref fragment).
230
+ if (query_separator >= 0) {
231
+ file_end = query_separator;
232
+ *query = MakeRange(query_separator + 1, query_end);
233
+ } else {
234
+ query->reset();
235
+ }
236
+
237
+ // File path: treat an empty file path as no file path.
238
+ if (file_end != path.begin)
239
+ *filepath = MakeRange(path.begin, file_end);
240
+ else
241
+ filepath->reset();
242
+ }
243
+
244
+ template<typename CHAR>
245
+ bool DoExtractScheme(const CHAR* url,
246
+ int url_len,
247
+ Component* scheme) {
248
+ // Skip leading whitespace and control characters.
249
+ int begin = 0;
250
+ while (begin < url_len && ShouldTrimFromURL(url[begin]))
251
+ begin++;
252
+ if (begin == url_len)
253
+ return false; // Input is empty or all whitespace.
254
+
255
+ // Find the first colon character.
256
+ for (int i = begin; i < url_len; i++) {
257
+ if (url[i] == ':') {
258
+ *scheme = MakeRange(begin, i);
259
+ return true;
260
+ }
261
+ }
262
+ return false; // No colon found: no scheme
263
+ }
264
+
265
+ // Fills in all members of the Parsed structure except for the scheme.
266
+ //
267
+ // |spec| is the full spec being parsed, of length |spec_len|.
268
+ // |after_scheme| is the character immediately following the scheme (after the
269
+ // colon) where we'll begin parsing.
270
+ //
271
+ // Compatability data points. I list "host", "path" extracted:
272
+ // Input IE6 Firefox Us
273
+ // ----- -------------- -------------- --------------
274
+ // http://foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
275
+ // http:foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
276
+ // http:/foo.com/ fail(*) "foo.com", "/" "foo.com", "/"
277
+ // http:\foo.com/ fail(*) "\foo.com", "/"(fail) "foo.com", "/"
278
+ // http:////foo.com/ "foo.com", "/" "foo.com", "/" "foo.com", "/"
279
+ //
280
+ // (*) Interestingly, although IE fails to load these URLs, its history
281
+ // canonicalizer handles them, meaning if you've been to the corresponding
282
+ // "http://foo.com/" link, it will be colored.
283
+ template <typename CHAR>
284
+ void DoParseAfterScheme(const CHAR* spec,
285
+ int spec_len,
286
+ int after_scheme,
287
+ Parsed* parsed) {
288
+ int num_slashes = CountConsecutiveSlashes(spec, after_scheme, spec_len);
289
+ int after_slashes = after_scheme + num_slashes;
290
+
291
+ // First split into two main parts, the authority (username, password, host,
292
+ // and port) and the full path (path, query, and reference).
293
+ Component authority;
294
+ Component full_path;
295
+
296
+ // Found "//<some data>", looks like an authority section. Treat everything
297
+ // from there to the next slash (or end of spec) to be the authority. Note
298
+ // that we ignore the number of slashes and treat it as the authority.
299
+ int end_auth = FindNextAuthorityTerminator(spec, after_slashes, spec_len);
300
+ authority = Component(after_slashes, end_auth - after_slashes);
301
+
302
+ if (end_auth == spec_len) // No beginning of path found.
303
+ full_path = Component();
304
+ else // Everything starting from the slash to the end is the path.
305
+ full_path = Component(end_auth, spec_len - end_auth);
306
+
307
+ // Now parse those two sub-parts.
308
+ DoParseAuthority(spec, authority, &parsed->username, &parsed->password,
309
+ &parsed->host, &parsed->port);
310
+ ParsePath(spec, full_path, &parsed->path, &parsed->query, &parsed->ref);
311
+ }
312
+
313
+ // The main parsing function for standard URLs. Standard URLs have a scheme,
314
+ // host, path, etc.
315
+ template<typename CHAR>
316
+ void DoParseStandardURL(const CHAR* spec, int spec_len, Parsed* parsed) {
317
+ DCHECK(spec_len >= 0);
318
+
319
+ // Strip leading & trailing spaces and control characters.
320
+ int begin = 0;
321
+ TrimURL(spec, &begin, &spec_len);
322
+
323
+ int after_scheme;
324
+ if (DoExtractScheme(spec, spec_len, &parsed->scheme)) {
325
+ after_scheme = parsed->scheme.end() + 1; // Skip past the colon.
326
+ } else {
327
+ // Say there's no scheme when there is no colon. We could also say that
328
+ // everything is the scheme. Both would produce an invalid URL, but this way
329
+ // seems less wrong in more cases.
330
+ parsed->scheme.reset();
331
+ after_scheme = begin;
332
+ }
333
+ DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
334
+ }
335
+
336
+ // Initializes a path URL which is merely a scheme followed by a path. Examples
337
+ // include "about:foo" and "javascript:alert('bar');"
338
+ template<typename CHAR>
339
+ void DoParsePathURL(const CHAR* spec, int spec_len, Parsed* parsed) {
340
+ // Get the non-path and non-scheme parts of the URL out of the way, we never
341
+ // use them.
342
+ parsed->username.reset();
343
+ parsed->password.reset();
344
+ parsed->host.reset();
345
+ parsed->port.reset();
346
+ parsed->query.reset();
347
+ parsed->ref.reset();
348
+
349
+ // Strip leading & trailing spaces and control characters.
350
+ int begin = 0;
351
+ TrimURL(spec, &begin, &spec_len);
352
+
353
+ // Handle empty specs or ones that contain only whitespace or control chars.
354
+ if (begin == spec_len) {
355
+ parsed->scheme.reset();
356
+ parsed->path.reset();
357
+ return;
358
+ }
359
+
360
+ // Extract the scheme, with the path being everything following. We also
361
+ // handle the case where there is no scheme.
362
+ if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
363
+ // Offset the results since we gave ExtractScheme a substring.
364
+ parsed->scheme.begin += begin;
365
+
366
+ // For compatability with the standard URL parser, we treat no path as
367
+ // -1, rather than having a length of 0 (we normally wouldn't care so
368
+ // much for these non-standard URLs).
369
+ if (parsed->scheme.end() == spec_len - 1)
370
+ parsed->path.reset();
371
+ else
372
+ parsed->path = MakeRange(parsed->scheme.end() + 1, spec_len);
373
+ } else {
374
+ // No scheme found, just path.
375
+ parsed->scheme.reset();
376
+ parsed->path = MakeRange(begin, spec_len);
377
+ }
378
+ }
379
+
380
+ template<typename CHAR>
381
+ void DoParseMailtoURL(const CHAR* spec, int spec_len, Parsed* parsed) {
382
+ DCHECK(spec_len >= 0);
383
+
384
+ // Get the non-path and non-scheme parts of the URL out of the way, we never
385
+ // use them.
386
+ parsed->username.reset();
387
+ parsed->password.reset();
388
+ parsed->host.reset();
389
+ parsed->port.reset();
390
+ parsed->ref.reset();
391
+ parsed->query.reset(); // May use this; reset for convenience.
392
+
393
+ // Strip leading & trailing spaces and control characters.
394
+ int begin = 0;
395
+ TrimURL(spec, &begin, &spec_len);
396
+
397
+ // Handle empty specs or ones that contain only whitespace or control chars.
398
+ if (begin == spec_len) {
399
+ parsed->scheme.reset();
400
+ parsed->path.reset();
401
+ return;
402
+ }
403
+
404
+ int path_begin = -1;
405
+ int path_end = -1;
406
+
407
+ // Extract the scheme, with the path being everything following. We also
408
+ // handle the case where there is no scheme.
409
+ if (ExtractScheme(&spec[begin], spec_len - begin, &parsed->scheme)) {
410
+ // Offset the results since we gave ExtractScheme a substring.
411
+ parsed->scheme.begin += begin;
412
+
413
+ if (parsed->scheme.end() != spec_len - 1) {
414
+ path_begin = parsed->scheme.end() + 1;
415
+ path_end = spec_len;
416
+ }
417
+ } else {
418
+ // No scheme found, just path.
419
+ parsed->scheme.reset();
420
+ path_begin = begin;
421
+ path_end = spec_len;
422
+ }
423
+
424
+ // Split [path_begin, path_end) into a path + query.
425
+ for (int i = path_begin; i < path_end; ++i) {
426
+ if (spec[i] == '?') {
427
+ parsed->query = MakeRange(i + 1, path_end);
428
+ path_end = i;
429
+ break;
430
+ }
431
+ }
432
+
433
+ // For compatability with the standard URL parser, treat no path as
434
+ // -1, rather than having a length of 0
435
+ if (path_begin == path_end) {
436
+ parsed->path.reset();
437
+ } else {
438
+ parsed->path = MakeRange(path_begin, path_end);
439
+ }
440
+ }
441
+
442
+ // Converts a port number in a string to an integer. We'd like to just call
443
+ // sscanf but our input is not NULL-terminated, which sscanf requires. Instead,
444
+ // we copy the digits to a small stack buffer (since we know the maximum number
445
+ // of digits in a valid port number) that we can NULL terminate.
446
+ template<typename CHAR>
447
+ int DoParsePort(const CHAR* spec, const Component& component) {
448
+ // Easy success case when there is no port.
449
+ const int kMaxDigits = 5;
450
+ if (!component.is_nonempty())
451
+ return PORT_UNSPECIFIED;
452
+
453
+ // Skip over any leading 0s.
454
+ Component digits_comp(component.end(), 0);
455
+ for (int i = 0; i < component.len; i++) {
456
+ if (spec[component.begin + i] != '0') {
457
+ digits_comp = MakeRange(component.begin + i, component.end());
458
+ break;
459
+ }
460
+ }
461
+ if (digits_comp.len == 0)
462
+ return 0; // All digits were 0.
463
+
464
+ // Verify we don't have too many digits (we'll be copying to our buffer so
465
+ // we need to double-check).
466
+ if (digits_comp.len > kMaxDigits)
467
+ return PORT_INVALID;
468
+
469
+ // Copy valid digits to the buffer.
470
+ char digits[kMaxDigits + 1]; // +1 for null terminator
471
+ for (int i = 0; i < digits_comp.len; i++) {
472
+ CHAR ch = spec[digits_comp.begin + i];
473
+ if (!IsPortDigit(ch)) {
474
+ // Invalid port digit, fail.
475
+ return PORT_INVALID;
476
+ }
477
+ digits[i] = static_cast<char>(ch);
478
+ }
479
+
480
+ // Null-terminate the string and convert to integer. Since we guarantee
481
+ // only digits, atoi's lack of error handling is OK.
482
+ digits[digits_comp.len] = 0;
483
+ int port = atoi(digits);
484
+ if (port > 65535)
485
+ return PORT_INVALID; // Out of range.
486
+ return port;
487
+ }
488
+
489
+ template<typename CHAR>
490
+ void DoExtractFileName(const CHAR* spec,
491
+ const Component& path,
492
+ Component* file_name) {
493
+ // Handle empty paths: they have no file names.
494
+ if (!path.is_nonempty()) {
495
+ file_name->reset();
496
+ return;
497
+ }
498
+
499
+ // Search backwards for a parameter, which is a normally unused field in a
500
+ // URL delimited by a semicolon. We parse the parameter as part of the
501
+ // path, but here, we don't want to count it. The last semicolon is the
502
+ // parameter. The path should start with a slash, so we don't need to check
503
+ // the first one.
504
+ int file_end = path.end();
505
+ for (int i = path.end() - 1; i > path.begin; i--) {
506
+ if (spec[i] == ';') {
507
+ file_end = i;
508
+ break;
509
+ }
510
+ }
511
+
512
+ // Now search backwards from the filename end to the previous slash
513
+ // to find the beginning of the filename.
514
+ for (int i = file_end - 1; i >= path.begin; i--) {
515
+ if (IsURLSlash(spec[i])) {
516
+ // File name is everything following this character to the end
517
+ *file_name = MakeRange(i + 1, file_end);
518
+ return;
519
+ }
520
+ }
521
+
522
+ // No slash found, this means the input was degenerate (generally paths
523
+ // will start with a slash). Let's call everything the file name.
524
+ *file_name = MakeRange(path.begin, file_end);
525
+ return;
526
+ }
527
+
528
+ template<typename CHAR>
529
+ bool DoExtractQueryKeyValue(const CHAR* spec,
530
+ Component* query,
531
+ Component* key,
532
+ Component* value) {
533
+ if (!query->is_nonempty())
534
+ return false;
535
+
536
+ int start = query->begin;
537
+ int cur = start;
538
+ int end = query->end();
539
+
540
+ // We assume the beginning of the input is the beginning of the "key" and we
541
+ // skip to the end of it.
542
+ key->begin = cur;
543
+ while (cur < end && spec[cur] != '&' && spec[cur] != '=')
544
+ cur++;
545
+ key->len = cur - key->begin;
546
+
547
+ // Skip the separator after the key (if any).
548
+ if (cur < end && spec[cur] == '=')
549
+ cur++;
550
+
551
+ // Find the value part.
552
+ value->begin = cur;
553
+ while (cur < end && spec[cur] != '&')
554
+ cur++;
555
+ value->len = cur - value->begin;
556
+
557
+ // Finally skip the next separator if any
558
+ if (cur < end && spec[cur] == '&')
559
+ cur++;
560
+
561
+ // Save the new query
562
+ *query = url_parse::MakeRange(cur, end);
563
+ return true;
564
+ }
565
+
566
+ } // namespace
567
+
568
+ Parsed::Parsed() {
569
+ }
570
+
571
+ int Parsed::Length() const {
572
+ if (ref.is_valid())
573
+ return ref.end();
574
+ return CountCharactersBefore(REF, false);
575
+ }
576
+
577
+ int Parsed::CountCharactersBefore(ComponentType type,
578
+ bool include_delimiter) const {
579
+ if (type == SCHEME)
580
+ return scheme.begin;
581
+
582
+ // There will be some characters after the scheme like "://" and we don't
583
+ // know how many. Search forwards for the next thing until we find one.
584
+ int cur = 0;
585
+ if (scheme.is_valid())
586
+ cur = scheme.end() + 1; // Advance over the ':' at the end of the scheme.
587
+
588
+ if (username.is_valid()) {
589
+ if (type <= USERNAME)
590
+ return username.begin;
591
+ cur = username.end() + 1; // Advance over the '@' or ':' at the end.
592
+ }
593
+
594
+ if (password.is_valid()) {
595
+ if (type <= PASSWORD)
596
+ return password.begin;
597
+ cur = password.end() + 1; // Advance over the '@' at the end.
598
+ }
599
+
600
+ if (host.is_valid()) {
601
+ if (type <= HOST)
602
+ return host.begin;
603
+ cur = host.end();
604
+ }
605
+
606
+ if (port.is_valid()) {
607
+ if (type < PORT || (type == PORT && include_delimiter))
608
+ return port.begin - 1; // Back over delimiter.
609
+ if (type == PORT)
610
+ return port.begin; // Don't want delimiter counted.
611
+ cur = port.end();
612
+ }
613
+
614
+ if (path.is_valid()) {
615
+ if (type <= PATH)
616
+ return path.begin;
617
+ cur = path.end();
618
+ }
619
+
620
+ if (query.is_valid()) {
621
+ if (type < QUERY || (type == QUERY && include_delimiter))
622
+ return query.begin - 1; // Back over delimiter.
623
+ if (type == QUERY)
624
+ return query.begin; // Don't want delimiter counted.
625
+ cur = query.end();
626
+ }
627
+
628
+ if (ref.is_valid()) {
629
+ if (type == REF && !include_delimiter)
630
+ return ref.begin; // Back over delimiter.
631
+
632
+ // When there is a ref and we get here, the component we wanted was before
633
+ // this and not found, so we always know the beginning of the ref is right.
634
+ return ref.begin - 1; // Don't want delimiter counted.
635
+ }
636
+
637
+ return cur;
638
+ }
639
+
640
+ bool ExtractScheme(const char* url, int url_len, Component* scheme) {
641
+ return DoExtractScheme(url, url_len, scheme);
642
+ }
643
+
644
+ bool ExtractScheme(const char16* url, int url_len, Component* scheme) {
645
+ return DoExtractScheme(url, url_len, scheme);
646
+ }
647
+
648
+ // This handles everything that may be an authority terminator, including
649
+ // backslash. For special backslash handling see DoParseAfterScheme.
650
+ bool IsAuthorityTerminator(char16 ch) {
651
+ return IsURLSlash(ch) || ch == '?' || ch == '#';
652
+ }
653
+
654
+ void ExtractFileName(const char* url,
655
+ const Component& path,
656
+ Component* file_name) {
657
+ DoExtractFileName(url, path, file_name);
658
+ }
659
+
660
+ void ExtractFileName(const char16* url,
661
+ const Component& path,
662
+ Component* file_name) {
663
+ DoExtractFileName(url, path, file_name);
664
+ }
665
+
666
+ bool ExtractQueryKeyValue(const char* url,
667
+ Component* query,
668
+ Component* key,
669
+ Component* value) {
670
+ return DoExtractQueryKeyValue(url, query, key, value);
671
+ }
672
+
673
+ bool ExtractQueryKeyValue(const char16* url,
674
+ Component* query,
675
+ Component* key,
676
+ Component* value) {
677
+ return DoExtractQueryKeyValue(url, query, key, value);
678
+ }
679
+
680
+ void ParseAuthority(const char* spec,
681
+ const Component& auth,
682
+ Component* username,
683
+ Component* password,
684
+ Component* hostname,
685
+ Component* port_num) {
686
+ DoParseAuthority(spec, auth, username, password, hostname, port_num);
687
+ }
688
+
689
+ void ParseAuthority(const char16* spec,
690
+ const Component& auth,
691
+ Component* username,
692
+ Component* password,
693
+ Component* hostname,
694
+ Component* port_num) {
695
+ DoParseAuthority(spec, auth, username, password, hostname, port_num);
696
+ }
697
+
698
+ int ParsePort(const char* url, const Component& port) {
699
+ return DoParsePort(url, port);
700
+ }
701
+
702
+ int ParsePort(const char16* url, const Component& port) {
703
+ return DoParsePort(url, port);
704
+ }
705
+
706
+ void ParseStandardURL(const char* url, int url_len, Parsed* parsed) {
707
+ DoParseStandardURL(url, url_len, parsed);
708
+ }
709
+
710
+ void ParseStandardURL(const char16* url, int url_len, Parsed* parsed) {
711
+ DoParseStandardURL(url, url_len, parsed);
712
+ }
713
+
714
+ void ParsePathURL(const char* url, int url_len, Parsed* parsed) {
715
+ DoParsePathURL(url, url_len, parsed);
716
+ }
717
+
718
+ void ParsePathURL(const char16* url, int url_len, Parsed* parsed) {
719
+ DoParsePathURL(url, url_len, parsed);
720
+ }
721
+
722
+ void ParseMailtoURL(const char* url, int url_len, Parsed* parsed) {
723
+ DoParseMailtoURL(url, url_len, parsed);
724
+ }
725
+
726
+ void ParseMailtoURL(const char16* url, int url_len, Parsed* parsed) {
727
+ DoParseMailtoURL(url, url_len, parsed);
728
+ }
729
+
730
+ void ParsePathInternal(const char* spec,
731
+ const Component& path,
732
+ Component* filepath,
733
+ Component* query,
734
+ Component* ref) {
735
+ ParsePath(spec, path, filepath, query, ref);
736
+ }
737
+
738
+ void ParsePathInternal(const char16* spec,
739
+ const Component& path,
740
+ Component* filepath,
741
+ Component* query,
742
+ Component* ref) {
743
+ ParsePath(spec, path, filepath, query, ref);
744
+ }
745
+
746
+ void ParseAfterScheme(const char* spec,
747
+ int spec_len,
748
+ int after_scheme,
749
+ Parsed* parsed) {
750
+ DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
751
+ }
752
+
753
+ void ParseAfterScheme(const char16* spec,
754
+ int spec_len,
755
+ int after_scheme,
756
+ Parsed* parsed) {
757
+ DoParseAfterScheme(spec, spec_len, after_scheme, parsed);
758
+ }
759
+
760
+ } // namespace url_parse