uri_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +6 -0
  2. data/.rvmrc +1 -0
  3. data/Gemfile +6 -0
  4. data/Rakefile +13 -0
  5. data/ext/uri_parser/basictypes.h +89 -0
  6. data/ext/uri_parser/extconf.h +6 -0
  7. data/ext/uri_parser/extconf.rb +50 -0
  8. data/ext/uri_parser/logging.h +5 -0
  9. data/ext/uri_parser/scoped_ptr.h +322 -0
  10. data/ext/uri_parser/string16.cc +95 -0
  11. data/ext/uri_parser/string16.h +194 -0
  12. data/ext/uri_parser/uri_parser.cc +87 -0
  13. data/ext/uri_parser/url_canon.h +872 -0
  14. data/ext/uri_parser/url_canon_etc.cc +392 -0
  15. data/ext/uri_parser/url_canon_fileurl.cc +215 -0
  16. data/ext/uri_parser/url_canon_host.cc +401 -0
  17. data/ext/uri_parser/url_canon_icu.cc +207 -0
  18. data/ext/uri_parser/url_canon_icu.h +63 -0
  19. data/ext/uri_parser/url_canon_internal.cc +427 -0
  20. data/ext/uri_parser/url_canon_internal.h +453 -0
  21. data/ext/uri_parser/url_canon_internal_file.h +157 -0
  22. data/ext/uri_parser/url_canon_ip.cc +737 -0
  23. data/ext/uri_parser/url_canon_ip.h +101 -0
  24. data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
  25. data/ext/uri_parser/url_canon_path.cc +380 -0
  26. data/ext/uri_parser/url_canon_pathurl.cc +128 -0
  27. data/ext/uri_parser/url_canon_query.cc +189 -0
  28. data/ext/uri_parser/url_canon_relative.cc +572 -0
  29. data/ext/uri_parser/url_canon_stdstring.h +134 -0
  30. data/ext/uri_parser/url_canon_stdurl.cc +211 -0
  31. data/ext/uri_parser/url_common.h +48 -0
  32. data/ext/uri_parser/url_file.h +108 -0
  33. data/ext/uri_parser/url_parse.cc +760 -0
  34. data/ext/uri_parser/url_parse.h +336 -0
  35. data/ext/uri_parser/url_parse_file.cc +243 -0
  36. data/ext/uri_parser/url_parse_internal.h +112 -0
  37. data/ext/uri_parser/url_util.cc +553 -0
  38. data/ext/uri_parser/url_util.h +222 -0
  39. data/lib/uri_parser.rb +28 -0
  40. data/lib/uri_parser/version.rb +3 -0
  41. data/spec/spec_helper.rb +16 -0
  42. data/spec/uri_parser_spec.rb +54 -0
  43. data/uri_parser.gemspec +26 -0
  44. metadata +117 -0
@@ -0,0 +1,453 @@
1
+ // Copyright 2007, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ // This file is intended to be included in another C++ file where the character
31
+ // types are defined. This allows us to write mostly generic code, but not have
32
+ // templace bloat because everything is inlined when anybody calls any of our
33
+ // functions.
34
+
35
+ #ifndef GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
36
+ #define GOOGLEURL_SRC_URL_CANON_INTERNAL_H__
37
+
38
+ #include <stdlib.h>
39
+
40
+ #include "logging.h"
41
+ #include "url_canon.h"
42
+
43
+ namespace url_canon {
44
+
45
+ // Character type handling -----------------------------------------------------
46
+
47
+ // Bits that identify different character types. These types identify different
48
+ // bits that are set for each 8-bit character in the kSharedCharTypeTable.
49
+ enum SharedCharTypes {
50
+ // Characters that do not require escaping in queries. Characters that do
51
+ // not have this flag will be escaped, see url_canon_query.cc
52
+ CHAR_QUERY = 1,
53
+
54
+ // Valid in the username/password field.
55
+ CHAR_USERINFO = 2,
56
+
57
+ // Valid in a IPv4 address (digits plus dot and 'x' for hex).
58
+ CHAR_IPV4 = 4,
59
+
60
+ // Valid in an ASCII-representation of a hex digit (as in %-escaped).
61
+ CHAR_HEX = 8,
62
+
63
+ // Valid in an ASCII-representation of a decimal digit.
64
+ CHAR_DEC = 16,
65
+
66
+ // Valid in an ASCII-representation of an octal digit.
67
+ CHAR_OCT = 32,
68
+ };
69
+
70
+ // This table contains the flags in SharedCharTypes for each 8-bit character.
71
+ // Some canonicalization functions have their own specialized lookup table.
72
+ // For those with simple requirements, we have collected the flags in one
73
+ // place so there are fewer lookup tables to load into the CPU cache.
74
+ //
75
+ // Using an unsigned char type has a small but measurable performance benefit
76
+ // over using a 32-bit number.
77
+ extern const unsigned char kSharedCharTypeTable[0x100];
78
+
79
+ // More readable wrappers around the character type lookup table.
80
+ inline bool IsCharOfType(unsigned char c, SharedCharTypes type) {
81
+ return !!(kSharedCharTypeTable[c] & type);
82
+ }
83
+ inline bool IsQueryChar(unsigned char c) {
84
+ return IsCharOfType(c, CHAR_QUERY);
85
+ }
86
+ inline bool IsIPv4Char(unsigned char c) {
87
+ return IsCharOfType(c, CHAR_IPV4);
88
+ }
89
+ inline bool IsHexChar(unsigned char c) {
90
+ return IsCharOfType(c, CHAR_HEX);
91
+ }
92
+
93
+ // Appends the given string to the output, escaping characters that do not
94
+ // match the given |type| in SharedCharTypes.
95
+ void AppendStringOfType(const char* source, int length,
96
+ SharedCharTypes type,
97
+ CanonOutput* output);
98
+ void AppendStringOfType(const char16* source, int length,
99
+ SharedCharTypes type,
100
+ CanonOutput* output);
101
+
102
+ // Maps the hex numerical values 0x0 to 0xf to the corresponding ASCII digit
103
+ // that will be used to represent it.
104
+ extern const char kHexCharLookup[0x10];
105
+
106
+ // This lookup table allows fast conversion between ASCII hex letters and their
107
+ // corresponding numerical value. The 8-bit range is divided up into 8
108
+ // regions of 0x20 characters each. Each of the three character types (numbers,
109
+ // uppercase, lowercase) falls into different regions of this range. The table
110
+ // contains the amount to subtract from characters in that range to get at
111
+ // the corresponding numerical value.
112
+ //
113
+ // See HexDigitToValue for the lookup.
114
+ extern const char kCharToHexLookup[8];
115
+
116
+ // Assumes the input is a valid hex digit! Call IsHexChar before using this.
117
+ inline unsigned char HexCharToValue(unsigned char c) {
118
+ return c - kCharToHexLookup[c / 0x20];
119
+ }
120
+
121
+ // Indicates if the given character is a dot or dot equivalent, returning the
122
+ // number of characters taken by it. This will be one for a literal dot, 3 for
123
+ // an escaped dot. If the character is not a dot, this will return 0.
124
+ template<typename CHAR>
125
+ inline int IsDot(const CHAR* spec, int offset, int end) {
126
+ if (spec[offset] == '.') {
127
+ return 1;
128
+ } else if (spec[offset] == '%' && offset + 3 <= end &&
129
+ spec[offset + 1] == '2' &&
130
+ (spec[offset + 2] == 'e' || spec[offset + 2] == 'E')) {
131
+ // Found "%2e"
132
+ return 3;
133
+ }
134
+ return 0;
135
+ }
136
+
137
+ // Returns the canonicalized version of the input character according to scheme
138
+ // rules. This is implemented alongside the scheme canonicalizer, and is
139
+ // required for relative URL resolving to test for scheme equality.
140
+ //
141
+ // Returns 0 if the input character is not a valid scheme character.
142
+ char CanonicalSchemeChar(char16 ch);
143
+
144
+ // Write a single character, escaped, to the output. This always escapes: it
145
+ // does no checking that thee character requires escaping.
146
+ // Escaping makes sense only 8 bit chars, so code works in all cases of
147
+ // input parameters (8/16bit).
148
+ template<typename UINCHAR, typename OUTCHAR>
149
+ inline void AppendEscapedChar(UINCHAR ch,
150
+ CanonOutputT<OUTCHAR>* output) {
151
+ output->push_back('%');
152
+ output->push_back(kHexCharLookup[ch >> 4]);
153
+ output->push_back(kHexCharLookup[ch & 0xf]);
154
+ }
155
+
156
+ // The character we'll substitute for undecodable or invalid characters.
157
+ extern const char16 kUnicodeReplacementCharacter;
158
+
159
+ // UTF-8 functions ------------------------------------------------------------
160
+
161
+ // Reads one character in UTF-8 starting at |*begin| in |str| and places
162
+ // the decoded value into |*code_point|. If the character is valid, we will
163
+ // return true. If invalid, we'll return false and put the
164
+ // kUnicodeReplacementCharacter into |*code_point|.
165
+ //
166
+ // |*begin| will be updated to point to the last character consumed so it
167
+ // can be incremented in a loop and will be ready for the next character.
168
+ // (for a single-byte ASCII character, it will not be changed).
169
+ //
170
+ // Implementation is in url_canon_icu.cc.
171
+ bool ReadUTFChar(const char* str, int* begin, int length,
172
+ unsigned* code_point_out);
173
+
174
+ // Generic To-UTF-8 converter. This will call the given append method for each
175
+ // character that should be appended, with the given output method. Wrappers
176
+ // are provided below for escaped and non-escaped versions of this.
177
+ //
178
+ // The char_value must have already been checked that it's a valid Unicode
179
+ // character.
180
+ template<class Output, void Appender(unsigned char, Output*)>
181
+ inline void DoAppendUTF8(unsigned char_value, Output* output) {
182
+ if (char_value <= 0x7f) {
183
+ Appender(static_cast<unsigned char>(char_value), output);
184
+ } else if (char_value <= 0x7ff) {
185
+ // 110xxxxx 10xxxxxx
186
+ Appender(static_cast<unsigned char>(0xC0 | (char_value >> 6)),
187
+ output);
188
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
189
+ output);
190
+ } else if (char_value <= 0xffff) {
191
+ // 1110xxxx 10xxxxxx 10xxxxxx
192
+ Appender(static_cast<unsigned char>(0xe0 | (char_value >> 12)),
193
+ output);
194
+ Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
195
+ output);
196
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
197
+ output);
198
+ } else if (char_value <= 0x10FFFF) { // Max unicode code point.
199
+ // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
200
+ Appender(static_cast<unsigned char>(0xf0 | (char_value >> 18)),
201
+ output);
202
+ Appender(static_cast<unsigned char>(0x80 | ((char_value >> 12) & 0x3f)),
203
+ output);
204
+ Appender(static_cast<unsigned char>(0x80 | ((char_value >> 6) & 0x3f)),
205
+ output);
206
+ Appender(static_cast<unsigned char>(0x80 | (char_value & 0x3f)),
207
+ output);
208
+ } else {
209
+ // Invalid UTF-8 character (>20 bits).
210
+ NOTREACHED();
211
+ }
212
+ }
213
+
214
+ // Helper used by AppendUTF8Value below. We use an unsigned parameter so there
215
+ // are no funny sign problems with the input, but then have to convert it to
216
+ // a regular char for appending.
217
+ inline void AppendCharToOutput(unsigned char ch, CanonOutput* output) {
218
+ output->push_back(static_cast<char>(ch));
219
+ }
220
+
221
+ // Writes the given character to the output as UTF-8. This does NO checking
222
+ // of the validity of the unicode characters; the caller should ensure that
223
+ // the value it is appending is valid to append.
224
+ inline void AppendUTF8Value(unsigned char_value, CanonOutput* output) {
225
+ DoAppendUTF8<CanonOutput, AppendCharToOutput>(char_value, output);
226
+ }
227
+
228
+ // Writes the given character to the output as UTF-8, escaping ALL
229
+ // characters (even when they are ASCII). This does NO checking of the
230
+ // validity of the unicode characters; the caller should ensure that the value
231
+ // it is appending is valid to append.
232
+ inline void AppendUTF8EscapedValue(unsigned char_value, CanonOutput* output) {
233
+ DoAppendUTF8<CanonOutput, AppendEscapedChar>(char_value, output);
234
+ }
235
+
236
+ // UTF-16 functions -----------------------------------------------------------
237
+
238
+ // Reads one character in UTF-16 starting at |*begin| in |str| and places
239
+ // the decoded value into |*code_point|. If the character is valid, we will
240
+ // return true. If invalid, we'll return false and put the
241
+ // kUnicodeReplacementCharacter into |*code_point|.
242
+ //
243
+ // |*begin| will be updated to point to the last character consumed so it
244
+ // can be incremented in a loop and will be ready for the next character.
245
+ // (for a single-16-bit-word character, it will not be changed).
246
+ //
247
+ // Implementation is in url_canon_icu.cc.
248
+ bool ReadUTFChar(const char16* str, int* begin, int length,
249
+ unsigned* code_point);
250
+
251
+ // Equivalent to U16_APPEND_UNSAFE in ICU but uses our output method.
252
+ inline void AppendUTF16Value(unsigned code_point,
253
+ CanonOutputT<char16>* output) {
254
+ if (code_point > 0xffff) {
255
+ output->push_back(static_cast<char16>((code_point >> 10) + 0xd7c0));
256
+ output->push_back(static_cast<char16>((code_point & 0x3ff) | 0xdc00));
257
+ } else {
258
+ output->push_back(static_cast<char16>(code_point));
259
+ }
260
+ }
261
+
262
+ // Escaping functions ---------------------------------------------------------
263
+
264
+ // Writes the given character to the output as UTF-8, escaped. Call this
265
+ // function only when the input is wide. Returns true on success. Failure
266
+ // means there was some problem with the encoding, we'll still try to
267
+ // update the |*begin| pointer and add a placeholder character to the
268
+ // output so processing can continue.
269
+ //
270
+ // We will append the character starting at ch[begin] with the buffer ch
271
+ // being |length|. |*begin| will be updated to point to the last character
272
+ // consumed (we may consume more than one for UTF-16) so that if called in
273
+ // a loop, incrementing the pointer will move to the next character.
274
+ //
275
+ // Every single output character will be escaped. This means that if you
276
+ // give it an ASCII character as input, it will be escaped. Some code uses
277
+ // this when it knows that a character is invalid according to its rules
278
+ // for validity. If you don't want escaping for ASCII characters, you will
279
+ // have to filter them out prior to calling this function.
280
+ //
281
+ // Assumes that ch[begin] is within range in the array, but does not assume
282
+ // that any following characters are.
283
+ inline bool AppendUTF8EscapedChar(const char16* str, int* begin, int length,
284
+ CanonOutput* output) {
285
+ // UTF-16 input. Readchar16 will handle invalid characters for us and give
286
+ // us the kUnicodeReplacementCharacter, so we don't have to do special
287
+ // checking after failure, just pass through the failure to the caller.
288
+ unsigned char_value;
289
+ bool success = ReadUTFChar(str, begin, length, &char_value);
290
+ AppendUTF8EscapedValue(char_value, output);
291
+ return success;
292
+ }
293
+
294
+ // Handles UTF-8 input. See the wide version above for usage.
295
+ inline bool AppendUTF8EscapedChar(const char* str, int* begin, int length,
296
+ CanonOutput* output) {
297
+ // ReadUTF8Char will handle invalid characters for us and give us the
298
+ // kUnicodeReplacementCharacter, so we don't have to do special checking
299
+ // after failure, just pass through the failure to the caller.
300
+ unsigned ch;
301
+ bool success = ReadUTFChar(str, begin, length, &ch);
302
+ AppendUTF8EscapedValue(ch, output);
303
+ return success;
304
+ }
305
+
306
+ // Given a '%' character at |*begin| in the string |spec|, this will decode
307
+ // the escaped value and put it into |*unescaped_value| on success (returns
308
+ // true). On failure, this will return false, and will not write into
309
+ // |*unescaped_value|.
310
+ //
311
+ // |*begin| will be updated to point to the last character of the escape
312
+ // sequence so that when called with the index of a for loop, the next time
313
+ // through it will point to the next character to be considered. On failure,
314
+ // |*begin| will be unchanged.
315
+ inline bool Is8BitChar(char c) {
316
+ return true; // this case is specialized to avoid a warning
317
+ }
318
+ inline bool Is8BitChar(char16 c) {
319
+ return c <= 255;
320
+ }
321
+
322
+ template<typename CHAR>
323
+ inline bool DecodeEscaped(const CHAR* spec, int* begin, int end,
324
+ unsigned char* unescaped_value) {
325
+ if (*begin + 3 > end ||
326
+ !Is8BitChar(spec[*begin + 1]) || !Is8BitChar(spec[*begin + 2])) {
327
+ // Invalid escape sequence because there's not enough room, or the
328
+ // digits are not ASCII.
329
+ return false;
330
+ }
331
+
332
+ unsigned char first = static_cast<unsigned char>(spec[*begin + 1]);
333
+ unsigned char second = static_cast<unsigned char>(spec[*begin + 2]);
334
+ if (!IsHexChar(first) || !IsHexChar(second)) {
335
+ // Invalid hex digits, fail.
336
+ return false;
337
+ }
338
+
339
+ // Valid escape sequence.
340
+ *unescaped_value = (HexCharToValue(first) << 4) + HexCharToValue(second);
341
+ *begin += 2;
342
+ return true;
343
+ }
344
+
345
+ // Appends the given substring to the output, escaping "some" characters that
346
+ // it feels may not be safe. It assumes the input values are all contained in
347
+ // 8-bit although it allows any type.
348
+ //
349
+ // This is used in error cases to append invalid output so that it looks
350
+ // approximately correct. Non-error cases should not call this function since
351
+ // the escaping rules are not guaranteed!
352
+ void AppendInvalidNarrowString(const char* spec, int begin, int end,
353
+ CanonOutput* output);
354
+ void AppendInvalidNarrowString(const char16* spec, int begin, int end,
355
+ CanonOutput* output);
356
+
357
+ // Misc canonicalization helpers ----------------------------------------------
358
+
359
+ // Converts between UTF-8 and UTF-16, returning true on successful conversion.
360
+ // The output will be appended to the given canonicalizer output (so make sure
361
+ // it's empty if you want to replace).
362
+ //
363
+ // On invalid input, this will still write as much output as possible,
364
+ // replacing the invalid characters with the "invalid character". It will
365
+ // return false in the failure case, and the caller should not continue as
366
+ // normal.
367
+ bool ConvertUTF16ToUTF8(const char16* input, int input_len,
368
+ CanonOutput* output);
369
+ bool ConvertUTF8ToUTF16(const char* input, int input_len,
370
+ CanonOutputT<char16>* output);
371
+
372
+ // Converts from UTF-16 to 8-bit using the character set converter. If the
373
+ // converter is NULL, this will use UTF-8.
374
+ void ConvertUTF16ToQueryEncoding(const char16* input,
375
+ const url_parse::Component& query,
376
+ CharsetConverter* converter,
377
+ CanonOutput* output);
378
+
379
+ // Applies the replacements to the given component source. The component source
380
+ // should be pre-initialized to the "old" base. That is, all pointers will
381
+ // point to the spec of the old URL, and all of the Parsed components will
382
+ // be indices into that string.
383
+ //
384
+ // The pointers and components in the |source| for all non-NULL strings in the
385
+ // |repl| (replacements) will be updated to reference those strings.
386
+ // Canonicalizing with the new |source| and |parsed| can then combine URL
387
+ // components from many different strings.
388
+ void SetupOverrideComponents(const char* base,
389
+ const Replacements<char>& repl,
390
+ URLComponentSource<char>* source,
391
+ url_parse::Parsed* parsed);
392
+
393
+ // Like the above 8-bit version, except that it additionally converts the
394
+ // UTF-16 input to UTF-8 before doing the overrides.
395
+ //
396
+ // The given utf8_buffer is used to store the converted components. They will
397
+ // be appended one after another, with the parsed structure identifying the
398
+ // appropriate substrings. This buffer is a parameter because the source has
399
+ // no storage, so the buffer must have the same lifetime as the source
400
+ // parameter owned by the caller.
401
+ //
402
+ // THE CALLER MUST NOT ADD TO THE |utf8_buffer| AFTER THIS CALL. Members of
403
+ // |source| will point into this buffer, which could be invalidated if
404
+ // additional data is added and the CanonOutput resizes its buffer.
405
+ //
406
+ // Returns true on success. Fales means that the input was not valid UTF-16,
407
+ // although we will have still done the override with "invalid characters" in
408
+ // place of errors.
409
+ bool SetupUTF16OverrideComponents(const char* base,
410
+ const Replacements<char16>& repl,
411
+ CanonOutput* utf8_buffer,
412
+ URLComponentSource<char>* source,
413
+ url_parse::Parsed* parsed);
414
+
415
+ // Implemented in url_canon_path.cc, these are required by the relative URL
416
+ // resolver as well, so we declare them here.
417
+ bool CanonicalizePartialPath(const char* spec,
418
+ const url_parse::Component& path,
419
+ int path_begin_in_output,
420
+ CanonOutput* output);
421
+ bool CanonicalizePartialPath(const char16* spec,
422
+ const url_parse::Component& path,
423
+ int path_begin_in_output,
424
+ CanonOutput* output);
425
+
426
+ #ifndef WIN32
427
+
428
+ // Implementations of Windows' int-to-string conversions
429
+ int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix);
430
+ int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix);
431
+
432
+ // Secure template overloads for these functions
433
+ template<size_t N>
434
+ inline int _itoa_s(int value, char (&buffer)[N], int radix) {
435
+ return _itoa_s(value, buffer, N, radix);
436
+ }
437
+
438
+ template<size_t N>
439
+ inline int _itow_s(int value, char16 (&buffer)[N], int radix) {
440
+ return _itow_s(value, buffer, N, radix);
441
+ }
442
+
443
+ // _strtoui64 and strtoull behave the same
444
+ inline unsigned long long _strtoui64(const char* nptr,
445
+ char** endptr, int base) {
446
+ return strtoull(nptr, endptr, base);
447
+ }
448
+
449
+ #endif // WIN32
450
+
451
+ } // namespace url_canon
452
+
453
+ #endif // GOOGLEURL_SRC_URL_CANON_INTERNAL_H__