uri_parser 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/.gitignore +6 -0
  2. data/.rvmrc +1 -0
  3. data/Gemfile +6 -0
  4. data/Rakefile +13 -0
  5. data/ext/uri_parser/basictypes.h +89 -0
  6. data/ext/uri_parser/extconf.h +6 -0
  7. data/ext/uri_parser/extconf.rb +50 -0
  8. data/ext/uri_parser/logging.h +5 -0
  9. data/ext/uri_parser/scoped_ptr.h +322 -0
  10. data/ext/uri_parser/string16.cc +95 -0
  11. data/ext/uri_parser/string16.h +194 -0
  12. data/ext/uri_parser/uri_parser.cc +87 -0
  13. data/ext/uri_parser/url_canon.h +872 -0
  14. data/ext/uri_parser/url_canon_etc.cc +392 -0
  15. data/ext/uri_parser/url_canon_fileurl.cc +215 -0
  16. data/ext/uri_parser/url_canon_host.cc +401 -0
  17. data/ext/uri_parser/url_canon_icu.cc +207 -0
  18. data/ext/uri_parser/url_canon_icu.h +63 -0
  19. data/ext/uri_parser/url_canon_internal.cc +427 -0
  20. data/ext/uri_parser/url_canon_internal.h +453 -0
  21. data/ext/uri_parser/url_canon_internal_file.h +157 -0
  22. data/ext/uri_parser/url_canon_ip.cc +737 -0
  23. data/ext/uri_parser/url_canon_ip.h +101 -0
  24. data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
  25. data/ext/uri_parser/url_canon_path.cc +380 -0
  26. data/ext/uri_parser/url_canon_pathurl.cc +128 -0
  27. data/ext/uri_parser/url_canon_query.cc +189 -0
  28. data/ext/uri_parser/url_canon_relative.cc +572 -0
  29. data/ext/uri_parser/url_canon_stdstring.h +134 -0
  30. data/ext/uri_parser/url_canon_stdurl.cc +211 -0
  31. data/ext/uri_parser/url_common.h +48 -0
  32. data/ext/uri_parser/url_file.h +108 -0
  33. data/ext/uri_parser/url_parse.cc +760 -0
  34. data/ext/uri_parser/url_parse.h +336 -0
  35. data/ext/uri_parser/url_parse_file.cc +243 -0
  36. data/ext/uri_parser/url_parse_internal.h +112 -0
  37. data/ext/uri_parser/url_util.cc +553 -0
  38. data/ext/uri_parser/url_util.h +222 -0
  39. data/lib/uri_parser.rb +28 -0
  40. data/lib/uri_parser/version.rb +3 -0
  41. data/spec/spec_helper.rb +16 -0
  42. data/spec/uri_parser_spec.rb +54 -0
  43. data/uri_parser.gemspec +26 -0
  44. metadata +117 -0
@@ -0,0 +1,63 @@
1
+ // Copyright 2007, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ // ICU integration functions.
31
+
32
+ #ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
33
+ #define GOOGLEURL_SRC_URL_CANON_ICU_H__
34
+
35
+ #include "url_canon.h"
36
+
37
+ typedef struct UConverter UConverter;
38
+
39
+ namespace url_canon {
40
+
41
+ // An implementation of CharsetConverter that implementations can use to
42
+ // interface the canonicalizer with ICU's conversion routines.
43
+ class ICUCharsetConverter : public CharsetConverter {
44
+ public:
45
+ // Constructs a converter using an already-existing ICU character set
46
+ // converter. This converter is NOT owned by this object; the lifetime must
47
+ // be managed by the creator such that it is alive as long as this is.
48
+ GURL_API ICUCharsetConverter(UConverter* converter);
49
+
50
+ GURL_API virtual ~ICUCharsetConverter() {}
51
+
52
+ GURL_API virtual void ConvertFromUTF16(const char16* input,
53
+ int input_len,
54
+ CanonOutput* output);
55
+
56
+ private:
57
+ // The ICU converter, not owned by this class.
58
+ UConverter* converter_;
59
+ };
60
+
61
+ } // namespace url_canon
62
+
63
+ #endif // GOOGLEURL_SRC_URL_CANON_ICU_H__
@@ -0,0 +1,427 @@
1
+ // Copyright 2007, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ #include <cstdio>
31
+ #include <errno.h>
32
+ #include <stdlib.h>
33
+ #include <string>
34
+
35
+ #include "url_canon_internal.h"
36
+
37
+ namespace url_canon {
38
+
39
+ namespace {
40
+
41
+ template<typename CHAR, typename UCHAR>
42
+ void DoAppendStringOfType(const CHAR* source, int length,
43
+ SharedCharTypes type,
44
+ CanonOutput* output) {
45
+ for (int i = 0; i < length; i++) {
46
+ if (static_cast<UCHAR>(source[i]) >= 0x80) {
47
+ // ReadChar will fill the code point with kUnicodeReplacementCharacter
48
+ // when the input is invalid, which is what we want.
49
+ unsigned code_point;
50
+ ReadUTFChar(source, &i, length, &code_point);
51
+ AppendUTF8EscapedValue(code_point, output);
52
+ } else {
53
+ // Just append the 7-bit character, possibly escaping it.
54
+ unsigned char uch = static_cast<unsigned char>(source[i]);
55
+ if (!IsCharOfType(uch, type))
56
+ AppendEscapedChar(uch, output);
57
+ else
58
+ output->push_back(uch);
59
+ }
60
+ }
61
+ }
62
+
63
+ // This function assumes the input values are all contained in 8-bit,
64
+ // although it allows any type. Returns true if input is valid, false if not.
65
+ template<typename CHAR, typename UCHAR>
66
+ void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
67
+ CanonOutput* output) {
68
+ for (int i = begin; i < end; i++) {
69
+ UCHAR uch = static_cast<UCHAR>(spec[i]);
70
+ if (uch >= 0x80) {
71
+ // Handle UTF-8/16 encodings. This call will correctly handle the error
72
+ // case by appending the invalid character.
73
+ AppendUTF8EscapedChar(spec, &i, end, output);
74
+ } else if (uch <= ' ' || uch == 0x7f) {
75
+ // This function is for error handling, so we escape all control
76
+ // characters and spaces, but not anything else since we lack
77
+ // context to do something more specific.
78
+ AppendEscapedChar(static_cast<unsigned char>(uch), output);
79
+ } else {
80
+ output->push_back(static_cast<char>(uch));
81
+ }
82
+ }
83
+ }
84
+
85
+ // Overrides one component, see the url_canon::Replacements structure for
86
+ // what the various combionations of source pointer and component mean.
87
+ void DoOverrideComponent(const char* override_source,
88
+ const url_parse::Component& override_component,
89
+ const char** dest,
90
+ url_parse::Component* dest_component) {
91
+ if (override_source) {
92
+ *dest = override_source;
93
+ *dest_component = override_component;
94
+ }
95
+ }
96
+
97
+ // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
98
+ // not actually set the output character pointer.
99
+ //
100
+ // The input is converted to UTF-8 at the end of the given buffer as a temporary
101
+ // holding place. The component indentifying the portion of the buffer used in
102
+ // the |utf8_buffer| will be specified in |*dest_component|.
103
+ //
104
+ // This will not actually set any |dest| pointer like DoOverrideComponent
105
+ // does because all of the pointers will point into the |utf8_buffer|, which
106
+ // may get resized while we're overriding a subsequent component. Instead, the
107
+ // caller should use the beginning of the |utf8_buffer| as the string pointer
108
+ // for all components once all overrides have been prepared.
109
+ bool PrepareUTF16OverrideComponent(
110
+ const char16* override_source,
111
+ const url_parse::Component& override_component,
112
+ CanonOutput* utf8_buffer,
113
+ url_parse::Component* dest_component) {
114
+ bool success = true;
115
+ if (override_source) {
116
+ if (!override_component.is_valid()) {
117
+ // Non-"valid" component (means delete), so we need to preserve that.
118
+ *dest_component = url_parse::Component();
119
+ } else {
120
+ // Convert to UTF-8.
121
+ dest_component->begin = utf8_buffer->length();
122
+ success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
123
+ override_component.len, utf8_buffer);
124
+ dest_component->len = utf8_buffer->length() - dest_component->begin;
125
+ }
126
+ }
127
+ return success;
128
+ }
129
+
130
+ } // namespace
131
+
132
+ // See the header file for this array's declaration.
133
+ const unsigned char kSharedCharTypeTable[0x100] = {
134
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f
135
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f
136
+ 0, // 0x20 ' ' (escape spaces in queries)
137
+ CHAR_QUERY | CHAR_USERINFO, // 0x21 !
138
+ 0, // 0x22 "
139
+ 0, // 0x23 # (invalid in query since it marks the ref)
140
+ CHAR_QUERY | CHAR_USERINFO, // 0x24 $
141
+ CHAR_QUERY | CHAR_USERINFO, // 0x25 %
142
+ CHAR_QUERY | CHAR_USERINFO, // 0x26 &
143
+ CHAR_QUERY | CHAR_USERINFO, // 0x27 '
144
+ CHAR_QUERY | CHAR_USERINFO, // 0x28 (
145
+ CHAR_QUERY | CHAR_USERINFO, // 0x29 )
146
+ CHAR_QUERY | CHAR_USERINFO, // 0x2a *
147
+ CHAR_QUERY | CHAR_USERINFO, // 0x2b +
148
+ CHAR_QUERY | CHAR_USERINFO, // 0x2c ,
149
+ CHAR_QUERY | CHAR_USERINFO, // 0x2d -
150
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x2e .
151
+ CHAR_QUERY, // 0x2f /
152
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x30 0
153
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x31 1
154
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x32 2
155
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x33 3
156
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x34 4
157
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x35 5
158
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x36 6
159
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x37 7
160
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x38 8
161
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x39 9
162
+ CHAR_QUERY, // 0x3a :
163
+ CHAR_QUERY, // 0x3b ;
164
+ 0, // 0x3c < (Try to prevent certain types of XSS.)
165
+ CHAR_QUERY, // 0x3d =
166
+ 0, // 0x3e > (Try to prevent certain types of XSS.)
167
+ CHAR_QUERY, // 0x3f ?
168
+ CHAR_QUERY, // 0x40 @
169
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x41 A
170
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x42 B
171
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x43 C
172
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x44 D
173
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x45 E
174
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x46 F
175
+ CHAR_QUERY | CHAR_USERINFO, // 0x47 G
176
+ CHAR_QUERY | CHAR_USERINFO, // 0x48 H
177
+ CHAR_QUERY | CHAR_USERINFO, // 0x49 I
178
+ CHAR_QUERY | CHAR_USERINFO, // 0x4a J
179
+ CHAR_QUERY | CHAR_USERINFO, // 0x4b K
180
+ CHAR_QUERY | CHAR_USERINFO, // 0x4c L
181
+ CHAR_QUERY | CHAR_USERINFO, // 0x4d M
182
+ CHAR_QUERY | CHAR_USERINFO, // 0x4e N
183
+ CHAR_QUERY | CHAR_USERINFO, // 0x4f O
184
+ CHAR_QUERY | CHAR_USERINFO, // 0x50 P
185
+ CHAR_QUERY | CHAR_USERINFO, // 0x51 Q
186
+ CHAR_QUERY | CHAR_USERINFO, // 0x52 R
187
+ CHAR_QUERY | CHAR_USERINFO, // 0x53 S
188
+ CHAR_QUERY | CHAR_USERINFO, // 0x54 T
189
+ CHAR_QUERY | CHAR_USERINFO, // 0x55 U
190
+ CHAR_QUERY | CHAR_USERINFO, // 0x56 V
191
+ CHAR_QUERY | CHAR_USERINFO, // 0x57 W
192
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x58 X
193
+ CHAR_QUERY | CHAR_USERINFO, // 0x59 Y
194
+ CHAR_QUERY | CHAR_USERINFO, // 0x5a Z
195
+ CHAR_QUERY, // 0x5b [
196
+ CHAR_QUERY, // 0x5c '\'
197
+ CHAR_QUERY, // 0x5d ]
198
+ CHAR_QUERY, // 0x5e ^
199
+ CHAR_QUERY | CHAR_USERINFO, // 0x5f _
200
+ CHAR_QUERY, // 0x60 `
201
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x61 a
202
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x62 b
203
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x63 c
204
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x64 d
205
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x65 e
206
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x66 f
207
+ CHAR_QUERY | CHAR_USERINFO, // 0x67 g
208
+ CHAR_QUERY | CHAR_USERINFO, // 0x68 h
209
+ CHAR_QUERY | CHAR_USERINFO, // 0x69 i
210
+ CHAR_QUERY | CHAR_USERINFO, // 0x6a j
211
+ CHAR_QUERY | CHAR_USERINFO, // 0x6b k
212
+ CHAR_QUERY | CHAR_USERINFO, // 0x6c l
213
+ CHAR_QUERY | CHAR_USERINFO, // 0x6d m
214
+ CHAR_QUERY | CHAR_USERINFO, // 0x6e n
215
+ CHAR_QUERY | CHAR_USERINFO, // 0x6f o
216
+ CHAR_QUERY | CHAR_USERINFO, // 0x70 p
217
+ CHAR_QUERY | CHAR_USERINFO, // 0x71 q
218
+ CHAR_QUERY | CHAR_USERINFO, // 0x72 r
219
+ CHAR_QUERY | CHAR_USERINFO, // 0x73 s
220
+ CHAR_QUERY | CHAR_USERINFO, // 0x74 t
221
+ CHAR_QUERY | CHAR_USERINFO, // 0x75 u
222
+ CHAR_QUERY | CHAR_USERINFO, // 0x76 v
223
+ CHAR_QUERY | CHAR_USERINFO, // 0x77 w
224
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x78 x
225
+ CHAR_QUERY | CHAR_USERINFO, // 0x79 y
226
+ CHAR_QUERY | CHAR_USERINFO, // 0x7a z
227
+ CHAR_QUERY, // 0x7b {
228
+ CHAR_QUERY, // 0x7c |
229
+ CHAR_QUERY, // 0x7d }
230
+ CHAR_QUERY | CHAR_USERINFO, // 0x7e ~
231
+ 0, // 0x7f
232
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f
233
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f
234
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf
235
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf
236
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf
237
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf
238
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef
239
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
240
+ };
241
+
242
+ const char kHexCharLookup[0x10] = {
243
+ '0', '1', '2', '3', '4', '5', '6', '7',
244
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
245
+ };
246
+
247
+ const char kCharToHexLookup[8] = {
248
+ 0, // 0x00 - 0x1f
249
+ '0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
250
+ 'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
251
+ 'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
252
+ 0, // 0x80 - 0x9F
253
+ 0, // 0xA0 - 0xBF
254
+ 0, // 0xC0 - 0xDF
255
+ 0, // 0xE0 - 0xFF
256
+ };
257
+
258
+ const char16 kUnicodeReplacementCharacter = 0xfffd;
259
+
260
+ void AppendStringOfType(const char* source, int length,
261
+ SharedCharTypes type,
262
+ CanonOutput* output) {
263
+ DoAppendStringOfType<char, unsigned char>(source, length, type, output);
264
+ }
265
+
266
+ void AppendStringOfType(const char16* source, int length,
267
+ SharedCharTypes type,
268
+ CanonOutput* output) {
269
+ DoAppendStringOfType<char16, char16>(source, length, type, output);
270
+ }
271
+
272
+ void AppendInvalidNarrowString(const char* spec, int begin, int end,
273
+ CanonOutput* output) {
274
+ DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
275
+ }
276
+
277
+ void AppendInvalidNarrowString(const char16* spec, int begin, int end,
278
+ CanonOutput* output) {
279
+ DoAppendInvalidNarrowString<char16, char16>(spec, begin, end, output);
280
+ }
281
+
282
+ bool ConvertUTF16ToUTF8(const char16* input, int input_len,
283
+ CanonOutput* output) {
284
+ bool success = true;
285
+ for (int i = 0; i < input_len; i++) {
286
+ unsigned code_point;
287
+ success &= ReadUTFChar(input, &i, input_len, &code_point);
288
+ AppendUTF8Value(code_point, output);
289
+ }
290
+ return success;
291
+ }
292
+
293
+ bool ConvertUTF8ToUTF16(const char* input, int input_len,
294
+ CanonOutputT<char16>* output) {
295
+ bool success = true;
296
+ for (int i = 0; i < input_len; i++) {
297
+ unsigned code_point;
298
+ success &= ReadUTFChar(input, &i, input_len, &code_point);
299
+ AppendUTF16Value(code_point, output);
300
+ }
301
+ return success;
302
+ }
303
+
304
+ void SetupOverrideComponents(const char* base,
305
+ const Replacements<char>& repl,
306
+ URLComponentSource<char>* source,
307
+ url_parse::Parsed* parsed) {
308
+ // Get the source and parsed structures of the things we are replacing.
309
+ const URLComponentSource<char>& repl_source = repl.sources();
310
+ const url_parse::Parsed& repl_parsed = repl.components();
311
+
312
+ DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
313
+ &source->scheme, &parsed->scheme);
314
+ DoOverrideComponent(repl_source.username, repl_parsed.username,
315
+ &source->username, &parsed->username);
316
+ DoOverrideComponent(repl_source.password, repl_parsed.password,
317
+ &source->password, &parsed->password);
318
+
319
+ // Our host should be empty if not present, so override the default setup.
320
+ DoOverrideComponent(repl_source.host, repl_parsed.host,
321
+ &source->host, &parsed->host);
322
+ if (parsed->host.len == -1)
323
+ parsed->host.len = 0;
324
+
325
+ DoOverrideComponent(repl_source.port, repl_parsed.port,
326
+ &source->port, &parsed->port);
327
+ DoOverrideComponent(repl_source.path, repl_parsed.path,
328
+ &source->path, &parsed->path);
329
+ DoOverrideComponent(repl_source.query, repl_parsed.query,
330
+ &source->query, &parsed->query);
331
+ DoOverrideComponent(repl_source.ref, repl_parsed.ref,
332
+ &source->ref, &parsed->ref);
333
+ }
334
+
335
+ bool SetupUTF16OverrideComponents(const char* base,
336
+ const Replacements<char16>& repl,
337
+ CanonOutput* utf8_buffer,
338
+ URLComponentSource<char>* source,
339
+ url_parse::Parsed* parsed) {
340
+ bool success = true;
341
+
342
+ // Get the source and parsed structures of the things we are replacing.
343
+ const URLComponentSource<char16>& repl_source = repl.sources();
344
+ const url_parse::Parsed& repl_parsed = repl.components();
345
+
346
+ success &= PrepareUTF16OverrideComponent(
347
+ repl_source.scheme, repl_parsed.scheme,
348
+ utf8_buffer, &parsed->scheme);
349
+ success &= PrepareUTF16OverrideComponent(
350
+ repl_source.username, repl_parsed.username,
351
+ utf8_buffer, &parsed->username);
352
+ success &= PrepareUTF16OverrideComponent(
353
+ repl_source.password, repl_parsed.password,
354
+ utf8_buffer, &parsed->password);
355
+ success &= PrepareUTF16OverrideComponent(
356
+ repl_source.host, repl_parsed.host,
357
+ utf8_buffer, &parsed->host);
358
+ success &= PrepareUTF16OverrideComponent(
359
+ repl_source.port, repl_parsed.port,
360
+ utf8_buffer, &parsed->port);
361
+ success &= PrepareUTF16OverrideComponent(
362
+ repl_source.path, repl_parsed.path,
363
+ utf8_buffer, &parsed->path);
364
+ success &= PrepareUTF16OverrideComponent(
365
+ repl_source.query, repl_parsed.query,
366
+ utf8_buffer, &parsed->query);
367
+ success &= PrepareUTF16OverrideComponent(
368
+ repl_source.ref, repl_parsed.ref,
369
+ utf8_buffer, &parsed->ref);
370
+
371
+ // PrepareUTF16OverrideComponent will not have set the data pointer since the
372
+ // buffer could be resized, invalidating the pointers. We set the data
373
+ // pointers for affected components now that the buffer is finalized.
374
+ if (repl_source.scheme) source->scheme = utf8_buffer->data();
375
+ if (repl_source.username) source->username = utf8_buffer->data();
376
+ if (repl_source.password) source->password = utf8_buffer->data();
377
+ if (repl_source.host) source->host = utf8_buffer->data();
378
+ if (repl_source.port) source->port = utf8_buffer->data();
379
+ if (repl_source.path) source->path = utf8_buffer->data();
380
+ if (repl_source.query) source->query = utf8_buffer->data();
381
+ if (repl_source.ref) source->ref = utf8_buffer->data();
382
+
383
+ return success;
384
+ }
385
+
386
+ #ifndef WIN32
387
+
388
+ int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
389
+ const char* format_str;
390
+ if (radix == 10)
391
+ format_str = "%d";
392
+ else if (radix == 16)
393
+ format_str = "%x";
394
+ else
395
+ return EINVAL;
396
+
397
+ int written = snprintf(buffer, size_in_chars, format_str, value);
398
+ if (static_cast<size_t>(written) >= size_in_chars) {
399
+ // Output was truncated, or written was negative.
400
+ return EINVAL;
401
+ }
402
+ return 0;
403
+ }
404
+
405
+ int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) {
406
+ if (radix != 10)
407
+ return EINVAL;
408
+
409
+ // No more than 12 characters will be required for a 32-bit integer.
410
+ // Add an extra byte for the terminating null.
411
+ char temp[13];
412
+ int written = snprintf(temp, sizeof(temp), "%d", value);
413
+ if (static_cast<size_t>(written) >= size_in_chars) {
414
+ // Output was truncated, or written was negative.
415
+ return EINVAL;
416
+ }
417
+
418
+ for (int i = 0; i < written; ++i) {
419
+ buffer[i] = static_cast<char16>(temp[i]);
420
+ }
421
+ buffer[written] = '\0';
422
+ return 0;
423
+ }
424
+
425
+ #endif // !WIN32
426
+
427
+ } // namespace url_canon