uri_parser 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/.gitignore +6 -0
  2. data/.rvmrc +1 -0
  3. data/Gemfile +6 -0
  4. data/Rakefile +13 -0
  5. data/ext/uri_parser/basictypes.h +89 -0
  6. data/ext/uri_parser/extconf.h +6 -0
  7. data/ext/uri_parser/extconf.rb +50 -0
  8. data/ext/uri_parser/logging.h +5 -0
  9. data/ext/uri_parser/scoped_ptr.h +322 -0
  10. data/ext/uri_parser/string16.cc +95 -0
  11. data/ext/uri_parser/string16.h +194 -0
  12. data/ext/uri_parser/uri_parser.cc +87 -0
  13. data/ext/uri_parser/url_canon.h +872 -0
  14. data/ext/uri_parser/url_canon_etc.cc +392 -0
  15. data/ext/uri_parser/url_canon_fileurl.cc +215 -0
  16. data/ext/uri_parser/url_canon_host.cc +401 -0
  17. data/ext/uri_parser/url_canon_icu.cc +207 -0
  18. data/ext/uri_parser/url_canon_icu.h +63 -0
  19. data/ext/uri_parser/url_canon_internal.cc +427 -0
  20. data/ext/uri_parser/url_canon_internal.h +453 -0
  21. data/ext/uri_parser/url_canon_internal_file.h +157 -0
  22. data/ext/uri_parser/url_canon_ip.cc +737 -0
  23. data/ext/uri_parser/url_canon_ip.h +101 -0
  24. data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
  25. data/ext/uri_parser/url_canon_path.cc +380 -0
  26. data/ext/uri_parser/url_canon_pathurl.cc +128 -0
  27. data/ext/uri_parser/url_canon_query.cc +189 -0
  28. data/ext/uri_parser/url_canon_relative.cc +572 -0
  29. data/ext/uri_parser/url_canon_stdstring.h +134 -0
  30. data/ext/uri_parser/url_canon_stdurl.cc +211 -0
  31. data/ext/uri_parser/url_common.h +48 -0
  32. data/ext/uri_parser/url_file.h +108 -0
  33. data/ext/uri_parser/url_parse.cc +760 -0
  34. data/ext/uri_parser/url_parse.h +336 -0
  35. data/ext/uri_parser/url_parse_file.cc +243 -0
  36. data/ext/uri_parser/url_parse_internal.h +112 -0
  37. data/ext/uri_parser/url_util.cc +553 -0
  38. data/ext/uri_parser/url_util.h +222 -0
  39. data/lib/uri_parser.rb +28 -0
  40. data/lib/uri_parser/version.rb +3 -0
  41. data/spec/spec_helper.rb +16 -0
  42. data/spec/uri_parser_spec.rb +54 -0
  43. data/uri_parser.gemspec +26 -0
  44. metadata +117 -0
@@ -0,0 +1,63 @@
1
+ // Copyright 2007, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ // ICU integration functions.
31
+
32
+ #ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
33
+ #define GOOGLEURL_SRC_URL_CANON_ICU_H__
34
+
35
+ #include "url_canon.h"
36
+
37
+ typedef struct UConverter UConverter;
38
+
39
+ namespace url_canon {
40
+
41
+ // An implementation of CharsetConverter that implementations can use to
42
+ // interface the canonicalizer with ICU's conversion routines.
43
+ class ICUCharsetConverter : public CharsetConverter {
44
+ public:
45
+ // Constructs a converter using an already-existing ICU character set
46
+ // converter. This converter is NOT owned by this object; the lifetime must
47
+ // be managed by the creator such that it is alive as long as this is.
48
+ GURL_API ICUCharsetConverter(UConverter* converter);
49
+
50
+ GURL_API virtual ~ICUCharsetConverter() {}
51
+
52
+ GURL_API virtual void ConvertFromUTF16(const char16* input,
53
+ int input_len,
54
+ CanonOutput* output);
55
+
56
+ private:
57
+ // The ICU converter, not owned by this class.
58
+ UConverter* converter_;
59
+ };
60
+
61
+ } // namespace url_canon
62
+
63
+ #endif // GOOGLEURL_SRC_URL_CANON_ICU_H__
@@ -0,0 +1,427 @@
1
+ // Copyright 2007, Google Inc.
2
+ // All rights reserved.
3
+ //
4
+ // Redistribution and use in source and binary forms, with or without
5
+ // modification, are permitted provided that the following conditions are
6
+ // met:
7
+ //
8
+ // * Redistributions of source code must retain the above copyright
9
+ // notice, this list of conditions and the following disclaimer.
10
+ // * Redistributions in binary form must reproduce the above
11
+ // copyright notice, this list of conditions and the following disclaimer
12
+ // in the documentation and/or other materials provided with the
13
+ // distribution.
14
+ // * Neither the name of Google Inc. nor the names of its
15
+ // contributors may be used to endorse or promote products derived from
16
+ // this software without specific prior written permission.
17
+ //
18
+ // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22
+ // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23
+ // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24
+ // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25
+ // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26
+ // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27
+ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+
30
+ #include <cstdio>
31
+ #include <errno.h>
32
+ #include <stdlib.h>
33
+ #include <string>
34
+
35
+ #include "url_canon_internal.h"
36
+
37
+ namespace url_canon {
38
+
39
+ namespace {
40
+
41
+ template<typename CHAR, typename UCHAR>
42
+ void DoAppendStringOfType(const CHAR* source, int length,
43
+ SharedCharTypes type,
44
+ CanonOutput* output) {
45
+ for (int i = 0; i < length; i++) {
46
+ if (static_cast<UCHAR>(source[i]) >= 0x80) {
47
+ // ReadChar will fill the code point with kUnicodeReplacementCharacter
48
+ // when the input is invalid, which is what we want.
49
+ unsigned code_point;
50
+ ReadUTFChar(source, &i, length, &code_point);
51
+ AppendUTF8EscapedValue(code_point, output);
52
+ } else {
53
+ // Just append the 7-bit character, possibly escaping it.
54
+ unsigned char uch = static_cast<unsigned char>(source[i]);
55
+ if (!IsCharOfType(uch, type))
56
+ AppendEscapedChar(uch, output);
57
+ else
58
+ output->push_back(uch);
59
+ }
60
+ }
61
+ }
62
+
63
+ // This function assumes the input values are all contained in 8-bit,
64
+ // although it allows any type. Returns true if input is valid, false if not.
65
+ template<typename CHAR, typename UCHAR>
66
+ void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
67
+ CanonOutput* output) {
68
+ for (int i = begin; i < end; i++) {
69
+ UCHAR uch = static_cast<UCHAR>(spec[i]);
70
+ if (uch >= 0x80) {
71
+ // Handle UTF-8/16 encodings. This call will correctly handle the error
72
+ // case by appending the invalid character.
73
+ AppendUTF8EscapedChar(spec, &i, end, output);
74
+ } else if (uch <= ' ' || uch == 0x7f) {
75
+ // This function is for error handling, so we escape all control
76
+ // characters and spaces, but not anything else since we lack
77
+ // context to do something more specific.
78
+ AppendEscapedChar(static_cast<unsigned char>(uch), output);
79
+ } else {
80
+ output->push_back(static_cast<char>(uch));
81
+ }
82
+ }
83
+ }
84
+
85
+ // Overrides one component, see the url_canon::Replacements structure for
86
+ // what the various combionations of source pointer and component mean.
87
+ void DoOverrideComponent(const char* override_source,
88
+ const url_parse::Component& override_component,
89
+ const char** dest,
90
+ url_parse::Component* dest_component) {
91
+ if (override_source) {
92
+ *dest = override_source;
93
+ *dest_component = override_component;
94
+ }
95
+ }
96
+
97
+ // Similar to DoOverrideComponent except that it takes a UTF-16 input and does
98
+ // not actually set the output character pointer.
99
+ //
100
+ // The input is converted to UTF-8 at the end of the given buffer as a temporary
101
+ // holding place. The component indentifying the portion of the buffer used in
102
+ // the |utf8_buffer| will be specified in |*dest_component|.
103
+ //
104
+ // This will not actually set any |dest| pointer like DoOverrideComponent
105
+ // does because all of the pointers will point into the |utf8_buffer|, which
106
+ // may get resized while we're overriding a subsequent component. Instead, the
107
+ // caller should use the beginning of the |utf8_buffer| as the string pointer
108
+ // for all components once all overrides have been prepared.
109
+ bool PrepareUTF16OverrideComponent(
110
+ const char16* override_source,
111
+ const url_parse::Component& override_component,
112
+ CanonOutput* utf8_buffer,
113
+ url_parse::Component* dest_component) {
114
+ bool success = true;
115
+ if (override_source) {
116
+ if (!override_component.is_valid()) {
117
+ // Non-"valid" component (means delete), so we need to preserve that.
118
+ *dest_component = url_parse::Component();
119
+ } else {
120
+ // Convert to UTF-8.
121
+ dest_component->begin = utf8_buffer->length();
122
+ success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
123
+ override_component.len, utf8_buffer);
124
+ dest_component->len = utf8_buffer->length() - dest_component->begin;
125
+ }
126
+ }
127
+ return success;
128
+ }
129
+
130
+ } // namespace
131
+
132
+ // See the header file for this array's declaration.
133
+ const unsigned char kSharedCharTypeTable[0x100] = {
134
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f
135
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f
136
+ 0, // 0x20 ' ' (escape spaces in queries)
137
+ CHAR_QUERY | CHAR_USERINFO, // 0x21 !
138
+ 0, // 0x22 "
139
+ 0, // 0x23 # (invalid in query since it marks the ref)
140
+ CHAR_QUERY | CHAR_USERINFO, // 0x24 $
141
+ CHAR_QUERY | CHAR_USERINFO, // 0x25 %
142
+ CHAR_QUERY | CHAR_USERINFO, // 0x26 &
143
+ CHAR_QUERY | CHAR_USERINFO, // 0x27 '
144
+ CHAR_QUERY | CHAR_USERINFO, // 0x28 (
145
+ CHAR_QUERY | CHAR_USERINFO, // 0x29 )
146
+ CHAR_QUERY | CHAR_USERINFO, // 0x2a *
147
+ CHAR_QUERY | CHAR_USERINFO, // 0x2b +
148
+ CHAR_QUERY | CHAR_USERINFO, // 0x2c ,
149
+ CHAR_QUERY | CHAR_USERINFO, // 0x2d -
150
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x2e .
151
+ CHAR_QUERY, // 0x2f /
152
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x30 0
153
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x31 1
154
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x32 2
155
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x33 3
156
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x34 4
157
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x35 5
158
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x36 6
159
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x37 7
160
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x38 8
161
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x39 9
162
+ CHAR_QUERY, // 0x3a :
163
+ CHAR_QUERY, // 0x3b ;
164
+ 0, // 0x3c < (Try to prevent certain types of XSS.)
165
+ CHAR_QUERY, // 0x3d =
166
+ 0, // 0x3e > (Try to prevent certain types of XSS.)
167
+ CHAR_QUERY, // 0x3f ?
168
+ CHAR_QUERY, // 0x40 @
169
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x41 A
170
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x42 B
171
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x43 C
172
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x44 D
173
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x45 E
174
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x46 F
175
+ CHAR_QUERY | CHAR_USERINFO, // 0x47 G
176
+ CHAR_QUERY | CHAR_USERINFO, // 0x48 H
177
+ CHAR_QUERY | CHAR_USERINFO, // 0x49 I
178
+ CHAR_QUERY | CHAR_USERINFO, // 0x4a J
179
+ CHAR_QUERY | CHAR_USERINFO, // 0x4b K
180
+ CHAR_QUERY | CHAR_USERINFO, // 0x4c L
181
+ CHAR_QUERY | CHAR_USERINFO, // 0x4d M
182
+ CHAR_QUERY | CHAR_USERINFO, // 0x4e N
183
+ CHAR_QUERY | CHAR_USERINFO, // 0x4f O
184
+ CHAR_QUERY | CHAR_USERINFO, // 0x50 P
185
+ CHAR_QUERY | CHAR_USERINFO, // 0x51 Q
186
+ CHAR_QUERY | CHAR_USERINFO, // 0x52 R
187
+ CHAR_QUERY | CHAR_USERINFO, // 0x53 S
188
+ CHAR_QUERY | CHAR_USERINFO, // 0x54 T
189
+ CHAR_QUERY | CHAR_USERINFO, // 0x55 U
190
+ CHAR_QUERY | CHAR_USERINFO, // 0x56 V
191
+ CHAR_QUERY | CHAR_USERINFO, // 0x57 W
192
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x58 X
193
+ CHAR_QUERY | CHAR_USERINFO, // 0x59 Y
194
+ CHAR_QUERY | CHAR_USERINFO, // 0x5a Z
195
+ CHAR_QUERY, // 0x5b [
196
+ CHAR_QUERY, // 0x5c '\'
197
+ CHAR_QUERY, // 0x5d ]
198
+ CHAR_QUERY, // 0x5e ^
199
+ CHAR_QUERY | CHAR_USERINFO, // 0x5f _
200
+ CHAR_QUERY, // 0x60 `
201
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x61 a
202
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x62 b
203
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x63 c
204
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x64 d
205
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x65 e
206
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x66 f
207
+ CHAR_QUERY | CHAR_USERINFO, // 0x67 g
208
+ CHAR_QUERY | CHAR_USERINFO, // 0x68 h
209
+ CHAR_QUERY | CHAR_USERINFO, // 0x69 i
210
+ CHAR_QUERY | CHAR_USERINFO, // 0x6a j
211
+ CHAR_QUERY | CHAR_USERINFO, // 0x6b k
212
+ CHAR_QUERY | CHAR_USERINFO, // 0x6c l
213
+ CHAR_QUERY | CHAR_USERINFO, // 0x6d m
214
+ CHAR_QUERY | CHAR_USERINFO, // 0x6e n
215
+ CHAR_QUERY | CHAR_USERINFO, // 0x6f o
216
+ CHAR_QUERY | CHAR_USERINFO, // 0x70 p
217
+ CHAR_QUERY | CHAR_USERINFO, // 0x71 q
218
+ CHAR_QUERY | CHAR_USERINFO, // 0x72 r
219
+ CHAR_QUERY | CHAR_USERINFO, // 0x73 s
220
+ CHAR_QUERY | CHAR_USERINFO, // 0x74 t
221
+ CHAR_QUERY | CHAR_USERINFO, // 0x75 u
222
+ CHAR_QUERY | CHAR_USERINFO, // 0x76 v
223
+ CHAR_QUERY | CHAR_USERINFO, // 0x77 w
224
+ CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x78 x
225
+ CHAR_QUERY | CHAR_USERINFO, // 0x79 y
226
+ CHAR_QUERY | CHAR_USERINFO, // 0x7a z
227
+ CHAR_QUERY, // 0x7b {
228
+ CHAR_QUERY, // 0x7c |
229
+ CHAR_QUERY, // 0x7d }
230
+ CHAR_QUERY | CHAR_USERINFO, // 0x7e ~
231
+ 0, // 0x7f
232
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f
233
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f
234
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf
235
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf
236
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf
237
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf
238
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef
239
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
240
+ };
241
+
242
+ const char kHexCharLookup[0x10] = {
243
+ '0', '1', '2', '3', '4', '5', '6', '7',
244
+ '8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
245
+ };
246
+
247
+ const char kCharToHexLookup[8] = {
248
+ 0, // 0x00 - 0x1f
249
+ '0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
250
+ 'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
251
+ 'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
252
+ 0, // 0x80 - 0x9F
253
+ 0, // 0xA0 - 0xBF
254
+ 0, // 0xC0 - 0xDF
255
+ 0, // 0xE0 - 0xFF
256
+ };
257
+
258
+ const char16 kUnicodeReplacementCharacter = 0xfffd;
259
+
260
+ void AppendStringOfType(const char* source, int length,
261
+ SharedCharTypes type,
262
+ CanonOutput* output) {
263
+ DoAppendStringOfType<char, unsigned char>(source, length, type, output);
264
+ }
265
+
266
+ void AppendStringOfType(const char16* source, int length,
267
+ SharedCharTypes type,
268
+ CanonOutput* output) {
269
+ DoAppendStringOfType<char16, char16>(source, length, type, output);
270
+ }
271
+
272
+ void AppendInvalidNarrowString(const char* spec, int begin, int end,
273
+ CanonOutput* output) {
274
+ DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
275
+ }
276
+
277
+ void AppendInvalidNarrowString(const char16* spec, int begin, int end,
278
+ CanonOutput* output) {
279
+ DoAppendInvalidNarrowString<char16, char16>(spec, begin, end, output);
280
+ }
281
+
282
+ bool ConvertUTF16ToUTF8(const char16* input, int input_len,
283
+ CanonOutput* output) {
284
+ bool success = true;
285
+ for (int i = 0; i < input_len; i++) {
286
+ unsigned code_point;
287
+ success &= ReadUTFChar(input, &i, input_len, &code_point);
288
+ AppendUTF8Value(code_point, output);
289
+ }
290
+ return success;
291
+ }
292
+
293
+ bool ConvertUTF8ToUTF16(const char* input, int input_len,
294
+ CanonOutputT<char16>* output) {
295
+ bool success = true;
296
+ for (int i = 0; i < input_len; i++) {
297
+ unsigned code_point;
298
+ success &= ReadUTFChar(input, &i, input_len, &code_point);
299
+ AppendUTF16Value(code_point, output);
300
+ }
301
+ return success;
302
+ }
303
+
304
+ void SetupOverrideComponents(const char* base,
305
+ const Replacements<char>& repl,
306
+ URLComponentSource<char>* source,
307
+ url_parse::Parsed* parsed) {
308
+ // Get the source and parsed structures of the things we are replacing.
309
+ const URLComponentSource<char>& repl_source = repl.sources();
310
+ const url_parse::Parsed& repl_parsed = repl.components();
311
+
312
+ DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
313
+ &source->scheme, &parsed->scheme);
314
+ DoOverrideComponent(repl_source.username, repl_parsed.username,
315
+ &source->username, &parsed->username);
316
+ DoOverrideComponent(repl_source.password, repl_parsed.password,
317
+ &source->password, &parsed->password);
318
+
319
+ // Our host should be empty if not present, so override the default setup.
320
+ DoOverrideComponent(repl_source.host, repl_parsed.host,
321
+ &source->host, &parsed->host);
322
+ if (parsed->host.len == -1)
323
+ parsed->host.len = 0;
324
+
325
+ DoOverrideComponent(repl_source.port, repl_parsed.port,
326
+ &source->port, &parsed->port);
327
+ DoOverrideComponent(repl_source.path, repl_parsed.path,
328
+ &source->path, &parsed->path);
329
+ DoOverrideComponent(repl_source.query, repl_parsed.query,
330
+ &source->query, &parsed->query);
331
+ DoOverrideComponent(repl_source.ref, repl_parsed.ref,
332
+ &source->ref, &parsed->ref);
333
+ }
334
+
335
+ bool SetupUTF16OverrideComponents(const char* base,
336
+ const Replacements<char16>& repl,
337
+ CanonOutput* utf8_buffer,
338
+ URLComponentSource<char>* source,
339
+ url_parse::Parsed* parsed) {
340
+ bool success = true;
341
+
342
+ // Get the source and parsed structures of the things we are replacing.
343
+ const URLComponentSource<char16>& repl_source = repl.sources();
344
+ const url_parse::Parsed& repl_parsed = repl.components();
345
+
346
+ success &= PrepareUTF16OverrideComponent(
347
+ repl_source.scheme, repl_parsed.scheme,
348
+ utf8_buffer, &parsed->scheme);
349
+ success &= PrepareUTF16OverrideComponent(
350
+ repl_source.username, repl_parsed.username,
351
+ utf8_buffer, &parsed->username);
352
+ success &= PrepareUTF16OverrideComponent(
353
+ repl_source.password, repl_parsed.password,
354
+ utf8_buffer, &parsed->password);
355
+ success &= PrepareUTF16OverrideComponent(
356
+ repl_source.host, repl_parsed.host,
357
+ utf8_buffer, &parsed->host);
358
+ success &= PrepareUTF16OverrideComponent(
359
+ repl_source.port, repl_parsed.port,
360
+ utf8_buffer, &parsed->port);
361
+ success &= PrepareUTF16OverrideComponent(
362
+ repl_source.path, repl_parsed.path,
363
+ utf8_buffer, &parsed->path);
364
+ success &= PrepareUTF16OverrideComponent(
365
+ repl_source.query, repl_parsed.query,
366
+ utf8_buffer, &parsed->query);
367
+ success &= PrepareUTF16OverrideComponent(
368
+ repl_source.ref, repl_parsed.ref,
369
+ utf8_buffer, &parsed->ref);
370
+
371
+ // PrepareUTF16OverrideComponent will not have set the data pointer since the
372
+ // buffer could be resized, invalidating the pointers. We set the data
373
+ // pointers for affected components now that the buffer is finalized.
374
+ if (repl_source.scheme) source->scheme = utf8_buffer->data();
375
+ if (repl_source.username) source->username = utf8_buffer->data();
376
+ if (repl_source.password) source->password = utf8_buffer->data();
377
+ if (repl_source.host) source->host = utf8_buffer->data();
378
+ if (repl_source.port) source->port = utf8_buffer->data();
379
+ if (repl_source.path) source->path = utf8_buffer->data();
380
+ if (repl_source.query) source->query = utf8_buffer->data();
381
+ if (repl_source.ref) source->ref = utf8_buffer->data();
382
+
383
+ return success;
384
+ }
385
+
386
+ #ifndef WIN32
387
+
388
+ int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
389
+ const char* format_str;
390
+ if (radix == 10)
391
+ format_str = "%d";
392
+ else if (radix == 16)
393
+ format_str = "%x";
394
+ else
395
+ return EINVAL;
396
+
397
+ int written = snprintf(buffer, size_in_chars, format_str, value);
398
+ if (static_cast<size_t>(written) >= size_in_chars) {
399
+ // Output was truncated, or written was negative.
400
+ return EINVAL;
401
+ }
402
+ return 0;
403
+ }
404
+
405
+ int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) {
406
+ if (radix != 10)
407
+ return EINVAL;
408
+
409
+ // No more than 12 characters will be required for a 32-bit integer.
410
+ // Add an extra byte for the terminating null.
411
+ char temp[13];
412
+ int written = snprintf(temp, sizeof(temp), "%d", value);
413
+ if (static_cast<size_t>(written) >= size_in_chars) {
414
+ // Output was truncated, or written was negative.
415
+ return EINVAL;
416
+ }
417
+
418
+ for (int i = 0; i < written; ++i) {
419
+ buffer[i] = static_cast<char16>(temp[i]);
420
+ }
421
+ buffer[written] = '\0';
422
+ return 0;
423
+ }
424
+
425
+ #endif // !WIN32
426
+
427
+ } // namespace url_canon