uri_parser 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +6 -0
- data/Rakefile +13 -0
- data/ext/uri_parser/basictypes.h +89 -0
- data/ext/uri_parser/extconf.h +6 -0
- data/ext/uri_parser/extconf.rb +50 -0
- data/ext/uri_parser/logging.h +5 -0
- data/ext/uri_parser/scoped_ptr.h +322 -0
- data/ext/uri_parser/string16.cc +95 -0
- data/ext/uri_parser/string16.h +194 -0
- data/ext/uri_parser/uri_parser.cc +87 -0
- data/ext/uri_parser/url_canon.h +872 -0
- data/ext/uri_parser/url_canon_etc.cc +392 -0
- data/ext/uri_parser/url_canon_fileurl.cc +215 -0
- data/ext/uri_parser/url_canon_host.cc +401 -0
- data/ext/uri_parser/url_canon_icu.cc +207 -0
- data/ext/uri_parser/url_canon_icu.h +63 -0
- data/ext/uri_parser/url_canon_internal.cc +427 -0
- data/ext/uri_parser/url_canon_internal.h +453 -0
- data/ext/uri_parser/url_canon_internal_file.h +157 -0
- data/ext/uri_parser/url_canon_ip.cc +737 -0
- data/ext/uri_parser/url_canon_ip.h +101 -0
- data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
- data/ext/uri_parser/url_canon_path.cc +380 -0
- data/ext/uri_parser/url_canon_pathurl.cc +128 -0
- data/ext/uri_parser/url_canon_query.cc +189 -0
- data/ext/uri_parser/url_canon_relative.cc +572 -0
- data/ext/uri_parser/url_canon_stdstring.h +134 -0
- data/ext/uri_parser/url_canon_stdurl.cc +211 -0
- data/ext/uri_parser/url_common.h +48 -0
- data/ext/uri_parser/url_file.h +108 -0
- data/ext/uri_parser/url_parse.cc +760 -0
- data/ext/uri_parser/url_parse.h +336 -0
- data/ext/uri_parser/url_parse_file.cc +243 -0
- data/ext/uri_parser/url_parse_internal.h +112 -0
- data/ext/uri_parser/url_util.cc +553 -0
- data/ext/uri_parser/url_util.h +222 -0
- data/lib/uri_parser.rb +28 -0
- data/lib/uri_parser/version.rb +3 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/uri_parser_spec.rb +54 -0
- data/uri_parser.gemspec +26 -0
- metadata +117 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
// Copyright 2007, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
// ICU integration functions.
|
31
|
+
|
32
|
+
#ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
|
33
|
+
#define GOOGLEURL_SRC_URL_CANON_ICU_H__
|
34
|
+
|
35
|
+
#include "url_canon.h"
|
36
|
+
|
37
|
+
typedef struct UConverter UConverter;
|
38
|
+
|
39
|
+
namespace url_canon {
|
40
|
+
|
41
|
+
// An implementation of CharsetConverter that implementations can use to
|
42
|
+
// interface the canonicalizer with ICU's conversion routines.
|
43
|
+
class ICUCharsetConverter : public CharsetConverter {
|
44
|
+
public:
|
45
|
+
// Constructs a converter using an already-existing ICU character set
|
46
|
+
// converter. This converter is NOT owned by this object; the lifetime must
|
47
|
+
// be managed by the creator such that it is alive as long as this is.
|
48
|
+
GURL_API ICUCharsetConverter(UConverter* converter);
|
49
|
+
|
50
|
+
GURL_API virtual ~ICUCharsetConverter() {}
|
51
|
+
|
52
|
+
GURL_API virtual void ConvertFromUTF16(const char16* input,
|
53
|
+
int input_len,
|
54
|
+
CanonOutput* output);
|
55
|
+
|
56
|
+
private:
|
57
|
+
// The ICU converter, not owned by this class.
|
58
|
+
UConverter* converter_;
|
59
|
+
};
|
60
|
+
|
61
|
+
} // namespace url_canon
|
62
|
+
|
63
|
+
#endif // GOOGLEURL_SRC_URL_CANON_ICU_H__
|
@@ -0,0 +1,427 @@
|
|
1
|
+
// Copyright 2007, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
#include <cstdio>
|
31
|
+
#include <errno.h>
|
32
|
+
#include <stdlib.h>
|
33
|
+
#include <string>
|
34
|
+
|
35
|
+
#include "url_canon_internal.h"
|
36
|
+
|
37
|
+
namespace url_canon {
|
38
|
+
|
39
|
+
namespace {
|
40
|
+
|
41
|
+
template<typename CHAR, typename UCHAR>
|
42
|
+
void DoAppendStringOfType(const CHAR* source, int length,
|
43
|
+
SharedCharTypes type,
|
44
|
+
CanonOutput* output) {
|
45
|
+
for (int i = 0; i < length; i++) {
|
46
|
+
if (static_cast<UCHAR>(source[i]) >= 0x80) {
|
47
|
+
// ReadChar will fill the code point with kUnicodeReplacementCharacter
|
48
|
+
// when the input is invalid, which is what we want.
|
49
|
+
unsigned code_point;
|
50
|
+
ReadUTFChar(source, &i, length, &code_point);
|
51
|
+
AppendUTF8EscapedValue(code_point, output);
|
52
|
+
} else {
|
53
|
+
// Just append the 7-bit character, possibly escaping it.
|
54
|
+
unsigned char uch = static_cast<unsigned char>(source[i]);
|
55
|
+
if (!IsCharOfType(uch, type))
|
56
|
+
AppendEscapedChar(uch, output);
|
57
|
+
else
|
58
|
+
output->push_back(uch);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
// This function assumes the input values are all contained in 8-bit,
|
64
|
+
// although it allows any type. Returns true if input is valid, false if not.
|
65
|
+
template<typename CHAR, typename UCHAR>
|
66
|
+
void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
|
67
|
+
CanonOutput* output) {
|
68
|
+
for (int i = begin; i < end; i++) {
|
69
|
+
UCHAR uch = static_cast<UCHAR>(spec[i]);
|
70
|
+
if (uch >= 0x80) {
|
71
|
+
// Handle UTF-8/16 encodings. This call will correctly handle the error
|
72
|
+
// case by appending the invalid character.
|
73
|
+
AppendUTF8EscapedChar(spec, &i, end, output);
|
74
|
+
} else if (uch <= ' ' || uch == 0x7f) {
|
75
|
+
// This function is for error handling, so we escape all control
|
76
|
+
// characters and spaces, but not anything else since we lack
|
77
|
+
// context to do something more specific.
|
78
|
+
AppendEscapedChar(static_cast<unsigned char>(uch), output);
|
79
|
+
} else {
|
80
|
+
output->push_back(static_cast<char>(uch));
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
// Overrides one component, see the url_canon::Replacements structure for
|
86
|
+
// what the various combionations of source pointer and component mean.
|
87
|
+
void DoOverrideComponent(const char* override_source,
|
88
|
+
const url_parse::Component& override_component,
|
89
|
+
const char** dest,
|
90
|
+
url_parse::Component* dest_component) {
|
91
|
+
if (override_source) {
|
92
|
+
*dest = override_source;
|
93
|
+
*dest_component = override_component;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
// Similar to DoOverrideComponent except that it takes a UTF-16 input and does
|
98
|
+
// not actually set the output character pointer.
|
99
|
+
//
|
100
|
+
// The input is converted to UTF-8 at the end of the given buffer as a temporary
|
101
|
+
// holding place. The component indentifying the portion of the buffer used in
|
102
|
+
// the |utf8_buffer| will be specified in |*dest_component|.
|
103
|
+
//
|
104
|
+
// This will not actually set any |dest| pointer like DoOverrideComponent
|
105
|
+
// does because all of the pointers will point into the |utf8_buffer|, which
|
106
|
+
// may get resized while we're overriding a subsequent component. Instead, the
|
107
|
+
// caller should use the beginning of the |utf8_buffer| as the string pointer
|
108
|
+
// for all components once all overrides have been prepared.
|
109
|
+
bool PrepareUTF16OverrideComponent(
|
110
|
+
const char16* override_source,
|
111
|
+
const url_parse::Component& override_component,
|
112
|
+
CanonOutput* utf8_buffer,
|
113
|
+
url_parse::Component* dest_component) {
|
114
|
+
bool success = true;
|
115
|
+
if (override_source) {
|
116
|
+
if (!override_component.is_valid()) {
|
117
|
+
// Non-"valid" component (means delete), so we need to preserve that.
|
118
|
+
*dest_component = url_parse::Component();
|
119
|
+
} else {
|
120
|
+
// Convert to UTF-8.
|
121
|
+
dest_component->begin = utf8_buffer->length();
|
122
|
+
success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
|
123
|
+
override_component.len, utf8_buffer);
|
124
|
+
dest_component->len = utf8_buffer->length() - dest_component->begin;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return success;
|
128
|
+
}
|
129
|
+
|
130
|
+
} // namespace
|
131
|
+
|
132
|
+
// See the header file for this array's declaration.
|
133
|
+
const unsigned char kSharedCharTypeTable[0x100] = {
|
134
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f
|
135
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f
|
136
|
+
0, // 0x20 ' ' (escape spaces in queries)
|
137
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x21 !
|
138
|
+
0, // 0x22 "
|
139
|
+
0, // 0x23 # (invalid in query since it marks the ref)
|
140
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x24 $
|
141
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x25 %
|
142
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x26 &
|
143
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x27 '
|
144
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x28 (
|
145
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x29 )
|
146
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2a *
|
147
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2b +
|
148
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2c ,
|
149
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2d -
|
150
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x2e .
|
151
|
+
CHAR_QUERY, // 0x2f /
|
152
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x30 0
|
153
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x31 1
|
154
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x32 2
|
155
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x33 3
|
156
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x34 4
|
157
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x35 5
|
158
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x36 6
|
159
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x37 7
|
160
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x38 8
|
161
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x39 9
|
162
|
+
CHAR_QUERY, // 0x3a :
|
163
|
+
CHAR_QUERY, // 0x3b ;
|
164
|
+
0, // 0x3c < (Try to prevent certain types of XSS.)
|
165
|
+
CHAR_QUERY, // 0x3d =
|
166
|
+
0, // 0x3e > (Try to prevent certain types of XSS.)
|
167
|
+
CHAR_QUERY, // 0x3f ?
|
168
|
+
CHAR_QUERY, // 0x40 @
|
169
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x41 A
|
170
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x42 B
|
171
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x43 C
|
172
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x44 D
|
173
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x45 E
|
174
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x46 F
|
175
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x47 G
|
176
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x48 H
|
177
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x49 I
|
178
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4a J
|
179
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4b K
|
180
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4c L
|
181
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4d M
|
182
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4e N
|
183
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4f O
|
184
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x50 P
|
185
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x51 Q
|
186
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x52 R
|
187
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x53 S
|
188
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x54 T
|
189
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x55 U
|
190
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x56 V
|
191
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x57 W
|
192
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x58 X
|
193
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x59 Y
|
194
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x5a Z
|
195
|
+
CHAR_QUERY, // 0x5b [
|
196
|
+
CHAR_QUERY, // 0x5c '\'
|
197
|
+
CHAR_QUERY, // 0x5d ]
|
198
|
+
CHAR_QUERY, // 0x5e ^
|
199
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x5f _
|
200
|
+
CHAR_QUERY, // 0x60 `
|
201
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x61 a
|
202
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x62 b
|
203
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x63 c
|
204
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x64 d
|
205
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x65 e
|
206
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x66 f
|
207
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x67 g
|
208
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x68 h
|
209
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x69 i
|
210
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6a j
|
211
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6b k
|
212
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6c l
|
213
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6d m
|
214
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6e n
|
215
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6f o
|
216
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x70 p
|
217
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x71 q
|
218
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x72 r
|
219
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x73 s
|
220
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x74 t
|
221
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x75 u
|
222
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x76 v
|
223
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x77 w
|
224
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x78 x
|
225
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x79 y
|
226
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x7a z
|
227
|
+
CHAR_QUERY, // 0x7b {
|
228
|
+
CHAR_QUERY, // 0x7c |
|
229
|
+
CHAR_QUERY, // 0x7d }
|
230
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x7e ~
|
231
|
+
0, // 0x7f
|
232
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f
|
233
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f
|
234
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf
|
235
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf
|
236
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf
|
237
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf
|
238
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef
|
239
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
|
240
|
+
};
|
241
|
+
|
242
|
+
const char kHexCharLookup[0x10] = {
|
243
|
+
'0', '1', '2', '3', '4', '5', '6', '7',
|
244
|
+
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
|
245
|
+
};
|
246
|
+
|
247
|
+
const char kCharToHexLookup[8] = {
|
248
|
+
0, // 0x00 - 0x1f
|
249
|
+
'0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
|
250
|
+
'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
|
251
|
+
'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
|
252
|
+
0, // 0x80 - 0x9F
|
253
|
+
0, // 0xA0 - 0xBF
|
254
|
+
0, // 0xC0 - 0xDF
|
255
|
+
0, // 0xE0 - 0xFF
|
256
|
+
};
|
257
|
+
|
258
|
+
const char16 kUnicodeReplacementCharacter = 0xfffd;
|
259
|
+
|
260
|
+
void AppendStringOfType(const char* source, int length,
|
261
|
+
SharedCharTypes type,
|
262
|
+
CanonOutput* output) {
|
263
|
+
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
|
264
|
+
}
|
265
|
+
|
266
|
+
void AppendStringOfType(const char16* source, int length,
|
267
|
+
SharedCharTypes type,
|
268
|
+
CanonOutput* output) {
|
269
|
+
DoAppendStringOfType<char16, char16>(source, length, type, output);
|
270
|
+
}
|
271
|
+
|
272
|
+
void AppendInvalidNarrowString(const char* spec, int begin, int end,
|
273
|
+
CanonOutput* output) {
|
274
|
+
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
|
275
|
+
}
|
276
|
+
|
277
|
+
void AppendInvalidNarrowString(const char16* spec, int begin, int end,
|
278
|
+
CanonOutput* output) {
|
279
|
+
DoAppendInvalidNarrowString<char16, char16>(spec, begin, end, output);
|
280
|
+
}
|
281
|
+
|
282
|
+
bool ConvertUTF16ToUTF8(const char16* input, int input_len,
|
283
|
+
CanonOutput* output) {
|
284
|
+
bool success = true;
|
285
|
+
for (int i = 0; i < input_len; i++) {
|
286
|
+
unsigned code_point;
|
287
|
+
success &= ReadUTFChar(input, &i, input_len, &code_point);
|
288
|
+
AppendUTF8Value(code_point, output);
|
289
|
+
}
|
290
|
+
return success;
|
291
|
+
}
|
292
|
+
|
293
|
+
bool ConvertUTF8ToUTF16(const char* input, int input_len,
|
294
|
+
CanonOutputT<char16>* output) {
|
295
|
+
bool success = true;
|
296
|
+
for (int i = 0; i < input_len; i++) {
|
297
|
+
unsigned code_point;
|
298
|
+
success &= ReadUTFChar(input, &i, input_len, &code_point);
|
299
|
+
AppendUTF16Value(code_point, output);
|
300
|
+
}
|
301
|
+
return success;
|
302
|
+
}
|
303
|
+
|
304
|
+
void SetupOverrideComponents(const char* base,
|
305
|
+
const Replacements<char>& repl,
|
306
|
+
URLComponentSource<char>* source,
|
307
|
+
url_parse::Parsed* parsed) {
|
308
|
+
// Get the source and parsed structures of the things we are replacing.
|
309
|
+
const URLComponentSource<char>& repl_source = repl.sources();
|
310
|
+
const url_parse::Parsed& repl_parsed = repl.components();
|
311
|
+
|
312
|
+
DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
|
313
|
+
&source->scheme, &parsed->scheme);
|
314
|
+
DoOverrideComponent(repl_source.username, repl_parsed.username,
|
315
|
+
&source->username, &parsed->username);
|
316
|
+
DoOverrideComponent(repl_source.password, repl_parsed.password,
|
317
|
+
&source->password, &parsed->password);
|
318
|
+
|
319
|
+
// Our host should be empty if not present, so override the default setup.
|
320
|
+
DoOverrideComponent(repl_source.host, repl_parsed.host,
|
321
|
+
&source->host, &parsed->host);
|
322
|
+
if (parsed->host.len == -1)
|
323
|
+
parsed->host.len = 0;
|
324
|
+
|
325
|
+
DoOverrideComponent(repl_source.port, repl_parsed.port,
|
326
|
+
&source->port, &parsed->port);
|
327
|
+
DoOverrideComponent(repl_source.path, repl_parsed.path,
|
328
|
+
&source->path, &parsed->path);
|
329
|
+
DoOverrideComponent(repl_source.query, repl_parsed.query,
|
330
|
+
&source->query, &parsed->query);
|
331
|
+
DoOverrideComponent(repl_source.ref, repl_parsed.ref,
|
332
|
+
&source->ref, &parsed->ref);
|
333
|
+
}
|
334
|
+
|
335
|
+
bool SetupUTF16OverrideComponents(const char* base,
|
336
|
+
const Replacements<char16>& repl,
|
337
|
+
CanonOutput* utf8_buffer,
|
338
|
+
URLComponentSource<char>* source,
|
339
|
+
url_parse::Parsed* parsed) {
|
340
|
+
bool success = true;
|
341
|
+
|
342
|
+
// Get the source and parsed structures of the things we are replacing.
|
343
|
+
const URLComponentSource<char16>& repl_source = repl.sources();
|
344
|
+
const url_parse::Parsed& repl_parsed = repl.components();
|
345
|
+
|
346
|
+
success &= PrepareUTF16OverrideComponent(
|
347
|
+
repl_source.scheme, repl_parsed.scheme,
|
348
|
+
utf8_buffer, &parsed->scheme);
|
349
|
+
success &= PrepareUTF16OverrideComponent(
|
350
|
+
repl_source.username, repl_parsed.username,
|
351
|
+
utf8_buffer, &parsed->username);
|
352
|
+
success &= PrepareUTF16OverrideComponent(
|
353
|
+
repl_source.password, repl_parsed.password,
|
354
|
+
utf8_buffer, &parsed->password);
|
355
|
+
success &= PrepareUTF16OverrideComponent(
|
356
|
+
repl_source.host, repl_parsed.host,
|
357
|
+
utf8_buffer, &parsed->host);
|
358
|
+
success &= PrepareUTF16OverrideComponent(
|
359
|
+
repl_source.port, repl_parsed.port,
|
360
|
+
utf8_buffer, &parsed->port);
|
361
|
+
success &= PrepareUTF16OverrideComponent(
|
362
|
+
repl_source.path, repl_parsed.path,
|
363
|
+
utf8_buffer, &parsed->path);
|
364
|
+
success &= PrepareUTF16OverrideComponent(
|
365
|
+
repl_source.query, repl_parsed.query,
|
366
|
+
utf8_buffer, &parsed->query);
|
367
|
+
success &= PrepareUTF16OverrideComponent(
|
368
|
+
repl_source.ref, repl_parsed.ref,
|
369
|
+
utf8_buffer, &parsed->ref);
|
370
|
+
|
371
|
+
// PrepareUTF16OverrideComponent will not have set the data pointer since the
|
372
|
+
// buffer could be resized, invalidating the pointers. We set the data
|
373
|
+
// pointers for affected components now that the buffer is finalized.
|
374
|
+
if (repl_source.scheme) source->scheme = utf8_buffer->data();
|
375
|
+
if (repl_source.username) source->username = utf8_buffer->data();
|
376
|
+
if (repl_source.password) source->password = utf8_buffer->data();
|
377
|
+
if (repl_source.host) source->host = utf8_buffer->data();
|
378
|
+
if (repl_source.port) source->port = utf8_buffer->data();
|
379
|
+
if (repl_source.path) source->path = utf8_buffer->data();
|
380
|
+
if (repl_source.query) source->query = utf8_buffer->data();
|
381
|
+
if (repl_source.ref) source->ref = utf8_buffer->data();
|
382
|
+
|
383
|
+
return success;
|
384
|
+
}
|
385
|
+
|
386
|
+
#ifndef WIN32
|
387
|
+
|
388
|
+
int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
|
389
|
+
const char* format_str;
|
390
|
+
if (radix == 10)
|
391
|
+
format_str = "%d";
|
392
|
+
else if (radix == 16)
|
393
|
+
format_str = "%x";
|
394
|
+
else
|
395
|
+
return EINVAL;
|
396
|
+
|
397
|
+
int written = snprintf(buffer, size_in_chars, format_str, value);
|
398
|
+
if (static_cast<size_t>(written) >= size_in_chars) {
|
399
|
+
// Output was truncated, or written was negative.
|
400
|
+
return EINVAL;
|
401
|
+
}
|
402
|
+
return 0;
|
403
|
+
}
|
404
|
+
|
405
|
+
int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) {
|
406
|
+
if (radix != 10)
|
407
|
+
return EINVAL;
|
408
|
+
|
409
|
+
// No more than 12 characters will be required for a 32-bit integer.
|
410
|
+
// Add an extra byte for the terminating null.
|
411
|
+
char temp[13];
|
412
|
+
int written = snprintf(temp, sizeof(temp), "%d", value);
|
413
|
+
if (static_cast<size_t>(written) >= size_in_chars) {
|
414
|
+
// Output was truncated, or written was negative.
|
415
|
+
return EINVAL;
|
416
|
+
}
|
417
|
+
|
418
|
+
for (int i = 0; i < written; ++i) {
|
419
|
+
buffer[i] = static_cast<char16>(temp[i]);
|
420
|
+
}
|
421
|
+
buffer[written] = '\0';
|
422
|
+
return 0;
|
423
|
+
}
|
424
|
+
|
425
|
+
#endif // !WIN32
|
426
|
+
|
427
|
+
} // namespace url_canon
|