uri_parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/.rvmrc +1 -0
- data/Gemfile +6 -0
- data/Rakefile +13 -0
- data/ext/uri_parser/basictypes.h +89 -0
- data/ext/uri_parser/extconf.h +6 -0
- data/ext/uri_parser/extconf.rb +50 -0
- data/ext/uri_parser/logging.h +5 -0
- data/ext/uri_parser/scoped_ptr.h +322 -0
- data/ext/uri_parser/string16.cc +95 -0
- data/ext/uri_parser/string16.h +194 -0
- data/ext/uri_parser/uri_parser.cc +87 -0
- data/ext/uri_parser/url_canon.h +872 -0
- data/ext/uri_parser/url_canon_etc.cc +392 -0
- data/ext/uri_parser/url_canon_fileurl.cc +215 -0
- data/ext/uri_parser/url_canon_host.cc +401 -0
- data/ext/uri_parser/url_canon_icu.cc +207 -0
- data/ext/uri_parser/url_canon_icu.h +63 -0
- data/ext/uri_parser/url_canon_internal.cc +427 -0
- data/ext/uri_parser/url_canon_internal.h +453 -0
- data/ext/uri_parser/url_canon_internal_file.h +157 -0
- data/ext/uri_parser/url_canon_ip.cc +737 -0
- data/ext/uri_parser/url_canon_ip.h +101 -0
- data/ext/uri_parser/url_canon_mailtourl.cc +137 -0
- data/ext/uri_parser/url_canon_path.cc +380 -0
- data/ext/uri_parser/url_canon_pathurl.cc +128 -0
- data/ext/uri_parser/url_canon_query.cc +189 -0
- data/ext/uri_parser/url_canon_relative.cc +572 -0
- data/ext/uri_parser/url_canon_stdstring.h +134 -0
- data/ext/uri_parser/url_canon_stdurl.cc +211 -0
- data/ext/uri_parser/url_common.h +48 -0
- data/ext/uri_parser/url_file.h +108 -0
- data/ext/uri_parser/url_parse.cc +760 -0
- data/ext/uri_parser/url_parse.h +336 -0
- data/ext/uri_parser/url_parse_file.cc +243 -0
- data/ext/uri_parser/url_parse_internal.h +112 -0
- data/ext/uri_parser/url_util.cc +553 -0
- data/ext/uri_parser/url_util.h +222 -0
- data/lib/uri_parser.rb +28 -0
- data/lib/uri_parser/version.rb +3 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/uri_parser_spec.rb +54 -0
- data/uri_parser.gemspec +26 -0
- metadata +117 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
// Copyright 2007, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
// ICU integration functions.
|
31
|
+
|
32
|
+
#ifndef GOOGLEURL_SRC_URL_CANON_ICU_H__
|
33
|
+
#define GOOGLEURL_SRC_URL_CANON_ICU_H__
|
34
|
+
|
35
|
+
#include "url_canon.h"
|
36
|
+
|
37
|
+
typedef struct UConverter UConverter;
|
38
|
+
|
39
|
+
namespace url_canon {
|
40
|
+
|
41
|
+
// An implementation of CharsetConverter that implementations can use to
|
42
|
+
// interface the canonicalizer with ICU's conversion routines.
|
43
|
+
class ICUCharsetConverter : public CharsetConverter {
|
44
|
+
public:
|
45
|
+
// Constructs a converter using an already-existing ICU character set
|
46
|
+
// converter. This converter is NOT owned by this object; the lifetime must
|
47
|
+
// be managed by the creator such that it is alive as long as this is.
|
48
|
+
GURL_API ICUCharsetConverter(UConverter* converter);
|
49
|
+
|
50
|
+
GURL_API virtual ~ICUCharsetConverter() {}
|
51
|
+
|
52
|
+
GURL_API virtual void ConvertFromUTF16(const char16* input,
|
53
|
+
int input_len,
|
54
|
+
CanonOutput* output);
|
55
|
+
|
56
|
+
private:
|
57
|
+
// The ICU converter, not owned by this class.
|
58
|
+
UConverter* converter_;
|
59
|
+
};
|
60
|
+
|
61
|
+
} // namespace url_canon
|
62
|
+
|
63
|
+
#endif // GOOGLEURL_SRC_URL_CANON_ICU_H__
|
@@ -0,0 +1,427 @@
|
|
1
|
+
// Copyright 2007, Google Inc.
|
2
|
+
// All rights reserved.
|
3
|
+
//
|
4
|
+
// Redistribution and use in source and binary forms, with or without
|
5
|
+
// modification, are permitted provided that the following conditions are
|
6
|
+
// met:
|
7
|
+
//
|
8
|
+
// * Redistributions of source code must retain the above copyright
|
9
|
+
// notice, this list of conditions and the following disclaimer.
|
10
|
+
// * Redistributions in binary form must reproduce the above
|
11
|
+
// copyright notice, this list of conditions and the following disclaimer
|
12
|
+
// in the documentation and/or other materials provided with the
|
13
|
+
// distribution.
|
14
|
+
// * Neither the name of Google Inc. nor the names of its
|
15
|
+
// contributors may be used to endorse or promote products derived from
|
16
|
+
// this software without specific prior written permission.
|
17
|
+
//
|
18
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
|
30
|
+
#include <cstdio>
|
31
|
+
#include <errno.h>
|
32
|
+
#include <stdlib.h>
|
33
|
+
#include <string>
|
34
|
+
|
35
|
+
#include "url_canon_internal.h"
|
36
|
+
|
37
|
+
namespace url_canon {
|
38
|
+
|
39
|
+
namespace {
|
40
|
+
|
41
|
+
template<typename CHAR, typename UCHAR>
|
42
|
+
void DoAppendStringOfType(const CHAR* source, int length,
|
43
|
+
SharedCharTypes type,
|
44
|
+
CanonOutput* output) {
|
45
|
+
for (int i = 0; i < length; i++) {
|
46
|
+
if (static_cast<UCHAR>(source[i]) >= 0x80) {
|
47
|
+
// ReadChar will fill the code point with kUnicodeReplacementCharacter
|
48
|
+
// when the input is invalid, which is what we want.
|
49
|
+
unsigned code_point;
|
50
|
+
ReadUTFChar(source, &i, length, &code_point);
|
51
|
+
AppendUTF8EscapedValue(code_point, output);
|
52
|
+
} else {
|
53
|
+
// Just append the 7-bit character, possibly escaping it.
|
54
|
+
unsigned char uch = static_cast<unsigned char>(source[i]);
|
55
|
+
if (!IsCharOfType(uch, type))
|
56
|
+
AppendEscapedChar(uch, output);
|
57
|
+
else
|
58
|
+
output->push_back(uch);
|
59
|
+
}
|
60
|
+
}
|
61
|
+
}
|
62
|
+
|
63
|
+
// This function assumes the input values are all contained in 8-bit,
|
64
|
+
// although it allows any type. Returns true if input is valid, false if not.
|
65
|
+
template<typename CHAR, typename UCHAR>
|
66
|
+
void DoAppendInvalidNarrowString(const CHAR* spec, int begin, int end,
|
67
|
+
CanonOutput* output) {
|
68
|
+
for (int i = begin; i < end; i++) {
|
69
|
+
UCHAR uch = static_cast<UCHAR>(spec[i]);
|
70
|
+
if (uch >= 0x80) {
|
71
|
+
// Handle UTF-8/16 encodings. This call will correctly handle the error
|
72
|
+
// case by appending the invalid character.
|
73
|
+
AppendUTF8EscapedChar(spec, &i, end, output);
|
74
|
+
} else if (uch <= ' ' || uch == 0x7f) {
|
75
|
+
// This function is for error handling, so we escape all control
|
76
|
+
// characters and spaces, but not anything else since we lack
|
77
|
+
// context to do something more specific.
|
78
|
+
AppendEscapedChar(static_cast<unsigned char>(uch), output);
|
79
|
+
} else {
|
80
|
+
output->push_back(static_cast<char>(uch));
|
81
|
+
}
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
// Overrides one component, see the url_canon::Replacements structure for
|
86
|
+
// what the various combionations of source pointer and component mean.
|
87
|
+
void DoOverrideComponent(const char* override_source,
|
88
|
+
const url_parse::Component& override_component,
|
89
|
+
const char** dest,
|
90
|
+
url_parse::Component* dest_component) {
|
91
|
+
if (override_source) {
|
92
|
+
*dest = override_source;
|
93
|
+
*dest_component = override_component;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
|
97
|
+
// Similar to DoOverrideComponent except that it takes a UTF-16 input and does
|
98
|
+
// not actually set the output character pointer.
|
99
|
+
//
|
100
|
+
// The input is converted to UTF-8 at the end of the given buffer as a temporary
|
101
|
+
// holding place. The component indentifying the portion of the buffer used in
|
102
|
+
// the |utf8_buffer| will be specified in |*dest_component|.
|
103
|
+
//
|
104
|
+
// This will not actually set any |dest| pointer like DoOverrideComponent
|
105
|
+
// does because all of the pointers will point into the |utf8_buffer|, which
|
106
|
+
// may get resized while we're overriding a subsequent component. Instead, the
|
107
|
+
// caller should use the beginning of the |utf8_buffer| as the string pointer
|
108
|
+
// for all components once all overrides have been prepared.
|
109
|
+
bool PrepareUTF16OverrideComponent(
|
110
|
+
const char16* override_source,
|
111
|
+
const url_parse::Component& override_component,
|
112
|
+
CanonOutput* utf8_buffer,
|
113
|
+
url_parse::Component* dest_component) {
|
114
|
+
bool success = true;
|
115
|
+
if (override_source) {
|
116
|
+
if (!override_component.is_valid()) {
|
117
|
+
// Non-"valid" component (means delete), so we need to preserve that.
|
118
|
+
*dest_component = url_parse::Component();
|
119
|
+
} else {
|
120
|
+
// Convert to UTF-8.
|
121
|
+
dest_component->begin = utf8_buffer->length();
|
122
|
+
success = ConvertUTF16ToUTF8(&override_source[override_component.begin],
|
123
|
+
override_component.len, utf8_buffer);
|
124
|
+
dest_component->len = utf8_buffer->length() - dest_component->begin;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return success;
|
128
|
+
}
|
129
|
+
|
130
|
+
} // namespace
|
131
|
+
|
132
|
+
// See the header file for this array's declaration.
|
133
|
+
const unsigned char kSharedCharTypeTable[0x100] = {
|
134
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0f
|
135
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1f
|
136
|
+
0, // 0x20 ' ' (escape spaces in queries)
|
137
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x21 !
|
138
|
+
0, // 0x22 "
|
139
|
+
0, // 0x23 # (invalid in query since it marks the ref)
|
140
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x24 $
|
141
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x25 %
|
142
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x26 &
|
143
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x27 '
|
144
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x28 (
|
145
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x29 )
|
146
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2a *
|
147
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2b +
|
148
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2c ,
|
149
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x2d -
|
150
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x2e .
|
151
|
+
CHAR_QUERY, // 0x2f /
|
152
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x30 0
|
153
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x31 1
|
154
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x32 2
|
155
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x33 3
|
156
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x34 4
|
157
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x35 5
|
158
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x36 6
|
159
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC | CHAR_OCT, // 0x37 7
|
160
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x38 8
|
161
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX | CHAR_DEC, // 0x39 9
|
162
|
+
CHAR_QUERY, // 0x3a :
|
163
|
+
CHAR_QUERY, // 0x3b ;
|
164
|
+
0, // 0x3c < (Try to prevent certain types of XSS.)
|
165
|
+
CHAR_QUERY, // 0x3d =
|
166
|
+
0, // 0x3e > (Try to prevent certain types of XSS.)
|
167
|
+
CHAR_QUERY, // 0x3f ?
|
168
|
+
CHAR_QUERY, // 0x40 @
|
169
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x41 A
|
170
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x42 B
|
171
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x43 C
|
172
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x44 D
|
173
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x45 E
|
174
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x46 F
|
175
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x47 G
|
176
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x48 H
|
177
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x49 I
|
178
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4a J
|
179
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4b K
|
180
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4c L
|
181
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4d M
|
182
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4e N
|
183
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x4f O
|
184
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x50 P
|
185
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x51 Q
|
186
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x52 R
|
187
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x53 S
|
188
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x54 T
|
189
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x55 U
|
190
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x56 V
|
191
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x57 W
|
192
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x58 X
|
193
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x59 Y
|
194
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x5a Z
|
195
|
+
CHAR_QUERY, // 0x5b [
|
196
|
+
CHAR_QUERY, // 0x5c '\'
|
197
|
+
CHAR_QUERY, // 0x5d ]
|
198
|
+
CHAR_QUERY, // 0x5e ^
|
199
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x5f _
|
200
|
+
CHAR_QUERY, // 0x60 `
|
201
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x61 a
|
202
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x62 b
|
203
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x63 c
|
204
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x64 d
|
205
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x65 e
|
206
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4 | CHAR_HEX, // 0x66 f
|
207
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x67 g
|
208
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x68 h
|
209
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x69 i
|
210
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6a j
|
211
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6b k
|
212
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6c l
|
213
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6d m
|
214
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6e n
|
215
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x6f o
|
216
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x70 p
|
217
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x71 q
|
218
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x72 r
|
219
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x73 s
|
220
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x74 t
|
221
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x75 u
|
222
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x76 v
|
223
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x77 w
|
224
|
+
CHAR_QUERY | CHAR_USERINFO | CHAR_IPV4, // 0x78 x
|
225
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x79 y
|
226
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x7a z
|
227
|
+
CHAR_QUERY, // 0x7b {
|
228
|
+
CHAR_QUERY, // 0x7c |
|
229
|
+
CHAR_QUERY, // 0x7d }
|
230
|
+
CHAR_QUERY | CHAR_USERINFO, // 0x7e ~
|
231
|
+
0, // 0x7f
|
232
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80 - 0x8f
|
233
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x90 - 0x9f
|
234
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xa0 - 0xaf
|
235
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xb0 - 0xbf
|
236
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xc0 - 0xcf
|
237
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xd0 - 0xdf
|
238
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xe0 - 0xef
|
239
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff
|
240
|
+
};
|
241
|
+
|
242
|
+
const char kHexCharLookup[0x10] = {
|
243
|
+
'0', '1', '2', '3', '4', '5', '6', '7',
|
244
|
+
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F',
|
245
|
+
};
|
246
|
+
|
247
|
+
const char kCharToHexLookup[8] = {
|
248
|
+
0, // 0x00 - 0x1f
|
249
|
+
'0', // 0x20 - 0x3f: digits 0 - 9 are 0x30 - 0x39
|
250
|
+
'A' - 10, // 0x40 - 0x5f: letters A - F are 0x41 - 0x46
|
251
|
+
'a' - 10, // 0x60 - 0x7f: letters a - f are 0x61 - 0x66
|
252
|
+
0, // 0x80 - 0x9F
|
253
|
+
0, // 0xA0 - 0xBF
|
254
|
+
0, // 0xC0 - 0xDF
|
255
|
+
0, // 0xE0 - 0xFF
|
256
|
+
};
|
257
|
+
|
258
|
+
const char16 kUnicodeReplacementCharacter = 0xfffd;
|
259
|
+
|
260
|
+
void AppendStringOfType(const char* source, int length,
|
261
|
+
SharedCharTypes type,
|
262
|
+
CanonOutput* output) {
|
263
|
+
DoAppendStringOfType<char, unsigned char>(source, length, type, output);
|
264
|
+
}
|
265
|
+
|
266
|
+
void AppendStringOfType(const char16* source, int length,
|
267
|
+
SharedCharTypes type,
|
268
|
+
CanonOutput* output) {
|
269
|
+
DoAppendStringOfType<char16, char16>(source, length, type, output);
|
270
|
+
}
|
271
|
+
|
272
|
+
void AppendInvalidNarrowString(const char* spec, int begin, int end,
|
273
|
+
CanonOutput* output) {
|
274
|
+
DoAppendInvalidNarrowString<char, unsigned char>(spec, begin, end, output);
|
275
|
+
}
|
276
|
+
|
277
|
+
void AppendInvalidNarrowString(const char16* spec, int begin, int end,
|
278
|
+
CanonOutput* output) {
|
279
|
+
DoAppendInvalidNarrowString<char16, char16>(spec, begin, end, output);
|
280
|
+
}
|
281
|
+
|
282
|
+
bool ConvertUTF16ToUTF8(const char16* input, int input_len,
|
283
|
+
CanonOutput* output) {
|
284
|
+
bool success = true;
|
285
|
+
for (int i = 0; i < input_len; i++) {
|
286
|
+
unsigned code_point;
|
287
|
+
success &= ReadUTFChar(input, &i, input_len, &code_point);
|
288
|
+
AppendUTF8Value(code_point, output);
|
289
|
+
}
|
290
|
+
return success;
|
291
|
+
}
|
292
|
+
|
293
|
+
bool ConvertUTF8ToUTF16(const char* input, int input_len,
|
294
|
+
CanonOutputT<char16>* output) {
|
295
|
+
bool success = true;
|
296
|
+
for (int i = 0; i < input_len; i++) {
|
297
|
+
unsigned code_point;
|
298
|
+
success &= ReadUTFChar(input, &i, input_len, &code_point);
|
299
|
+
AppendUTF16Value(code_point, output);
|
300
|
+
}
|
301
|
+
return success;
|
302
|
+
}
|
303
|
+
|
304
|
+
void SetupOverrideComponents(const char* base,
|
305
|
+
const Replacements<char>& repl,
|
306
|
+
URLComponentSource<char>* source,
|
307
|
+
url_parse::Parsed* parsed) {
|
308
|
+
// Get the source and parsed structures of the things we are replacing.
|
309
|
+
const URLComponentSource<char>& repl_source = repl.sources();
|
310
|
+
const url_parse::Parsed& repl_parsed = repl.components();
|
311
|
+
|
312
|
+
DoOverrideComponent(repl_source.scheme, repl_parsed.scheme,
|
313
|
+
&source->scheme, &parsed->scheme);
|
314
|
+
DoOverrideComponent(repl_source.username, repl_parsed.username,
|
315
|
+
&source->username, &parsed->username);
|
316
|
+
DoOverrideComponent(repl_source.password, repl_parsed.password,
|
317
|
+
&source->password, &parsed->password);
|
318
|
+
|
319
|
+
// Our host should be empty if not present, so override the default setup.
|
320
|
+
DoOverrideComponent(repl_source.host, repl_parsed.host,
|
321
|
+
&source->host, &parsed->host);
|
322
|
+
if (parsed->host.len == -1)
|
323
|
+
parsed->host.len = 0;
|
324
|
+
|
325
|
+
DoOverrideComponent(repl_source.port, repl_parsed.port,
|
326
|
+
&source->port, &parsed->port);
|
327
|
+
DoOverrideComponent(repl_source.path, repl_parsed.path,
|
328
|
+
&source->path, &parsed->path);
|
329
|
+
DoOverrideComponent(repl_source.query, repl_parsed.query,
|
330
|
+
&source->query, &parsed->query);
|
331
|
+
DoOverrideComponent(repl_source.ref, repl_parsed.ref,
|
332
|
+
&source->ref, &parsed->ref);
|
333
|
+
}
|
334
|
+
|
335
|
+
bool SetupUTF16OverrideComponents(const char* base,
|
336
|
+
const Replacements<char16>& repl,
|
337
|
+
CanonOutput* utf8_buffer,
|
338
|
+
URLComponentSource<char>* source,
|
339
|
+
url_parse::Parsed* parsed) {
|
340
|
+
bool success = true;
|
341
|
+
|
342
|
+
// Get the source and parsed structures of the things we are replacing.
|
343
|
+
const URLComponentSource<char16>& repl_source = repl.sources();
|
344
|
+
const url_parse::Parsed& repl_parsed = repl.components();
|
345
|
+
|
346
|
+
success &= PrepareUTF16OverrideComponent(
|
347
|
+
repl_source.scheme, repl_parsed.scheme,
|
348
|
+
utf8_buffer, &parsed->scheme);
|
349
|
+
success &= PrepareUTF16OverrideComponent(
|
350
|
+
repl_source.username, repl_parsed.username,
|
351
|
+
utf8_buffer, &parsed->username);
|
352
|
+
success &= PrepareUTF16OverrideComponent(
|
353
|
+
repl_source.password, repl_parsed.password,
|
354
|
+
utf8_buffer, &parsed->password);
|
355
|
+
success &= PrepareUTF16OverrideComponent(
|
356
|
+
repl_source.host, repl_parsed.host,
|
357
|
+
utf8_buffer, &parsed->host);
|
358
|
+
success &= PrepareUTF16OverrideComponent(
|
359
|
+
repl_source.port, repl_parsed.port,
|
360
|
+
utf8_buffer, &parsed->port);
|
361
|
+
success &= PrepareUTF16OverrideComponent(
|
362
|
+
repl_source.path, repl_parsed.path,
|
363
|
+
utf8_buffer, &parsed->path);
|
364
|
+
success &= PrepareUTF16OverrideComponent(
|
365
|
+
repl_source.query, repl_parsed.query,
|
366
|
+
utf8_buffer, &parsed->query);
|
367
|
+
success &= PrepareUTF16OverrideComponent(
|
368
|
+
repl_source.ref, repl_parsed.ref,
|
369
|
+
utf8_buffer, &parsed->ref);
|
370
|
+
|
371
|
+
// PrepareUTF16OverrideComponent will not have set the data pointer since the
|
372
|
+
// buffer could be resized, invalidating the pointers. We set the data
|
373
|
+
// pointers for affected components now that the buffer is finalized.
|
374
|
+
if (repl_source.scheme) source->scheme = utf8_buffer->data();
|
375
|
+
if (repl_source.username) source->username = utf8_buffer->data();
|
376
|
+
if (repl_source.password) source->password = utf8_buffer->data();
|
377
|
+
if (repl_source.host) source->host = utf8_buffer->data();
|
378
|
+
if (repl_source.port) source->port = utf8_buffer->data();
|
379
|
+
if (repl_source.path) source->path = utf8_buffer->data();
|
380
|
+
if (repl_source.query) source->query = utf8_buffer->data();
|
381
|
+
if (repl_source.ref) source->ref = utf8_buffer->data();
|
382
|
+
|
383
|
+
return success;
|
384
|
+
}
|
385
|
+
|
386
|
+
#ifndef WIN32
|
387
|
+
|
388
|
+
int _itoa_s(int value, char* buffer, size_t size_in_chars, int radix) {
|
389
|
+
const char* format_str;
|
390
|
+
if (radix == 10)
|
391
|
+
format_str = "%d";
|
392
|
+
else if (radix == 16)
|
393
|
+
format_str = "%x";
|
394
|
+
else
|
395
|
+
return EINVAL;
|
396
|
+
|
397
|
+
int written = snprintf(buffer, size_in_chars, format_str, value);
|
398
|
+
if (static_cast<size_t>(written) >= size_in_chars) {
|
399
|
+
// Output was truncated, or written was negative.
|
400
|
+
return EINVAL;
|
401
|
+
}
|
402
|
+
return 0;
|
403
|
+
}
|
404
|
+
|
405
|
+
int _itow_s(int value, char16* buffer, size_t size_in_chars, int radix) {
|
406
|
+
if (radix != 10)
|
407
|
+
return EINVAL;
|
408
|
+
|
409
|
+
// No more than 12 characters will be required for a 32-bit integer.
|
410
|
+
// Add an extra byte for the terminating null.
|
411
|
+
char temp[13];
|
412
|
+
int written = snprintf(temp, sizeof(temp), "%d", value);
|
413
|
+
if (static_cast<size_t>(written) >= size_in_chars) {
|
414
|
+
// Output was truncated, or written was negative.
|
415
|
+
return EINVAL;
|
416
|
+
}
|
417
|
+
|
418
|
+
for (int i = 0; i < written; ++i) {
|
419
|
+
buffer[i] = static_cast<char16>(temp[i]);
|
420
|
+
}
|
421
|
+
buffer[written] = '\0';
|
422
|
+
return 0;
|
423
|
+
}
|
424
|
+
|
425
|
+
#endif // !WIN32
|
426
|
+
|
427
|
+
} // namespace url_canon
|