idn 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,184 @@
1
+ /*
2
+ * Copyright (c) 2005 Erik Abele. All rights reserved.
3
+ * Portions Copyright (c) 2005 Yuki Mitsui. All rights reserved.
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * Please see the file called LICENSE for further details.
8
+ *
9
+ * You may also obtain a copy of the License at
10
+ *
11
+ * * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ *
19
+ * This software is OSI Certified Open Source Software.
20
+ * OSI Certified is a certification mark of the Open Source Initiative.
21
+ */
22
+
23
+ #include <stdlib.h>
24
+ #include <ruby.h>
25
+ #include <stringprep.h>
26
+ #include "idn.h"
27
+
28
+ /*
29
+ * Document-class: IDN::Stringprep
30
+ * The Stringprep module of LibIDN Ruby Bindings.
31
+ *
32
+ * === Example usage
33
+ *
34
+ * require 'idn'
35
+ * include IDN
36
+ *
37
+ * str = Stringprep.with_profile('FOO', 'Nameprep')
38
+ */
39
+
40
+ VALUE mStringprep;
41
+
42
+ /*
43
+ * Document-class: IDN::Stringprep::StringprepError
44
+ * The base class for all exceptions raised by the IDN::Stringprep module.
45
+ */
46
+
47
+ VALUE eStringprepError;
48
+
49
+ /*
50
+ * Internal helper function:
51
+ * stringprep_internal
52
+ *
53
+ * Prepares the given string in UTF-8 format according to the given
54
+ * stringprep profile name. See the various public wrapper functions
55
+ * below for details.
56
+ *
57
+ * Raises IDN::Stringprep::StringprepError on failure.
58
+ */
59
+
60
+ static VALUE stringprep_internal(VALUE str, const char *profile)
61
+ {
62
+ int rc;
63
+ char *buf;
64
+ VALUE retv;
65
+
66
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
67
+ rc = stringprep_profile(RSTRING(str)->ptr, &buf, profile, 0);
68
+
69
+ if (rc != STRINGPREP_OK) {
70
+ xfree(buf);
71
+ rb_raise(eStringprepError, "%s (%d)", stringprep_strerror(rc), rc);
72
+ return Qnil;
73
+ }
74
+
75
+ retv = rb_str_new2(buf);
76
+ xfree(buf);
77
+ return retv;
78
+ }
79
+
80
+ /*
81
+ * call-seq:
82
+ * IDN::Stringprep.nameprep(string) => string
83
+ *
84
+ * Prepares a string in UTF-8 format according to the 'Nameprep'
85
+ * profile.
86
+ *
87
+ * Raises IDN::Stringprep::StringprepError on failure.
88
+ */
89
+
90
+ static VALUE nameprep(VALUE self, VALUE str)
91
+ {
92
+ return stringprep_internal(str, "Nameprep");
93
+ }
94
+
95
+ /*
96
+ * call-seq:
97
+ * IDN::Stringprep.nodeprep(string) => string
98
+ *
99
+ * Prepares a string in UTF-8 format according to the 'Nodeprep'
100
+ * profile.
101
+ *
102
+ * Raises IDN::Stringprep::StringprepError on failure.
103
+ */
104
+
105
+ static VALUE nodeprep(VALUE self, VALUE str)
106
+ {
107
+ return stringprep_internal(str, "Nodeprep");
108
+ }
109
+
110
+ /*
111
+ * call-seq:
112
+ * IDN::Stringprep.resourceprep(string) => string
113
+ *
114
+ * Prepares a string in UTF-8 format according to the 'Resourceprep'
115
+ * profile.
116
+ *
117
+ * Raises IDN::Stringprep::StringprepError on failure.
118
+ */
119
+
120
+ static VALUE resourceprep(VALUE self, VALUE str)
121
+ {
122
+ return stringprep_internal(str, "Resourceprep");
123
+ }
124
+
125
+ /*
126
+ * call-seq:
127
+ * IDN::Stringprep.with_profile(string, profile) => string
128
+ *
129
+ * Prepares a string in UTF-8 format according to the given stringprep
130
+ * profile name which must be one of the internally supported stringprep
131
+ * profiles (for details see IANA's Profile Names in RFC3454).
132
+ *
133
+ * Raises IDN::Stringprep::StringprepError on failure.
134
+ */
135
+
136
+ static VALUE with_profile(VALUE self, VALUE str, VALUE profile)
137
+ {
138
+ profile = rb_check_convert_type(profile, T_STRING, "String", "to_s");
139
+ return stringprep_internal(str, RSTRING(profile)->ptr);
140
+ }
141
+
142
+ /*
143
+ * call-seq:
144
+ * IDN::Stringprep.nfkc_normalize(string) => string
145
+ *
146
+ * Converts a string in UTF-8 format into canonical form, standardizing
147
+ * such issues as whether a character with an accent is represented as a
148
+ * base character and combining accent or as a single precomposed character.
149
+ */
150
+
151
+ static VALUE nfkc_normalize(VALUE self, VALUE str)
152
+ {
153
+ char *buf;
154
+ VALUE retv;
155
+
156
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
157
+ buf = stringprep_utf8_nfkc_normalize(RSTRING(str)->ptr, RSTRING(str)->len);
158
+
159
+ retv = rb_str_new2(buf);
160
+ xfree(buf);
161
+ return retv;
162
+ }
163
+
164
+ /*
165
+ * Module Initialization.
166
+ */
167
+
168
+ void init_stringprep(void)
169
+ {
170
+ #ifdef mIDN_RDOC_HACK
171
+ mIDN = rb_define_module("IDN");
172
+ eIDNError = rb_define_class_under(mIDN, "IDNError", rb_eStandardError);
173
+ #endif
174
+
175
+ mStringprep = rb_define_module_under(mIDN, "Stringprep");
176
+ eStringprepError = rb_define_class_under(mStringprep, "StringprepError",
177
+ eIDNError);
178
+
179
+ rb_define_singleton_method(mStringprep, "nameprep", nameprep, 1);
180
+ rb_define_singleton_method(mStringprep, "nodeprep", nodeprep, 1);
181
+ rb_define_singleton_method(mStringprep, "resourceprep", resourceprep, 1);
182
+ rb_define_singleton_method(mStringprep, "with_profile", with_profile, 2);
183
+ rb_define_singleton_method(mStringprep, "nfkc_normalize", nfkc_normalize, 1);
184
+ }
@@ -0,0 +1,275 @@
1
+ # Unit tests for IDN::Idna.
2
+ #
3
+ # Copyright (c) 2005 Erik Abele. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # Please see the file called LICENSE for further details.
8
+ #
9
+ # You may also obtain a copy of the License at
10
+ #
11
+ # * http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+ # This software is OSI Certified Open Source Software.
20
+ # OSI Certified is a certification mark of the Open Source Initiative.
21
+
22
+ require 'test/unit'
23
+ require 'idn'
24
+
25
+ class Test_Idna < Test::Unit::TestCase
26
+ include IDN
27
+
28
+ # JOSEFSSON test vectors, taken from DRAFT-JOSEFSSON-IDN-TEST-VECTORS-00:
29
+ # http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
30
+ #
31
+ # Modifications:
32
+ # - omission of 5.20 since it is identical with 5.8 (case H below)
33
+
34
+ TESTCASES_JOSEFSSON = {
35
+ 'A' => [
36
+ [ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
37
+ 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
38
+ 0x061F ].pack('U*'),
39
+ Idna::ACE_PREFIX + 'egbpdaj6bu4bxfgehfvwxn'
40
+ ],
41
+
42
+ 'B' => [
43
+ [ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D,
44
+ 0x6587 ].pack('U*'),
45
+ Idna::ACE_PREFIX + 'ihqwcrb4cv8a8dqg056pqjye'
46
+ ],
47
+
48
+ 'C' => [
49
+ [ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D,
50
+ 0x6587 ].pack('U*'),
51
+ Idna::ACE_PREFIX + 'ihqwctvzc91f659drss3x8bo0yb'
52
+ ],
53
+
54
+ 'D' => [
55
+ [ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
56
+ 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
57
+ 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 ].pack('U*'),
58
+ Idna::ACE_PREFIX + 'Proprostnemluvesky-uyb24dma41a'
59
+ ],
60
+
61
+ 'E' => [
62
+ [ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
63
+ 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
64
+ 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA ].pack('U*'),
65
+ Idna::ACE_PREFIX + '4dbcagdahymbxekheh6e0a7fei0b'
66
+ ],
67
+
68
+ 'F' => [
69
+ [ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
70
+ 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
71
+ 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
72
+ 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 ].pack('U*'),
73
+ Idna::ACE_PREFIX + 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'
74
+ ],
75
+
76
+ 'G' => [
77
+ [ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
78
+ 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
79
+ 0x306E, 0x304B ].pack('U*'),
80
+ Idna::ACE_PREFIX + 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'
81
+ ],
82
+
83
+ 'H' => [
84
+ [ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
85
+ 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
86
+ 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
87
+ 0x0441, 0x0441, 0x043A, 0x0438 ].pack('U*'),
88
+ Idna::ACE_PREFIX + 'b1abfaaepdrnnbgefbadotcwatmq2g4l'
89
+ ],
90
+
91
+ 'I' => [
92
+ [ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
93
+ 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
94
+ 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
95
+ 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
96
+ 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F,
97
+ 0x006C ].pack('U*'),
98
+ Idna::ACE_PREFIX + 'PorqunopuedensimplementehablarenEspaol-fmd56a'
99
+ ],
100
+
101
+ 'J' => [
102
+ [ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
103
+ 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
104
+ 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
105
+ 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 ].pack('U*'),
106
+ Idna::ACE_PREFIX + 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'
107
+ ],
108
+
109
+ 'K' => [
110
+ [ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148,
111
+ 0x751F ].pack('U*'),
112
+ Idna::ACE_PREFIX + '3B-ww4c5e180e575a65lsy2b'
113
+ ],
114
+
115
+ 'L' => [
116
+ [ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
117
+ 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
118
+ 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059,
119
+ 0x0053 ].pack('U*'),
120
+ Idna::ACE_PREFIX + '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'
121
+ ],
122
+
123
+ 'M' => [
124
+ [ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
125
+ 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
126
+ 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
127
+ 0x6240 ].pack('U*'),
128
+ Idna::ACE_PREFIX + 'Hello-Another-Way--fc4qua05auwb3674vfr0b'
129
+ ],
130
+
131
+ 'N' => [
132
+ [ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B,
133
+ 0x0032 ].pack('U*'),
134
+ Idna::ACE_PREFIX + '2-u9tlzr9756bt3uc0v'
135
+ ],
136
+
137
+ 'O' => [
138
+ [ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
139
+ 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D ].pack('U*'),
140
+ Idna::ACE_PREFIX + 'MajiKoi5-783gue6qz075azm5e'
141
+ ],
142
+
143
+ 'P' => [
144
+ [ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3,
145
+ 0x30D0 ].pack('U*'),
146
+ Idna::ACE_PREFIX + 'de-jg4avhby1noc0d'
147
+ ],
148
+
149
+ 'Q' => [
150
+ [ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 ].pack('U*'),
151
+ Idna::ACE_PREFIX + 'd9juau41awczczp'
152
+ ],
153
+
154
+ 'R' => [
155
+ [ 0x03B5, 0x03BB, 0x03BB, 0x03B7, 0x03BD, 0x03B9, 0x03BA,
156
+ 0x03AC ].pack('U*'),
157
+ Idna::ACE_PREFIX + 'hxargifdar'
158
+ ],
159
+
160
+ 'S' => [
161
+ [ 0x0062, 0x006F, 0x006E, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
162
+ 0x0127, 0x0061 ].pack('U*'),
163
+ Idna::ACE_PREFIX + 'bonusaa-5bb1da'
164
+ ]
165
+ }
166
+
167
+ # UNASSIGNED test vectors: unassigned code points U+0221 and U+0236.
168
+
169
+ TESTCASES_UNASSIGNED = {
170
+ 'A' => [
171
+ [ 0x0221 ].pack('U*'),
172
+ Idna::ACE_PREFIX + '6la'
173
+ ],
174
+
175
+ 'B' => [
176
+ [ 0x0236 ].pack('U*'),
177
+ Idna::ACE_PREFIX + 'sma'
178
+ ]
179
+ }
180
+
181
+ # STD3 test vectors: labels not conforming to the STD3 ASCII rules (see
182
+ # RFC1122 and RFC1123 for details).
183
+
184
+ TESTCASES_STD3 = {
185
+ 'A' => [
186
+ [ 0x0115, 0x0073, 0x0074, 0x0065, 0x002D ].pack('U*'),
187
+ Idna::ACE_PREFIX + 'ste--kva'
188
+ ],
189
+
190
+ 'B' => [
191
+ [ 0x006F, 0x003A, 0x006C, 0x006B, 0x01EB, 0x0065 ].pack('U*'),
192
+ Idna::ACE_PREFIX + 'o:lke-m1b'
193
+ ]
194
+ }
195
+
196
+ def setup
197
+ end
198
+
199
+ def teardown
200
+ end
201
+
202
+ def test_toASCII_JOSEFSSON
203
+ TESTCASES_JOSEFSSON.each do |key, val|
204
+ rc = Idna.toASCII(val[0])
205
+ assert_equal(val[1].downcase, rc, "TestCase #{key} failed")
206
+ end
207
+ end
208
+
209
+ def test_toASCII_UNASSIGNED_ALLOWED
210
+ TESTCASES_UNASSIGNED.each do |key, val|
211
+ assert_nothing_raised("TestCase #{key} failed") do
212
+ Idna.toASCII(val[0], IDN::Idna::ALLOW_UNASSIGNED)
213
+ end
214
+ end
215
+ end
216
+
217
+ def test_toASCII_UNASSIGNED_NOT_ALLOWED
218
+ TESTCASES_UNASSIGNED.each do |key, val|
219
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
220
+ Idna.toASCII(val[0])
221
+ end
222
+ end
223
+ end
224
+
225
+ def test_toASCII_STD3_USED
226
+ TESTCASES_STD3.each do |key, val|
227
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
228
+ Idna.toASCII(val[0], IDN::Idna::USE_STD3_ASCII_RULES)
229
+ end
230
+ end
231
+ end
232
+
233
+ def test_toASCII_STD3_NOT_USED
234
+ TESTCASES_STD3.each do |key, val|
235
+ assert_nothing_raised("TestCase #{key} failed") do
236
+ Idna.toASCII(val[0])
237
+ end
238
+ end
239
+ end
240
+
241
+ def test_toUnicode_JOSEFSSON
242
+ TESTCASES_JOSEFSSON.each do |key, val|
243
+ rc = Idna.toUnicode(val[1])
244
+ assert_equal(val[0], rc, "TestCase #{key} failed")
245
+ end
246
+ end
247
+
248
+ def test_toUnicode_UNASSIGNED_ALLOWED
249
+ TESTCASES_UNASSIGNED.each do |key, val|
250
+ rc = Idna.toUnicode(val[1], IDN::Idna::ALLOW_UNASSIGNED)
251
+ assert_equal(val[0], rc, "TestCase #{key} failed")
252
+ end
253
+ end
254
+
255
+ def test_toUnicode_UNASSIGNED_NOT_ALLOWED
256
+ TESTCASES_UNASSIGNED.each do |key, val|
257
+ rc = Idna.toUnicode(val[1])
258
+ assert_equal(val[1], rc, "TestCase #{key} failed")
259
+ end
260
+ end
261
+
262
+ def test_toUnicode_STD3_USED
263
+ TESTCASES_STD3.each do |key, val|
264
+ rc = Idna.toUnicode(val[1], IDN::Idna::USE_STD3_ASCII_RULES)
265
+ assert_equal(val[1], rc, "TestCase #{key} failed")
266
+ end
267
+ end
268
+
269
+ def test_toUnicode_STD3_NOT_USED
270
+ TESTCASES_STD3.each do |key, val|
271
+ rc = Idna.toUnicode(val[1])
272
+ assert_equal(val[0], rc, "TestCase #{key} failed")
273
+ end
274
+ end
275
+ end