idn-ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,183 @@
1
+ /*
2
+ * Copyright (c) 2005-2006 Erik Abele. All rights reserved.
3
+ * Portions Copyright (c) 2005 Yuki Mitsui. All rights reserved.
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * Please see the file called LICENSE for further details.
8
+ *
9
+ * You may also obtain a copy of the License at
10
+ *
11
+ * * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ *
19
+ * This software is OSI Certified Open Source Software.
20
+ * OSI Certified is a certification mark of the Open Source Initiative.
21
+ */
22
+
23
+ #include <stdlib.h>
24
+ #include <ruby.h>
25
+ #include <stringprep.h>
26
+ #include "idn.h"
27
+
28
+ /*
29
+ * Document-class: IDN::Stringprep
30
+ * The Stringprep module of LibIDN Ruby Bindings.
31
+ *
32
+ * === Example usage
33
+ *
34
+ * require 'idn'
35
+ * include IDN
36
+ *
37
+ * str = Stringprep.with_profile('FOO', 'Nameprep')
38
+ */
39
+
40
+ VALUE mStringprep;
41
+
42
+ /*
43
+ * Document-class: IDN::Stringprep::StringprepError
44
+ * The base class for all exceptions raised by the IDN::Stringprep module.
45
+ */
46
+
47
+ VALUE eStringprepError;
48
+
49
+ /*
50
+ * Internal helper function:
51
+ * stringprep_internal
52
+ *
53
+ * Prepares the given string in UTF-8 format according to the given
54
+ * stringprep profile name. See the various public wrapper functions
55
+ * below for details.
56
+ *
57
+ * Raises IDN::Stringprep::StringprepError on failure.
58
+ */
59
+
60
+ static VALUE stringprep_internal(VALUE str, const char *profile)
61
+ {
62
+ int rc;
63
+ char *buf;
64
+ VALUE retv;
65
+
66
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
67
+ rc = stringprep_profile(RSTRING_PTR(str), &buf, profile, 0);
68
+
69
+ if (rc != STRINGPREP_OK) {
70
+ rb_raise(eStringprepError, "%s (%d)", stringprep_strerror(rc), rc);
71
+ return Qnil;
72
+ }
73
+
74
+ retv = rb_str_new2(buf);
75
+ xfree(buf);
76
+ return retv;
77
+ }
78
+
79
+ /*
80
+ * call-seq:
81
+ * IDN::Stringprep.nameprep(string) => string
82
+ *
83
+ * Prepares a string in UTF-8 format according to the 'Nameprep'
84
+ * profile.
85
+ *
86
+ * Raises IDN::Stringprep::StringprepError on failure.
87
+ */
88
+
89
+ static VALUE nameprep(VALUE self, VALUE str)
90
+ {
91
+ return stringprep_internal(str, "Nameprep");
92
+ }
93
+
94
+ /*
95
+ * call-seq:
96
+ * IDN::Stringprep.nodeprep(string) => string
97
+ *
98
+ * Prepares a string in UTF-8 format according to the 'Nodeprep'
99
+ * profile.
100
+ *
101
+ * Raises IDN::Stringprep::StringprepError on failure.
102
+ */
103
+
104
+ static VALUE nodeprep(VALUE self, VALUE str)
105
+ {
106
+ return stringprep_internal(str, "Nodeprep");
107
+ }
108
+
109
+ /*
110
+ * call-seq:
111
+ * IDN::Stringprep.resourceprep(string) => string
112
+ *
113
+ * Prepares a string in UTF-8 format according to the 'Resourceprep'
114
+ * profile.
115
+ *
116
+ * Raises IDN::Stringprep::StringprepError on failure.
117
+ */
118
+
119
+ static VALUE resourceprep(VALUE self, VALUE str)
120
+ {
121
+ return stringprep_internal(str, "Resourceprep");
122
+ }
123
+
124
+ /*
125
+ * call-seq:
126
+ * IDN::Stringprep.with_profile(string, profile) => string
127
+ *
128
+ * Prepares a string in UTF-8 format according to the given stringprep
129
+ * profile name which must be one of the internally supported stringprep
130
+ * profiles (for details see IANA's Profile Names in RFC3454).
131
+ *
132
+ * Raises IDN::Stringprep::StringprepError on failure.
133
+ */
134
+
135
+ static VALUE with_profile(VALUE self, VALUE str, VALUE profile)
136
+ {
137
+ profile = rb_check_convert_type(profile, T_STRING, "String", "to_s");
138
+ return stringprep_internal(str, RSTRING_PTR(profile));
139
+ }
140
+
141
+ /*
142
+ * call-seq:
143
+ * IDN::Stringprep.nfkc_normalize(string) => string
144
+ *
145
+ * Converts a string in UTF-8 format into canonical form, standardizing
146
+ * such issues as whether a character with an accent is represented as a
147
+ * base character and combining accent or as a single precomposed character.
148
+ */
149
+
150
+ static VALUE nfkc_normalize(VALUE self, VALUE str)
151
+ {
152
+ char *buf;
153
+ VALUE retv;
154
+
155
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
156
+ buf = stringprep_utf8_nfkc_normalize(RSTRING_PTR(str), RSTRING_LEN(str));
157
+
158
+ retv = rb_str_new2(buf);
159
+ xfree(buf);
160
+ return retv;
161
+ }
162
+
163
+ /*
164
+ * Module Initialization.
165
+ */
166
+
167
+ void init_stringprep(void)
168
+ {
169
+ #ifdef mIDN_RDOC_HACK
170
+ mIDN = rb_define_module("IDN");
171
+ eIDNError = rb_define_class_under(mIDN, "IDNError", rb_eStandardError);
172
+ #endif
173
+
174
+ mStringprep = rb_define_module_under(mIDN, "Stringprep");
175
+ eStringprepError = rb_define_class_under(mStringprep, "StringprepError",
176
+ eIDNError);
177
+
178
+ rb_define_singleton_method(mStringprep, "nameprep", nameprep, 1);
179
+ rb_define_singleton_method(mStringprep, "nodeprep", nodeprep, 1);
180
+ rb_define_singleton_method(mStringprep, "resourceprep", resourceprep, 1);
181
+ rb_define_singleton_method(mStringprep, "with_profile", with_profile, 2);
182
+ rb_define_singleton_method(mStringprep, "nfkc_normalize", nfkc_normalize, 1);
183
+ }
@@ -0,0 +1,273 @@
1
+ # Unit tests for IDN::Idna.
2
+ #
3
+ # Copyright (c) 2005-2006 Erik Abele. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # Please see the file called LICENSE for further details.
8
+ #
9
+ # You may also obtain a copy of the License at
10
+ #
11
+ # * http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+ # This software is OSI Certified Open Source Software.
20
+ # OSI Certified is a certification mark of the Open Source Initiative.
21
+
22
+ require 'test/unit'
23
+ require 'idn'
24
+
25
+ class Test_Idna < Test::Unit::TestCase
26
+ include IDN
27
+
28
+ # JOSEFSSON test vectors, taken from DRAFT-JOSEFSSON-IDN-TEST-VECTORS-00:
29
+ # http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
30
+ #
31
+ # Modifications:
32
+ # - omission of 5.20 since it is identical with 5.8 (case H below)
33
+
34
+ TESTCASES_JOSEFSSON = {
35
+ 'A' => [
36
+ [ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
37
+ 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
38
+ 0x061F ].pack('U*'),
39
+ Idna::ACE_PREFIX + 'egbpdaj6bu4bxfgehfvwxn'
40
+ ],
41
+
42
+ 'B' => [
43
+ [ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D,
44
+ 0x6587 ].pack('U*'),
45
+ Idna::ACE_PREFIX + 'ihqwcrb4cv8a8dqg056pqjye'
46
+ ],
47
+
48
+ 'C' => [
49
+ [ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D,
50
+ 0x6587 ].pack('U*'),
51
+ Idna::ACE_PREFIX + 'ihqwctvzc91f659drss3x8bo0yb'
52
+ ],
53
+
54
+ 'D' => [
55
+ [ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
56
+ 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
57
+ 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 ].pack('U*'),
58
+ Idna::ACE_PREFIX + 'Proprostnemluvesky-uyb24dma41a'
59
+ ],
60
+
61
+ 'E' => [
62
+ [ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
63
+ 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
64
+ 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA ].pack('U*'),
65
+ Idna::ACE_PREFIX + '4dbcagdahymbxekheh6e0a7fei0b'
66
+ ],
67
+
68
+ 'F' => [
69
+ [ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
70
+ 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
71
+ 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
72
+ 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 ].pack('U*'),
73
+ Idna::ACE_PREFIX + 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'
74
+ ],
75
+
76
+ 'G' => [
77
+ [ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
78
+ 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
79
+ 0x306E, 0x304B ].pack('U*'),
80
+ Idna::ACE_PREFIX + 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'
81
+ ],
82
+
83
+ 'H' => [
84
+ [ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
85
+ 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
86
+ 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
87
+ 0x0441, 0x0441, 0x043A, 0x0438 ].pack('U*'),
88
+ Idna::ACE_PREFIX + 'b1abfaaepdrnnbgefbadotcwatmq2g4l'
89
+ ],
90
+
91
+ 'I' => [
92
+ [ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
93
+ 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
94
+ 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
95
+ 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
96
+ 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F,
97
+ 0x006C ].pack('U*'),
98
+ Idna::ACE_PREFIX + 'PorqunopuedensimplementehablarenEspaol-fmd56a'
99
+ ],
100
+
101
+ 'J' => [
102
+ [ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
103
+ 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
104
+ 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
105
+ 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 ].pack('U*'),
106
+ Idna::ACE_PREFIX + 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'
107
+ ],
108
+
109
+ 'K' => [
110
+ [ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148,
111
+ 0x751F ].pack('U*'),
112
+ Idna::ACE_PREFIX + '3B-ww4c5e180e575a65lsy2b'
113
+ ],
114
+
115
+ 'L' => [
116
+ [ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
117
+ 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
118
+ 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059,
119
+ 0x0053 ].pack('U*'),
120
+ Idna::ACE_PREFIX + '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'
121
+ ],
122
+
123
+ 'M' => [
124
+ [ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
125
+ 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
126
+ 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
127
+ 0x6240 ].pack('U*'),
128
+ Idna::ACE_PREFIX + 'Hello-Another-Way--fc4qua05auwb3674vfr0b'
129
+ ],
130
+
131
+ 'N' => [
132
+ [ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B,
133
+ 0x0032 ].pack('U*'),
134
+ Idna::ACE_PREFIX + '2-u9tlzr9756bt3uc0v'
135
+ ],
136
+
137
+ 'O' => [
138
+ [ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
139
+ 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D ].pack('U*'),
140
+ Idna::ACE_PREFIX + 'MajiKoi5-783gue6qz075azm5e'
141
+ ],
142
+
143
+ 'P' => [
144
+ [ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3,
145
+ 0x30D0 ].pack('U*'),
146
+ Idna::ACE_PREFIX + 'de-jg4avhby1noc0d'
147
+ ],
148
+
149
+ 'Q' => [
150
+ [ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 ].pack('U*'),
151
+ Idna::ACE_PREFIX + 'd9juau41awczczp'
152
+ ],
153
+
154
+ 'R' => [
155
+ [ 0x03B5, 0x03BB, 0x03BB, 0x03B7, 0x03BD, 0x03B9, 0x03BA,
156
+ 0x03AC ].pack('U*'),
157
+ Idna::ACE_PREFIX + 'hxargifdar'
158
+ ],
159
+
160
+ 'S' => [
161
+ [ 0x0062, 0x006F, 0x006E, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
162
+ 0x0127, 0x0061 ].pack('U*'),
163
+ Idna::ACE_PREFIX + 'bonusaa-5bb1da'
164
+ ]
165
+ }
166
+
167
+ # UNASSIGNED test vectors: unassigned code points U+0221 and U+0236.
168
+
169
+ TESTCASES_UNASSIGNED = {
170
+ 'A' => [
171
+ [ 0x0221 ].pack('U*'),
172
+ Idna::ACE_PREFIX + '6la'
173
+ ],
174
+
175
+ 'B' => [
176
+ [ 0x0236 ].pack('U*'),
177
+ Idna::ACE_PREFIX + 'sma'
178
+ ]
179
+ }
180
+
181
+ # STD3 test vectors: labels not conforming to the STD3 ASCII rules (see
182
+ # RFC1122 and RFC1123 for details).
183
+
184
+ TESTCASES_STD3 = {
185
+ 'A' => [
186
+ [ 0x0115, 0x0073, 0x0074, 0x0065, 0x002D ].pack('U*'),
187
+ Idna::ACE_PREFIX + 'ste--kva'
188
+ ],
189
+
190
+ 'B' => [
191
+ [ 0x006F, 0x003A, 0x006C, 0x006B, 0x01EB, 0x0065 ].pack('U*'),
192
+ Idna::ACE_PREFIX + 'o:lke-m1b'
193
+ ]
194
+ }
195
+
196
+ def setup
197
+ end
198
+
199
+ def teardown
200
+ end
201
+
202
+ def test_toASCII_JOSEFSSON
203
+ TESTCASES_JOSEFSSON.each do |key, val|
204
+ rc = Idna.toASCII(val[0])
205
+ assert_equal(val[1].downcase, rc, "TestCase #{key} failed")
206
+ end
207
+ end
208
+
209
+ def test_toASCII_UNASSIGNED_ALLOWED
210
+ TESTCASES_UNASSIGNED.each do |key, val|
211
+ rc = Idna.toASCII(val[0], IDN::Idna::ALLOW_UNASSIGNED)
212
+ assert_equal(val[1], rc, "TestCase #{key} failed")
213
+ end
214
+ end
215
+
216
+ def test_toASCII_UNASSIGNED_NOT_ALLOWED
217
+ TESTCASES_UNASSIGNED.each do |key, val|
218
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
219
+ Idna.toASCII(val[0])
220
+ end
221
+ end
222
+ end
223
+
224
+ def test_toASCII_STD3_USED
225
+ TESTCASES_STD3.each do |key, val|
226
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
227
+ Idna.toASCII(val[0], IDN::Idna::USE_STD3_ASCII_RULES)
228
+ end
229
+ end
230
+ end
231
+
232
+ def test_toASCII_STD3_NOT_USED
233
+ TESTCASES_STD3.each do |key, val|
234
+ rc = Idna.toASCII(val[0])
235
+ assert_equal(val[1], rc, "TestCase #{key} failed")
236
+ end
237
+ end
238
+
239
+ def test_toUnicode_JOSEFSSON
240
+ TESTCASES_JOSEFSSON.each do |key, val|
241
+ rc = Idna.toUnicode(val[1])
242
+ assert_equal(val[0], rc, "TestCase #{key} failed")
243
+ end
244
+ end
245
+
246
+ def test_toUnicode_UNASSIGNED_ALLOWED
247
+ TESTCASES_UNASSIGNED.each do |key, val|
248
+ rc = Idna.toUnicode(val[1], IDN::Idna::ALLOW_UNASSIGNED)
249
+ assert_equal(val[0], rc, "TestCase #{key} failed")
250
+ end
251
+ end
252
+
253
+ def test_toUnicode_UNASSIGNED_NOT_ALLOWED
254
+ TESTCASES_UNASSIGNED.each do |key, val|
255
+ rc = Idna.toUnicode(val[1])
256
+ assert_equal(val[1], rc, "TestCase #{key} failed")
257
+ end
258
+ end
259
+
260
+ def test_toUnicode_STD3_USED
261
+ TESTCASES_STD3.each do |key, val|
262
+ rc = Idna.toUnicode(val[1], IDN::Idna::USE_STD3_ASCII_RULES)
263
+ assert_equal(val[1], rc, "TestCase #{key} failed")
264
+ end
265
+ end
266
+
267
+ def test_toUnicode_STD3_NOT_USED
268
+ TESTCASES_STD3.each do |key, val|
269
+ rc = Idna.toUnicode(val[1])
270
+ assert_equal(val[0], rc, "TestCase #{key} failed")
271
+ end
272
+ end
273
+ end