idn-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ /*
2
+ * Copyright (c) 2005-2006 Erik Abele. All rights reserved.
3
+ * Portions Copyright (c) 2005 Yuki Mitsui. All rights reserved.
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * Please see the file called LICENSE for further details.
8
+ *
9
+ * You may also obtain a copy of the License at
10
+ *
11
+ * * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ * See the License for the specific language governing permissions and
17
+ * limitations under the License.
18
+ *
19
+ * This software is OSI Certified Open Source Software.
20
+ * OSI Certified is a certification mark of the Open Source Initiative.
21
+ */
22
+
23
+ #include <stdlib.h>
24
+ #include <ruby.h>
25
+ #include <stringprep.h>
26
+ #include "idn.h"
27
+
28
+ /*
29
+ * Document-class: IDN::Stringprep
30
+ * The Stringprep module of LibIDN Ruby Bindings.
31
+ *
32
+ * === Example usage
33
+ *
34
+ * require 'idn'
35
+ * include IDN
36
+ *
37
+ * str = Stringprep.with_profile('FOO', 'Nameprep')
38
+ */
39
+
40
+ VALUE mStringprep;
41
+
42
+ /*
43
+ * Document-class: IDN::Stringprep::StringprepError
44
+ * The base class for all exceptions raised by the IDN::Stringprep module.
45
+ */
46
+
47
+ VALUE eStringprepError;
48
+
49
+ /*
50
+ * Internal helper function:
51
+ * stringprep_internal
52
+ *
53
+ * Prepares the given string in UTF-8 format according to the given
54
+ * stringprep profile name. See the various public wrapper functions
55
+ * below for details.
56
+ *
57
+ * Raises IDN::Stringprep::StringprepError on failure.
58
+ */
59
+
60
+ static VALUE stringprep_internal(VALUE str, const char *profile)
61
+ {
62
+ int rc;
63
+ char *buf;
64
+ VALUE retv;
65
+
66
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
67
+ rc = stringprep_profile(RSTRING_PTR(str), &buf, profile, 0);
68
+
69
+ if (rc != STRINGPREP_OK) {
70
+ rb_raise(eStringprepError, "%s (%d)", stringprep_strerror(rc), rc);
71
+ return Qnil;
72
+ }
73
+
74
+ retv = rb_str_new2(buf);
75
+ xfree(buf);
76
+ return retv;
77
+ }
78
+
79
+ /*
80
+ * call-seq:
81
+ * IDN::Stringprep.nameprep(string) => string
82
+ *
83
+ * Prepares a string in UTF-8 format according to the 'Nameprep'
84
+ * profile.
85
+ *
86
+ * Raises IDN::Stringprep::StringprepError on failure.
87
+ */
88
+
89
+ static VALUE nameprep(VALUE self, VALUE str)
90
+ {
91
+ return stringprep_internal(str, "Nameprep");
92
+ }
93
+
94
+ /*
95
+ * call-seq:
96
+ * IDN::Stringprep.nodeprep(string) => string
97
+ *
98
+ * Prepares a string in UTF-8 format according to the 'Nodeprep'
99
+ * profile.
100
+ *
101
+ * Raises IDN::Stringprep::StringprepError on failure.
102
+ */
103
+
104
+ static VALUE nodeprep(VALUE self, VALUE str)
105
+ {
106
+ return stringprep_internal(str, "Nodeprep");
107
+ }
108
+
109
+ /*
110
+ * call-seq:
111
+ * IDN::Stringprep.resourceprep(string) => string
112
+ *
113
+ * Prepares a string in UTF-8 format according to the 'Resourceprep'
114
+ * profile.
115
+ *
116
+ * Raises IDN::Stringprep::StringprepError on failure.
117
+ */
118
+
119
+ static VALUE resourceprep(VALUE self, VALUE str)
120
+ {
121
+ return stringprep_internal(str, "Resourceprep");
122
+ }
123
+
124
+ /*
125
+ * call-seq:
126
+ * IDN::Stringprep.with_profile(string, profile) => string
127
+ *
128
+ * Prepares a string in UTF-8 format according to the given stringprep
129
+ * profile name which must be one of the internally supported stringprep
130
+ * profiles (for details see IANA's Profile Names in RFC3454).
131
+ *
132
+ * Raises IDN::Stringprep::StringprepError on failure.
133
+ */
134
+
135
+ static VALUE with_profile(VALUE self, VALUE str, VALUE profile)
136
+ {
137
+ profile = rb_check_convert_type(profile, T_STRING, "String", "to_s");
138
+ return stringprep_internal(str, RSTRING_PTR(profile));
139
+ }
140
+
141
+ /*
142
+ * call-seq:
143
+ * IDN::Stringprep.nfkc_normalize(string) => string
144
+ *
145
+ * Converts a string in UTF-8 format into canonical form, standardizing
146
+ * such issues as whether a character with an accent is represented as a
147
+ * base character and combining accent or as a single precomposed character.
148
+ */
149
+
150
+ static VALUE nfkc_normalize(VALUE self, VALUE str)
151
+ {
152
+ char *buf;
153
+ VALUE retv;
154
+
155
+ str = rb_check_convert_type(str, T_STRING, "String", "to_s");
156
+ buf = stringprep_utf8_nfkc_normalize(RSTRING_PTR(str), RSTRING_LEN(str));
157
+
158
+ retv = rb_str_new2(buf);
159
+ xfree(buf);
160
+ return retv;
161
+ }
162
+
163
+ /*
164
+ * Module Initialization.
165
+ */
166
+
167
+ void init_stringprep(void)
168
+ {
169
+ #ifdef mIDN_RDOC_HACK
170
+ mIDN = rb_define_module("IDN");
171
+ eIDNError = rb_define_class_under(mIDN, "IDNError", rb_eStandardError);
172
+ #endif
173
+
174
+ mStringprep = rb_define_module_under(mIDN, "Stringprep");
175
+ eStringprepError = rb_define_class_under(mStringprep, "StringprepError",
176
+ eIDNError);
177
+
178
+ rb_define_singleton_method(mStringprep, "nameprep", nameprep, 1);
179
+ rb_define_singleton_method(mStringprep, "nodeprep", nodeprep, 1);
180
+ rb_define_singleton_method(mStringprep, "resourceprep", resourceprep, 1);
181
+ rb_define_singleton_method(mStringprep, "with_profile", with_profile, 2);
182
+ rb_define_singleton_method(mStringprep, "nfkc_normalize", nfkc_normalize, 1);
183
+ }
@@ -0,0 +1,273 @@
1
+ # Unit tests for IDN::Idna.
2
+ #
3
+ # Copyright (c) 2005-2006 Erik Abele. All rights reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # Please see the file called LICENSE for further details.
8
+ #
9
+ # You may also obtain a copy of the License at
10
+ #
11
+ # * http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+ # This software is OSI Certified Open Source Software.
20
+ # OSI Certified is a certification mark of the Open Source Initiative.
21
+
22
+ require 'test/unit'
23
+ require 'idn'
24
+
25
+ class Test_Idna < Test::Unit::TestCase
26
+ include IDN
27
+
28
+ # JOSEFSSON test vectors, taken from DRAFT-JOSEFSSON-IDN-TEST-VECTORS-00:
29
+ # http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
30
+ #
31
+ # Modifications:
32
+ # - omission of 5.20 since it is identical with 5.8 (case H below)
33
+
34
+ TESTCASES_JOSEFSSON = {
35
+ 'A' => [
36
+ [ 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643,
37
+ 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A,
38
+ 0x061F ].pack('U*'),
39
+ Idna::ACE_PREFIX + 'egbpdaj6bu4bxfgehfvwxn'
40
+ ],
41
+
42
+ 'B' => [
43
+ [ 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D,
44
+ 0x6587 ].pack('U*'),
45
+ Idna::ACE_PREFIX + 'ihqwcrb4cv8a8dqg056pqjye'
46
+ ],
47
+
48
+ 'C' => [
49
+ [ 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D,
50
+ 0x6587 ].pack('U*'),
51
+ Idna::ACE_PREFIX + 'ihqwctvzc91f659drss3x8bo0yb'
52
+ ],
53
+
54
+ 'D' => [
55
+ [ 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073,
56
+ 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076,
57
+ 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079 ].pack('U*'),
58
+ Idna::ACE_PREFIX + 'Proprostnemluvesky-uyb24dma41a'
59
+ ],
60
+
61
+ 'E' => [
62
+ [ 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5,
63
+ 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9,
64
+ 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA ].pack('U*'),
65
+ Idna::ACE_PREFIX + '4dbcagdahymbxekheh6e0a7fei0b'
66
+ ],
67
+
68
+ 'F' => [
69
+ [ 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928,
70
+ 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902,
71
+ 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938,
72
+ 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902 ].pack('U*'),
73
+ Idna::ACE_PREFIX + 'i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd'
74
+ ],
75
+
76
+ 'G' => [
77
+ [ 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E,
78
+ 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044,
79
+ 0x306E, 0x304B ].pack('U*'),
80
+ Idna::ACE_PREFIX + 'n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa'
81
+ ],
82
+
83
+ 'H' => [
84
+ [ 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435,
85
+ 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432,
86
+ 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443,
87
+ 0x0441, 0x0441, 0x043A, 0x0438 ].pack('U*'),
88
+ Idna::ACE_PREFIX + 'b1abfaaepdrnnbgefbadotcwatmq2g4l'
89
+ ],
90
+
91
+ 'I' => [
92
+ [ 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F,
93
+ 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069,
94
+ 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074,
95
+ 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065,
96
+ 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F,
97
+ 0x006C ].pack('U*'),
98
+ Idna::ACE_PREFIX + 'PorqunopuedensimplementehablarenEspaol-fmd56a'
99
+ ],
100
+
101
+ 'J' => [
102
+ [ 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD,
103
+ 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3,
104
+ 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069,
105
+ 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074 ].pack('U*'),
106
+ Idna::ACE_PREFIX + 'TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g'
107
+ ],
108
+
109
+ 'K' => [
110
+ [ 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148,
111
+ 0x751F ].pack('U*'),
112
+ Idna::ACE_PREFIX + '3B-ww4c5e180e575a65lsy2b'
113
+ ],
114
+
115
+ 'L' => [
116
+ [ 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069,
117
+ 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052,
118
+ 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059,
119
+ 0x0053 ].pack('U*'),
120
+ Idna::ACE_PREFIX + '-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n'
121
+ ],
122
+
123
+ 'M' => [
124
+ [ 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E,
125
+ 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061,
126
+ 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834,
127
+ 0x6240 ].pack('U*'),
128
+ Idna::ACE_PREFIX + 'Hello-Another-Way--fc4qua05auwb3674vfr0b'
129
+ ],
130
+
131
+ 'N' => [
132
+ [ 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B,
133
+ 0x0032 ].pack('U*'),
134
+ Idna::ACE_PREFIX + '2-u9tlzr9756bt3uc0v'
135
+ ],
136
+
137
+ 'O' => [
138
+ [ 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069,
139
+ 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D ].pack('U*'),
140
+ Idna::ACE_PREFIX + 'MajiKoi5-783gue6qz075azm5e'
141
+ ],
142
+
143
+ 'P' => [
144
+ [ 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3,
145
+ 0x30D0 ].pack('U*'),
146
+ Idna::ACE_PREFIX + 'de-jg4avhby1noc0d'
147
+ ],
148
+
149
+ 'Q' => [
150
+ [ 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 ].pack('U*'),
151
+ Idna::ACE_PREFIX + 'd9juau41awczczp'
152
+ ],
153
+
154
+ 'R' => [
155
+ [ 0x03B5, 0x03BB, 0x03BB, 0x03B7, 0x03BD, 0x03B9, 0x03BA,
156
+ 0x03AC ].pack('U*'),
157
+ Idna::ACE_PREFIX + 'hxargifdar'
158
+ ],
159
+
160
+ 'S' => [
161
+ [ 0x0062, 0x006F, 0x006E, 0x0121, 0x0075, 0x0073, 0x0061, 0x0127,
162
+ 0x0127, 0x0061 ].pack('U*'),
163
+ Idna::ACE_PREFIX + 'bonusaa-5bb1da'
164
+ ]
165
+ }
166
+
167
+ # UNASSIGNED test vectors: unassigned code points U+0221 and U+0236.
168
+
169
+ TESTCASES_UNASSIGNED = {
170
+ 'A' => [
171
+ [ 0x0221 ].pack('U*'),
172
+ Idna::ACE_PREFIX + '6la'
173
+ ],
174
+
175
+ 'B' => [
176
+ [ 0x0236 ].pack('U*'),
177
+ Idna::ACE_PREFIX + 'sma'
178
+ ]
179
+ }
180
+
181
+ # STD3 test vectors: labels not conforming to the STD3 ASCII rules (see
182
+ # RFC1122 and RFC1123 for details).
183
+
184
+ TESTCASES_STD3 = {
185
+ 'A' => [
186
+ [ 0x0115, 0x0073, 0x0074, 0x0065, 0x002D ].pack('U*'),
187
+ Idna::ACE_PREFIX + 'ste--kva'
188
+ ],
189
+
190
+ 'B' => [
191
+ [ 0x006F, 0x003A, 0x006C, 0x006B, 0x01EB, 0x0065 ].pack('U*'),
192
+ Idna::ACE_PREFIX + 'o:lke-m1b'
193
+ ]
194
+ }
195
+
196
+ def setup
197
+ end
198
+
199
+ def teardown
200
+ end
201
+
202
+ def test_toASCII_JOSEFSSON
203
+ TESTCASES_JOSEFSSON.each do |key, val|
204
+ rc = Idna.toASCII(val[0])
205
+ assert_equal(val[1].downcase, rc, "TestCase #{key} failed")
206
+ end
207
+ end
208
+
209
+ def test_toASCII_UNASSIGNED_ALLOWED
210
+ TESTCASES_UNASSIGNED.each do |key, val|
211
+ rc = Idna.toASCII(val[0], IDN::Idna::ALLOW_UNASSIGNED)
212
+ assert_equal(val[1], rc, "TestCase #{key} failed")
213
+ end
214
+ end
215
+
216
+ def test_toASCII_UNASSIGNED_NOT_ALLOWED
217
+ TESTCASES_UNASSIGNED.each do |key, val|
218
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
219
+ Idna.toASCII(val[0])
220
+ end
221
+ end
222
+ end
223
+
224
+ def test_toASCII_STD3_USED
225
+ TESTCASES_STD3.each do |key, val|
226
+ assert_raise(Idna::IdnaError, "TestCase #{key} failed") do
227
+ Idna.toASCII(val[0], IDN::Idna::USE_STD3_ASCII_RULES)
228
+ end
229
+ end
230
+ end
231
+
232
+ def test_toASCII_STD3_NOT_USED
233
+ TESTCASES_STD3.each do |key, val|
234
+ rc = Idna.toASCII(val[0])
235
+ assert_equal(val[1], rc, "TestCase #{key} failed")
236
+ end
237
+ end
238
+
239
+ def test_toUnicode_JOSEFSSON
240
+ TESTCASES_JOSEFSSON.each do |key, val|
241
+ rc = Idna.toUnicode(val[1])
242
+ assert_equal(val[0], rc, "TestCase #{key} failed")
243
+ end
244
+ end
245
+
246
+ def test_toUnicode_UNASSIGNED_ALLOWED
247
+ TESTCASES_UNASSIGNED.each do |key, val|
248
+ rc = Idna.toUnicode(val[1], IDN::Idna::ALLOW_UNASSIGNED)
249
+ assert_equal(val[0], rc, "TestCase #{key} failed")
250
+ end
251
+ end
252
+
253
+ def test_toUnicode_UNASSIGNED_NOT_ALLOWED
254
+ TESTCASES_UNASSIGNED.each do |key, val|
255
+ rc = Idna.toUnicode(val[1])
256
+ assert_equal(val[1], rc, "TestCase #{key} failed")
257
+ end
258
+ end
259
+
260
+ def test_toUnicode_STD3_USED
261
+ TESTCASES_STD3.each do |key, val|
262
+ rc = Idna.toUnicode(val[1], IDN::Idna::USE_STD3_ASCII_RULES)
263
+ assert_equal(val[1], rc, "TestCase #{key} failed")
264
+ end
265
+ end
266
+
267
+ def test_toUnicode_STD3_NOT_USED
268
+ TESTCASES_STD3.each do |key, val|
269
+ rc = Idna.toUnicode(val[1])
270
+ assert_equal(val[0], rc, "TestCase #{key} failed")
271
+ end
272
+ end
273
+ end