punycode4r 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/lib/punycode.rb +565 -0
  2. data/test/test_punycode.rb +311 -0
  3. metadata +47 -0
@@ -0,0 +1,565 @@
1
+ #!/usr/bin/ruby -Ku
2
+ #
3
+ # This is pure Ruby implementing Punycode (RFC 3492).
4
+ # (original ANSI C code (C89) implementing Punycode is in RFC 3492)
5
+ #
6
+ # copyright (c) 2005 Kazuhiro NISHIYAMA
7
+ # You can redistribute it and/or modify it under the same terms as Ruby.
8
+ #
9
+ =begin
10
+ = punycode4r
11
+ == usage
12
+ === simple usage
13
+ require 'punycode'
14
+ utf8_string = "\346\226\207\345\255\227\345\210\227"
15
+ punycode_string = Punycode.encode(utf8_string)
16
+ p punycode_string #=> "1br58tspi"
17
+ p(Punycode.decode(punycode_string) == utf8_string) #=> true
18
+
19
+ == IDN (Internationalized Domain Name)
20
+ When you use punycode in IDN,
21
+ you must need to do NAMEPREP (RFC 3491) before Punycode.encode,
22
+ and add ACE Prefix (defined in RFC 3490) after Punycode.encode.
23
+
24
+ This library supports punycode only.
25
+ NAMEPREP requires other libraries.
26
+
27
+ =end
28
+
29
+ module Punycode
30
+ module Status
31
+ class Error < StandardError; end
32
+ class PunycodeSuccess; end
33
+ # Input is invalid.
34
+ class PunycodeBadInput < Error; end
35
+ # Output would exceed the space provided.
36
+ class PunycodeBigOutput< Error; end
37
+ # Input needs wider integers to process.
38
+ class PunycodeOverflow < Error; end
39
+ end
40
+ include Status
41
+
42
+ # *** Bootstring parameters for Punycode ***
43
+
44
+ BASE = 36; TMIN = 1; TMAX = 26; SKEW = 38; DAMP = 700
45
+ INITIAL_BIAS = 72; INITIAL_N = 0x80; DELIMITER = 0x2D
46
+
47
+ module_function
48
+
49
+ # basic(cp) tests whether cp is a basic code point:
50
+ def basic(cp)
51
+ cp < 0x80
52
+ end
53
+
54
+ # delim(cp) tests whether cp is a delimiter:
55
+ def delim(cp)
56
+ cp == DELIMITER
57
+ end
58
+
59
+ # decode_digit(cp) returns the numeric value of a basic code
60
+ # point (for use in representing integers) in the range 0 to
61
+ # base-1, or base if cp is does not represent a value.
62
+ def decode_digit(cp)
63
+ cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
64
+ cp - 97 < 26 ? cp - 97 : BASE
65
+ end
66
+
67
+ # encode_digit(d,flag) returns the basic code point whose value
68
+ # (when used for representing integers) is d, which needs to be in
69
+ # the range 0 to base-1. The lowercase form is used unless flag is
70
+ # nonzero, in which case the uppercase form is used. The behavior
71
+ # is undefined if flag is nonzero and digit d has no uppercase form.
72
+ def encode_digit(d, flag)
73
+ return d + 22 + 75 * ((d < 26) ? 1 : 0) - ((flag ? 1 : 0) << 5)
74
+ # 0..25 map to ASCII a..z or A..Z
75
+ # 26..35 map to ASCII 0..9
76
+ end
77
+
78
+ # flagged(bcp) tests whether a basic code point is flagged
79
+ # (uppercase). The behavior is undefined if bcp is not a
80
+ # basic code point.
81
+ def flagged(bcp)
82
+ (0...26) === (bcp - 65)
83
+ end
84
+
85
+ # encode_basic(bcp,flag) forces a basic code point to lowercase
86
+ # if flag is zero, uppercase if flag is nonzero, and returns
87
+ # the resulting code point. The code point is unchanged if it
88
+ # is caseless. The behavior is undefined if bcp is not a basic
89
+ # code point.
90
+ def encode_basic(bcp, flag)
91
+ # bcp -= (bcp - 97 < 26) << 5;
92
+ if (0...26) === (bcp - 97)
93
+ bcp -= 1 << 5
94
+ end
95
+ # return bcp + ((!flag && (bcp - 65 < 26)) << 5);
96
+ if !flag and (0...26) === (bcp - 65)
97
+ bcp += 1 << 5
98
+ end
99
+ bcp
100
+ end
101
+
102
+ # *** Platform-specific constants ***
103
+
104
+ # maxint is the maximum value of a punycode_uint variable:
105
+ MAXINT = 1 << 64
106
+
107
+ # *** Bias adaptation function ***
108
+
109
+ def adapt(delta, numpoints, firsttime)
110
+ delta = firsttime ? delta / DAMP : delta >> 1
111
+ # delta >> 1 is a faster way of doing delta / 2
112
+ delta += delta / numpoints
113
+
114
+ k = 0
115
+ while delta > ((BASE - TMIN) * TMAX) / 2
116
+ delta /= BASE - TMIN
117
+ k += BASE
118
+ end
119
+
120
+ k + (BASE - TMIN + 1) * delta / (delta + SKEW)
121
+ end
122
+
123
+ # *** Main encode function ***
124
+
125
+ # punycode_encode() converts Unicode to Punycode. The input
126
+ # is represented as an array of Unicode code points (not code
127
+ # units; surrogate pairs are not allowed), and the output
128
+ # will be represented as an array of ASCII code points. The
129
+ # output string is *not* null-terminated; it will contain
130
+ # zeros if and only if the input contains zeros. (Of course
131
+ # the caller can leave room for a terminator and add one if
132
+ # needed.) The input_length is the number of code points in
133
+ # the input. The output_length is an in/out argument: the
134
+ # caller passes in the maximum number of code points that it
135
+ # can receive, and on successful return it will contain the
136
+ # number of code points actually output. The case_flags array
137
+ # holds input_length boolean values, where nonzero suggests that
138
+ # the corresponding Unicode character be forced to uppercase
139
+ # after being decoded (if possible), and zero suggests that
140
+ # it be forced to lowercase (if possible). ASCII code points
141
+ # are encoded literally, except that ASCII letters are forced
142
+ # to uppercase or lowercase according to the corresponding
143
+ # uppercase flags. If case_flags is a null pointer then ASCII
144
+ # letters are left as they are, and other code points are
145
+ # treated as if their uppercase flags were zero. The return
146
+ # value can be any of the punycode_status values defined above
147
+ # except punycode_bad_input; if not punycode_success, then
148
+ # output_size and output might contain garbage.
149
+ def punycode_encode(input_length, input, case_flags, output_length, output)
150
+
151
+ # Initialize the state:
152
+
153
+ n = INITIAL_N
154
+ delta = out = 0
155
+ max_out = output_length[0]
156
+ bias = INITIAL_BIAS
157
+
158
+ # Handle the basic code points:
159
+ input_length.times do |j|
160
+ if basic(input[j])
161
+ raise PunycodeBigOutput if max_out - out < 2
162
+ output[out] =
163
+ if case_flags
164
+ encode_basic(input[j], case_flags[j])
165
+ else
166
+ input[j]
167
+ end
168
+ out+=1
169
+ # elsif (input[j] < n)
170
+ # raise PunycodeBadInput
171
+ # (not needed for Punycode with unsigned code points)
172
+ end
173
+ end
174
+
175
+ h = b = out
176
+
177
+ # h is the number of code points that have been handled, b is the
178
+ # number of basic code points, and out is the number of characters
179
+ # that have been output.
180
+
181
+ if b > 0
182
+ output[out] = DELIMITER
183
+ out+=1
184
+ end
185
+
186
+ # Main encoding loop:
187
+
188
+ while h < input_length
189
+ # All non-basic code points < n have been
190
+ # handled already. Find the next larger one:
191
+
192
+ m = MAXINT
193
+ input_length.times do |j|
194
+ # next if basic(input[j])
195
+ # (not needed for Punycode)
196
+ m = input[j] if (n...m) === input[j]
197
+ end
198
+
199
+ # Increase delta enough to advance the decoder's
200
+ # <n,i> state to <m,0>, but guard against overflow:
201
+
202
+ raise PunycodeOverflow if m - n > (MAXINT - delta) / (h + 1)
203
+ delta += (m - n) * (h + 1)
204
+ n = m
205
+
206
+ input_length.times do |j|
207
+ # Punycode does not need to check whether input[j] is basic:
208
+ if input[j] < n # || basic(input[j])
209
+ delta+=1
210
+ raise PunycodeOverflow if delta == 0
211
+ end
212
+
213
+ if input[j] == n
214
+ # Represent delta as a generalized variable-length integer:
215
+
216
+ q = delta; k = BASE
217
+ while true
218
+ raise PunycodeBigOutput if out >= max_out
219
+ t = if k <= bias # + TMIN # +TMIN not needed
220
+ TMIN
221
+ elsif k >= bias + TMAX
222
+ TMAX
223
+ else
224
+ k - bias
225
+ end
226
+ break if q < t
227
+ output[out] = encode_digit(t + (q - t) % (BASE - t), false)
228
+ out+=1
229
+ q = (q - t) / (BASE - t)
230
+ k += BASE
231
+ end
232
+
233
+ output[out] = encode_digit(q, case_flags && case_flags[j])
234
+ out+=1
235
+ bias = adapt(delta, h + 1, h == b)
236
+ delta = 0
237
+ h+=1
238
+ end
239
+ end
240
+
241
+ delta+=1; n+=1
242
+ end
243
+
244
+ output_length[0] = out
245
+ return PunycodeSuccess
246
+ end
247
+
248
+ # *** Main decode function ***
249
+
250
+ # punycode_decode() converts Punycode to Unicode. The input is
251
+ # represented as an array of ASCII code points, and the output
252
+ # will be represented as an array of Unicode code points. The
253
+ # input_length is the number of code points in the input. The
254
+ # output_length is an in/out argument: the caller passes in
255
+ # the maximum number of code points that it can receive, and
256
+ # on successful return it will contain the actual number of
257
+ # code points output. The case_flags array needs room for at
258
+ # least output_length values, or it can be a null pointer if the
259
+ # case information is not needed. A nonzero flag suggests that
260
+ # the corresponding Unicode character be forced to uppercase
261
+ # by the caller (if possible), while zero suggests that it be
262
+ # forced to lowercase (if possible). ASCII code points are
263
+ # output already in the proper case, but their flags will be set
264
+ # appropriately so that applying the flags would be harmless.
265
+ # The return value can be any of the punycode_status values
266
+ # defined above; if not punycode_success, then output_length,
267
+ # output, and case_flags might contain garbage. On success, the
268
+ # decoder will never need to write an output_length greater than
269
+ # input_length, because of how the encoding is defined.
270
+ def punycode_decode(input_length, input, output_length, output, case_flags)
271
+
272
+ # Initialize the state:
273
+
274
+ n = INITIAL_N
275
+
276
+ out = i = 0
277
+ max_out = output_length[0]
278
+ bias = INITIAL_BIAS
279
+
280
+ # Handle the basic code points: Let b be the number of input code
281
+ # points before the last delimiter, or 0 if there is none, then
282
+ # copy the first b code points to the output.
283
+
284
+ b = 0
285
+ input_length.times do |j|
286
+ b = j if delim(input[j])
287
+ end
288
+ raise PunycodeBigOutput if b > max_out
289
+
290
+ b.times do |j|
291
+ case_flags[out] = flagged(input[j]) if case_flags
292
+ raise PunycodeBadInput unless basic(input[j])
293
+ output[out] = input[j]
294
+ out+=1
295
+ end
296
+
297
+ # Main decoding loop: Start just after the last delimiter if any
298
+ # basic code points were copied; start at the beginning otherwise.
299
+
300
+ in_ = b > 0 ? b + 1 : 0
301
+ while in_ < input_length
302
+
303
+ # in_ is the index of the next character to be consumed, and
304
+ # out is the number of code points in the output array.
305
+
306
+ # Decode a generalized variable-length integer into delta,
307
+ # which gets added to i. The overflow checking is easier
308
+ # if we increase i as we go, then subtract off its starting
309
+ # value at the end to obtain delta.
310
+
311
+ oldi = i; w = 1; k = BASE
312
+ while true
313
+ raise PunycodeBadInput if in_ >= input_length
314
+ digit = decode_digit(input[in_])
315
+ in_+=1
316
+ raise PunycodeBadInput if digit >= BASE
317
+ raise PunycodeOverflow if digit > (MAXINT - i) / w
318
+ i += digit * w
319
+ t = if k <= bias # + TMIN # +TMIN not needed
320
+ TMIN
321
+ elsif k >= bias + TMAX
322
+ TMAX
323
+ else
324
+ k - bias
325
+ end
326
+ break if digit < t
327
+ raise PunycodeOverflow if w > MAXINT / (BASE - t)
328
+ w *= BASE - t
329
+ k += BASE
330
+ end
331
+
332
+ bias = adapt(i - oldi, out + 1, oldi == 0)
333
+
334
+ # i was supposed to wrap around from out+1 to 0,
335
+ # incrementing n each time, so we'll fix that now:
336
+
337
+ raise PunycodeOverflow if i / (out + 1) > MAXINT - n
338
+ n += i / (out + 1)
339
+ i %= out + 1
340
+
341
+ # Insert n at position i of the output:
342
+
343
+ # not needed for Punycode:
344
+ # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
345
+ raise PunycodeBigOutput if out >= max_out
346
+
347
+ if case_flags
348
+ #memmove(case_flags + i + 1, case_flags + i, out - i)
349
+ case_flags[i + 1, out - i] = case_flags[i, out - i]
350
+
351
+ # Case of last character determines uppercase flag:
352
+ case_flags[i] = flagged(input[in_ - 1])
353
+ end
354
+
355
+ #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
356
+ output[i + 1, out - i] = output[i, out - i]
357
+ output[i] = n
358
+ i+=1
359
+
360
+ out+=1
361
+ end
362
+
363
+ output_length[0] = out
364
+ return PunycodeSuccess
365
+ end
366
+
367
+ def encode(unicode_string, case_flags=nil, print_ascii_only=false)
368
+ input = unicode_string.unpack('U*')
369
+ output = [0] * (ACE_MAX_LENGTH+1)
370
+ output_length = [ACE_MAX_LENGTH]
371
+
372
+ punycode_encode(input.size, input, case_flags, output_length, output)
373
+
374
+ outlen = output_length[0]
375
+ outlen.times do |j|
376
+ c = output[j]
377
+ unless c >= 0 && c <= 127
378
+ raise Error, "assertion error: invalid output char"
379
+ end
380
+ unless PRINT_ASCII[c]
381
+ raise PunycodeBadInput
382
+ end
383
+ output[j] = PRINT_ASCII[c] if print_ascii_only
384
+ end
385
+
386
+ output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
387
+ end
388
+
389
+ def decode(punycode, case_flags=[])
390
+ input = []
391
+ output = []
392
+
393
+ if ACE_MAX_LENGTH*2 < punycode.size
394
+ raise PunycodeBigOutput
395
+ end
396
+ punycode.each_byte do |c|
397
+ unless c >= 0 && c <= 127
398
+ raise PunycodeBadInput
399
+ end
400
+ input.push(c)
401
+ end
402
+
403
+ output_length = [UNICODE_MAX_LENGTH]
404
+ Punycode.punycode_decode(input.length, input, output_length,
405
+ output, case_flags)
406
+ output.pack('U*')
407
+ end
408
+
409
+ UNICODE_MAX_LENGTH = 256
410
+ ACE_MAX_LENGTH = 256
411
+
412
+ # The following string is used to convert printable
413
+ # characters between ASCII and the native charset:
414
+
415
+ PRINT_ASCII =
416
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
417
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
418
+ " !\"\#$%&'()*+,-./" \
419
+ "0123456789:;<=>?" \
420
+ "@ABCDEFGHIJKLMNO" \
421
+ "PQRSTUVWXYZ[\\]^_" \
422
+ "`abcdefghijklmno" \
423
+ "pqrstuvwxyz{|}~\n"
424
+ end
425
+
426
+ if __FILE__ == $0
427
+ UNICODE_MAX_LENGTH = Punycode::UNICODE_MAX_LENGTH
428
+ ACE_MAX_LENGTH = Punycode::ACE_MAX_LENGTH
429
+
430
+ def usage(argv)
431
+ STDERR.puts <<-USAGE
432
+ #{argv[0]} -e reads code points and writes a Punycode string.
433
+ #{argv[0]} -d reads a Punycode string and writes code points.
434
+
435
+ Input and output are plain text in the native character set.
436
+ Code points are in the form u+hex separated by whitespace.
437
+ Although the specification allows Punycode strings to contain
438
+ any characters from the ASCII repertoire, this test code
439
+ supports only the printable characters, and needs the Punycode
440
+ string to be followed by a newline.
441
+ The case of the u in u+hex is the force-to-uppercase flag.
442
+ USAGE
443
+ exit(false)
444
+ end
445
+
446
+ TOO_BIG = "input or output is too large, recompile with larger limits"
447
+ INVALID_INPUT = "invalid input"
448
+ OVERFLOW = "arithmetic overflow"
449
+ IO_ERROR = "I/O error"
450
+
451
+ PRINT_ASCII = Punycode::PRINT_ASCII
452
+
453
+ def main(argv)
454
+ case_flags = [0] * UNICODE_MAX_LENGTH
455
+
456
+ usage(argv) if argv.size != 2
457
+ usage(argv) if /\A-[de]\z/ !~ argv[1]
458
+
459
+ if argv[1][1] == ?e
460
+ input = [0] * UNICODE_MAX_LENGTH
461
+ output = [0] * (ACE_MAX_LENGTH+1)
462
+
463
+ # Read the input code points:
464
+
465
+ input_length = 0
466
+
467
+ STDIN.read.scan(/([uU]\+)([0-9a-fA-F]+)/) do |uplus, codept|
468
+ codept = codept.hex
469
+ if uplus[1] != ?+ || codept > Punycode::MAXINT
470
+ fail(INVALID_INPUT)
471
+ end
472
+
473
+ fail(TOO_BIG) if input_length == UNICODE_MAX_LENGTH
474
+
475
+ if uplus[0] == ?u
476
+ case_flags[input_length] = false
477
+ elsif uplus[0] == ?U
478
+ case_flags[input_length] = true
479
+ else
480
+ fail(INVALID_INPUT)
481
+ end
482
+
483
+ input[input_length] = codept
484
+ input_length+=1
485
+ end
486
+
487
+ # Encode:
488
+
489
+ output_length = [ACE_MAX_LENGTH]
490
+ begin
491
+ status = Punycode.punycode_encode(input_length, input, case_flags,
492
+ output_length, output)
493
+ rescue Punycode::Status::PunycodeBadInput
494
+ fail(INVALID_INPUT)
495
+ rescue Punycode::Status::PunycodeBigOutput
496
+ fail(TOO_BIG)
497
+ rescue Punycode::Status::PunycodeOverflow
498
+ fail(OVERFLOW)
499
+ end
500
+ if status != Punycode::Status::PunycodeSuccess
501
+ fail("assertion error: unknown status")
502
+ end
503
+
504
+ # Convert to native charset and output:
505
+
506
+ outlen = output_length[0]
507
+ outlen.times do |j|
508
+ c = output[j]
509
+ raise "assertion error: invalid output char" unless c >= 0 && c <= 127
510
+ unless PRINT_ASCII[c]
511
+ fail(INVALID_INPUT)
512
+ end
513
+ output[j] = PRINT_ASCII[c]
514
+ end
515
+
516
+ output = output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
517
+ puts(output)
518
+ exit(true)
519
+ end
520
+
521
+ if argv[1][1] == ?d
522
+ #input = [0] * ACE_MAX_LENGTH*2
523
+ #output = [0] * UNICODE_MAX_LENGTH
524
+ output = []
525
+
526
+ input = STDIN.gets.split(//)[0,ACE_MAX_LENGTH*2]
527
+ fail(TOO_BIG) if input[-1] != "\n"
528
+ input = input[0...-1]
529
+ input.each_with_index do |c, i|
530
+ print_ascii_index = PRINT_ASCII.index(c)
531
+ fail(INVALID_INPUT) unless print_ascii_index
532
+ input[i] = print_ascii_index
533
+ end
534
+
535
+ # Decode:
536
+
537
+ output_length = [UNICODE_MAX_LENGTH]
538
+ begin
539
+ status = Punycode.punycode_decode(input.length, input, output_length,
540
+ output, case_flags)
541
+ rescue Punycode::Status::PunycodeBadInput
542
+ fail(INVALID_INPUT)
543
+ rescue Punycode::Status::PunycodeBigOutput
544
+ fail(TOO_BIG)
545
+ rescue Punycode::Status::PunycodeOverflow
546
+ fail(OVERFLOW)
547
+ end
548
+ if status != Punycode::Status::PunycodeSuccess
549
+ fail("assertion error: unknown status")
550
+ end
551
+
552
+ # Output the result:
553
+
554
+ output_length[0].times do |j|
555
+ printf("%s+%04X\n", case_flags[j] ? "U" : "u", output[j])
556
+ end
557
+
558
+ exit(true)
559
+ end
560
+
561
+ usage(argv)
562
+ raise "not reached"
563
+ end
564
+ main([$0]+ARGV)
565
+ end
@@ -0,0 +1,311 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # test of punycode.rb
4
+ #
5
+ # copyright (c) 2005 Kazuhiro NISHIYAMA
6
+ # You can redistribute it and/or modify it under the same terms as Ruby.
7
+ #
8
+ require 'test/unit'
9
+
10
+ module AssertPunycode
11
+ def assert_punycode(example)
12
+ example = example.gsub(/\\\n\s*/, "").split(/\n/)
13
+ description = example[0]
14
+ codepoints = example[1...-1].join("")
15
+ punycode = example[-1].strip.sub(/^Punycode: /, "")
16
+
17
+ assert_punycode_main(description, codepoints, punycode)
18
+ end
19
+
20
+ def test_rfc3492_7_1_A
21
+ assert_punycode(<<-EXAMPLE)
22
+ (A) Arabic (Egyptian):
23
+ u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644
24
+ u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F
25
+ Punycode: egbpdaj6bu4bxfgehfvwxn
26
+ EXAMPLE
27
+ end
28
+
29
+ def test_rfc3492_7_1_B
30
+ assert_punycode(<<-EXAMPLE)
31
+ (B) Chinese (simplified):
32
+ u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587
33
+ Punycode: ihqwcrb4cv8a8dqg056pqjye
34
+ EXAMPLE
35
+ end
36
+
37
+ def test_rfc3492_7_1_C
38
+ assert_punycode(<<-EXAMPLE)
39
+ (C) Chinese (traditional):
40
+ u+4ED6 u+5011 u+7232 u+4EC0 u+9EBD u+4E0D u+8AAA u+4E2D u+6587
41
+ Punycode: ihqwctvzc91f659drss3x8bo0yb
42
+ EXAMPLE
43
+ end
44
+
45
+ def test_rfc3492_7_1_D
46
+ assert_punycode(<<-EXAMPLE)
47
+ (D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
48
+ U+0050 u+0072 u+006F u+010D u+0070 u+0072 u+006F u+0073 u+0074
49
+ u+011B u+006E u+0065 u+006D u+006C u+0075 u+0076 u+00ED u+010D
50
+ u+0065 u+0073 u+006B u+0079
51
+ Punycode: Proprostnemluvesky-uyb24dma41a
52
+ EXAMPLE
53
+ end
54
+
55
+ def test_rfc3492_7_1_E
56
+ assert_punycode(<<-EXAMPLE)
57
+ (E) Hebrew:
58
+ u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8
59
+ u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2
60
+ u+05D1 u+05E8 u+05D9 u+05EA
61
+ Punycode: 4dbcagdahymbxekheh6e0a7fei0b
62
+ EXAMPLE
63
+ end
64
+
65
+ def test_rfc3492_7_1_F
66
+ assert_punycode(<<-EXAMPLE)
67
+ (F) Hindi (Devanagari):
68
+ u+092F u+0939 u+0932 u+094B u+0917 u+0939 u+093F u+0928 u+094D
69
+ u+0926 u+0940 u+0915 u+094D u+092F u+094B u+0902 u+0928 u+0939
70
+ u+0940 u+0902 u+092C u+094B u+0932 u+0938 u+0915 u+0924 u+0947
71
+ u+0939 u+0948 u+0902
72
+ Punycode: i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd
73
+ EXAMPLE
74
+ end
75
+
76
+ def test_rfc3492_7_1_G
77
+ assert_punycode(<<-EXAMPLE)
78
+ (G) Japanese (kanji and hiragana):
79
+ u+306A u+305C u+307F u+3093 u+306A u+65E5 u+672C u+8A9E u+3092
80
+ u+8A71 u+3057 u+3066 u+304F u+308C u+306A u+3044 u+306E u+304B
81
+ Punycode: n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa
82
+ EXAMPLE
83
+ end
84
+
85
+ def test_rfc3492_7_1_H
86
+ assert_punycode(<<-EXAMPLE)
87
+ (H) Korean (Hangul syllables):
88
+ u+C138 u+ACC4 u+C758 u+BAA8 u+B4E0 u+C0AC u+B78C u+B4E4 u+C774
89
+ u+D55C u+AD6D u+C5B4 u+B97C u+C774 u+D574 u+D55C u+B2E4 u+BA74
90
+ u+C5BC u+B9C8 u+B098 u+C88B u+C744 u+AE4C
91
+ Punycode: 989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j\\
92
+ psd879ccm6fea98c
93
+ EXAMPLE
94
+ end
95
+
96
+ def test_rfc3492_7_1_I
97
+ if self.class.to_s == 'TestPunycodeEncodeLib'
98
+ if __FILE__ == $0 || $VERBOSE || $DEBUG
99
+ STDERR.puts "SKIP KNOWN BUG: downcase D in Punycode in encode test without case_flags."
100
+ end
101
+ return
102
+ end
103
+ assert_punycode(<<-EXAMPLE)
104
+ KNOWN BUG: downcase D in Punycode in encode test without case_flags. \\
105
+ (I) Russian (Cyrillic):
106
+ U+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
107
+ u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
108
+ u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
109
+ u+0438
110
+ Punycode: b1abfaaepdrnnbgefbaDotcwatmq2g4l
111
+ EXAMPLE
112
+ end
113
+
114
+ def test_rfc3492_7_1_I_downcase
115
+ assert_punycode(<<-EXAMPLE)
116
+ (I) Russian (Cyrillic): (downcase first U in Codepoints and D in Punycode)
117
+ u+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
118
+ u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
119
+ u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
120
+ u+0438
121
+ Punycode: b1abfaaepdrnnbgefbadotcwatmq2g4l
122
+ EXAMPLE
123
+ end
124
+
125
+ def test_rfc3492_7_1_J
126
+ assert_punycode(<<-EXAMPLE)
127
+ (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
128
+ U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070
129
+ u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070
130
+ u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061
131
+ u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070
132
+ u+0061 u+00F1 u+006F u+006C
133
+ Punycode: PorqunopuedensimplementehablarenEspaol-fmd56a
134
+ EXAMPLE
135
+ end
136
+
137
+ def test_rfc3492_7_1_K
138
+ assert_punycode(<<-EXAMPLE)
139
+ (K) Vietnamese:\\
140
+ T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\\
141
+ <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
142
+ U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B
143
+ u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068
144
+ u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067
145
+ U+0056 u+0069 u+1EC7 u+0074
146
+ Punycode: TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g
147
+ EXAMPLE
148
+ end
149
+
150
+ def test_rfc3492_7_1_L
151
+ assert_punycode(<<-EXAMPLE)
152
+ (L) 3<nen>B<gumi><kinpachi><sensei>
153
+ u+0033 u+5E74 U+0042 u+7D44 u+91D1 u+516B u+5148 u+751F
154
+ Punycode: 3B-ww4c5e180e575a65lsy2b
155
+ EXAMPLE
156
+ end
157
+
158
+ def test_rfc3492_7_1_M
159
+ assert_punycode(<<-EXAMPLE)
160
+ (M) <amuro><namie>-with-SUPER-MONKEYS
161
+ u+5B89 u+5BA4 u+5948 u+7F8E u+6075 u+002D u+0077 u+0069 u+0074
162
+ u+0068 u+002D U+0053 U+0055 U+0050 U+0045 U+0052 u+002D U+004D
163
+ U+004F U+004E U+004B U+0045 U+0059 U+0053
164
+ Punycode: -with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
165
+ EXAMPLE
166
+ end
167
+
168
+ def test_rfc3492_7_1_N
169
+ assert_punycode(<<-EXAMPLE)
170
+ (N) Hello-Another-Way-<sorezore><no><basho>
171
+ U+0048 u+0065 u+006C u+006C u+006F u+002D U+0041 u+006E u+006F
172
+ u+0074 u+0068 u+0065 u+0072 u+002D U+0057 u+0061 u+0079 u+002D
173
+ u+305D u+308C u+305E u+308C u+306E u+5834 u+6240
174
+ Punycode: Hello-Another-Way--fc4qua05auwb3674vfr0b
175
+ EXAMPLE
176
+ end
177
+
178
+ def test_rfc3492_7_1_O
179
+ assert_punycode(<<-EXAMPLE)
180
+ (O) <hitotsu><yane><no><shita>2
181
+ u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032
182
+ Punycode: 2-u9tlzr9756bt3uc0v
183
+ EXAMPLE
184
+ end
185
+
186
+ def test_rfc3492_7_1_P
187
+ assert_punycode(<<-EXAMPLE)
188
+ (P) Maji<de>Koi<suru>5<byou><mae>
189
+ U+004D u+0061 u+006A u+0069 u+3067 U+004B u+006F u+0069 u+3059
190
+ u+308B u+0035 u+79D2 u+524D
191
+ Punycode: MajiKoi5-783gue6qz075azm5e
192
+ EXAMPLE
193
+ end
194
+
195
+ def test_rfc3492_7_1_Q
196
+ assert_punycode(<<-EXAMPLE)
197
+ (Q) <pafii>de<runba>
198
+ u+30D1 u+30D5 u+30A3 u+30FC u+0064 u+0065 u+30EB u+30F3 u+30D0
199
+ Punycode: de-jg4avhby1noc0d
200
+ EXAMPLE
201
+ end
202
+
203
+ def test_rfc3492_7_1_R
204
+ assert_punycode(<<-EXAMPLE)
205
+ (R) <sono><supiido><de>
206
+ u+305D u+306E u+30B9 u+30D4 u+30FC u+30C9 u+3067
207
+ Punycode: d9juau41awczczp
208
+ EXAMPLE
209
+ end
210
+
211
+ def test_rfc3492_7_1_S
212
+ assert_punycode(<<-EXAMPLE)
213
+ (S) -> $1.00 <-
214
+ u+002D u+003E u+0020 u+0024 u+0031 u+002E u+0030 u+0030 u+0020
215
+ u+003C u+002D
216
+ Punycode: -> $1.00 <--
217
+ EXAMPLE
218
+ end
219
+
220
+ RUBY_BIN =
221
+ begin
222
+ require "rbconfig"
223
+ File.join(
224
+ Config::CONFIG["bindir"],
225
+ Config::CONFIG["ruby_install_name"] + Config::CONFIG["EXEEXT"]
226
+ )
227
+ rescue LoadError
228
+ "ruby"
229
+ end
230
+ PUNYCODE_RB =
231
+ if File.exist?('punycode.rb')
232
+ 'punycode.rb'
233
+ else
234
+ File.expand_path(File.join('..', 'lib', 'punycode.rb'),
235
+ File.dirname(__FILE__))
236
+ end
237
+ end
238
+
239
+ class TestPunycodeEncode < Test::Unit::TestCase
240
+ include AssertPunycode
241
+
242
+ def assert_punycode_main(description, codepoints, punycode)
243
+ IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -e", "r+") do |io|
244
+ io.puts codepoints
245
+ io.close_write
246
+ assert_equal(punycode, io.gets.chomp, description)
247
+ end
248
+ end
249
+ end
250
+
251
+ class TestPunycodeDecode < Test::Unit::TestCase
252
+ include AssertPunycode
253
+
254
+ def assert_punycode_main(description, codepoints, punycode)
255
+ IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -d", "r+") do |io|
256
+ io.puts punycode
257
+ io.close_write
258
+ assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
259
+ io.read.strip, description)
260
+ end
261
+ end
262
+ end
263
+
264
+ if File.executable?("./punycode")
265
+ class TestPunycodeEncodeBin < Test::Unit::TestCase
266
+ include AssertPunycode
267
+
268
+ def assert_punycode_main(description, codepoints, punycode)
269
+ IO.popen("./punycode -e", "r+") do |io|
270
+ io.puts codepoints
271
+ io.close_write
272
+ assert_equal(punycode, io.gets.chomp, description)
273
+ end
274
+ end
275
+ end
276
+
277
+ class TestPunycodeDecodeBin < Test::Unit::TestCase
278
+ include AssertPunycode
279
+
280
+ def assert_punycode_main(description, codepoints, punycode)
281
+ IO.popen("./punycode -d", "r+") do |io|
282
+ io.puts punycode
283
+ io.close_write
284
+ assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
285
+ io.read.strip, description)
286
+ end
287
+ end
288
+ end
289
+ end
290
+
291
+ begin
292
+ require 'punycode'
293
+ class TestPunycodeEncodeLib < Test::Unit::TestCase
294
+ include AssertPunycode
295
+
296
+ def assert_punycode_main(description, codepoints, punycode)
297
+ unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
298
+ assert_equal(punycode, Punycode.encode(unistring), description)
299
+ end
300
+ end
301
+
302
+ class TestPunycodeDecodeLib < Test::Unit::TestCase
303
+ include AssertPunycode
304
+
305
+ def assert_punycode_main(description, codepoints, punycode)
306
+ unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
307
+ assert_equal(unistring, Punycode.decode(punycode), description)
308
+ end
309
+ end
310
+ rescue LoadError
311
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: punycode4r
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.2.0
7
+ date: 2007-02-21 00:00:00 +09:00
8
+ summary: pure Ruby implementing Punycode (RFC 3492)
9
+ require_paths:
10
+ - lib
11
+ email: zn@mbf.nifty.com
12
+ homepage:
13
+ rubyforge_project: rwiki
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Kazuhiro NISHIYAMA
31
+ files:
32
+ - lib/punycode.rb
33
+ - test/test_punycode.rb
34
+ test_files: []
35
+
36
+ rdoc_options: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ requirements: []
45
+
46
+ dependencies: []
47
+