punycode4r 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/lib/punycode.rb +565 -0
  2. data/test/test_punycode.rb +311 -0
  3. metadata +47 -0
@@ -0,0 +1,565 @@
1
+ #!/usr/bin/ruby -Ku
2
+ #
3
+ # This is pure Ruby implementing Punycode (RFC 3492).
4
+ # (original ANSI C code (C89) implementing Punycode is in RFC 3492)
5
+ #
6
+ # copyright (c) 2005 Kazuhiro NISHIYAMA
7
+ # You can redistribute it and/or modify it under the same terms as Ruby.
8
+ #
9
+ =begin
10
+ = punycode4r
11
+ == usage
12
+ === simple usage
13
+ require 'punycode'
14
+ utf8_string = "\346\226\207\345\255\227\345\210\227"
15
+ punycode_string = Punycode.encode(utf8_string)
16
+ p punycode_string #=> "1br58tspi"
17
+ p(Punycode.decode(punycode_string) == utf8_string) #=> true
18
+
19
+ == IDN (Internationalized Domain Name)
20
+ When you use punycode in IDN,
21
+ you must need to do NAMEPREP (RFC 3491) before Punycode.encode,
22
+ and add ACE Prefix (defined in RFC 3490) after Punycode.encode.
23
+
24
+ This library supports punycode only.
25
+ NAMEPREP requires other libraries.
26
+
27
+ =end
28
+
29
+ module Punycode
30
+ module Status
31
+ class Error < StandardError; end
32
+ class PunycodeSuccess; end
33
+ # Input is invalid.
34
+ class PunycodeBadInput < Error; end
35
+ # Output would exceed the space provided.
36
+ class PunycodeBigOutput< Error; end
37
+ # Input needs wider integers to process.
38
+ class PunycodeOverflow < Error; end
39
+ end
40
+ include Status
41
+
42
+ # *** Bootstring parameters for Punycode ***
43
+
44
+ BASE = 36; TMIN = 1; TMAX = 26; SKEW = 38; DAMP = 700
45
+ INITIAL_BIAS = 72; INITIAL_N = 0x80; DELIMITER = 0x2D
46
+
47
+ module_function
48
+
49
+ # basic(cp) tests whether cp is a basic code point:
50
+ def basic(cp)
51
+ cp < 0x80
52
+ end
53
+
54
+ # delim(cp) tests whether cp is a delimiter:
55
+ def delim(cp)
56
+ cp == DELIMITER
57
+ end
58
+
59
+ # decode_digit(cp) returns the numeric value of a basic code
60
+ # point (for use in representing integers) in the range 0 to
61
+ # base-1, or base if cp is does not represent a value.
62
+ def decode_digit(cp)
63
+ cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 :
64
+ cp - 97 < 26 ? cp - 97 : BASE
65
+ end
66
+
67
+ # encode_digit(d,flag) returns the basic code point whose value
68
+ # (when used for representing integers) is d, which needs to be in
69
+ # the range 0 to base-1. The lowercase form is used unless flag is
70
+ # nonzero, in which case the uppercase form is used. The behavior
71
+ # is undefined if flag is nonzero and digit d has no uppercase form.
72
+ def encode_digit(d, flag)
73
+ return d + 22 + 75 * ((d < 26) ? 1 : 0) - ((flag ? 1 : 0) << 5)
74
+ # 0..25 map to ASCII a..z or A..Z
75
+ # 26..35 map to ASCII 0..9
76
+ end
77
+
78
+ # flagged(bcp) tests whether a basic code point is flagged
79
+ # (uppercase). The behavior is undefined if bcp is not a
80
+ # basic code point.
81
+ def flagged(bcp)
82
+ (0...26) === (bcp - 65)
83
+ end
84
+
85
+ # encode_basic(bcp,flag) forces a basic code point to lowercase
86
+ # if flag is zero, uppercase if flag is nonzero, and returns
87
+ # the resulting code point. The code point is unchanged if it
88
+ # is caseless. The behavior is undefined if bcp is not a basic
89
+ # code point.
90
+ def encode_basic(bcp, flag)
91
+ # bcp -= (bcp - 97 < 26) << 5;
92
+ if (0...26) === (bcp - 97)
93
+ bcp -= 1 << 5
94
+ end
95
+ # return bcp + ((!flag && (bcp - 65 < 26)) << 5);
96
+ if !flag and (0...26) === (bcp - 65)
97
+ bcp += 1 << 5
98
+ end
99
+ bcp
100
+ end
101
+
102
+ # *** Platform-specific constants ***
103
+
104
+ # maxint is the maximum value of a punycode_uint variable:
105
+ MAXINT = 1 << 64
106
+
107
+ # *** Bias adaptation function ***
108
+
109
+ def adapt(delta, numpoints, firsttime)
110
+ delta = firsttime ? delta / DAMP : delta >> 1
111
+ # delta >> 1 is a faster way of doing delta / 2
112
+ delta += delta / numpoints
113
+
114
+ k = 0
115
+ while delta > ((BASE - TMIN) * TMAX) / 2
116
+ delta /= BASE - TMIN
117
+ k += BASE
118
+ end
119
+
120
+ k + (BASE - TMIN + 1) * delta / (delta + SKEW)
121
+ end
122
+
123
+ # *** Main encode function ***
124
+
125
+ # punycode_encode() converts Unicode to Punycode. The input
126
+ # is represented as an array of Unicode code points (not code
127
+ # units; surrogate pairs are not allowed), and the output
128
+ # will be represented as an array of ASCII code points. The
129
+ # output string is *not* null-terminated; it will contain
130
+ # zeros if and only if the input contains zeros. (Of course
131
+ # the caller can leave room for a terminator and add one if
132
+ # needed.) The input_length is the number of code points in
133
+ # the input. The output_length is an in/out argument: the
134
+ # caller passes in the maximum number of code points that it
135
+ # can receive, and on successful return it will contain the
136
+ # number of code points actually output. The case_flags array
137
+ # holds input_length boolean values, where nonzero suggests that
138
+ # the corresponding Unicode character be forced to uppercase
139
+ # after being decoded (if possible), and zero suggests that
140
+ # it be forced to lowercase (if possible). ASCII code points
141
+ # are encoded literally, except that ASCII letters are forced
142
+ # to uppercase or lowercase according to the corresponding
143
+ # uppercase flags. If case_flags is a null pointer then ASCII
144
+ # letters are left as they are, and other code points are
145
+ # treated as if their uppercase flags were zero. The return
146
+ # value can be any of the punycode_status values defined above
147
+ # except punycode_bad_input; if not punycode_success, then
148
+ # output_size and output might contain garbage.
149
+ def punycode_encode(input_length, input, case_flags, output_length, output)
150
+
151
+ # Initialize the state:
152
+
153
+ n = INITIAL_N
154
+ delta = out = 0
155
+ max_out = output_length[0]
156
+ bias = INITIAL_BIAS
157
+
158
+ # Handle the basic code points:
159
+ input_length.times do |j|
160
+ if basic(input[j])
161
+ raise PunycodeBigOutput if max_out - out < 2
162
+ output[out] =
163
+ if case_flags
164
+ encode_basic(input[j], case_flags[j])
165
+ else
166
+ input[j]
167
+ end
168
+ out+=1
169
+ # elsif (input[j] < n)
170
+ # raise PunycodeBadInput
171
+ # (not needed for Punycode with unsigned code points)
172
+ end
173
+ end
174
+
175
+ h = b = out
176
+
177
+ # h is the number of code points that have been handled, b is the
178
+ # number of basic code points, and out is the number of characters
179
+ # that have been output.
180
+
181
+ if b > 0
182
+ output[out] = DELIMITER
183
+ out+=1
184
+ end
185
+
186
+ # Main encoding loop:
187
+
188
+ while h < input_length
189
+ # All non-basic code points < n have been
190
+ # handled already. Find the next larger one:
191
+
192
+ m = MAXINT
193
+ input_length.times do |j|
194
+ # next if basic(input[j])
195
+ # (not needed for Punycode)
196
+ m = input[j] if (n...m) === input[j]
197
+ end
198
+
199
+ # Increase delta enough to advance the decoder's
200
+ # <n,i> state to <m,0>, but guard against overflow:
201
+
202
+ raise PunycodeOverflow if m - n > (MAXINT - delta) / (h + 1)
203
+ delta += (m - n) * (h + 1)
204
+ n = m
205
+
206
+ input_length.times do |j|
207
+ # Punycode does not need to check whether input[j] is basic:
208
+ if input[j] < n # || basic(input[j])
209
+ delta+=1
210
+ raise PunycodeOverflow if delta == 0
211
+ end
212
+
213
+ if input[j] == n
214
+ # Represent delta as a generalized variable-length integer:
215
+
216
+ q = delta; k = BASE
217
+ while true
218
+ raise PunycodeBigOutput if out >= max_out
219
+ t = if k <= bias # + TMIN # +TMIN not needed
220
+ TMIN
221
+ elsif k >= bias + TMAX
222
+ TMAX
223
+ else
224
+ k - bias
225
+ end
226
+ break if q < t
227
+ output[out] = encode_digit(t + (q - t) % (BASE - t), false)
228
+ out+=1
229
+ q = (q - t) / (BASE - t)
230
+ k += BASE
231
+ end
232
+
233
+ output[out] = encode_digit(q, case_flags && case_flags[j])
234
+ out+=1
235
+ bias = adapt(delta, h + 1, h == b)
236
+ delta = 0
237
+ h+=1
238
+ end
239
+ end
240
+
241
+ delta+=1; n+=1
242
+ end
243
+
244
+ output_length[0] = out
245
+ return PunycodeSuccess
246
+ end
247
+
248
+ # *** Main decode function ***
249
+
250
+ # punycode_decode() converts Punycode to Unicode. The input is
251
+ # represented as an array of ASCII code points, and the output
252
+ # will be represented as an array of Unicode code points. The
253
+ # input_length is the number of code points in the input. The
254
+ # output_length is an in/out argument: the caller passes in
255
+ # the maximum number of code points that it can receive, and
256
+ # on successful return it will contain the actual number of
257
+ # code points output. The case_flags array needs room for at
258
+ # least output_length values, or it can be a null pointer if the
259
+ # case information is not needed. A nonzero flag suggests that
260
+ # the corresponding Unicode character be forced to uppercase
261
+ # by the caller (if possible), while zero suggests that it be
262
+ # forced to lowercase (if possible). ASCII code points are
263
+ # output already in the proper case, but their flags will be set
264
+ # appropriately so that applying the flags would be harmless.
265
+ # The return value can be any of the punycode_status values
266
+ # defined above; if not punycode_success, then output_length,
267
+ # output, and case_flags might contain garbage. On success, the
268
+ # decoder will never need to write an output_length greater than
269
+ # input_length, because of how the encoding is defined.
270
+ def punycode_decode(input_length, input, output_length, output, case_flags)
271
+
272
+ # Initialize the state:
273
+
274
+ n = INITIAL_N
275
+
276
+ out = i = 0
277
+ max_out = output_length[0]
278
+ bias = INITIAL_BIAS
279
+
280
+ # Handle the basic code points: Let b be the number of input code
281
+ # points before the last delimiter, or 0 if there is none, then
282
+ # copy the first b code points to the output.
283
+
284
+ b = 0
285
+ input_length.times do |j|
286
+ b = j if delim(input[j])
287
+ end
288
+ raise PunycodeBigOutput if b > max_out
289
+
290
+ b.times do |j|
291
+ case_flags[out] = flagged(input[j]) if case_flags
292
+ raise PunycodeBadInput unless basic(input[j])
293
+ output[out] = input[j]
294
+ out+=1
295
+ end
296
+
297
+ # Main decoding loop: Start just after the last delimiter if any
298
+ # basic code points were copied; start at the beginning otherwise.
299
+
300
+ in_ = b > 0 ? b + 1 : 0
301
+ while in_ < input_length
302
+
303
+ # in_ is the index of the next character to be consumed, and
304
+ # out is the number of code points in the output array.
305
+
306
+ # Decode a generalized variable-length integer into delta,
307
+ # which gets added to i. The overflow checking is easier
308
+ # if we increase i as we go, then subtract off its starting
309
+ # value at the end to obtain delta.
310
+
311
+ oldi = i; w = 1; k = BASE
312
+ while true
313
+ raise PunycodeBadInput if in_ >= input_length
314
+ digit = decode_digit(input[in_])
315
+ in_+=1
316
+ raise PunycodeBadInput if digit >= BASE
317
+ raise PunycodeOverflow if digit > (MAXINT - i) / w
318
+ i += digit * w
319
+ t = if k <= bias # + TMIN # +TMIN not needed
320
+ TMIN
321
+ elsif k >= bias + TMAX
322
+ TMAX
323
+ else
324
+ k - bias
325
+ end
326
+ break if digit < t
327
+ raise PunycodeOverflow if w > MAXINT / (BASE - t)
328
+ w *= BASE - t
329
+ k += BASE
330
+ end
331
+
332
+ bias = adapt(i - oldi, out + 1, oldi == 0)
333
+
334
+ # i was supposed to wrap around from out+1 to 0,
335
+ # incrementing n each time, so we'll fix that now:
336
+
337
+ raise PunycodeOverflow if i / (out + 1) > MAXINT - n
338
+ n += i / (out + 1)
339
+ i %= out + 1
340
+
341
+ # Insert n at position i of the output:
342
+
343
+ # not needed for Punycode:
344
+ # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
345
+ raise PunycodeBigOutput if out >= max_out
346
+
347
+ if case_flags
348
+ #memmove(case_flags + i + 1, case_flags + i, out - i)
349
+ case_flags[i + 1, out - i] = case_flags[i, out - i]
350
+
351
+ # Case of last character determines uppercase flag:
352
+ case_flags[i] = flagged(input[in_ - 1])
353
+ end
354
+
355
+ #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
356
+ output[i + 1, out - i] = output[i, out - i]
357
+ output[i] = n
358
+ i+=1
359
+
360
+ out+=1
361
+ end
362
+
363
+ output_length[0] = out
364
+ return PunycodeSuccess
365
+ end
366
+
367
+ def encode(unicode_string, case_flags=nil, print_ascii_only=false)
368
+ input = unicode_string.unpack('U*')
369
+ output = [0] * (ACE_MAX_LENGTH+1)
370
+ output_length = [ACE_MAX_LENGTH]
371
+
372
+ punycode_encode(input.size, input, case_flags, output_length, output)
373
+
374
+ outlen = output_length[0]
375
+ outlen.times do |j|
376
+ c = output[j]
377
+ unless c >= 0 && c <= 127
378
+ raise Error, "assertion error: invalid output char"
379
+ end
380
+ unless PRINT_ASCII[c]
381
+ raise PunycodeBadInput
382
+ end
383
+ output[j] = PRINT_ASCII[c] if print_ascii_only
384
+ end
385
+
386
+ output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
387
+ end
388
+
389
+ def decode(punycode, case_flags=[])
390
+ input = []
391
+ output = []
392
+
393
+ if ACE_MAX_LENGTH*2 < punycode.size
394
+ raise PunycodeBigOutput
395
+ end
396
+ punycode.each_byte do |c|
397
+ unless c >= 0 && c <= 127
398
+ raise PunycodeBadInput
399
+ end
400
+ input.push(c)
401
+ end
402
+
403
+ output_length = [UNICODE_MAX_LENGTH]
404
+ Punycode.punycode_decode(input.length, input, output_length,
405
+ output, case_flags)
406
+ output.pack('U*')
407
+ end
408
+
409
+ UNICODE_MAX_LENGTH = 256
410
+ ACE_MAX_LENGTH = 256
411
+
412
+ # The following string is used to convert printable
413
+ # characters between ASCII and the native charset:
414
+
415
+ PRINT_ASCII =
416
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
417
+ "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
418
+ " !\"\#$%&'()*+,-./" \
419
+ "0123456789:;<=>?" \
420
+ "@ABCDEFGHIJKLMNO" \
421
+ "PQRSTUVWXYZ[\\]^_" \
422
+ "`abcdefghijklmno" \
423
+ "pqrstuvwxyz{|}~\n"
424
+ end
425
+
426
+ if __FILE__ == $0
427
+ UNICODE_MAX_LENGTH = Punycode::UNICODE_MAX_LENGTH
428
+ ACE_MAX_LENGTH = Punycode::ACE_MAX_LENGTH
429
+
430
+ def usage(argv)
431
+ STDERR.puts <<-USAGE
432
+ #{argv[0]} -e reads code points and writes a Punycode string.
433
+ #{argv[0]} -d reads a Punycode string and writes code points.
434
+
435
+ Input and output are plain text in the native character set.
436
+ Code points are in the form u+hex separated by whitespace.
437
+ Although the specification allows Punycode strings to contain
438
+ any characters from the ASCII repertoire, this test code
439
+ supports only the printable characters, and needs the Punycode
440
+ string to be followed by a newline.
441
+ The case of the u in u+hex is the force-to-uppercase flag.
442
+ USAGE
443
+ exit(false)
444
+ end
445
+
446
+ TOO_BIG = "input or output is too large, recompile with larger limits"
447
+ INVALID_INPUT = "invalid input"
448
+ OVERFLOW = "arithmetic overflow"
449
+ IO_ERROR = "I/O error"
450
+
451
+ PRINT_ASCII = Punycode::PRINT_ASCII
452
+
453
+ def main(argv)
454
+ case_flags = [0] * UNICODE_MAX_LENGTH
455
+
456
+ usage(argv) if argv.size != 2
457
+ usage(argv) if /\A-[de]\z/ !~ argv[1]
458
+
459
+ if argv[1][1] == ?e
460
+ input = [0] * UNICODE_MAX_LENGTH
461
+ output = [0] * (ACE_MAX_LENGTH+1)
462
+
463
+ # Read the input code points:
464
+
465
+ input_length = 0
466
+
467
+ STDIN.read.scan(/([uU]\+)([0-9a-fA-F]+)/) do |uplus, codept|
468
+ codept = codept.hex
469
+ if uplus[1] != ?+ || codept > Punycode::MAXINT
470
+ fail(INVALID_INPUT)
471
+ end
472
+
473
+ fail(TOO_BIG) if input_length == UNICODE_MAX_LENGTH
474
+
475
+ if uplus[0] == ?u
476
+ case_flags[input_length] = false
477
+ elsif uplus[0] == ?U
478
+ case_flags[input_length] = true
479
+ else
480
+ fail(INVALID_INPUT)
481
+ end
482
+
483
+ input[input_length] = codept
484
+ input_length+=1
485
+ end
486
+
487
+ # Encode:
488
+
489
+ output_length = [ACE_MAX_LENGTH]
490
+ begin
491
+ status = Punycode.punycode_encode(input_length, input, case_flags,
492
+ output_length, output)
493
+ rescue Punycode::Status::PunycodeBadInput
494
+ fail(INVALID_INPUT)
495
+ rescue Punycode::Status::PunycodeBigOutput
496
+ fail(TOO_BIG)
497
+ rescue Punycode::Status::PunycodeOverflow
498
+ fail(OVERFLOW)
499
+ end
500
+ if status != Punycode::Status::PunycodeSuccess
501
+ fail("assertion error: unknown status")
502
+ end
503
+
504
+ # Convert to native charset and output:
505
+
506
+ outlen = output_length[0]
507
+ outlen.times do |j|
508
+ c = output[j]
509
+ raise "assertion error: invalid output char" unless c >= 0 && c <= 127
510
+ unless PRINT_ASCII[c]
511
+ fail(INVALID_INPUT)
512
+ end
513
+ output[j] = PRINT_ASCII[c]
514
+ end
515
+
516
+ output = output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
517
+ puts(output)
518
+ exit(true)
519
+ end
520
+
521
+ if argv[1][1] == ?d
522
+ #input = [0] * ACE_MAX_LENGTH*2
523
+ #output = [0] * UNICODE_MAX_LENGTH
524
+ output = []
525
+
526
+ input = STDIN.gets.split(//)[0,ACE_MAX_LENGTH*2]
527
+ fail(TOO_BIG) if input[-1] != "\n"
528
+ input = input[0...-1]
529
+ input.each_with_index do |c, i|
530
+ print_ascii_index = PRINT_ASCII.index(c)
531
+ fail(INVALID_INPUT) unless print_ascii_index
532
+ input[i] = print_ascii_index
533
+ end
534
+
535
+ # Decode:
536
+
537
+ output_length = [UNICODE_MAX_LENGTH]
538
+ begin
539
+ status = Punycode.punycode_decode(input.length, input, output_length,
540
+ output, case_flags)
541
+ rescue Punycode::Status::PunycodeBadInput
542
+ fail(INVALID_INPUT)
543
+ rescue Punycode::Status::PunycodeBigOutput
544
+ fail(TOO_BIG)
545
+ rescue Punycode::Status::PunycodeOverflow
546
+ fail(OVERFLOW)
547
+ end
548
+ if status != Punycode::Status::PunycodeSuccess
549
+ fail("assertion error: unknown status")
550
+ end
551
+
552
+ # Output the result:
553
+
554
+ output_length[0].times do |j|
555
+ printf("%s+%04X\n", case_flags[j] ? "U" : "u", output[j])
556
+ end
557
+
558
+ exit(true)
559
+ end
560
+
561
+ usage(argv)
562
+ raise "not reached"
563
+ end
564
+ main([$0]+ARGV)
565
+ end
@@ -0,0 +1,311 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # test of punycode.rb
4
+ #
5
+ # copyright (c) 2005 Kazuhiro NISHIYAMA
6
+ # You can redistribute it and/or modify it under the same terms as Ruby.
7
+ #
8
+ require 'test/unit'
9
+
10
+ module AssertPunycode
11
+ def assert_punycode(example)
12
+ example = example.gsub(/\\\n\s*/, "").split(/\n/)
13
+ description = example[0]
14
+ codepoints = example[1...-1].join("")
15
+ punycode = example[-1].strip.sub(/^Punycode: /, "")
16
+
17
+ assert_punycode_main(description, codepoints, punycode)
18
+ end
19
+
20
+ def test_rfc3492_7_1_A
21
+ assert_punycode(<<-EXAMPLE)
22
+ (A) Arabic (Egyptian):
23
+ u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644
24
+ u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F
25
+ Punycode: egbpdaj6bu4bxfgehfvwxn
26
+ EXAMPLE
27
+ end
28
+
29
+ def test_rfc3492_7_1_B
30
+ assert_punycode(<<-EXAMPLE)
31
+ (B) Chinese (simplified):
32
+ u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587
33
+ Punycode: ihqwcrb4cv8a8dqg056pqjye
34
+ EXAMPLE
35
+ end
36
+
37
+ def test_rfc3492_7_1_C
38
+ assert_punycode(<<-EXAMPLE)
39
+ (C) Chinese (traditional):
40
+ u+4ED6 u+5011 u+7232 u+4EC0 u+9EBD u+4E0D u+8AAA u+4E2D u+6587
41
+ Punycode: ihqwctvzc91f659drss3x8bo0yb
42
+ EXAMPLE
43
+ end
44
+
45
+ def test_rfc3492_7_1_D
46
+ assert_punycode(<<-EXAMPLE)
47
+ (D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
48
+ U+0050 u+0072 u+006F u+010D u+0070 u+0072 u+006F u+0073 u+0074
49
+ u+011B u+006E u+0065 u+006D u+006C u+0075 u+0076 u+00ED u+010D
50
+ u+0065 u+0073 u+006B u+0079
51
+ Punycode: Proprostnemluvesky-uyb24dma41a
52
+ EXAMPLE
53
+ end
54
+
55
+ def test_rfc3492_7_1_E
56
+ assert_punycode(<<-EXAMPLE)
57
+ (E) Hebrew:
58
+ u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8
59
+ u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2
60
+ u+05D1 u+05E8 u+05D9 u+05EA
61
+ Punycode: 4dbcagdahymbxekheh6e0a7fei0b
62
+ EXAMPLE
63
+ end
64
+
65
+ def test_rfc3492_7_1_F
66
+ assert_punycode(<<-EXAMPLE)
67
+ (F) Hindi (Devanagari):
68
+ u+092F u+0939 u+0932 u+094B u+0917 u+0939 u+093F u+0928 u+094D
69
+ u+0926 u+0940 u+0915 u+094D u+092F u+094B u+0902 u+0928 u+0939
70
+ u+0940 u+0902 u+092C u+094B u+0932 u+0938 u+0915 u+0924 u+0947
71
+ u+0939 u+0948 u+0902
72
+ Punycode: i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd
73
+ EXAMPLE
74
+ end
75
+
76
+ def test_rfc3492_7_1_G
77
+ assert_punycode(<<-EXAMPLE)
78
+ (G) Japanese (kanji and hiragana):
79
+ u+306A u+305C u+307F u+3093 u+306A u+65E5 u+672C u+8A9E u+3092
80
+ u+8A71 u+3057 u+3066 u+304F u+308C u+306A u+3044 u+306E u+304B
81
+ Punycode: n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa
82
+ EXAMPLE
83
+ end
84
+
85
+ def test_rfc3492_7_1_H
86
+ assert_punycode(<<-EXAMPLE)
87
+ (H) Korean (Hangul syllables):
88
+ u+C138 u+ACC4 u+C758 u+BAA8 u+B4E0 u+C0AC u+B78C u+B4E4 u+C774
89
+ u+D55C u+AD6D u+C5B4 u+B97C u+C774 u+D574 u+D55C u+B2E4 u+BA74
90
+ u+C5BC u+B9C8 u+B098 u+C88B u+C744 u+AE4C
91
+ Punycode: 989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j\\
92
+ psd879ccm6fea98c
93
+ EXAMPLE
94
+ end
95
+
96
+ def test_rfc3492_7_1_I
97
+ if self.class.to_s == 'TestPunycodeEncodeLib'
98
+ if __FILE__ == $0 || $VERBOSE || $DEBUG
99
+ STDERR.puts "SKIP KNOWN BUG: downcase D in Punycode in encode test without case_flags."
100
+ end
101
+ return
102
+ end
103
+ assert_punycode(<<-EXAMPLE)
104
+ KNOWN BUG: downcase D in Punycode in encode test without case_flags. \\
105
+ (I) Russian (Cyrillic):
106
+ U+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
107
+ u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
108
+ u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
109
+ u+0438
110
+ Punycode: b1abfaaepdrnnbgefbaDotcwatmq2g4l
111
+ EXAMPLE
112
+ end
113
+
114
+ def test_rfc3492_7_1_I_downcase
115
+ assert_punycode(<<-EXAMPLE)
116
+ (I) Russian (Cyrillic): (downcase first U in Codepoints and D in Punycode)
117
+ u+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
118
+ u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
119
+ u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
120
+ u+0438
121
+ Punycode: b1abfaaepdrnnbgefbadotcwatmq2g4l
122
+ EXAMPLE
123
+ end
124
+
125
+ def test_rfc3492_7_1_J
126
+ assert_punycode(<<-EXAMPLE)
127
+ (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
128
+ U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070
129
+ u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070
130
+ u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061
131
+ u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070
132
+ u+0061 u+00F1 u+006F u+006C
133
+ Punycode: PorqunopuedensimplementehablarenEspaol-fmd56a
134
+ EXAMPLE
135
+ end
136
+
137
+ def test_rfc3492_7_1_K
138
+ assert_punycode(<<-EXAMPLE)
139
+ (K) Vietnamese:\\
140
+ T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\\
141
+ <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
142
+ U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B
143
+ u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068
144
+ u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067
145
+ U+0056 u+0069 u+1EC7 u+0074
146
+ Punycode: TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g
147
+ EXAMPLE
148
+ end
149
+
150
+ def test_rfc3492_7_1_L
151
+ assert_punycode(<<-EXAMPLE)
152
+ (L) 3<nen>B<gumi><kinpachi><sensei>
153
+ u+0033 u+5E74 U+0042 u+7D44 u+91D1 u+516B u+5148 u+751F
154
+ Punycode: 3B-ww4c5e180e575a65lsy2b
155
+ EXAMPLE
156
+ end
157
+
158
+ def test_rfc3492_7_1_M
159
+ assert_punycode(<<-EXAMPLE)
160
+ (M) <amuro><namie>-with-SUPER-MONKEYS
161
+ u+5B89 u+5BA4 u+5948 u+7F8E u+6075 u+002D u+0077 u+0069 u+0074
162
+ u+0068 u+002D U+0053 U+0055 U+0050 U+0045 U+0052 u+002D U+004D
163
+ U+004F U+004E U+004B U+0045 U+0059 U+0053
164
+ Punycode: -with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
165
+ EXAMPLE
166
+ end
167
+
168
+ def test_rfc3492_7_1_N
169
+ assert_punycode(<<-EXAMPLE)
170
+ (N) Hello-Another-Way-<sorezore><no><basho>
171
+ U+0048 u+0065 u+006C u+006C u+006F u+002D U+0041 u+006E u+006F
172
+ u+0074 u+0068 u+0065 u+0072 u+002D U+0057 u+0061 u+0079 u+002D
173
+ u+305D u+308C u+305E u+308C u+306E u+5834 u+6240
174
+ Punycode: Hello-Another-Way--fc4qua05auwb3674vfr0b
175
+ EXAMPLE
176
+ end
177
+
178
+ def test_rfc3492_7_1_O
179
+ assert_punycode(<<-EXAMPLE)
180
+ (O) <hitotsu><yane><no><shita>2
181
+ u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032
182
+ Punycode: 2-u9tlzr9756bt3uc0v
183
+ EXAMPLE
184
+ end
185
+
186
+ def test_rfc3492_7_1_P
187
+ assert_punycode(<<-EXAMPLE)
188
+ (P) Maji<de>Koi<suru>5<byou><mae>
189
+ U+004D u+0061 u+006A u+0069 u+3067 U+004B u+006F u+0069 u+3059
190
+ u+308B u+0035 u+79D2 u+524D
191
+ Punycode: MajiKoi5-783gue6qz075azm5e
192
+ EXAMPLE
193
+ end
194
+
195
+ def test_rfc3492_7_1_Q
196
+ assert_punycode(<<-EXAMPLE)
197
+ (Q) <pafii>de<runba>
198
+ u+30D1 u+30D5 u+30A3 u+30FC u+0064 u+0065 u+30EB u+30F3 u+30D0
199
+ Punycode: de-jg4avhby1noc0d
200
+ EXAMPLE
201
+ end
202
+
203
+ def test_rfc3492_7_1_R
204
+ assert_punycode(<<-EXAMPLE)
205
+ (R) <sono><supiido><de>
206
+ u+305D u+306E u+30B9 u+30D4 u+30FC u+30C9 u+3067
207
+ Punycode: d9juau41awczczp
208
+ EXAMPLE
209
+ end
210
+
211
+ def test_rfc3492_7_1_S
212
+ assert_punycode(<<-EXAMPLE)
213
+ (S) -> $1.00 <-
214
+ u+002D u+003E u+0020 u+0024 u+0031 u+002E u+0030 u+0030 u+0020
215
+ u+003C u+002D
216
+ Punycode: -> $1.00 <--
217
+ EXAMPLE
218
+ end
219
+
220
+ RUBY_BIN =
221
+ begin
222
+ require "rbconfig"
223
+ File.join(
224
+ Config::CONFIG["bindir"],
225
+ Config::CONFIG["ruby_install_name"] + Config::CONFIG["EXEEXT"]
226
+ )
227
+ rescue LoadError
228
+ "ruby"
229
+ end
230
+ PUNYCODE_RB =
231
+ if File.exist?('punycode.rb')
232
+ 'punycode.rb'
233
+ else
234
+ File.expand_path(File.join('..', 'lib', 'punycode.rb'),
235
+ File.dirname(__FILE__))
236
+ end
237
+ end
238
+
239
+ class TestPunycodeEncode < Test::Unit::TestCase
240
+ include AssertPunycode
241
+
242
+ def assert_punycode_main(description, codepoints, punycode)
243
+ IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -e", "r+") do |io|
244
+ io.puts codepoints
245
+ io.close_write
246
+ assert_equal(punycode, io.gets.chomp, description)
247
+ end
248
+ end
249
+ end
250
+
251
+ class TestPunycodeDecode < Test::Unit::TestCase
252
+ include AssertPunycode
253
+
254
+ def assert_punycode_main(description, codepoints, punycode)
255
+ IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -d", "r+") do |io|
256
+ io.puts punycode
257
+ io.close_write
258
+ assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
259
+ io.read.strip, description)
260
+ end
261
+ end
262
+ end
263
+
264
+ if File.executable?("./punycode")
265
+ class TestPunycodeEncodeBin < Test::Unit::TestCase
266
+ include AssertPunycode
267
+
268
+ def assert_punycode_main(description, codepoints, punycode)
269
+ IO.popen("./punycode -e", "r+") do |io|
270
+ io.puts codepoints
271
+ io.close_write
272
+ assert_equal(punycode, io.gets.chomp, description)
273
+ end
274
+ end
275
+ end
276
+
277
+ class TestPunycodeDecodeBin < Test::Unit::TestCase
278
+ include AssertPunycode
279
+
280
+ def assert_punycode_main(description, codepoints, punycode)
281
+ IO.popen("./punycode -d", "r+") do |io|
282
+ io.puts punycode
283
+ io.close_write
284
+ assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
285
+ io.read.strip, description)
286
+ end
287
+ end
288
+ end
289
+ end
290
+
291
+ begin
292
+ require 'punycode'
293
+ class TestPunycodeEncodeLib < Test::Unit::TestCase
294
+ include AssertPunycode
295
+
296
+ def assert_punycode_main(description, codepoints, punycode)
297
+ unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
298
+ assert_equal(punycode, Punycode.encode(unistring), description)
299
+ end
300
+ end
301
+
302
+ class TestPunycodeDecodeLib < Test::Unit::TestCase
303
+ include AssertPunycode
304
+
305
+ def assert_punycode_main(description, codepoints, punycode)
306
+ unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
307
+ assert_equal(unistring, Punycode.decode(punycode), description)
308
+ end
309
+ end
310
+ rescue LoadError
311
+ end
metadata ADDED
@@ -0,0 +1,47 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: punycode4r
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.2.0
7
+ date: 2007-02-21 00:00:00 +09:00
8
+ summary: pure Ruby implementing Punycode (RFC 3492)
9
+ require_paths:
10
+ - lib
11
+ email: zn@mbf.nifty.com
12
+ homepage:
13
+ rubyforge_project: rwiki
14
+ description:
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: false
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - Kazuhiro NISHIYAMA
31
+ files:
32
+ - lib/punycode.rb
33
+ - test/test_punycode.rb
34
+ test_files: []
35
+
36
+ rdoc_options: []
37
+
38
+ extra_rdoc_files: []
39
+
40
+ executables: []
41
+
42
+ extensions: []
43
+
44
+ requirements: []
45
+
46
+ dependencies: []
47
+