RubyGems - punycode4r - Versions diffs - 0.2.0 - Mend

punycode4r 0.2.0

Files changed (3) hide show

@@ -0,0 +1,565 @@
+#!/usr/bin/ruby -Ku
+#
+# This is pure Ruby implementing Punycode (RFC 3492).
+# (original ANSI C code (C89) implementing Punycode is in RFC 3492)
+#
+# copyright (c) 2005 Kazuhiro NISHIYAMA
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+=begin
+= punycode4r
+== usage
+=== simple usage
+  require 'punycode'
+  utf8_string = "\346\226\207\345\255\227\345\210\227"
+  punycode_string = Punycode.encode(utf8_string)
+  p punycode_string #=> "1br58tspi"
+  p(Punycode.decode(punycode_string) == utf8_string) #=> true
+== IDN (Internationalized Domain Name)
+When you use punycode in IDN,
+you must need to do NAMEPREP (RFC 3491) before Punycode.encode,
+and add ACE Prefix (defined in RFC 3490) after Punycode.encode.
+This library supports punycode only.
+NAMEPREP requires other libraries.
+=end
+module Punycode
+  module Status
+    class Error < StandardError; end
+    class PunycodeSuccess; end
+    # Input is invalid.
+    class PunycodeBadInput < Error; end
+    # Output would exceed the space provided.
+    class PunycodeBigOutput< Error; end
+    # Input needs wider integers to process.
+    class PunycodeOverflow < Error; end
+  end
+  include Status
+  # *** Bootstring parameters for Punycode ***
+  BASE = 36; TMIN = 1; TMAX = 26; SKEW = 38; DAMP = 700
+  INITIAL_BIAS = 72; INITIAL_N = 0x80; DELIMITER = 0x2D
+  module_function
+  # basic(cp) tests whether cp is a basic code point:
+  def basic(cp)
+    cp < 0x80
+  end
+  # delim(cp) tests whether cp is a delimiter:
+  def delim(cp)
+    cp == DELIMITER
+  end
+  # decode_digit(cp) returns the numeric value of a basic code
+  # point (for use in representing integers) in the range 0 to
+  # base-1, or base if cp is does not represent a value.
+  def decode_digit(cp)
+    cp - 48 < 10 ? cp - 22 :  cp - 65 < 26 ? cp - 65 :
+      cp - 97 < 26 ? cp - 97 : BASE
+  end
+  # encode_digit(d,flag) returns the basic code point whose value
+  # (when used for representing integers) is d, which needs to be in
+  # the range 0 to base-1.  The lowercase form is used unless flag is
+  # nonzero, in which case the uppercase form is used.  The behavior
+  # is undefined if flag is nonzero and digit d has no uppercase form.
+  def encode_digit(d, flag)
+    return d + 22 + 75 * ((d < 26) ? 1 : 0) - ((flag ? 1 : 0) << 5)
+    #  0..25 map to ASCII a..z or A..Z
+    # 26..35 map to ASCII 0..9
+  end
+  # flagged(bcp) tests whether a basic code point is flagged
+  # (uppercase).  The behavior is undefined if bcp is not a
+  # basic code point.
+  def flagged(bcp)
+    (0...26) === (bcp - 65)
+  end
+  # encode_basic(bcp,flag) forces a basic code point to lowercase
+  # if flag is zero, uppercase if flag is nonzero, and returns
+  # the resulting code point.  The code point is unchanged if it
+  # is caseless.  The behavior is undefined if bcp is not a basic
+  # code point.
+  def encode_basic(bcp, flag)
+    # bcp -= (bcp - 97 < 26) << 5;
+    if (0...26) === (bcp - 97)
+      bcp -= 1 << 5
+    end
+    # return bcp + ((!flag && (bcp - 65 < 26)) << 5);
+    if !flag and (0...26) === (bcp - 65)
+      bcp += 1 << 5
+    end
+    bcp
+  end
+  # *** Platform-specific constants ***
+  # maxint is the maximum value of a punycode_uint variable:
+  MAXINT = 1 << 64
+  # *** Bias adaptation function ***
+  def adapt(delta, numpoints, firsttime)
+    delta = firsttime ? delta / DAMP : delta >> 1
+    # delta >> 1 is a faster way of doing delta / 2
+    delta += delta / numpoints
+    k = 0
+    while delta > ((BASE - TMIN) * TMAX) / 2
+      delta /= BASE - TMIN
+      k += BASE
+    end
+    k + (BASE - TMIN + 1) * delta / (delta + SKEW)
+  end
+  # *** Main encode function ***
+  # punycode_encode() converts Unicode to Punycode.  The input
+  # is represented as an array of Unicode code points (not code
+  # units; surrogate pairs are not allowed), and the output
+  # will be represented as an array of ASCII code points.  The
+  # output string is *not* null-terminated; it will contain
+  # zeros if and only if the input contains zeros.  (Of course
+  # the caller can leave room for a terminator and add one if
+  # needed.)  The input_length is the number of code points in
+  # the input.  The output_length is an in/out argument: the
+  # caller passes in the maximum number of code points that it
+  # can receive, and on successful return it will contain the
+  # number of code points actually output.  The case_flags array
+  # holds input_length boolean values, where nonzero suggests that
+  # the corresponding Unicode character be forced to uppercase
+  # after being decoded (if possible), and zero suggests that
+  # it be forced to lowercase (if possible).  ASCII code points
+  # are encoded literally, except that ASCII letters are forced
+  # to uppercase or lowercase according to the corresponding
+  # uppercase flags.  If case_flags is a null pointer then ASCII
+  # letters are left as they are, and other code points are
+  # treated as if their uppercase flags were zero.  The return
+  # value can be any of the punycode_status values defined above
+  # except punycode_bad_input; if not punycode_success, then
+  # output_size and output might contain garbage.
+  def punycode_encode(input_length, input, case_flags, output_length, output)
+    # Initialize the state:
+    n = INITIAL_N
+    delta = out = 0
+    max_out = output_length[0]
+    bias = INITIAL_BIAS
+    # Handle the basic code points:
+    input_length.times do |j|
+      if basic(input[j])
+        raise PunycodeBigOutput if max_out - out < 2
+        output[out] =
+          if case_flags
+            encode_basic(input[j], case_flags[j])
+          else
+            input[j]
+          end
+        out+=1
+      # elsif (input[j] < n)
+      #   raise PunycodeBadInput
+      # (not needed for Punycode with unsigned code points)
+      end
+    end
+    h = b = out
+    # h is the number of code points that have been handled, b is the
+    # number of basic code points, and out is the number of characters
+    # that have been output.
+    if b > 0
+      output[out] = DELIMITER
+      out+=1
+    end
+    # Main encoding loop:
+    while h < input_length
+      # All non-basic code points < n have been
+      # handled already.  Find the next larger one:
+      m = MAXINT
+      input_length.times do |j|
+        # next if basic(input[j])
+        # (not needed for Punycode)
+        m = input[j] if (n...m) === input[j]
+      end
+      # Increase delta enough to advance the decoder's
+      # <n,i> state to <m,0>, but guard against overflow:
+      raise PunycodeOverflow if m - n > (MAXINT - delta) / (h + 1)
+      delta += (m - n) * (h + 1)
+      n = m
+      input_length.times do |j|
+        # Punycode does not need to check whether input[j] is basic:
+        if input[j] < n # || basic(input[j])
+          delta+=1
+          raise PunycodeOverflow if delta == 0
+        end
+        if input[j] == n
+          # Represent delta as a generalized variable-length integer:
+          q = delta; k = BASE
+          while true
+            raise PunycodeBigOutput if out >= max_out
+            t = if k <= bias # + TMIN # +TMIN not needed
+                  TMIN
+                elsif k >= bias + TMAX
+                  TMAX
+                else
+                  k - bias
+                end
+            break if q < t
+            output[out] = encode_digit(t + (q - t) % (BASE - t), false)
+            out+=1
+            q = (q - t) / (BASE - t)
+            k += BASE
+          end
+          output[out] = encode_digit(q, case_flags && case_flags[j])
+          out+=1
+          bias = adapt(delta, h + 1, h == b)
+          delta = 0
+          h+=1
+        end
+      end
+      delta+=1; n+=1
+    end
+    output_length[0] = out
+    return PunycodeSuccess
+  end
+  # *** Main decode function ***
+  # punycode_decode() converts Punycode to Unicode.  The input is
+  # represented as an array of ASCII code points, and the output
+  # will be represented as an array of Unicode code points.  The
+  # input_length is the number of code points in the input.  The
+  # output_length is an in/out argument: the caller passes in
+  # the maximum number of code points that it can receive, and
+  # on successful return it will contain the actual number of
+  # code points output.  The case_flags array needs room for at
+  # least output_length values, or it can be a null pointer if the
+  # case information is not needed.  A nonzero flag suggests that
+  # the corresponding Unicode character be forced to uppercase
+  # by the caller (if possible), while zero suggests that it be
+  # forced to lowercase (if possible).  ASCII code points are
+  # output already in the proper case, but their flags will be set
+  # appropriately so that applying the flags would be harmless.
+  # The return value can be any of the punycode_status values
+  # defined above; if not punycode_success, then output_length,
+  # output, and case_flags might contain garbage.  On success, the
+  # decoder will never need to write an output_length greater than
+  # input_length, because of how the encoding is defined.
+  def punycode_decode(input_length, input, output_length, output, case_flags)
+    # Initialize the state:
+    n = INITIAL_N
+    out = i = 0
+    max_out = output_length[0]
+    bias = INITIAL_BIAS
+    # Handle the basic code points:  Let b be the number of input code
+    # points before the last delimiter, or 0 if there is none, then
+    # copy the first b code points to the output.
+    b = 0
+    input_length.times do |j|
+      b = j if delim(input[j])
+    end
+    raise PunycodeBigOutput if b > max_out
+    b.times do |j|
+      case_flags[out] = flagged(input[j]) if case_flags
+      raise PunycodeBadInput unless basic(input[j])
+      output[out] = input[j]
+      out+=1
+    end
+    # Main decoding loop:  Start just after the last delimiter if any
+    # basic code points were copied; start at the beginning otherwise.
+    in_ = b > 0 ? b + 1 : 0
+    while in_ < input_length
+      # in_ is the index of the next character to be consumed, and
+      # out is the number of code points in the output array.
+      # Decode a generalized variable-length integer into delta,
+      # which gets added to i.  The overflow checking is easier
+      # if we increase i as we go, then subtract off its starting
+      # value at the end to obtain delta.
+      oldi = i; w = 1; k = BASE
+      while true
+        raise PunycodeBadInput if in_ >= input_length
+        digit = decode_digit(input[in_])
+        in_+=1
+        raise PunycodeBadInput if digit >= BASE
+        raise PunycodeOverflow if digit > (MAXINT - i) / w
+        i += digit * w
+        t = if k <= bias # + TMIN # +TMIN not needed
+              TMIN
+            elsif k >= bias + TMAX
+              TMAX
+            else
+              k - bias
+            end
+        break if digit < t
+        raise PunycodeOverflow if w > MAXINT / (BASE - t)
+        w *= BASE - t
+        k += BASE
+      end
+      bias = adapt(i - oldi, out + 1, oldi == 0)
+      # i was supposed to wrap around from out+1 to 0,
+      # incrementing n each time, so we'll fix that now:
+      raise PunycodeOverflow if i / (out + 1) > MAXINT - n
+      n += i / (out + 1)
+      i %= out + 1
+      # Insert n at position i of the output:
+      # not needed for Punycode:
+      # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
+      raise PunycodeBigOutput if out >= max_out
+      if case_flags
+        #memmove(case_flags + i + 1, case_flags + i, out - i)
+        case_flags[i + 1, out - i] = case_flags[i, out - i]
+        # Case of last character determines uppercase flag:
+        case_flags[i] = flagged(input[in_ - 1])
+      end
+      #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
+      output[i + 1, out - i] = output[i, out - i]
+      output[i] = n
+      i+=1
+      out+=1
+    end
+    output_length[0] = out
+    return PunycodeSuccess
+  end
+  def encode(unicode_string, case_flags=nil, print_ascii_only=false)
+    input = unicode_string.unpack('U*')
+    output = [0] * (ACE_MAX_LENGTH+1)
+    output_length = [ACE_MAX_LENGTH]
+    punycode_encode(input.size, input, case_flags, output_length, output)
+    outlen = output_length[0]
+    outlen.times do |j|
+      c = output[j]
+      unless c >= 0 && c <= 127
+        raise Error, "assertion error: invalid output char"
+      end
+      unless PRINT_ASCII[c]
+        raise PunycodeBadInput
+      end
+      output[j] = PRINT_ASCII[c] if print_ascii_only
+    end
+    output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
+  end
+  def decode(punycode, case_flags=[])
+    input = []
+    output = []
+    if ACE_MAX_LENGTH*2 < punycode.size
+      raise PunycodeBigOutput
+    end
+    punycode.each_byte do |c|
+      unless c >= 0 && c <= 127
+        raise PunycodeBadInput
+      end
+      input.push(c)
+    end
+    output_length = [UNICODE_MAX_LENGTH]
+    Punycode.punycode_decode(input.length, input, output_length,
+                             output, case_flags)
+    output.pack('U*')
+  end
+  UNICODE_MAX_LENGTH = 256
+  ACE_MAX_LENGTH = 256
+  # The following string is used to convert printable
+  # characters between ASCII and the native charset:
+  PRINT_ASCII =
+    "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
+    "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" \
+    " !\"\#$%&'()*+,-./" \
+    "0123456789:;<=>?" \
+    "@ABCDEFGHIJKLMNO" \
+    "PQRSTUVWXYZ[\\]^_" \
+    "`abcdefghijklmno" \
+    "pqrstuvwxyz{|}~\n"
+end
+if __FILE__ == $0
+  UNICODE_MAX_LENGTH = Punycode::UNICODE_MAX_LENGTH
+  ACE_MAX_LENGTH = Punycode::ACE_MAX_LENGTH
+  def usage(argv)
+    STDERR.puts <<-USAGE
+#{argv[0]} -e reads code points and writes a Punycode string.
+#{argv[0]} -d reads a Punycode string and writes code points.
+Input and output are plain text in the native character set.
+Code points are in the form u+hex separated by whitespace.
+Although the specification allows Punycode strings to contain
+any characters from the ASCII repertoire, this test code
+supports only the printable characters, and needs the Punycode
+string to be followed by a newline.
+The case of the u in u+hex is the force-to-uppercase flag.
+    USAGE
+    exit(false)
+  end
+  TOO_BIG = "input or output is too large, recompile with larger limits"
+  INVALID_INPUT = "invalid input"
+  OVERFLOW = "arithmetic overflow"
+  IO_ERROR = "I/O error"
+  PRINT_ASCII = Punycode::PRINT_ASCII
+  def main(argv)
+    case_flags = [0] * UNICODE_MAX_LENGTH
+    usage(argv) if argv.size != 2
+    usage(argv) if /\A-[de]\z/ !~ argv[1]
+    if argv[1][1] == ?e
+      input = [0] * UNICODE_MAX_LENGTH
+      output = [0] * (ACE_MAX_LENGTH+1)
+      # Read the input code points:
+      input_length = 0
+      STDIN.read.scan(/([uU]\+)([0-9a-fA-F]+)/) do |uplus, codept|
+        codept = codept.hex
+        if uplus[1] != ?+ || codept > Punycode::MAXINT
+          fail(INVALID_INPUT)
+        end
+        fail(TOO_BIG) if input_length == UNICODE_MAX_LENGTH
+        if uplus[0] == ?u
+          case_flags[input_length] = false
+        elsif uplus[0] == ?U
+          case_flags[input_length] = true
+        else
+          fail(INVALID_INPUT)
+        end
+        input[input_length] = codept
+        input_length+=1
+      end
+      # Encode:
+      output_length = [ACE_MAX_LENGTH]
+      begin
+        status = Punycode.punycode_encode(input_length, input, case_flags,
+                                          output_length, output)
+      rescue Punycode::Status::PunycodeBadInput
+        fail(INVALID_INPUT)
+      rescue Punycode::Status::PunycodeBigOutput
+        fail(TOO_BIG)
+      rescue Punycode::Status::PunycodeOverflow
+        fail(OVERFLOW)
+      end
+      if status != Punycode::Status::PunycodeSuccess
+        fail("assertion error: unknown status")
+      end
+      # Convert to native charset and output:
+      outlen = output_length[0]
+      outlen.times do |j|
+        c = output[j]
+        raise  "assertion error: invalid output char" unless c >= 0 && c <= 127
+        unless PRINT_ASCII[c]
+          fail(INVALID_INPUT)
+        end
+        output[j] = PRINT_ASCII[c]
+      end
+      output = output[0..outlen].map{|x|x.chr}.join('').sub(/\0+\z/, '')
+      puts(output)
+      exit(true)
+    end
+    if argv[1][1] == ?d
+      #input = [0] * ACE_MAX_LENGTH*2
+      #output = [0] * UNICODE_MAX_LENGTH
+      output = []
+      input = STDIN.gets.split(//)[0,ACE_MAX_LENGTH*2]
+      fail(TOO_BIG) if input[-1] != "\n"
+      input = input[0...-1]
+      input.each_with_index do |c, i|
+        print_ascii_index = PRINT_ASCII.index(c)
+        fail(INVALID_INPUT) unless print_ascii_index
+        input[i] = print_ascii_index
+      end
+      # Decode:
+      output_length = [UNICODE_MAX_LENGTH]
+      begin
+        status = Punycode.punycode_decode(input.length, input, output_length,
+                                          output, case_flags)
+      rescue Punycode::Status::PunycodeBadInput
+        fail(INVALID_INPUT)
+      rescue Punycode::Status::PunycodeBigOutput
+        fail(TOO_BIG)
+      rescue Punycode::Status::PunycodeOverflow
+        fail(OVERFLOW)
+      end
+      if status != Punycode::Status::PunycodeSuccess
+        fail("assertion error: unknown status")
+      end
+      # Output the result:
+      output_length[0].times do |j|
+        printf("%s+%04X\n", case_flags[j] ? "U" : "u", output[j])
+      end
+      exit(true)
+    end
+    usage(argv)
+    raise "not reached"
+  end
+  main([$0]+ARGV)
+end

data/test/test_punycode.rb ADDED

@@ -0,0 +1,311 @@
+#!/usr/bin/ruby
+#
+# test of punycode.rb
+#
+# copyright (c) 2005 Kazuhiro NISHIYAMA
+# You can redistribute it and/or modify it under the same terms as Ruby.
+#
+require 'test/unit'
+module AssertPunycode
+  def assert_punycode(example)
+    example = example.gsub(/\\\n\s*/, "").split(/\n/)
+    description = example[0]
+    codepoints = example[1...-1].join("")
+    punycode = example[-1].strip.sub(/^Punycode: /, "")
+    assert_punycode_main(description, codepoints, punycode)
+  end
+  def test_rfc3492_7_1_A
+    assert_punycode(<<-EXAMPLE)
+    (A) Arabic (Egyptian):
+        u+0644 u+064A u+0647 u+0645 u+0627 u+0628 u+062A u+0643 u+0644
+        u+0645 u+0648 u+0634 u+0639 u+0631 u+0628 u+064A u+061F
+        Punycode: egbpdaj6bu4bxfgehfvwxn
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_B
+    assert_punycode(<<-EXAMPLE)
+    (B) Chinese (simplified):
+        u+4ED6 u+4EEC u+4E3A u+4EC0 u+4E48 u+4E0D u+8BF4 u+4E2D u+6587
+        Punycode: ihqwcrb4cv8a8dqg056pqjye
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_C
+    assert_punycode(<<-EXAMPLE)
+    (C) Chinese (traditional):
+        u+4ED6 u+5011 u+7232 u+4EC0 u+9EBD u+4E0D u+8AAA u+4E2D u+6587
+        Punycode: ihqwctvzc91f659drss3x8bo0yb
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_D
+    assert_punycode(<<-EXAMPLE)
+    (D) Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
+        U+0050 u+0072 u+006F u+010D u+0070 u+0072 u+006F u+0073 u+0074
+        u+011B u+006E u+0065 u+006D u+006C u+0075 u+0076 u+00ED u+010D
+        u+0065 u+0073 u+006B u+0079
+        Punycode: Proprostnemluvesky-uyb24dma41a
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_E
+    assert_punycode(<<-EXAMPLE)
+    (E) Hebrew:
+        u+05DC u+05DE u+05D4 u+05D4 u+05DD u+05E4 u+05E9 u+05D5 u+05D8
+        u+05DC u+05D0 u+05DE u+05D3 u+05D1 u+05E8 u+05D9 u+05DD u+05E2
+        u+05D1 u+05E8 u+05D9 u+05EA
+        Punycode: 4dbcagdahymbxekheh6e0a7fei0b
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_F
+    assert_punycode(<<-EXAMPLE)
+    (F) Hindi (Devanagari):
+        u+092F u+0939 u+0932 u+094B u+0917 u+0939 u+093F u+0928 u+094D
+        u+0926 u+0940 u+0915 u+094D u+092F u+094B u+0902 u+0928 u+0939
+        u+0940 u+0902 u+092C u+094B u+0932 u+0938 u+0915 u+0924 u+0947
+        u+0939 u+0948 u+0902
+        Punycode: i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_G
+    assert_punycode(<<-EXAMPLE)
+    (G) Japanese (kanji and hiragana):
+        u+306A u+305C u+307F u+3093 u+306A u+65E5 u+672C u+8A9E u+3092
+        u+8A71 u+3057 u+3066 u+304F u+308C u+306A u+3044 u+306E u+304B
+        Punycode: n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_H
+    assert_punycode(<<-EXAMPLE)
+    (H) Korean (Hangul syllables):
+        u+C138 u+ACC4 u+C758 u+BAA8 u+B4E0 u+C0AC u+B78C u+B4E4 u+C774
+        u+D55C u+AD6D u+C5B4 u+B97C u+C774 u+D574 u+D55C u+B2E4 u+BA74
+        u+C5BC u+B9C8 u+B098 u+C88B u+C744 u+AE4C
+        Punycode: 989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j\\
+                  psd879ccm6fea98c
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_I
+    if self.class.to_s == 'TestPunycodeEncodeLib'
+      if __FILE__ == $0 || $VERBOSE || $DEBUG
+        STDERR.puts "SKIP KNOWN BUG: downcase D in Punycode in encode test without case_flags."
+      end
+      return
+    end
+    assert_punycode(<<-EXAMPLE)
+    KNOWN BUG: downcase D in Punycode in encode test without case_flags. \\
+    (I) Russian (Cyrillic):
+        U+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
+        u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
+        u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
+        u+0438
+        Punycode: b1abfaaepdrnnbgefbaDotcwatmq2g4l
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_I_downcase
+    assert_punycode(<<-EXAMPLE)
+    (I) Russian (Cyrillic): (downcase first U in Codepoints and D in Punycode)
+        u+043F u+043E u+0447 u+0435 u+043C u+0443 u+0436 u+0435 u+043E
+        u+043D u+0438 u+043D u+0435 u+0433 u+043E u+0432 u+043E u+0440
+        u+044F u+0442 u+043F u+043E u+0440 u+0443 u+0441 u+0441 u+043A
+        u+0438
+        Punycode: b1abfaaepdrnnbgefbadotcwatmq2g4l
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_J
+    assert_punycode(<<-EXAMPLE)
+    (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
+        U+0050 u+006F u+0072 u+0071 u+0075 u+00E9 u+006E u+006F u+0070
+        u+0075 u+0065 u+0064 u+0065 u+006E u+0073 u+0069 u+006D u+0070
+        u+006C u+0065 u+006D u+0065 u+006E u+0074 u+0065 u+0068 u+0061
+        u+0062 u+006C u+0061 u+0072 u+0065 u+006E U+0045 u+0073 u+0070
+        u+0061 u+00F1 u+006F u+006C
+        Punycode: PorqunopuedensimplementehablarenEspaol-fmd56a
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_K
+    assert_punycode(<<-EXAMPLE)
+    (K) Vietnamese:\\
+        T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\\
+        <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
+        U+0054 u+1EA1 u+0069 u+0073 u+0061 u+006F u+0068 u+1ECD u+006B
+        u+0068 u+00F4 u+006E u+0067 u+0074 u+0068 u+1EC3 u+0063 u+0068
+        u+1EC9 u+006E u+00F3 u+0069 u+0074 u+0069 u+1EBF u+006E u+0067
+        U+0056 u+0069 u+1EC7 u+0074
+        Punycode: TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_L
+    assert_punycode(<<-EXAMPLE)
+    (L) 3<nen>B<gumi><kinpachi><sensei>
+        u+0033 u+5E74 U+0042 u+7D44 u+91D1 u+516B u+5148 u+751F
+        Punycode: 3B-ww4c5e180e575a65lsy2b
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_M
+    assert_punycode(<<-EXAMPLE)
+    (M) <amuro><namie>-with-SUPER-MONKEYS
+        u+5B89 u+5BA4 u+5948 u+7F8E u+6075 u+002D u+0077 u+0069 u+0074
+        u+0068 u+002D U+0053 U+0055 U+0050 U+0045 U+0052 u+002D U+004D
+        U+004F U+004E U+004B U+0045 U+0059 U+0053
+        Punycode: -with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_N
+    assert_punycode(<<-EXAMPLE)
+    (N) Hello-Another-Way-<sorezore><no><basho>
+        U+0048 u+0065 u+006C u+006C u+006F u+002D U+0041 u+006E u+006F
+        u+0074 u+0068 u+0065 u+0072 u+002D U+0057 u+0061 u+0079 u+002D
+        u+305D u+308C u+305E u+308C u+306E u+5834 u+6240
+        Punycode: Hello-Another-Way--fc4qua05auwb3674vfr0b
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_O
+    assert_punycode(<<-EXAMPLE)
+    (O) <hitotsu><yane><no><shita>2
+        u+3072 u+3068 u+3064 u+5C4B u+6839 u+306E u+4E0B u+0032
+        Punycode: 2-u9tlzr9756bt3uc0v
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_P
+    assert_punycode(<<-EXAMPLE)
+    (P) Maji<de>Koi<suru>5<byou><mae>
+        U+004D u+0061 u+006A u+0069 u+3067 U+004B u+006F u+0069 u+3059
+        u+308B u+0035 u+79D2 u+524D
+        Punycode: MajiKoi5-783gue6qz075azm5e
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_Q
+    assert_punycode(<<-EXAMPLE)
+    (Q) <pafii>de<runba>
+        u+30D1 u+30D5 u+30A3 u+30FC u+0064 u+0065 u+30EB u+30F3 u+30D0
+        Punycode: de-jg4avhby1noc0d
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_R
+    assert_punycode(<<-EXAMPLE)
+    (R) <sono><supiido><de>
+        u+305D u+306E u+30B9 u+30D4 u+30FC u+30C9 u+3067
+        Punycode: d9juau41awczczp
+    EXAMPLE
+  end
+  def test_rfc3492_7_1_S
+    assert_punycode(<<-EXAMPLE)
+    (S) -> $1.00 <-
+        u+002D u+003E u+0020 u+0024 u+0031 u+002E u+0030 u+0030 u+0020
+        u+003C u+002D
+        Punycode: -> $1.00 <--
+    EXAMPLE
+  end
+  RUBY_BIN =
+    begin
+      require "rbconfig"
+      File.join(
+        Config::CONFIG["bindir"],
+        Config::CONFIG["ruby_install_name"] + Config::CONFIG["EXEEXT"]
+      )
+    rescue LoadError
+      "ruby"
+    end
+  PUNYCODE_RB =
+    if File.exist?('punycode.rb')
+      'punycode.rb'
+    else
+      File.expand_path(File.join('..', 'lib', 'punycode.rb'),
+                       File.dirname(__FILE__))
+    end
+end
+class TestPunycodeEncode < Test::Unit::TestCase
+  include AssertPunycode
+  def assert_punycode_main(description, codepoints, punycode)
+    IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -e", "r+") do |io|
+      io.puts codepoints
+      io.close_write
+      assert_equal(punycode, io.gets.chomp, description)
+    end
+  end
+end
+class TestPunycodeDecode < Test::Unit::TestCase
+  include AssertPunycode
+  def assert_punycode_main(description, codepoints, punycode)
+    IO.popen("#{RUBY_BIN} '#{PUNYCODE_RB}' -d", "r+") do |io|
+      io.puts punycode
+      io.close_write
+      assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
+                   io.read.strip, description)
+    end
+  end
+end
+if File.executable?("./punycode")
+  class TestPunycodeEncodeBin < Test::Unit::TestCase
+    include AssertPunycode
+    def assert_punycode_main(description, codepoints, punycode)
+      IO.popen("./punycode -e", "r+") do |io|
+        io.puts codepoints
+        io.close_write
+        assert_equal(punycode, io.gets.chomp, description)
+      end
+    end
+  end
+  class TestPunycodeDecodeBin < Test::Unit::TestCase
+    include AssertPunycode
+    def assert_punycode_main(description, codepoints, punycode)
+      IO.popen("./punycode -d", "r+") do |io|
+        io.puts punycode
+        io.close_write
+        assert_equal(codepoints.strip.gsub(/\s+/, "\n"),
+                     io.read.strip, description)
+      end
+    end
+  end
+end
+begin
+  require 'punycode'
+  class TestPunycodeEncodeLib < Test::Unit::TestCase
+    include AssertPunycode
+    def assert_punycode_main(description, codepoints, punycode)
+      unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
+      assert_equal(punycode, Punycode.encode(unistring), description)
+    end
+  end
+  class TestPunycodeDecodeLib < Test::Unit::TestCase
+    include AssertPunycode
+    def assert_punycode_main(description, codepoints, punycode)
+      unistring = codepoints.scan(/[0-9a-fA-F]+/).map{|x|x.hex}.pack('U*')
+      assert_equal(unistring, Punycode.decode(punycode), description)
+    end
+  end
+rescue LoadError
+end

metadata ADDED

@@ -0,0 +1,47 @@
+--- !ruby/object:Gem::Specification
+rubygems_version: 0.9.2
+specification_version: 1
+name: punycode4r
+version: !ruby/object:Gem::Version
+  version: 0.2.0
+date: 2007-02-21 00:00:00 +09:00
+summary: pure Ruby implementing Punycode (RFC 3492)
+require_paths:
+- lib
+email: zn@mbf.nifty.com
+homepage:
+rubyforge_project: rwiki
+description:
+autorequire:
+default_executable:
+bindir: bin
+has_rdoc: false
+required_ruby_version: !ruby/object:Gem::Version::Requirement
+  requirements:
+  - - ">"
+    - !ruby/object:Gem::Version
+      version: 0.0.0
+  version:
+platform: ruby
+signing_key:
+cert_chain:
+post_install_message:
+authors:
+- Kazuhiro NISHIYAMA
+files:
+- lib/punycode.rb
+- test/test_punycode.rb
+test_files: []
+rdoc_options: []
+extra_rdoc_files: []
+executables: []
+extensions: []
+requirements: []
+dependencies: []