RubyGems - domain_name - Versions diffs - 0.5.2 → 0.5.3 - Mend

domain_name 0.5.2 → 0.5.3

Files changed (9) hide show

data/Gemfile +1 -2
data/Rakefile +0 -8
data/VERSION +1 -1
data/domain_name.gemspec +6 -9
data/lib/domain_name/etld_data.rb +4300 -4298
data/lib/domain_name/etld_data.rb.erb +5 -3
data/lib/domain_name/punycode.rb +198 -93
data/test/test_domain_name-punycode.rb +1 -0
metadata +37 -23

data/lib/domain_name/etld_data.rb.erb CHANGED

@@ -1,7 +1,9 @@
 class DomainName
+  ETLD_DATA = {
+<% etld_data.each_pair { |key, value| %>    <%= key.inspect %> => <%= value.inspect %>,
+<% } %>  }
   def self.etld_data
-    @@etld_data ||= {
-<% etld_data.each_pair { |key, value| %>      <%= key.inspect %> => <%= value.inspect %>,
-<% } %>    }
+    ETLD_DATA
   end
 end

data/lib/domain_name/punycode.rb CHANGED

@@ -2,7 +2,7 @@
 #
 # punycode.rb - PunyCode encoder for the Domain Name library
 #
-# Copyright (C) 2011 Akinori MUSHA, All rights reserved.
+# Copyright (C) 2011, 2012 Akinori MUSHA, All rights reserved.
 #
 # Ported from puny.c, a part of VeriSign XCode (encode/decode) IDN
 # Library.
@@ -58,125 +58,230 @@ class DomainName
     INITIAL_N = 0x80
     DELIMITER = '-'
-    # The maximum value of an DWORD variable
-    MAXINT = (1 << 64) - 1
+    MAXINT = (1 << 32) - 1
-    # Used in the calculation of bias:
     LOBASE = BASE - TMIN
-    # Used in the calculation of bias:
     CUTOFF = LOBASE * TMAX / 2
-    class Error < StandardError; end
-    class BufferOverflowError < Error; end
-    # Returns the basic code point whose value (when used for
-    # representing integers) is d, which must be in the range 0 to
-    # BASE-1.  The lowercase form is used unless flag is true, in
-    # which case the uppercase form is used.  The behavior is
-    # undefined if flag is nonzero and digit d has no uppercase form.
-    def encode_digit(d, flag)
-      (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
-      #  0..25 map to ASCII a..z or A..Z
-      # 26..35 map to ASCII 0..9
-    end
-    module_function :encode_digit
+    RE_NONBASIC = /[^\x00-\x7f]/
+    DECODE_DIGIT = {}.tap { |map|
+      # ASCII A..Z map to 0..25
+      # ASCII a..z map to 0..25
+      (0..25).each { |i| map[65 + i] = map[97 + i] = i }
+      # ASCII 0..9 map to 26..35
+      (26..35).each { |i| map[22 + i] = i }
+    }
-    # Main encode function
-    def encode(string)
-      input = string.unpack('U*')
-      output = ''
+    # Most errors we raise are basically kind of ArgumentError.
+    class ArgumentError < ::ArgumentError; end
+    class BufferOverflowError < ArgumentError; end
-      # Initialize the state
-      n = INITIAL_N
-      delta = 0
-      bias = INITIAL_BIAS;
+    class << self
+      private
-      # Handle the basic code points
-      input.each { |cp| output << cp.chr if cp < 0x80 }
+      # Returns the basic code point whose value (when used for
+      # representing integers) is d, which must be in the range 0 to
+      # BASE-1.  The lowercase form is used unless flag is true, in
+      # which case the uppercase form is used.  The behavior is
+      # undefined if flag is nonzero and digit d has no uppercase
+      # form.
+      def encode_digit(d, flag)
+        (d + 22 + (d < 26 ? 75 : 0) - (flag ? (1 << 5) : 0)).chr
+        #  0..25 map to ASCII a..z or A..Z
+        # 26..35 map to ASCII 0..9
+      end
-      h = b = output.length
+      # Returns the numeric value of a basic code point (for use in
+      # representing integers) in the range 0 to base-1, or nil if cp
+      # is does not represent a value.
+      def decode_digit(cp)
+        DECODE_DIGIT[cp]
+      end
-      # h is the number of code points that have been handled, b is the
-      # number of basic code points, and out is the number of characters
-      # that have been output.
+      public
-      output << DELIMITER if b > 0
+      # Encode a +string+ in Punycode
+      def encode(string)
+        input = string.unpack('U*')
+        output = ''
-      # Main encoding loop
+        # Initialize the state
+        n = INITIAL_N
+        delta = 0
+        bias = INITIAL_BIAS
-      while h < input.length
-        # All non-basic code points < n have been handled already.  Find
-        # the next larger one
+        # Handle the basic code points
+        input.each { |cp| output << cp.chr if cp < 0x80 }
-        m = MAXINT
-        input.each { |cp|
-          m = cp if (n...m) === cp
-        }
+        h = b = output.length
+        # h is the number of code points that have been handled, b is the
+        # number of basic code points, and out is the number of characters
+        # that have been output.
+        output << DELIMITER if b > 0
+        # Main encoding loop
+        while h < input.length
+          # All non-basic code points < n have been handled already.  Find
+          # the next larger one
+          m = MAXINT
+          input.each { |cp|
+            m = cp if (n...m) === cp
+          }
-        # Increase delta enough to advance the decoder's <n,i> state to
-        # <m,0>, but guard against overflow
+          # Increase delta enough to advance the decoder's <n,i> state to
+          # <m,0>, but guard against overflow
-        if m - n > (MAXINT - delta) / (h + 1)
-          raise BufferOverflowError
+          delta += (m - n) * (h + 1)
+          raise BufferOverflowError if delta > MAXINT
+          n = m
+          input.each { |cp|
+            # AMC-ACE-Z can use this simplified version instead
+            if cp < n
+              delta += 1
+              raise BufferOverflowError if delta > MAXINT
+            elsif cp == n
+              # Represent delta as a generalized variable-length integer
+              q = delta
+              k = BASE
+              loop {
+                t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
+                break if q < t
+                q, r = (q - t).divmod(BASE - t)
+                output << encode_digit(t + r, false)
+                k += BASE
+              }
+              output << encode_digit(q, false)
+              # Adapt the bias
+              delta = h == b ? delta / DAMP : delta >> 1
+              delta += delta / (h + 1)
+              bias = 0
+              while delta > CUTOFF
+                delta /= LOBASE
+                bias += BASE
+              end
+              bias += (LOBASE + 1) * delta / (delta + SKEW)
+              delta = 0
+              h += 1
+            end
+          }
+          delta += 1
+          n += 1
         end
-        delta += (m - n) * (h + 1)
-        n = m
-        input.each { |cp|
-          # AMC-ACE-Z can use this simplified version instead
-          if cp < n && (delta += 1) == 0
-            raise BufferOverflowError
+        output
+      end
+      # Encode a hostname using IDN/Punycode algorithms
+      def encode_hostname(hostname)
+        hostname.match(RE_NONBASIC) or return hostname
+        hostname.split('.').map { |name|
+          if name.match(RE_NONBASIC)
+            'xn--' << encode(name)
+          else
+            name
           end
+        }.join('.')
+      end
-          if cp == n
-            # Represent delta as a generalized variable-length integer
-            q = delta
-            k = BASE
-            loop {
-              t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias;
-              break if q < t
-              output << encode_digit(t + (q - t) % (BASE - t), false)
-              q = (q - t) / (BASE - t)
-              k += BASE
-            }
-            output << encode_digit(q, false)
-            # Adapt the bias
-            delta = h == b ? delta / DAMP : delta >> 1
-            delta += delta / (h + 1)
-            bias = 0
-            while delta > CUTOFF
-              delta /= LOBASE
-              bias += BASE
-            end
-            bias += (LOBASE + 1) * delta / (delta + SKEW)
+      # Decode a +string+ encoded in Punycode
+      def decode(string)
+        # Initialize the state
+        n = INITIAL_N
+        i = 0
+        bias = INITIAL_BIAS
+        if j = string.rindex(DELIMITER)
+          b = string[0...j]
+          b.match(RE_NONBASIC) and
+            raise ArgumentError, "Illegal character is found in basic part: #{string.inspect}"
+          # Handle the basic code points
+          output = b.unpack('U*')
+          u = string[(j + 1)..-1]
+        else
+          output = []
+          u = string
+        end
+        # Main decoding loop: Start just after the last delimiter if any
+        # basic code points were copied; start at the beginning
+        # otherwise.
+        input = u.unpack('C*')
+        input_length = input.length
+        h = 0
+        out = output.length
+        while h < input_length
+          # Decode a generalized variable-length integer into delta,
+          # which gets added to i.  The overflow checking is easier
+          # if we increase i as we go, then subtract off its starting
+          # value at the end to obtain delta.
+          oldi = i
+          w = 1
+          k = BASE
-            delta = 0
+          loop {
+            digit = decode_digit(input[h]) or
+            raise ArgumentError, "Illegal character is found in non-basic part: #{string.inspect}"
             h += 1
+            i += digit * w
+            raise BufferOverflowError if i > MAXINT
+            t = k <= bias ? TMIN : k - bias >= TMAX ? TMAX : k - bias
+            break if digit < t
+            w *= BASE - t
+            raise BufferOverflowError if w > MAXINT
+            k += BASE
+            h < input_length or raise ArgumentError, "Malformed input given: #{string.inspect}"
+          }
+          # Adapt the bias
+          delta = oldi == 0 ? i / DAMP : (i - oldi) >> 1
+          delta += delta / (out + 1)
+          bias = 0
+          while delta > CUTOFF
+            delta /= LOBASE
+            bias += BASE
           end
-        }
+          bias += (LOBASE + 1) * delta / (delta + SKEW)
-        delta += 1
-        n += 1
-      end
+          # i was supposed to wrap around from out+1 to 0, incrementing
+          # n each time, so we'll fix that now:
-      output
-    end
-    module_function :encode
+          q, i = i.divmod(out + 1)
+          n += q
+          raise BufferOverflowError if n > MAXINT
-    def encode_hostname(hostname)
-      hostname.match(/[^\x00-\x7f]/) or return hostname
+          # Insert n at position i of the output:
-      hostname.split('.').map { |name|
-        if name.match(/[^\x00-\x7f]/)
-          'xn--' << encode(name)
-        else
-          name
+          output[i, 0] = n
+          out += 1
+          i += 1
         end
-      }.join('.')
+        output.pack('U*')
+      end
+      # Decode a hostname using IDN/Punycode algorithms
+      def decode_hostname(hostname)
+        hostname.gsub(/(\A|\.)xn--([^.]*)/) {
+          $1 << decode($2)
+        }
+      end
     end
-    module_function :encode_hostname
   end
 end

data/test/test_domain_name-punycode.rb CHANGED

@@ -91,6 +91,7 @@ class TestDomainName < Test::Unit::TestCase
         '-> $1.00 <--']
     ].each { |title, cps, punycode|
       assert_equal punycode, DomainName::Punycode.encode(cps.pack('U*')), title
+      assert_equal cps.pack('U*').to_nfc, DomainName::Punycode.decode(punycode), title
     }
   end
 end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: domain_name
 version: !ruby/object:Gem::Version
-  version: 0.5.2
+  version: 0.5.3
   prerelease:
 platform: ruby
 authors:
@@ -9,11 +9,11 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-01-18 00:00:00.000000000 Z
+date: 2012-04-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: unf
-  requirement: &70324603442080 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -21,10 +21,15 @@ dependencies:
         version: 0.0.3
   type: :runtime
   prerelease: false
-  version_requirements: *70324603442080
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 0.0.3
 - !ruby/object:Gem::Dependency
   name: shoulda
-  requirement: &70324603441520 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -32,21 +37,31 @@ dependencies:
         version: '0'
   type: :development
   prerelease: false
-  version_requirements: *70324603441520
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: bundler
-  requirement: &70324603440960 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
       - !ruby/object:Gem::Version
-        version: 1.0.0
+        version: 1.1.0
   type: :development
   prerelease: false
-  version_requirements: *70324603440960
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: 1.1.0
 - !ruby/object:Gem::Dependency
   name: jeweler
-  requirement: &70324603440180 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ~>
@@ -54,21 +69,15 @@ dependencies:
         version: 1.6.4
   type: :development
   prerelease: false
-  version_requirements: *70324603440180
-- !ruby/object:Gem::Dependency
-  name: rcov
-  requirement: &70324603439500 !ruby/object:Gem::Requirement
+  version_requirements: !ruby/object:Gem::Requirement
     none: false
     requirements:
-    - - ! '>='
+    - - ~>
       - !ruby/object:Gem::Version
-        version: '0'
-  type: :development
-  prerelease: false
-  version_requirements: *70324603439500
+        version: 1.6.4
 - !ruby/object:Gem::Dependency
   name: rdoc
-  requirement: &70324603438960 !ruby/object:Gem::Requirement
+  requirement: !ruby/object:Gem::Requirement
     none: false
     requirements:
     - - ! '>='
@@ -76,7 +85,12 @@ dependencies:
         version: 2.4.2
   type: :development
   prerelease: false
-  version_requirements: *70324603438960
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ! '>='
+      - !ruby/object:Gem::Version
+        version: 2.4.2
 description: ! 'This is a Domain Name manipulation library for Ruby.
@@ -123,7 +137,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
       segments:
       - 0
-      hash: 79833043846430816
+      hash: -1258501941076469497
 required_rubygems_version: !ruby/object:Gem::Requirement
   none: false
   requirements:
@@ -132,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.11
+rubygems_version: 1.8.21
 signing_key:
 specification_version: 3
 summary: Domain Name manipulation library for Ruby