RubyGems - simpleidn - Versions diffs - 0.0.7 → 0.0.9 - Mend

simpleidn 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: bbff0e8f2c02135040f6ce79aa7400f1fb4e4329
-  data.tar.gz: 755dfe2f0c621b57080bea4c5793816e92ced4c9
+  metadata.gz: 91419f6658f1aa0e2efd5f1774879994f822a3d7
+  data.tar.gz: 60663039b50c49baa983dc7d51179dae820d839d
 SHA512:
-  metadata.gz: 670cbaa7ada6f97efcfa5170e64cde78d5d43ae054ab32fec1bd77c669f32888c69f4af54d8109ae6ce29afc1282b2e325174f2d14aee6960991c13498416696
-  data.tar.gz: 023b37009fe2128e99da467cb9771a195dcb8523430b7008c4417450e8dc1a9cef53caa1e2d37441e54ab4c6a2c4f71f28746e86cef24c059030b7e9f241efc6
+  metadata.gz: 1eca7fc205bdd66663334b91786029b645637d501acb8b61e8661b2298c751a2df39c4854b6f926f29dec505b5530f86c1159b7a3e0a6a8ffa4ef411fd59600d
+  data.tar.gz: 5ca7274d2115e9e3a65f992721df2ec748719754e2ceccce3e03b1f4328c0a795a8fa85f90010d208fbb6f87d119b7f1154ae3d92e099b09d28957e0599e9a68

data/LICENCE CHANGED Viewed

@@ -1,6 +1,6 @@
 The MIT License
-Copyright (c) 2011-2013 Morten Møller Riis
+Copyright (c) 2011-2017 Morten Møller Riis
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+THE SOFTWARE.

data/README.rdoc CHANGED Viewed

@@ -4,7 +4,7 @@ This gem allows easy conversion from punycode ACE strings to unicode UTF-8 strin
 The implementation is heavily based on the RFC3492 C example implementation but simplified since it does not preserve case.
-This gem works with Ruby 1.8.7, 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
+This gem works with Ruby 1.9.2, 1.9.3, 2.0, 2.1, 2.2.
 * http://www.whatastruggle.com
@@ -18,13 +18,13 @@ In your Ruby script you can now.
   require 'rubygems'
   require 'simpleidn'
   SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
-  => "møllerriis.com"
+  => "møllerriis.com"
 	SimpleIDN.to_ascii("møllerriis.com")
-  => "xn--mllerriis-l8a.com"
+  => "xn--mllerriis-l8a.com"
 == Testing / RSpec
 In order to run the test suite you must have <tt>rspec</tt> installed.
@@ -36,4 +36,4 @@ http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
 Does not preserve uppercase. So if, for some reason, you use uppercase characters (eg. Ø instead of ø), please take note of that.
-Please report any issues!
+Please report any issues!

data/lib/simpleidn.rb CHANGED Viewed

@@ -1,24 +1,5 @@
-# encoding: UTF-8
-if RUBY_VERSION =~ /^1\.8/
-  $KCODE = "UTF-8"
-  class String
-    def ord
-      self[0]
-    end
-  end
-else
-  Encoding.default_internal = "UTF-8"
-end
-class Integer
-  def to_utf8_character
-    [self].pack("U*")
-  end
-end
 module SimpleIDN
-  VERSION = "0.0.7"
+  VERSION = "0.0.9"
   # The ConversionError is raised when an error occurs during a
   # Punycode <-> Unicode conversion.
@@ -26,7 +7,6 @@ module SimpleIDN
   end
   module Punycode
     INITIAL_N = 0x80
     INITIAL_BIAS = 72
     DELIMITER = 0x2D
@@ -36,6 +16,9 @@ module SimpleIDN
     TMAX = 26
     SKEW = 38
     MAXINT = 0x7FFFFFFF
+    ASCII_MAX = 0x7F
+    EMPTY = ''.encode(Encoding::UTF_8).freeze
     module_function
@@ -46,14 +29,12 @@ module SimpleIDN
       cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : cp - 97 < 26 ? cp - 97 : BASE
     end
-    # encode_digit(d,flag) returns the basic code point whose value
+    # encode_digit(d) returns the basic code point whose value
     # (when used for representing integers) is d, which needs to be in
-    # the range 0 to base-1. The lowercase form is used unless flag is
-    # nonzero, in which case the uppercase form is used. The behavior
-    # is undefined if flag is nonzero and digit d has no uppercase form.
+    # the range 0 to base-1.
     def encode_digit(d)
       d + 22 + 75 * (d < 26 ? 1 : 0)
-      #  0..25 map to ASCII a..z or A..Z
+      #  0..25 map to ASCII a..z
       # 26..35 map to ASCII 0..9
     end
@@ -63,24 +44,17 @@ module SimpleIDN
       delta += (delta / numpoints)
       k = 0
-      while delta > (((BASE - TMIN) * TMAX) / 2) do
+      while delta > (((BASE - TMIN) * TMAX) / 2)
         delta /= BASE - TMIN
         k += BASE
       end
-      return k + (BASE - TMIN + 1) * delta / (delta + SKEW)
-    end
-    # encode_basic(bcp,flag) forces a basic code point to lowercase if flag is zero,
-    # uppercase if flag is nonzero, and returns the resulting code point.
-    # The code point is unchanged if it is caseless.
-    # The behavior is undefined if bcp is not a basic code point.
-    def encode_basic(bcp, flag)
-      bcp -= (bcp - 97 < 26 ? 1 : 0) << 5
-      return bcp + ((!flag && (bcp - 65 < 26 ? 1 : 0)) << 5)
+      k + (BASE - TMIN + 1) * delta / (delta + SKEW)
     end
     # Main decode
     def decode(input)
+      input_encoding = input.encoding
+      input = input.encode(Encoding::UTF_8).codepoints.to_a
       output = []
       # Initialize the state:
@@ -91,18 +65,18 @@ module SimpleIDN
       # Handle the basic code points: Let basic be the number of input code
       # points before the last delimiter, or 0 if there is none, then
       # copy the first basic code points to the output.
-      basic = input.rindex(DELIMITER.to_utf8_character) || 0
+      basic = input.rindex(DELIMITER) || 0
-      input.unpack("U*")[0, basic].each do |char|
-        raise(ConversionError, "Illegal input >= 0x80") if char >= 0x80
-        output << char.chr # to_utf8_character not needed her because ord < 0x80 (128) which is within US-ASCII.
+      input[0, basic].each do |char|
+        raise(ConversionError, "Illegal input >= 0x80") if char > ASCII_MAX
+        output << char
       end
       # Main decoding loop: Start just after the last delimiter if any
       # basic code points were copied; start at the beginning otherwise.
       ic = basic > 0 ? basic + 1 : 0
-      while ic < input.length do
+      while ic < input.length
         # ic is the index of the next character to be consumed,
         # Decode a generalized variable-length integer into delta,
@@ -112,10 +86,10 @@ module SimpleIDN
         oldi = i
         w = 1
         k = BASE
-        while true do
+        loop do
           raise(ConversionError, "punycode_bad_input(1)") if ic >= input.length
-          digit = decode_digit(input[ic].ord)
+          digit = decode_digit(input[ic])
           ic += 1
           raise(ConversionError, "punycode_bad_input(2)") if digit >= BASE
@@ -142,16 +116,17 @@ module SimpleIDN
         i %= out
         # Insert n at position i of the output:
-        output.insert(i, n.to_utf8_character)
+        output.insert(i, n)
         i += 1
       end
-      return output.join
+      output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
     end
     # Main encode function
     def encode(input)
-      input = input.unpack("U*")
+      input_encoding = input.encoding
+      input = input.encode(Encoding::UTF_8).codepoints.to_a
       output = []
       # Initialize the state:
@@ -160,9 +135,7 @@ module SimpleIDN
       bias = INITIAL_BIAS
       # Handle the basic code points:
-      output = input.select do |char|
-        char if char < 0x80
-      end
+      output = input.select { |char| char <= ASCII_MAX }
       h = b = output.length
@@ -172,7 +145,7 @@ module SimpleIDN
       output << DELIMITER if b > 0
       # Main encoding loop:
-      while h < input.length do
+      while h < input.length
         # All non-basic code points < n have been
         # handled already. Find the next larger one:
@@ -190,38 +163,42 @@ module SimpleIDN
         delta += (m - n) * (h + 1)
         n = m
-        input.each_with_index do |char, j|
+        input.each_with_index do |char, _|
           if char < n
             delta += 1
             raise(ConversionError, "punycode_overflow(2)") if delta > MAXINT
           end
-          if (char == n)
-              # Represent delta as a generalized variable-length integer:
-              q = delta
-              k = BASE
-              while true do
-                  t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
-                  break if q < t
-                  output << encode_digit(t + (q - t) % (BASE - t))
-                  q = ( (q - t) / (BASE - t) ).floor
-                  k += BASE
-              end
-              output << encode_digit(q)
-              bias = adapt(delta, h + 1, h == b)
-              delta = 0
-              h += 1
+          next unless char == n
+          # Represent delta as a generalized variable-length integer:
+          q = delta
+          k = BASE
+          loop do
+            t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias
+            break if q < t
+            output << encode_digit(t + (q - t) % (BASE - t))
+            q = ((q - t) / (BASE - t)).floor
+            k += BASE
           end
+          output << encode_digit(q)
+          bias = adapt(delta, h + 1, h == b)
+          delta = 0
+          h += 1
         end
         delta += 1
         n += 1
       end
-      return output.collect {|c| c.to_utf8_character}.join
+      output.collect {|c| c.chr(Encoding::UTF_8)}.join(EMPTY).encode(input_encoding)
     end
   end
+  ACE_PREFIX = 'xn--'.encode(Encoding::UTF_8).freeze
+  ASCII_MAX = 0x7F
+  DOT = 0x2E.chr(Encoding::UTF_8).freeze
+  LABEL_SEPERATOR_RE = /[\u002e]/
   module_function
   # Converts a UTF-8 unicode string to a punycode ACE string.
@@ -229,16 +206,14 @@ module SimpleIDN
   #   SimpleIDN.to_ascii("møllerriis.com")
   #    => "xn--mllerriis-l8a.com"
   def to_ascii(domain)
-    domain_array = domain.split(".") rescue []
+    return nil if domain.nil?
+    domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
     return domain if domain_array.length == 0
     out = []
-    i = 0
-    while i < domain_array.length
-      s = domain_array[i]
-      out << (s =~ /[^A-Z0-9@\-*_]/i ? "xn--" + Punycode.encode(s) : s)
-      i += 1
+    domain_array.each do |s|
+      out << (s.codepoints.any? { |cp| cp > ASCII_MAX } ? ACE_PREFIX + Punycode.encode(s) : s)
     end
-    return out.join(".")
+    out.join(DOT).encode(domain.encoding)
   end
   # Converts a punycode ACE string to a UTF-8 unicode string.
@@ -246,15 +221,13 @@ module SimpleIDN
   #   SimpleIDN.to_unicode("xn--mllerriis-l8a.com")
   #    => "møllerriis.com"
   def to_unicode(domain)
-    domain_array = domain.split(".") rescue []
+    return nil if domain.nil?
+    domain_array = domain.encode(Encoding::UTF_8).split(LABEL_SEPERATOR_RE) rescue []
     return domain if domain_array.length == 0
     out = []
-    i = 0
-    while i < domain_array.length
-      s = domain_array[i]
-      out << (s =~ /^xn\-\-/i ? Punycode.decode(s.gsub('xn--','')) : s)
-      i += 1
+    domain_array.each do |s|
+      out << (s.downcase.start_with?(ACE_PREFIX) ? Punycode.decode(s[ACE_PREFIX.length..-1]) : s)
     end
-    return out.join(".")
+    out.join(DOT).encode(domain.encoding)
   end
 end

data/simpleidn.gemspec CHANGED Viewed

@@ -1,4 +1,3 @@
-# coding: utf-8
 lib = File.expand_path('../lib', __FILE__)
 $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
 require 'simpleidn'
@@ -20,4 +19,6 @@ Gem::Specification.new do |spec|
   spec.add_development_dependency "bundler", "~> 1.11"
   spec.add_development_dependency "rake", "~> 10.0"
   spec.add_development_dependency "rspec", "~> 3.0"
+  spec.required_ruby_version = '>1.9'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: simpleidn
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.9
 platform: ruby
 authors:
 - Morten Møller Riis
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-04-12 00:00:00.000000000 Z
+date: 2017-06-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -77,9 +77,9 @@ require_paths:
 - lib
 required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">="
+  - - ">"
     - !ruby/object:Gem::Version
-      version: '0'
+      version: '1.9'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
@@ -87,7 +87,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.4.5
+rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: Punycode ACE to unicode UTF-8 (and vice-versa) string conversion.