RubyGems - uri-idna - Versions diffs - 0.1.0 - Mend

uri-idna 0.1.0

Files changed (15) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +22 -0
data/LICENSE.txt +21 -0
data/README.md +184 -0
data/lib/uri/idna/data/idna.rb +4692 -0
data/lib/uri/idna/data/uts46.rb +8190 -0
data/lib/uri/idna/intranges.rb +49 -0
data/lib/uri/idna/process.rb +139 -0
data/lib/uri/idna/punycode.rb +174 -0
data/lib/uri/idna/uts46.rb +60 -0
data/lib/uri/idna/validation/bidi.rb +93 -0
data/lib/uri/idna/validation.rb +199 -0
data/lib/uri/idna/version.rb +7 -0
data/lib/uri/idna.rb +60 -0
metadata +62 -0

data/lib/uri/idna/intranges.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+module URI
+  module IDNA
+    module Intranges
+      class << self
+        def from_list(list)
+          sorted_list = list.sort
+          ranges = []
+          last_write = -1
+          sorted_list.each_with_index do |value, i|
+            next if value + 1 == sorted_list[i + 1]
+            ranges << encode_range(sorted_list[last_write + 1], sorted_list[i] + 1)
+            last_write = i
+          end
+          ranges
+        end
+        def contain?(int, ranges)
+          tuple = encode_range(int, 0)
+          pos = ranges.bsearch_index { |x| x > tuple } || ranges.length
+          # we could be immediately ahead of a tuple (start, end)
+          # with start < int_ <= end
+          if pos > 0
+            left, right = decode_range(ranges[pos - 1])
+            return true if left <= int && int < right
+          end
+          # or we could be immediately behind a tuple (int_, end)
+          if pos < ranges.length
+            left, = decode_range(ranges[pos])
+            return true if left == int
+          end
+          false
+        end
+        private
+        def encode_range(start, finish)
+          (start << 32) | finish
+        end
+        def decode_range(r)
+          [(r >> 32), (r & ((1 << 32) - 1))]
+        end
+      end
+    end
+  end
+end

data/lib/uri/idna/process.rb ADDED Viewed

@@ -0,0 +1,139 @@
+# frozen_string_literal: true
+require_relative "punycode"
+require_relative "validation"
+require_relative "uts46"
+module URI
+  module IDNA
+    class Process
+      UNICODE_DOTS_REGEX = /[\u002e\u3002\uff0e\uff61]/.freeze
+      def initialize(**options)
+        @options = options
+      end
+      def register(alabel: nil, ulabel: nil)
+        raise ArgumentError, "Provide alabel or ulabel" if alabel.nil? && ulabel.nil?
+        return encode(ulabel) if alabel.nil?
+        raise ArgumentError, "String expected" unless alabel.is_a?(String)
+        raise Error, "Invalid alabel #{alabel}" unless alabel.start_with?(ALABEL_PREFIX)
+        process_labels(alabel) do |l|
+          to_alabel(l, roundtrip: true, ulabel: ulabel)
+        end
+      end
+      def lookup(s)
+        raise ArgumentError, "String expected" unless s.is_a?(String)
+        s = process_labels(s) do |l|
+          to_alabel(l, roundtrip: true)
+        end
+        validate_domain_length(s) if options.fetch(:dns_length, true)
+        s
+      end
+      def encode(s)
+        raise ArgumentError, "String expected" unless s.is_a?(String)
+        s = process_labels(s) { |l| to_alabel(l) }
+        validate_domain_length(s) if options.fetch(:dns_length, true)
+        s
+      end
+      def decode(s)
+        raise ArgumentError, "String expected" unless s.is_a?(String)
+        process_labels(s) { |l| to_ulabel(l) }
+      end
+      private
+      attr_reader :labels, :options
+      def splitter
+        @splitter ||= options.fetch(:uts46, false) ? "." : UNICODE_DOTS_REGEX
+      end
+      def process_labels(s)
+        s = UTS46.map_string(s, **options.slice(:uts46_std3, :uts46_transitional)) if options.fetch(:uts46, false)
+        @labels ||= s.split(splitter, -1)
+        trailing_dot = labels[-1] && labels[-1].empty? ? labels.pop : false
+        raise Error, "Empty domain" if labels.empty? || labels == [""]
+        result = []
+        labels.each do |label|
+          str = yield(label)
+          raise Error, "Empty label" if str.empty?
+          result << str
+        end
+        result << "" if trailing_dot
+        result.join(".")
+      end
+      def to_alabel(label, roundtrip: false, ulabel: nil)
+        orig_label = label
+        # validate label is a valid U-label
+        label = to_ulabel(label)
+        if ulabel && ulabel != label
+          raise Error, "Provided ulabel does not match conversion of alabel, #{ulabel.inspect} != #{label.inspect}"
+        end
+        label = encode_punycode_label(label) unless label.ascii_only?
+        validate_label_length(label)
+        if roundtrip && orig_label.ascii_only? && orig_label != label
+          raise Error, "Roundtrip encoding failed, #{orig_label.inspect} != #{label.inspect}"
+        end
+        label
+      end
+      # https://datatracker.ietf.org/doc/html/rfc5891#section-5.3
+      def to_ulabel(label)
+        decoded = false
+        label, decoded = decode_punycode_label(label) if label.ascii_only?
+        validation.call(label, decoded: decoded)
+        label
+      end
+      def encode_punycode_label(label)
+        ALABEL_PREFIX + Punycode.encode(label)
+      end
+      def decode_punycode_label(label)
+        label = label.downcase
+        return [label, false] unless label.start_with?(ALABEL_PREFIX)
+        code = label[ALABEL_PREFIX.length..]
+        raise Error, "Malformed A-label, no Punycode eligible content found" if code.empty?
+        raise Error, "A-label must not end with a hyphen" if code[-1] == "-"
+        [URI::IDNA::Punycode.decode(code), true]
+      end
+      def validate_label_length(label)
+        raise Error, "Label too long" unless label.length < 64
+      end
+      def validate_domain_length(s)
+        raise Error, "Domain too long" unless s.length < (s[-1] == "." ? 255 : 254)
+      end
+      def validation
+        @validation ||= Validation.new(options.merge(bidi: check_bidi?))
+      end
+      def check_bidi?
+        options.fetch(:bidi, true) && Validation::Bidi.check?(labels)
+      end
+    end
+  end
+end

data/lib/uri/idna/punycode.rb ADDED Viewed

@@ -0,0 +1,174 @@
+# frozen_string_literal: true
+module URI
+  module IDNA
+    # Punycode implementation based on a simplified version of RFC 3492
+    # https://datatracker.ietf.org/doc/html/rfc3492#appendix-C
+    module Punycode
+      class << self
+        BASE = 36
+        TMIN = 1
+        TMAX = 26
+        SKEW = 38
+        DAMP = 700
+        INITIAL_BIAS = 72
+        INITIAL_N = 0x80
+        DELIMITER = 0x2D
+        MAXINT = 0x7FFFFFFF
+        def decode_digit(cp)
+          if cp - 48 < 10
+            cp - 22
+          elsif cp - 65 < 26
+            cp - 65
+          elsif cp - 97 < 26
+            cp - 97
+          else
+            BASE
+          end
+        end
+        def encode_digit(d)
+          d + 22 + 75 * (d < 26 ? 1 : 0)
+        end
+        def adapt(delta, num_points, first_time)
+          delta = first_time ? (delta / DAMP) : (delta >> 1)
+          delta += (delta / num_points)
+          k = 0
+          while delta > (((BASE - TMIN) * TMAX) / 2)
+            delta /= BASE - TMIN
+            k += BASE
+          end
+          k + ((BASE - TMIN + 1) * delta / (delta + SKEW))
+        end
+        def encode(input)
+          input = input.codepoints
+          n = INITIAL_N
+          delta = 0
+          bias = INITIAL_BIAS
+          output = input.select { |char| basic?(char) }
+          h = b = output.length
+          output << DELIMITER if b > 0
+          while h < input.length
+            m = MAXINT
+            input.each do |char|
+              m = char if char >= n && char < m
+            end
+            raise PunycodeError, "Arithmetic overflow" if m - n > (MAXINT - delta) / (h + 1)
+            delta += (m - n) * (h + 1)
+            n = m
+            input.each do |char|
+              if char < n
+                delta += 1
+                raise PunycodeError, "Arithmetic overflow" if delta > MAXINT
+              end
+              next unless char == n
+              q = delta
+              k = BASE
+              loop do
+                t =
+                  if k <= bias
+                    TMIN
+                  elsif k >= bias + TMAX
+                    TMAX
+                  else
+                    k - bias
+                  end
+                break if q < t
+                output << encode_digit(t + ((q - t) % (BASE - t)))
+                q = (q - t) / (BASE - t)
+                k += BASE
+              end
+              output << encode_digit(q)
+              bias = adapt(delta, h + 1, h == b)
+              delta = 0
+              h += 1
+            end
+            delta += 1
+            n += 1
+          end
+          output.pack("U*")
+        end
+        def decode(input)
+          input = input.codepoints
+          output = []
+          n = INITIAL_N
+          i = 0
+          bias = INITIAL_BIAS
+          b = input.rindex(DELIMITER) || 0
+          input[0, b].each do |char|
+            raise PunycodeError, "Invalid input" unless basic?(char)
+            output << char
+          end
+          inc = b > 0 ? b + 1 : 0
+          while inc < input.length
+            old_i = i
+            w = 1
+            k = BASE
+            loop do
+              raise PunycodeError, "Invalid input" if inc >= input.length
+              digit = decode_digit(input[inc])
+              inc += 1
+              raise PunycodeError, "Invalid input" if digit >= BASE
+              raise PunycodeError, "Arithmetic overflow" if digit > (MAXINT - i) / w
+              i += digit * w
+              t = if k <= bias
+                    TMIN
+                  elsif k >= bias + TMAX
+                    TMAX
+                  else
+                    k - bias
+                  end
+              break if digit < t
+              raise PunycodeError, "Arithmetic overflow" if w > MAXINT / (BASE - t)
+              w *= BASE - t
+              k += BASE
+            end
+            out = output.length
+            bias = adapt(i - old_i, out + 1, old_i == 0)
+            raise PunycodeError, "Arithmetic overflow" if (i / (out + 1)) > MAXINT - n
+            n += i / (out + 1)
+            i %= (out + 1)
+            output.insert(i, n)
+            i += 1
+          end
+          output.pack("U*")
+        end
+        private
+        def basic?(codepoint)
+          codepoint < 0x80
+        end
+      end
+    end
+  end
+end

data/lib/uri/idna/uts46.rb ADDED Viewed

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+require_relative "data/uts46"
+module URI
+  module IDNA
+    module UTS46
+      class << self
+        # https://unicode.org/reports/tr46/#ProcessingStepMap
+        def map_string(domain, uts46_std3: true, uts46_transitional: false)
+          output = ""
+          domain.each_char do |char|
+            code_point = char.ord
+            _, status, replacement = uts46_status(code_point)
+            case status
+            when "I"
+              next
+            when "V"
+              output += char
+            when "M"
+              output += replacement
+            when "D"
+              output += uts46_transitional ? replacement : char
+            when "3"
+              if uts46_std3
+                raise InvalidCodepointError,
+                      "Codepoint #{code_point} not allowed in #{domain} via STD3 rules"
+              end
+              output += replacement || char
+            else
+              raise InvalidCodepointError, "Codepoint #{code_point} not allowed in #{domain}"
+            end
+          end
+          output.unicode_normalize(:nfc)
+        end
+        def valid?(char, uts46_transitional: false)
+          _, status, = uts46_status(char.ord)
+          return true if status == "V"
+          return true if uts46_transitional && status == "D"
+          false
+        end
+        private
+        def uts46_status(code_point)
+          index =
+            if code_point < 256
+              code_point
+            else
+              (UTS46_DATA.bsearch_index { |x| x[0] > code_point } || UTS46_DATA.length) - 1
+            end
+          UTS46_DATA[index] || []
+        end
+      end
+    end
+  end
+end

data/lib/uri/idna/validation/bidi.rb ADDED Viewed

@@ -0,0 +1,93 @@
+# frozen_string_literal: true
+module URI
+  module IDNA
+    class Validation
+      # 4.2.3.4. Labels Containing Characters Written Right to Left
+      # https://datatracker.ietf.org/doc/html/rfc5891#section-4.2.3.4
+      # https://datatracker.ietf.org/doc/html/rfc5893#section-2
+      module Bidi
+        class << self
+          def call(label)
+            # Bidi rule 1
+            if bidi_class(label[0], "RTL")
+              rtl = true
+            elsif bidi_class(label[0], "L")
+              rtl = false
+            else
+              raise BidiError, "First codepoint in label #{label} must be directionality L, R or AL"
+            end
+            valid_ending = false
+            number_type = nil
+            label.each_char.with_index do |cp, idx|
+              if rtl
+                # Bidi rule 2
+                if bidi_class(cp, "L") || bidi_class(cp, "UNUSED")
+                  raise BidiError, "Invalid direction for codepoint at position #{idx + 1} in a right-to-left label"
+                end
+                # Bidi rule 3
+                direction = bidi_class(cp, "RTL") || bidi_class(cp, "EN") || bidi_class(cp, "AN")
+                if direction
+                  valid_ending = true
+                elsif !bidi_class(cp, "NSM")
+                  valid_ending = false
+                end
+                # Bidi rule 4
+                if %w[EN AN].include?(direction)
+                  number_type ||= direction
+                  raise BidiError, "Can not mix numeral types in a right-to-left label" if number_type != direction
+                end
+              else
+                # Bidi rule 5
+                if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
+                  raise BidiError, "Invalid direction for codepoint at position #{idx + 1} in a left-to-right label"
+                end
+                # Bidi rule 6
+                if bidi_class(cp, "L") || bidi_class(cp, "EN")
+                  valid_ending = true
+                elsif !bidi_class(cp, "NSM")
+                  valid_ending = false
+                end
+              end
+            end
+            raise BidiError, "Label ends with illegal codepoint directionality" unless valid_ending
+            true
+          end
+          # https://www.rfc-editor.org/rfc/rfc5891.html#section-4.2.3.4
+          def check?(labels)
+            domain = labels.map do |label|
+              if label.start_with?(ALABEL_PREFIX)
+                begin
+                  Punycode.decode(label[ALABEL_PREFIX.length..])
+                rescue StandardError
+                  ""
+                end
+              else
+                label
+              end
+            end.join(".")
+            domain.each_char do |cp|
+              return true if bidi_class(cp, "RTL") || bidi_class(cp, "AN")
+            end
+            false
+          end
+          private
+          def bidi_class(cp, bidi_class)
+            return bidi_class if Intranges.contain?(cp.ord, BIDI_CLASSES[bidi_class])
+            false
+          end
+        end
+      end
+    end
+  end
+end