RubyGems - addressable - Versions diffs - 2.8.0 → 2.8.2 - Mend

addressable 2.8.0 → 2.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +25 -0
data/Gemfile +4 -2
data/Rakefile +2 -1
data/addressable.gemspec +9 -18
data/lib/addressable/idna/native.rb +0 -5
data/lib/addressable/idna/pure.rb +2 -185
data/lib/addressable/idna.rb +0 -1
data/lib/addressable/template.rb +10 -9
data/lib/addressable/uri.rb +168 -148
data/lib/addressable/version.rb +1 -2
data/spec/addressable/idna_spec.rb +6 -6
data/spec/addressable/net_http_compat_spec.rb +0 -1
data/spec/addressable/security_spec.rb +0 -1
data/spec/addressable/template_spec.rb +33 -1
data/spec/addressable/uri_spec.rb +137 -1
data/tasks/gem.rake +5 -2
metadata +11 -10

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 03a21b1eab156a16e90bd7963af85980edfbddc8f3dbe052766303dba76cc000
-  data.tar.gz: 03eca5d86f4c70f9320000f36e3cff4fd8023342a4e0ac855d0ef1ec89ee6183
+  metadata.gz: b18375911dede706411b9ec0d6c8b104e9f49a2ebf53ea18d89aec89a349d8f2
+  data.tar.gz: 4701c220482e3e92a10ebe9605ecb03fcf5d295bbef20346aeb49fdfbab4bbb3
 SHA512:
-  metadata.gz: d504f9475ad823f5bb077b9c039a2c91c83e52c20896247a7289b61725c61b1ddefe8ae06155fb018fc67087cf04276081b42105a18394b45e2374ad0b2fadb0
-  data.tar.gz: b81766fbcb9335d5ca94403b62d3b2a6fae31b66cd3c05f48e1885eaf07883bfa1321b6930271fe1415135aec687af51312a26ce27bd4b83b2ac6424dec597c9
+  metadata.gz: 8e6c9605ceec0aa65ceb3b4a1152d9c89830f1748e20c6667746634a9952a13fcff404ec1783f904e9240d642b11e2907914d43581fd8c451971c48106e70710
+  data.tar.gz: 7865e8accde73f97022353c51e9556f7f1f1786ed2e004789a9c4e5ffa82486ae2abe17166fa724f2bf17b8e853e40f185abf0ee16a4108aca712c202540b3e2

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,28 @@
+# Addressable 2.8.2
+- Improve cache hits and JIT friendliness ([#486](https://github.com/sporkmonger/addressable/pull/486))
+- Improve code style and test coverage ([#482](https://github.com/sporkmonger/addressable/pull/482))
+- Ensure reset of deferred validation ([#481](https://github.com/sporkmonger/addressable/pull/481))
+- Resolve normalization differences between `IDNA::Native` and `IDNA::Pure` ([#408](https://github.com/sporkmonger/addressable/issues/408), [#492])
+- Remove redundant colon in `Addressable::URI::CharacterClasses::AUTHORITY` regex ([#438](https://github.com/sporkmonger/addressable/pull/438)) (accidentally reverted by [#449] merge but [added back](https://github.com/sporkmonger/addressable/pull/492#discussion_r1105125280) in [#492])
+[#492]: https://github.com/sporkmonger/addressable/pull/492
+# Addressable 2.8.1
+- refactor `Addressable::URI.normalize_path` to address linter offenses ([#430](https://github.com/sporkmonger/addressable/pull/430))
+- update gemspec to reflect supported Ruby versions ([#466], [#464], [#463])
+- compatibility w/ public_suffix 5.x ([#466], [#465], [#460])
+- fixes "invalid byte sequence in UTF-8" exception when unencoding URLs containing non UTF-8 characters ([#459](https://github.com/sporkmonger/addressable/pull/459))
+- `Ractor` compatibility ([#449])
+- use the whole string instead of a single line for template match ([#431](https://github.com/sporkmonger/addressable/pull/431))
+- force UTF-8 encoding only if needed ([#341](https://github.com/sporkmonger/addressable/pull/341))
+[#449]: https://github.com/sporkmonger/addressable/pull/449
+[#460]: https://github.com/sporkmonger/addressable/pull/460
+[#463]: https://github.com/sporkmonger/addressable/pull/463
+[#464]: https://github.com/sporkmonger/addressable/pull/464
+[#465]: https://github.com/sporkmonger/addressable/pull/465
+[#466]: https://github.com/sporkmonger/addressable/pull/466
 # Addressable 2.8.0
 - fixes ReDoS vulnerability in Addressable::Template#match
 - no longer replaces `+` with spaces in queries for non-http(s) schemes

data/Gemfile CHANGED Viewed

@@ -2,7 +2,7 @@
 source 'https://rubygems.org'
-gemspec(path: __FILE__ == "(eval)" ? ".." : ".")
+gemspec
 group :test do
   gem 'rspec', '~> 3.8'
@@ -25,4 +25,6 @@ group :test, :development do
   gem "rake", ">= 12.3.3"
 end
-gem "idn-ruby", platform: :mri
+unless ENV["IDNA_MODE"] == "pure"
+  gem "idn-ruby", platform: :mri
+end

data/Rakefile CHANGED Viewed

@@ -24,7 +24,8 @@ PKG_FILES = FileList[
     "tasks/**/*",
     "[A-Z]*", "Rakefile"
 ].exclude(/pkg/).exclude(/database\.yml/).
-  exclude(/Gemfile\.lock/).exclude(/[_\.]git$/)
+  exclude(/Gemfile\.lock/).exclude(/[_\.]git$/).
+  exclude(/coverage/)
 task :default => "spec"

data/addressable.gemspec CHANGED Viewed

@@ -1,14 +1,15 @@
 # -*- encoding: utf-8 -*-
-# stub: addressable 2.8.0 ruby lib
+# stub: addressable 2.8.2 ruby lib
 Gem::Specification.new do |s|
   s.name = "addressable".freeze
-  s.version = "2.8.0"
+  s.version = "2.8.2"
   s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
+  s.metadata = { "changelog_uri" => "https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md" } if s.respond_to? :metadata=
   s.require_paths = ["lib".freeze]
   s.authors = ["Bob Aman".freeze]
-  s.date = "2021-07-03"
+  s.date = "2023-04-01"
   s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze
   s.email = "bob@sporkmonger.com".freeze
   s.extra_rdoc_files = ["README.md".freeze]
@@ -16,22 +17,12 @@ Gem::Specification.new do |s|
   s.homepage = "https://github.com/sporkmonger/addressable".freeze
   s.licenses = ["Apache-2.0".freeze]
   s.rdoc_options = ["--main".freeze, "README.md".freeze]
-  s.required_ruby_version = Gem::Requirement.new(">= 2.0".freeze)
-  s.rubygems_version = "3.0.3".freeze
+  s.required_ruby_version = Gem::Requirement.new(">= 2.2".freeze)
+  s.rubygems_version = "3.4.8".freeze
   s.summary = "URI Implementation".freeze
-  if s.respond_to? :specification_version then
-    s.specification_version = 4
+  s.specification_version = 4
-    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
-      s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
-      s.add_development_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
-    else
-      s.add_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
-      s.add_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
-    end
-  else
-    s.add_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 5.0"])
-    s.add_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
-  end
+  s.add_runtime_dependency(%q<public_suffix>.freeze, [">= 2.0.2", "< 6.0"])
+  s.add_development_dependency(%q<bundler>.freeze, [">= 1.0", "< 3.0"])
 end

data/lib/addressable/idna/native.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-# encoding:utf-8
 #--
 # Copyright (C) Bob Aman
 #
@@ -30,10 +29,6 @@ module Addressable
        IDN::Punycode.decode(value.to_s)
      end
-    def self.unicode_normalize_kc(value)
-      IDN::Stringprep.nfkc_normalize(value.to_s)
-    end
     def self.to_ascii(value)
       value.to_s.split('.', -1).map do |segment|
         if segment.size > 0 && segment.size < 64

data/lib/addressable/idna/pure.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-# encoding:utf-8
 #--
 # Copyright (C) Bob Aman
 #
@@ -67,7 +66,7 @@ module Addressable
     # domain name as described in RFC 3490.
     def self.to_ascii(input)
       input = input.to_s unless input.is_a?(String)
-      input = input.dup
+      input = input.dup.force_encoding(Encoding::UTF_8).unicode_normalize(:nfkc)
       if input.respond_to?(:force_encoding)
         input.force_encoding(Encoding::ASCII_8BIT)
       end
@@ -78,7 +77,7 @@ module Addressable
             part.force_encoding(Encoding::ASCII_8BIT)
           end
           if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
-            ACE_PREFIX + punycode_encode(unicode_normalize_kc(part))
+            ACE_PREFIX + punycode_encode(part)
           else
             part
           end
@@ -113,15 +112,6 @@ module Addressable
       output
     end
-    # Unicode normalization form KC.
-    def self.unicode_normalize_kc(input)
-      input = input.to_s unless input.is_a?(String)
-      unpacked = input.unpack("U*")
-      unpacked =
-        unicode_compose(unicode_sort_canonical(unicode_decompose(unpacked)))
-      return unpacked.pack("U*")
-    end
     ##
     # Unicode aware downcase method.
     #
@@ -137,164 +127,6 @@ module Addressable
     end
     private_class_method :unicode_downcase
-    def self.unicode_compose(unpacked)
-      unpacked_result = []
-      length = unpacked.length
-      return unpacked if length == 0
-      starter = unpacked[0]
-      starter_cc = lookup_unicode_combining_class(starter)
-      starter_cc = 256 if starter_cc != 0
-      for i in 1...length
-        ch = unpacked[i]
-        if (starter_cc == 0 &&
-            (composite = unicode_compose_pair(starter, ch)) != nil)
-          starter = composite
-        else
-          unpacked_result << starter
-          starter = ch
-        end
-      end
-      unpacked_result << starter
-      return unpacked_result
-    end
-    private_class_method :unicode_compose
-    def self.unicode_compose_pair(ch_one, ch_two)
-      if ch_one >= HANGUL_LBASE && ch_one < HANGUL_LBASE + HANGUL_LCOUNT &&
-          ch_two >= HANGUL_VBASE && ch_two < HANGUL_VBASE + HANGUL_VCOUNT
-        # Hangul L + V
-        return HANGUL_SBASE + (
-          (ch_one - HANGUL_LBASE) * HANGUL_VCOUNT + (ch_two - HANGUL_VBASE)
-        ) * HANGUL_TCOUNT
-      elsif ch_one >= HANGUL_SBASE &&
-          ch_one < HANGUL_SBASE + HANGUL_SCOUNT &&
-          (ch_one - HANGUL_SBASE) % HANGUL_TCOUNT == 0 &&
-          ch_two >= HANGUL_TBASE && ch_two < HANGUL_TBASE + HANGUL_TCOUNT
-           # Hangul LV + T
-        return ch_one + (ch_two - HANGUL_TBASE)
-      end
-      p = []
-      ucs4_to_utf8(ch_one, p)
-      ucs4_to_utf8(ch_two, p)
-      return lookup_unicode_composition(p)
-    end
-    private_class_method :unicode_compose_pair
-    def self.ucs4_to_utf8(char, buffer)
-      if char < 128
-        buffer << char
-      elsif char < 2048
-        buffer << (char >> 6 | 192)
-        buffer << (char & 63 | 128)
-      elsif char < 0x10000
-        buffer << (char >> 12 | 224)
-        buffer << (char >> 6 & 63 | 128)
-        buffer << (char & 63 | 128)
-      elsif char < 0x200000
-        buffer << (char >> 18 | 240)
-        buffer << (char >> 12 & 63 | 128)
-        buffer << (char >> 6 & 63 | 128)
-        buffer << (char & 63 | 128)
-      elsif char < 0x4000000
-        buffer << (char >> 24 | 248)
-        buffer << (char >> 18 & 63 | 128)
-        buffer << (char >> 12 & 63 | 128)
-        buffer << (char >> 6 & 63 | 128)
-        buffer << (char & 63 | 128)
-      elsif char < 0x80000000
-        buffer << (char >> 30 | 252)
-        buffer << (char >> 24 & 63 | 128)
-        buffer << (char >> 18 & 63 | 128)
-        buffer << (char >> 12 & 63 | 128)
-        buffer << (char >> 6 & 63 | 128)
-        buffer << (char & 63 | 128)
-      end
-    end
-    private_class_method :ucs4_to_utf8
-    def self.unicode_sort_canonical(unpacked)
-      unpacked = unpacked.dup
-      i = 1
-      length = unpacked.length
-      return unpacked if length < 2
-      while i < length
-        last = unpacked[i-1]
-        ch = unpacked[i]
-        last_cc = lookup_unicode_combining_class(last)
-        cc = lookup_unicode_combining_class(ch)
-        if cc != 0 && last_cc != 0 && last_cc > cc
-          unpacked[i] = last
-          unpacked[i-1] = ch
-          i -= 1 if i > 1
-        else
-          i += 1
-        end
-      end
-      return unpacked
-    end
-    private_class_method :unicode_sort_canonical
-    def self.unicode_decompose(unpacked)
-      unpacked_result = []
-      for cp in unpacked
-        if cp >= HANGUL_SBASE && cp < HANGUL_SBASE + HANGUL_SCOUNT
-          l, v, t = unicode_decompose_hangul(cp)
-          unpacked_result << l
-          unpacked_result << v if v
-          unpacked_result << t if t
-        else
-          dc = lookup_unicode_compatibility(cp)
-          unless dc
-            unpacked_result << cp
-          else
-            unpacked_result.concat(unicode_decompose(dc.unpack("U*")))
-          end
-        end
-      end
-      return unpacked_result
-    end
-    private_class_method :unicode_decompose
-    def self.unicode_decompose_hangul(codepoint)
-      sindex = codepoint - HANGUL_SBASE;
-      if sindex < 0 || sindex >= HANGUL_SCOUNT
-        l = codepoint
-        v = t = nil
-        return l, v, t
-      end
-      l = HANGUL_LBASE + sindex / HANGUL_NCOUNT
-      v = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
-      t = HANGUL_TBASE + sindex % HANGUL_TCOUNT
-      if t == HANGUL_TBASE
-        t = nil
-      end
-      return l, v, t
-    end
-    private_class_method :unicode_decompose_hangul
-    def self.lookup_unicode_combining_class(codepoint)
-      codepoint_data = UNICODE_DATA[codepoint]
-      (codepoint_data ?
-        (codepoint_data[UNICODE_DATA_COMBINING_CLASS] || 0) :
-        0)
-    end
-    private_class_method :lookup_unicode_combining_class
-    def self.lookup_unicode_compatibility(codepoint)
-      codepoint_data = UNICODE_DATA[codepoint]
-      (codepoint_data ?
-        codepoint_data[UNICODE_DATA_COMPATIBILITY] : nil)
-    end
-    private_class_method :lookup_unicode_compatibility
     def self.lookup_unicode_lowercase(codepoint)
       codepoint_data = UNICODE_DATA[codepoint]
       (codepoint_data ?
@@ -303,21 +135,6 @@ module Addressable
     end
     private_class_method :lookup_unicode_lowercase
-    def self.lookup_unicode_composition(unpacked)
-      return COMPOSITION_TABLE[unpacked]
-    end
-    private_class_method :lookup_unicode_composition
-    HANGUL_SBASE =  0xac00
-    HANGUL_LBASE =  0x1100
-    HANGUL_LCOUNT = 19
-    HANGUL_VBASE =  0x1161
-    HANGUL_VCOUNT = 21
-    HANGUL_TBASE =  0x11a7
-    HANGUL_TCOUNT = 28
-    HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT # 588
-    HANGUL_SCOUNT = HANGUL_LCOUNT * HANGUL_NCOUNT # 11172
     UNICODE_DATA_COMBINING_CLASS = 0
     UNICODE_DATA_EXCLUSION = 1
     UNICODE_DATA_CANONICAL = 2

data/lib/addressable/idna.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-# encoding:utf-8
 #--
 # Copyright (C) Bob Aman
 #

data/lib/addressable/template.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-# encoding:utf-8
 #--
 # Copyright (C) Bob Aman
 #
@@ -657,12 +656,12 @@ module Addressable
     def ordered_variable_defaults
       @ordered_variable_defaults ||= begin
         expansions, _ = parse_template_pattern(pattern)
-        expansions.map do |capture|
+        expansions.flat_map do |capture|
           _, _, varlist = *capture.match(EXPRESSION)
           varlist.split(',').map do |varspec|
             varspec[VARSPEC, 1]
           end
-        end.flatten
+        end
       end
     end
@@ -893,7 +892,7 @@ module Addressable
     # operator.
     #
     # @param [Hash, Array, String] value
-    #   Normalizes keys and values with IDNA#unicode_normalize_kc
+    #   Normalizes unicode keys and values with String#unicode_normalize (NFC)
     #
     # @return [Hash, Array, String] The normalized values
     def normalize_value(value)
@@ -903,15 +902,17 @@ module Addressable
       # Handle unicode normalization
       if value.kind_of?(Array)
-        value.map! { |val| Addressable::IDNA.unicode_normalize_kc(val) }
+        value.map! { |val| normalize_value(val) }
       elsif value.kind_of?(Hash)
         value = value.inject({}) { |acc, (k, v)|
-          acc[Addressable::IDNA.unicode_normalize_kc(k)] =
-            Addressable::IDNA.unicode_normalize_kc(v)
+          acc[normalize_value(k)] = normalize_value(v)
           acc
         }
       else
-        value = Addressable::IDNA.unicode_normalize_kc(value)
+        if value.encoding != Encoding::UTF_8
+          value = value.dup.force_encoding(Encoding::UTF_8)
+        end
+        value = value.unicode_normalize(:nfc)
       end
       value
     end
@@ -1023,7 +1024,7 @@ module Addressable
       end
       # Ensure that the regular expression matches the whole URI.
-      regexp_string = "^#{regexp_string}$"
+      regexp_string = "\\A#{regexp_string}\\z"
       return expansions, Regexp.new(regexp_string)
     end