RubyGems - sanitize - Versions diffs - 2.1.0 → 2.1.1 - Mend

sanitize 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +5 -5
data/HISTORY.md +21 -0
data/lib/sanitize/transformers/clean_element.rb +53 -1
data/lib/sanitize/version.rb +1 -1
data/test/test_sanitize.rb +79 -5
metadata +3 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: a1be4f7e5790c7e0fa8943b793803e507bbaa2ce
-  data.tar.gz: a879b798b76f4bfff12532e4779bb418a89d4500
+SHA256:
+  metadata.gz: 9e75a99c50777d57667e66bee0787e3259085c38ec616bb14f13f3c2d8673827
+  data.tar.gz: 6819e4c2c9730157b5b0acb4571c72dfecf02d9e5e1f0e31425cf8fdcfb8af97
 SHA512:
-  metadata.gz: ecdbc579a9ed3f737539118ac5b6c17612a736268263fafd03b9daf39da433309a11e090494c2008859edc16c278dcc1ea63ea52b5693479c625b825bbbfbc80
-  data.tar.gz: 4fff69ad6c6812fb6aac4c492a7644f196faeb82039096dcd204461b07872a05d97c02e0b92237fc65b36891783256e84ee335fc83b03365e92ec5e07a2af57e
+  metadata.gz: '08a334cb79442b561ce83bcacc1f4401907b47c3802d91934abf5ae3b13fe91c760df21402f30b126d489815c06eac8d055d65924b826e7e4c41017e08c96ad9'
+  data.tar.gz: '0858de4b6b03e1c41c079e96165d3714a4a8b8512deb85b2d7cb837d45a76f2503e1c16f1e8eb18d775a2f5c4337966c36a62a4d98a2162e4301a83338bee595'

data/HISTORY.md CHANGED

@@ -1,6 +1,27 @@
 Sanitize History
 ================================================================================
+Version 2.1.1 (2018-09-30)
+--------------------------
+* [CVE-2018-3740][176]: Fixed an HTML injection vulnerability that could allow
+  XSS (backported from Sanitize 4.6.3). [@dometto - #188][188]
+  When Sanitize <= 2.1.0 is used in combination with libxml2 >= 2.9.2, a
+  specially crafted HTML fragment can cause libxml2 to generate improperly
+  escaped output, allowing non-whitelisted attributes to be used on whitelisted
+  elements.
+  Sanitize now performs additional escaping on affected attributes to prevent
+  this.
+  Many thanks to the Shopify Application Security Team for responsibly reporting
+  this issue.
+[176]:https://github.com/rgrove/sanitize/issues/176
+[188]:https://github.com/rgrove/sanitize/pull/188
 Version 2.1.0 (2014-01-13)
 --------------------------

data/lib/sanitize/transformers/clean_element.rb CHANGED

@@ -1,6 +1,32 @@
 class Sanitize; module Transformers
   class CleanElement
+    # Attributes that need additional escaping on `<a>` elements due to unsafe
+    # libxml2 behavior.
+    UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
+      name
+    ])
+    # Attributes that need additional escaping on all elements due to unsafe
+    # libxml2 behavior.
+    UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[
+      action
+      href
+      src
+    ])
+    # Mapping of original characters to escape sequences for characters that
+    # should be escaped in attributes affected by unsafe libxml2 behavior.
+    UNSAFE_LIBXML_ESCAPE_CHARS = {
+      ' ' => '%20',
+      '"' => '%22'
+    }
+    # Regex that matches any single character that needs to be escaped in
+    # attributes affected by unsafe libxml2 behavior.
+    UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/
     def initialize(config)
       @config = config
@@ -88,11 +114,37 @@ class Sanitize; module Transformers
               !protocol[attr_name].include?(:relative)
             end
-            attr.unlink if del
+            if del
+              attr.unlink
+            else
+              # Leading and trailing whitespace around URLs is ignored at parse
+              # time. Stripping it here prevents it from being escaped by the
+              # libxml2 workaround below.
+              attr.value = attr.value.strip
+            end
           end
         end
       end
+      # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
+      # attempt to preserve server-side includes. This can result in XSS since
+      # an unescaped double quote can allow an attacker to inject a
+      # non-whitelisted attribute.
+      #
+      # Sanitize works around this by implementing its own escaping for
+      # affected attributes, some of which can exist on any element and some
+      # of which can only exist on `<a>` elements.
+      #
+      # The relevant libxml2 code is here:
+      # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
+      node.attribute_nodes.each do |attr|
+        attr_name = attr.name.downcase
+        if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
+          (name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
+            attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
+        end
+      end
       # Add required attributes.
       if @add_attributes.has_key?(name)
         @add_attributes[name].each {|key, val| node[key] = val }

data/lib/sanitize/version.rb CHANGED

@@ -1,3 +1,3 @@
 class Sanitize
-  VERSION = '2.1.0'
+  VERSION = '2.1.1'
 end

data/test/test_sanitize.rb CHANGED

@@ -55,10 +55,10 @@ strings = {
   :malicious => {
     :html       => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
-    :default    => 'Lorem ipsum dolor sit amet script&gt;alert("hello world");',
-    :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet script&gt;alert("hello world");',
-    :basic      => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet script&gt;alert("hello world");',
-    :relaxed    => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet script&gt;alert("hello world");'
+    :default    => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
+    :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
+    :basic      => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
+    :relaxed    => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");'
   },
   :raw_comment => {
@@ -181,7 +181,7 @@ tricky = {
     :default    => '',
     :restricted => '',
     :basic      => '',
-    :relaxed    => '<img src="">'
+    :relaxed    => '<img src>'
   }
 }
@@ -645,3 +645,77 @@ describe 'bugs' do
     Sanitize.clean!('foo <style>bar').must_equal('foo bar')
   end
 end
+describe 'Malicious HTML' do
+  make_my_diffs_pretty!
+  parallelize_me!
+  before do
+    @s = Sanitize.new(Sanitize::Config::RELAXED)
+  end
+  # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
+  # attempt to preserve server-side includes. This can result in XSS since an
+  # unescaped double quote can allow an attacker to inject a non-whitelisted
+  # attribute. Sanitize works around this by implementing its own escaping for
+  # affected attributes.
+  #
+  # The relevant libxml2 code is here:
+  # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
+  describe 'unsafe libxml2 server-side includes in attributes' do
+    tag_configs = [
+      {
+        tag_name: 'a',
+        escaped_attrs: %w[ action href src name ],
+        unescaped_attrs: []
+      },
+      {
+        tag_name: 'div',
+        escaped_attrs: %w[ action href src ],
+        unescaped_attrs: %w[ name ]
+      }
+    ]
+    before do
+      @s = Sanitize.new({
+        elements: %w[ a div ],
+        attributes: {
+          all: %w[ action href src name ]
+        }
+      })
+    end
+    tag_configs.each do |tag_config|
+      tag_name = tag_config[:tag_name]
+      tag_config[:escaped_attrs].each do |attr_name|
+        input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
+        it 'should escape unsafe characters in attributes' do
+          @s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
+        end
+        it 'should round-trip to the same output' do
+          output = @s.clean(input)
+          @s.clean(output).must_equal(output)
+        end
+      end
+      tag_config[:unescaped_attrs].each do |attr_name|
+        input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
+        it 'should not escape characters unnecessarily' do
+          @s.clean(input).must_equal(input)
+        end
+        it 'should round-trip to the same output' do
+          output = @s.clean(input)
+          @s.clean(output).must_equal(output)
+        end
+      end
+    end
+  end
+end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sanitize
 version: !ruby/object:Gem::Version
-  version: 2.1.0
+  version: 2.1.1
 platform: ruby
 authors:
 - Ryan Grove
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-01-13 00:00:00.000000000 Z
+date: 2018-09-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -118,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: 1.2.0
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.0
+rubygems_version: 2.7.3
 signing_key:
 specification_version: 4
 summary: Whitelist-based HTML sanitizer.