sanitize 2.1.0 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: a1be4f7e5790c7e0fa8943b793803e507bbaa2ce
4
- data.tar.gz: a879b798b76f4bfff12532e4779bb418a89d4500
2
+ SHA256:
3
+ metadata.gz: 9e75a99c50777d57667e66bee0787e3259085c38ec616bb14f13f3c2d8673827
4
+ data.tar.gz: 6819e4c2c9730157b5b0acb4571c72dfecf02d9e5e1f0e31425cf8fdcfb8af97
5
5
  SHA512:
6
- metadata.gz: ecdbc579a9ed3f737539118ac5b6c17612a736268263fafd03b9daf39da433309a11e090494c2008859edc16c278dcc1ea63ea52b5693479c625b825bbbfbc80
7
- data.tar.gz: 4fff69ad6c6812fb6aac4c492a7644f196faeb82039096dcd204461b07872a05d97c02e0b92237fc65b36891783256e84ee335fc83b03365e92ec5e07a2af57e
6
+ metadata.gz: '08a334cb79442b561ce83bcacc1f4401907b47c3802d91934abf5ae3b13fe91c760df21402f30b126d489815c06eac8d055d65924b826e7e4c41017e08c96ad9'
7
+ data.tar.gz: '0858de4b6b03e1c41c079e96165d3714a4a8b8512deb85b2d7cb837d45a76f2503e1c16f1e8eb18d775a2f5c4337966c36a62a4d98a2162e4301a83338bee595'
data/HISTORY.md CHANGED
@@ -1,6 +1,27 @@
1
1
  Sanitize History
2
2
  ================================================================================
3
3
 
4
+ Version 2.1.1 (2018-09-30)
5
+ --------------------------
6
+
7
+ * [CVE-2018-3740][176]: Fixed an HTML injection vulnerability that could allow
8
+ XSS (backported from Sanitize 4.6.3). [@dometto - #188][188]
9
+
10
+ When Sanitize <= 2.1.0 is used in combination with libxml2 >= 2.9.2, a
11
+ specially crafted HTML fragment can cause libxml2 to generate improperly
12
+ escaped output, allowing non-whitelisted attributes to be used on whitelisted
13
+ elements.
14
+
15
+ Sanitize now performs additional escaping on affected attributes to prevent
16
+ this.
17
+
18
+ Many thanks to the Shopify Application Security Team for responsibly reporting
19
+ this issue.
20
+
21
+ [176]:https://github.com/rgrove/sanitize/issues/176
22
+ [188]:https://github.com/rgrove/sanitize/pull/188
23
+
24
+
4
25
  Version 2.1.0 (2014-01-13)
5
26
  --------------------------
6
27
 
@@ -1,6 +1,32 @@
1
1
  class Sanitize; module Transformers
2
2
 
3
3
  class CleanElement
4
+
5
+ # Attributes that need additional escaping on `<a>` elements due to unsafe
6
+ # libxml2 behavior.
7
+ UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
8
+ name
9
+ ])
10
+
11
+ # Attributes that need additional escaping on all elements due to unsafe
12
+ # libxml2 behavior.
13
+ UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[
14
+ action
15
+ href
16
+ src
17
+ ])
18
+
19
+ # Mapping of original characters to escape sequences for characters that
20
+ # should be escaped in attributes affected by unsafe libxml2 behavior.
21
+ UNSAFE_LIBXML_ESCAPE_CHARS = {
22
+ ' ' => '%20',
23
+ '"' => '%22'
24
+ }
25
+
26
+ # Regex that matches any single character that needs to be escaped in
27
+ # attributes affected by unsafe libxml2 behavior.
28
+ UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/
29
+
4
30
  def initialize(config)
5
31
  @config = config
6
32
 
@@ -88,11 +114,37 @@ class Sanitize; module Transformers
88
114
  !protocol[attr_name].include?(:relative)
89
115
  end
90
116
 
91
- attr.unlink if del
117
+ if del
118
+ attr.unlink
119
+ else
120
+ # Leading and trailing whitespace around URLs is ignored at parse
121
+ # time. Stripping it here prevents it from being escaped by the
122
+ # libxml2 workaround below.
123
+ attr.value = attr.value.strip
124
+ end
92
125
  end
93
126
  end
94
127
  end
95
128
 
129
+ # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
130
+ # attempt to preserve server-side includes. This can result in XSS since
131
+ # an unescaped double quote can allow an attacker to inject a
132
+ # non-whitelisted attribute.
133
+ #
134
+ # Sanitize works around this by implementing its own escaping for
135
+ # affected attributes, some of which can exist on any element and some
136
+ # of which can only exist on `<a>` elements.
137
+ #
138
+ # The relevant libxml2 code is here:
139
+ # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
140
+ node.attribute_nodes.each do |attr|
141
+ attr_name = attr.name.downcase
142
+ if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
143
+ (name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
144
+ attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
145
+ end
146
+ end
147
+
96
148
  # Add required attributes.
97
149
  if @add_attributes.has_key?(name)
98
150
  @add_attributes[name].each {|key, val| node[key] = val }
@@ -1,3 +1,3 @@
1
1
  class Sanitize
2
- VERSION = '2.1.0'
2
+ VERSION = '2.1.1'
3
3
  end
@@ -55,10 +55,10 @@ strings = {
55
55
 
56
56
  :malicious => {
57
57
  :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
58
- :default => 'Lorem ipsum dolor sit amet script&gt;alert("hello world");',
59
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet script&gt;alert("hello world");',
60
- :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet script&gt;alert("hello world");',
61
- :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet script&gt;alert("hello world");'
58
+ :default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
59
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
60
+ :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
61
+ :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");'
62
62
  },
63
63
 
64
64
  :raw_comment => {
@@ -181,7 +181,7 @@ tricky = {
181
181
  :default => '',
182
182
  :restricted => '',
183
183
  :basic => '',
184
- :relaxed => '<img src="">'
184
+ :relaxed => '<img src>'
185
185
  }
186
186
  }
187
187
 
@@ -645,3 +645,77 @@ describe 'bugs' do
645
645
  Sanitize.clean!('foo <style>bar').must_equal('foo bar')
646
646
  end
647
647
  end
648
+
649
+ describe 'Malicious HTML' do
650
+ make_my_diffs_pretty!
651
+ parallelize_me!
652
+
653
+ before do
654
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
655
+ end
656
+
657
+ # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
658
+ # attempt to preserve server-side includes. This can result in XSS since an
659
+ # unescaped double quote can allow an attacker to inject a non-whitelisted
660
+ # attribute. Sanitize works around this by implementing its own escaping for
661
+ # affected attributes.
662
+ #
663
+ # The relevant libxml2 code is here:
664
+ # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
665
+ describe 'unsafe libxml2 server-side includes in attributes' do
666
+ tag_configs = [
667
+ {
668
+ tag_name: 'a',
669
+ escaped_attrs: %w[ action href src name ],
670
+ unescaped_attrs: []
671
+ },
672
+
673
+ {
674
+ tag_name: 'div',
675
+ escaped_attrs: %w[ action href src ],
676
+ unescaped_attrs: %w[ name ]
677
+ }
678
+ ]
679
+
680
+ before do
681
+ @s = Sanitize.new({
682
+ elements: %w[ a div ],
683
+
684
+ attributes: {
685
+ all: %w[ action href src name ]
686
+ }
687
+ })
688
+ end
689
+
690
+ tag_configs.each do |tag_config|
691
+ tag_name = tag_config[:tag_name]
692
+
693
+ tag_config[:escaped_attrs].each do |attr_name|
694
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
695
+
696
+ it 'should escape unsafe characters in attributes' do
697
+ @s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
698
+ end
699
+
700
+ it 'should round-trip to the same output' do
701
+ output = @s.clean(input)
702
+ @s.clean(output).must_equal(output)
703
+ end
704
+ end
705
+
706
+ tag_config[:unescaped_attrs].each do |attr_name|
707
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
708
+
709
+ it 'should not escape characters unnecessarily' do
710
+ @s.clean(input).must_equal(input)
711
+ end
712
+
713
+ it 'should round-trip to the same output' do
714
+ output = @s.clean(input)
715
+ @s.clean(output).must_equal(output)
716
+ end
717
+ end
718
+ end
719
+ end
720
+ end
721
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grove
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-13 00:00:00.000000000 Z
11
+ date: 2018-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -118,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
118
118
  version: 1.2.0
119
119
  requirements: []
120
120
  rubyforge_project:
121
- rubygems_version: 2.2.0
121
+ rubygems_version: 2.7.3
122
122
  signing_key:
123
123
  specification_version: 4
124
124
  summary: Whitelist-based HTML sanitizer.