sanitize 4.6.2 → 4.6.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fb775bea4edea52d04bbfca1b95cd52387951da8a257277c2a95e6371d59ef43
4
- data.tar.gz: 7a94074ef83e2acecf446bed31fb2de9d5f5a164409481f9af50b5cc85b17608
3
+ metadata.gz: 04fe170a57bfd67e2e2f40e19e6add8cd777a9d812f24b66a4350d0cefe9f803
4
+ data.tar.gz: fb848fbc8cf1878905378f2795c9ad012d4247a1a5491ec4735994902544840d
5
5
  SHA512:
6
- metadata.gz: b2c52aea4bd23c99c3cb5e55bcb2e6b63746d02532c19e8c7d5a2bc72e1e3ab571ac03c2aaf7ba4f8d02ab77cdf2be92407069670c3ff9878de6b54675fa5c6e
7
- data.tar.gz: d384b9754d718205a5cb2532699d2c71f48d3e4e0854fdbff29cc8bd3f559a4e4b828e6311726605de85906c95d0342b960d67cc9e7426a4c5c00e0eb8b3b946
6
+ metadata.gz: dde1af17f562062ea7136d8033df17ed2aeaf39fdc1d037e75118c1ed9718d6ae50f29f9bb1165b0057810fed7a8bcac303e9e0687c3f98dbe514f6cb768cae5
7
+ data.tar.gz: 408533cd205ec1570041a6c029bb639978aa1785824a2e69df9d695331f274e7cda4e024934300ba35b8792fb39f4a94bd780dc6365cc2eaade06cdc32d3299e
data/HISTORY.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Sanitize History
2
2
 
3
+ ## 4.6.3 (2018-03-19)
4
+
5
+ * Fixed an HTML injection vulnerability that could allow XSS.
6
+
7
+ When Sanitize <= 4.6.2 is used in combination with libxml2 >= 2.9.2, a
8
+ specially crafted HTML fragment can cause libxml2 to generate improperly
9
+ escaped output, allowing non-whitelisted attributes to be used on whitelisted
10
+ elements.
11
+
12
+ Sanitize now performs additional escaping on affected attributes to prevent
13
+ this.
14
+
15
+ Many thanks to the Shopify Application Security Team for responsibly reporting
16
+ this issue.
17
+
3
18
  ## 4.6.2 (2018-03-19)
4
19
 
5
20
  * Reduced string allocations to optimize memory usage. [@janklimo - #175][175]
@@ -18,6 +18,31 @@ class Sanitize; module Transformers; class CleanElement
18
18
  # http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes
19
19
  REGEX_DATA_ATTR = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u
20
20
 
21
+ # Attributes that need additional escaping on `<a>` elements due to unsafe
22
+ # libxml2 behavior.
23
+ UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
24
+ name
25
+ ])
26
+
27
+ # Attributes that need additional escaping on all elements due to unsafe
28
+ # libxml2 behavior.
29
+ UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[
30
+ action
31
+ href
32
+ src
33
+ ])
34
+
35
+ # Mapping of original characters to escape sequences for characters that
36
+ # should be escaped in attributes affected by unsafe libxml2 behavior.
37
+ UNSAFE_LIBXML_ESCAPE_CHARS = {
38
+ ' ' => '%20',
39
+ '"' => '%22'
40
+ }
41
+
42
+ # Regex that matches any single character that needs to be escaped in
43
+ # attributes affected by unsafe libxml2 behavior.
44
+ UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/
45
+
21
46
  def initialize(config)
22
47
  @add_attributes = config[:add_attributes]
23
48
  @attributes = config[:attributes].dup
@@ -92,31 +117,61 @@ class Sanitize; module Transformers; class CleanElement
92
117
  node.attribute_nodes.each do |attr|
93
118
  attr_name = attr.name.downcase
94
119
 
95
- if attr_whitelist.include?(attr_name)
96
- # The attribute is whitelisted.
120
+ unless attr_whitelist.include?(attr_name)
121
+ # The attribute isn't whitelisted.
122
+
123
+ if allow_data_attributes && attr_name.start_with?('data-')
124
+ # Arbitrary data attributes are allowed. If this is a data
125
+ # attribute, continue.
126
+ next if attr_name =~ REGEX_DATA_ATTR
127
+ end
128
+
129
+ # Either the attribute isn't a data attribute or arbitrary data
130
+ # attributes aren't allowed. Remove the attribute.
131
+ attr.unlink
132
+ next
133
+ end
134
+
135
+ # The attribute is whitelisted.
97
136
 
98
- # Remove any attributes that use unacceptable protocols.
99
- if @protocols.include?(name) && @protocols[name].include?(attr_name)
100
- attr_protocols = @protocols[name][attr_name]
137
+ # Remove any attributes that use unacceptable protocols.
138
+ if @protocols.include?(name) && @protocols[name].include?(attr_name)
139
+ attr_protocols = @protocols[name][attr_name]
101
140
 
102
- if attr.value =~ REGEX_PROTOCOL
103
- attr.unlink unless attr_protocols.include?($1.downcase)
104
- else
105
- attr.unlink unless attr_protocols.include?(:relative)
141
+ if attr.value =~ REGEX_PROTOCOL
142
+ unless attr_protocols.include?($1.downcase)
143
+ attr.unlink
144
+ next
106
145
  end
107
- end
108
- else
109
- # The attribute isn't whitelisted.
110
146
 
111
- if allow_data_attributes && attr_name.start_with?('data-')
112
- # Arbitrary data attributes are allowed. Verify that the attribute
113
- # is a valid data attribute.
114
- attr.unlink unless attr_name =~ REGEX_DATA_ATTR
115
147
  else
116
- # Either the attribute isn't a data attribute, or arbitrary data
117
- # attributes aren't allowed. Remove the attribute.
118
- attr.unlink
148
+ unless attr_protocols.include?(:relative)
149
+ attr.unlink
150
+ next
151
+ end
119
152
  end
153
+
154
+ # Leading and trailing whitespace around URLs is ignored at parse
155
+ # time. Stripping it here prevents it from being escaped by the
156
+ # libxml2 workaround below.
157
+ attr.value = attr.value.strip
158
+ end
159
+
160
+ # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
161
+ # attempt to preserve server-side includes. This can result in XSS since
162
+ # an unescaped double quote can allow an attacker to inject a
163
+ # non-whitelisted attribute.
164
+ #
165
+ # Sanitize works around this by implementing its own escaping for
166
+ # affected attributes, some of which can exist on any element and some
167
+ # of which can only exist on `<a>` elements.
168
+ #
169
+ # The relevant libxml2 code is here:
170
+ # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
171
+ if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
172
+ (name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
173
+
174
+ attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
120
175
  end
121
176
  end
122
177
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  class Sanitize
4
- VERSION = '4.6.2'
4
+ VERSION = '4.6.3'
5
5
  end
@@ -234,7 +234,7 @@ describe 'Sanitize::Transformers::CleanElement' do
234
234
 
235
235
  it 'should not choke on valueless attributes' do
236
236
  @s.fragment('foo <a href>foo</a> bar')
237
- .must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
237
+ .must_equal 'foo <a href rel="nofollow">foo</a> bar'
238
238
  end
239
239
 
240
240
  it 'should downcase attribute names' do
@@ -300,6 +300,16 @@ describe 'Sanitize::Transformers::CleanElement' do
300
300
  }).must_equal input
301
301
  end
302
302
 
303
+ it "should not allow relative URLs when relative URLs aren't whitelisted" do
304
+ input = '<a href="/foo/bar">Link</a>'
305
+
306
+ Sanitize.fragment(input,
307
+ :elements => ['a'],
308
+ :attributes => {'a' => ['href']},
309
+ :protocols => {'a' => {'href' => ['http']}}
310
+ ).must_equal '<a>Link</a>'
311
+ end
312
+
303
313
  it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
304
314
  input = '<a href="/wiki/Special:Random">Random Page</a>'
305
315
 
@@ -125,4 +125,68 @@ describe 'Malicious HTML' do
125
125
  must_equal '&lt;alert("XSS");//&lt;'
126
126
  end
127
127
  end
128
+
129
+ # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
130
+ # attempt to preserve server-side includes. This can result in XSS since an
131
+ # unescaped double quote can allow an attacker to inject a non-whitelisted
132
+ # attribute. Sanitize works around this by implementing its own escaping for
133
+ # affected attributes.
134
+ #
135
+ # The relevant libxml2 code is here:
136
+ # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
137
+ describe 'unsafe libxml2 server-side includes in attributes' do
138
+ tag_configs = [
139
+ {
140
+ tag_name: 'a',
141
+ escaped_attrs: %w[ action href src name ],
142
+ unescaped_attrs: []
143
+ },
144
+
145
+ {
146
+ tag_name: 'div',
147
+ escaped_attrs: %w[ action href src ],
148
+ unescaped_attrs: %w[ name ]
149
+ }
150
+ ]
151
+
152
+ before do
153
+ @s = Sanitize.new({
154
+ elements: %w[ a div ],
155
+
156
+ attributes: {
157
+ all: %w[ action href src name ]
158
+ }
159
+ })
160
+ end
161
+
162
+ tag_configs.each do |tag_config|
163
+ tag_name = tag_config[:tag_name]
164
+
165
+ tag_config[:escaped_attrs].each do |attr_name|
166
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
167
+
168
+ it 'should escape unsafe characters in attributes' do
169
+ @s.fragment(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
170
+ end
171
+
172
+ it 'should round-trip to the same output' do
173
+ output = @s.fragment(input)
174
+ @s.fragment(output).must_equal(output)
175
+ end
176
+ end
177
+
178
+ tag_config[:unescaped_attrs].each do |attr_name|
179
+ input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
180
+
181
+ it 'should not escape characters unnecessarily' do
182
+ @s.fragment(input).must_equal(input)
183
+ end
184
+
185
+ it 'should round-trip to the same output' do
186
+ output = @s.fragment(input)
187
+ @s.fragment(output).must_equal(output)
188
+ end
189
+ end
190
+ end
191
+ end
128
192
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanitize
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.6.2
4
+ version: 4.6.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Grove
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-03-19 00:00:00.000000000 Z
11
+ date: 2018-03-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: crass