sanitize 4.6.2 → 4.6.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +15 -0
- data/lib/sanitize/transformers/clean_element.rb +74 -19
- data/lib/sanitize/version.rb +1 -1
- data/test/test_clean_element.rb +11 -1
- data/test/test_malicious_html.rb +64 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 04fe170a57bfd67e2e2f40e19e6add8cd777a9d812f24b66a4350d0cefe9f803
|
4
|
+
data.tar.gz: fb848fbc8cf1878905378f2795c9ad012d4247a1a5491ec4735994902544840d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dde1af17f562062ea7136d8033df17ed2aeaf39fdc1d037e75118c1ed9718d6ae50f29f9bb1165b0057810fed7a8bcac303e9e0687c3f98dbe514f6cb768cae5
|
7
|
+
data.tar.gz: 408533cd205ec1570041a6c029bb639978aa1785824a2e69df9d695331f274e7cda4e024934300ba35b8792fb39f4a94bd780dc6365cc2eaade06cdc32d3299e
|
data/HISTORY.md
CHANGED
@@ -1,5 +1,20 @@
|
|
1
1
|
# Sanitize History
|
2
2
|
|
3
|
+
## 4.6.3 (2018-03-19)
|
4
|
+
|
5
|
+
* Fixed an HTML injection vulnerability that could allow XSS.
|
6
|
+
|
7
|
+
When Sanitize <= 4.6.2 is used in combination with libxml2 >= 2.9.2, a
|
8
|
+
specially crafted HTML fragment can cause libxml2 to generate improperly
|
9
|
+
escaped output, allowing non-whitelisted attributes to be used on whitelisted
|
10
|
+
elements.
|
11
|
+
|
12
|
+
Sanitize now performs additional escaping on affected attributes to prevent
|
13
|
+
this.
|
14
|
+
|
15
|
+
Many thanks to the Shopify Application Security Team for responsibly reporting
|
16
|
+
this issue.
|
17
|
+
|
3
18
|
## 4.6.2 (2018-03-19)
|
4
19
|
|
5
20
|
* Reduced string allocations to optimize memory usage. [@janklimo - #175][175]
|
@@ -18,6 +18,31 @@ class Sanitize; module Transformers; class CleanElement
|
|
18
18
|
# http://www.whatwg.org/specs/web-apps/current-work/multipage/elements.html#embedding-custom-non-visible-data-with-the-data-*-attributes
|
19
19
|
REGEX_DATA_ATTR = /\Adata-(?!xml)[a-z_][\w.\u00E0-\u00F6\u00F8-\u017F\u01DD-\u02AF-]*\z/u
|
20
20
|
|
21
|
+
# Attributes that need additional escaping on `<a>` elements due to unsafe
|
22
|
+
# libxml2 behavior.
|
23
|
+
UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
|
24
|
+
name
|
25
|
+
])
|
26
|
+
|
27
|
+
# Attributes that need additional escaping on all elements due to unsafe
|
28
|
+
# libxml2 behavior.
|
29
|
+
UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[
|
30
|
+
action
|
31
|
+
href
|
32
|
+
src
|
33
|
+
])
|
34
|
+
|
35
|
+
# Mapping of original characters to escape sequences for characters that
|
36
|
+
# should be escaped in attributes affected by unsafe libxml2 behavior.
|
37
|
+
UNSAFE_LIBXML_ESCAPE_CHARS = {
|
38
|
+
' ' => '%20',
|
39
|
+
'"' => '%22'
|
40
|
+
}
|
41
|
+
|
42
|
+
# Regex that matches any single character that needs to be escaped in
|
43
|
+
# attributes affected by unsafe libxml2 behavior.
|
44
|
+
UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/
|
45
|
+
|
21
46
|
def initialize(config)
|
22
47
|
@add_attributes = config[:add_attributes]
|
23
48
|
@attributes = config[:attributes].dup
|
@@ -92,31 +117,61 @@ class Sanitize; module Transformers; class CleanElement
|
|
92
117
|
node.attribute_nodes.each do |attr|
|
93
118
|
attr_name = attr.name.downcase
|
94
119
|
|
95
|
-
|
96
|
-
# The attribute
|
120
|
+
unless attr_whitelist.include?(attr_name)
|
121
|
+
# The attribute isn't whitelisted.
|
122
|
+
|
123
|
+
if allow_data_attributes && attr_name.start_with?('data-')
|
124
|
+
# Arbitrary data attributes are allowed. If this is a data
|
125
|
+
# attribute, continue.
|
126
|
+
next if attr_name =~ REGEX_DATA_ATTR
|
127
|
+
end
|
128
|
+
|
129
|
+
# Either the attribute isn't a data attribute or arbitrary data
|
130
|
+
# attributes aren't allowed. Remove the attribute.
|
131
|
+
attr.unlink
|
132
|
+
next
|
133
|
+
end
|
134
|
+
|
135
|
+
# The attribute is whitelisted.
|
97
136
|
|
98
|
-
|
99
|
-
|
100
|
-
|
137
|
+
# Remove any attributes that use unacceptable protocols.
|
138
|
+
if @protocols.include?(name) && @protocols[name].include?(attr_name)
|
139
|
+
attr_protocols = @protocols[name][attr_name]
|
101
140
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
141
|
+
if attr.value =~ REGEX_PROTOCOL
|
142
|
+
unless attr_protocols.include?($1.downcase)
|
143
|
+
attr.unlink
|
144
|
+
next
|
106
145
|
end
|
107
|
-
end
|
108
|
-
else
|
109
|
-
# The attribute isn't whitelisted.
|
110
146
|
|
111
|
-
if allow_data_attributes && attr_name.start_with?('data-')
|
112
|
-
# Arbitrary data attributes are allowed. Verify that the attribute
|
113
|
-
# is a valid data attribute.
|
114
|
-
attr.unlink unless attr_name =~ REGEX_DATA_ATTR
|
115
147
|
else
|
116
|
-
|
117
|
-
|
118
|
-
|
148
|
+
unless attr_protocols.include?(:relative)
|
149
|
+
attr.unlink
|
150
|
+
next
|
151
|
+
end
|
119
152
|
end
|
153
|
+
|
154
|
+
# Leading and trailing whitespace around URLs is ignored at parse
|
155
|
+
# time. Stripping it here prevents it from being escaped by the
|
156
|
+
# libxml2 workaround below.
|
157
|
+
attr.value = attr.value.strip
|
158
|
+
end
|
159
|
+
|
160
|
+
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
161
|
+
# attempt to preserve server-side includes. This can result in XSS since
|
162
|
+
# an unescaped double quote can allow an attacker to inject a
|
163
|
+
# non-whitelisted attribute.
|
164
|
+
#
|
165
|
+
# Sanitize works around this by implementing its own escaping for
|
166
|
+
# affected attributes, some of which can exist on any element and some
|
167
|
+
# of which can only exist on `<a>` elements.
|
168
|
+
#
|
169
|
+
# The relevant libxml2 code is here:
|
170
|
+
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
171
|
+
if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
|
172
|
+
(name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
|
173
|
+
|
174
|
+
attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
|
120
175
|
end
|
121
176
|
end
|
122
177
|
end
|
data/lib/sanitize/version.rb
CHANGED
data/test/test_clean_element.rb
CHANGED
@@ -234,7 +234,7 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
234
234
|
|
235
235
|
it 'should not choke on valueless attributes' do
|
236
236
|
@s.fragment('foo <a href>foo</a> bar')
|
237
|
-
.must_equal 'foo <a href
|
237
|
+
.must_equal 'foo <a href rel="nofollow">foo</a> bar'
|
238
238
|
end
|
239
239
|
|
240
240
|
it 'should downcase attribute names' do
|
@@ -300,6 +300,16 @@ describe 'Sanitize::Transformers::CleanElement' do
|
|
300
300
|
}).must_equal input
|
301
301
|
end
|
302
302
|
|
303
|
+
it "should not allow relative URLs when relative URLs aren't whitelisted" do
|
304
|
+
input = '<a href="/foo/bar">Link</a>'
|
305
|
+
|
306
|
+
Sanitize.fragment(input,
|
307
|
+
:elements => ['a'],
|
308
|
+
:attributes => {'a' => ['href']},
|
309
|
+
:protocols => {'a' => {'href' => ['http']}}
|
310
|
+
).must_equal '<a>Link</a>'
|
311
|
+
end
|
312
|
+
|
303
313
|
it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
|
304
314
|
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
305
315
|
|
data/test/test_malicious_html.rb
CHANGED
@@ -125,4 +125,68 @@ describe 'Malicious HTML' do
|
|
125
125
|
must_equal '<alert("XSS");//<'
|
126
126
|
end
|
127
127
|
end
|
128
|
+
|
129
|
+
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
130
|
+
# attempt to preserve server-side includes. This can result in XSS since an
|
131
|
+
# unescaped double quote can allow an attacker to inject a non-whitelisted
|
132
|
+
# attribute. Sanitize works around this by implementing its own escaping for
|
133
|
+
# affected attributes.
|
134
|
+
#
|
135
|
+
# The relevant libxml2 code is here:
|
136
|
+
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
|
+
describe 'unsafe libxml2 server-side includes in attributes' do
|
138
|
+
tag_configs = [
|
139
|
+
{
|
140
|
+
tag_name: 'a',
|
141
|
+
escaped_attrs: %w[ action href src name ],
|
142
|
+
unescaped_attrs: []
|
143
|
+
},
|
144
|
+
|
145
|
+
{
|
146
|
+
tag_name: 'div',
|
147
|
+
escaped_attrs: %w[ action href src ],
|
148
|
+
unescaped_attrs: %w[ name ]
|
149
|
+
}
|
150
|
+
]
|
151
|
+
|
152
|
+
before do
|
153
|
+
@s = Sanitize.new({
|
154
|
+
elements: %w[ a div ],
|
155
|
+
|
156
|
+
attributes: {
|
157
|
+
all: %w[ action href src name ]
|
158
|
+
}
|
159
|
+
})
|
160
|
+
end
|
161
|
+
|
162
|
+
tag_configs.each do |tag_config|
|
163
|
+
tag_name = tag_config[:tag_name]
|
164
|
+
|
165
|
+
tag_config[:escaped_attrs].each do |attr_name|
|
166
|
+
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
167
|
+
|
168
|
+
it 'should escape unsafe characters in attributes' do
|
169
|
+
@s.fragment(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
170
|
+
end
|
171
|
+
|
172
|
+
it 'should round-trip to the same output' do
|
173
|
+
output = @s.fragment(input)
|
174
|
+
@s.fragment(output).must_equal(output)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
tag_config[:unescaped_attrs].each do |attr_name|
|
179
|
+
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
180
|
+
|
181
|
+
it 'should not escape characters unnecessarily' do
|
182
|
+
@s.fragment(input).must_equal(input)
|
183
|
+
end
|
184
|
+
|
185
|
+
it 'should round-trip to the same output' do
|
186
|
+
output = @s.fragment(input)
|
187
|
+
@s.fragment(output).must_equal(output)
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
128
192
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.6.
|
4
|
+
version: 4.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: crass
|