sanitize 2.1.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/HISTORY.md +21 -0
- data/lib/sanitize/transformers/clean_element.rb +53 -1
- data/lib/sanitize/version.rb +1 -1
- data/test/test_sanitize.rb +79 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9e75a99c50777d57667e66bee0787e3259085c38ec616bb14f13f3c2d8673827
|
4
|
+
data.tar.gz: 6819e4c2c9730157b5b0acb4571c72dfecf02d9e5e1f0e31425cf8fdcfb8af97
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08a334cb79442b561ce83bcacc1f4401907b47c3802d91934abf5ae3b13fe91c760df21402f30b126d489815c06eac8d055d65924b826e7e4c41017e08c96ad9'
|
7
|
+
data.tar.gz: '0858de4b6b03e1c41c079e96165d3714a4a8b8512deb85b2d7cb837d45a76f2503e1c16f1e8eb18d775a2f5c4337966c36a62a4d98a2162e4301a83338bee595'
|
data/HISTORY.md
CHANGED
@@ -1,6 +1,27 @@
|
|
1
1
|
Sanitize History
|
2
2
|
================================================================================
|
3
3
|
|
4
|
+
Version 2.1.1 (2018-09-30)
|
5
|
+
--------------------------
|
6
|
+
|
7
|
+
* [CVE-2018-3740][176]: Fixed an HTML injection vulnerability that could allow
|
8
|
+
XSS (backported from Sanitize 4.6.3). [@dometto - #188][188]
|
9
|
+
|
10
|
+
When Sanitize <= 2.1.0 is used in combination with libxml2 >= 2.9.2, a
|
11
|
+
specially crafted HTML fragment can cause libxml2 to generate improperly
|
12
|
+
escaped output, allowing non-whitelisted attributes to be used on whitelisted
|
13
|
+
elements.
|
14
|
+
|
15
|
+
Sanitize now performs additional escaping on affected attributes to prevent
|
16
|
+
this.
|
17
|
+
|
18
|
+
Many thanks to the Shopify Application Security Team for responsibly reporting
|
19
|
+
this issue.
|
20
|
+
|
21
|
+
[176]:https://github.com/rgrove/sanitize/issues/176
|
22
|
+
[188]:https://github.com/rgrove/sanitize/pull/188
|
23
|
+
|
24
|
+
|
4
25
|
Version 2.1.0 (2014-01-13)
|
5
26
|
--------------------------
|
6
27
|
|
@@ -1,6 +1,32 @@
|
|
1
1
|
class Sanitize; module Transformers
|
2
2
|
|
3
3
|
class CleanElement
|
4
|
+
|
5
|
+
# Attributes that need additional escaping on `<a>` elements due to unsafe
|
6
|
+
# libxml2 behavior.
|
7
|
+
UNSAFE_LIBXML_ATTRS_A = Set.new(%w[
|
8
|
+
name
|
9
|
+
])
|
10
|
+
|
11
|
+
# Attributes that need additional escaping on all elements due to unsafe
|
12
|
+
# libxml2 behavior.
|
13
|
+
UNSAFE_LIBXML_ATTRS_GLOBAL = Set.new(%w[
|
14
|
+
action
|
15
|
+
href
|
16
|
+
src
|
17
|
+
])
|
18
|
+
|
19
|
+
# Mapping of original characters to escape sequences for characters that
|
20
|
+
# should be escaped in attributes affected by unsafe libxml2 behavior.
|
21
|
+
UNSAFE_LIBXML_ESCAPE_CHARS = {
|
22
|
+
' ' => '%20',
|
23
|
+
'"' => '%22'
|
24
|
+
}
|
25
|
+
|
26
|
+
# Regex that matches any single character that needs to be escaped in
|
27
|
+
# attributes affected by unsafe libxml2 behavior.
|
28
|
+
UNSAFE_LIBXML_ESCAPE_REGEX = /[ "]/
|
29
|
+
|
4
30
|
def initialize(config)
|
5
31
|
@config = config
|
6
32
|
|
@@ -88,11 +114,37 @@ class Sanitize; module Transformers
|
|
88
114
|
!protocol[attr_name].include?(:relative)
|
89
115
|
end
|
90
116
|
|
91
|
-
|
117
|
+
if del
|
118
|
+
attr.unlink
|
119
|
+
else
|
120
|
+
# Leading and trailing whitespace around URLs is ignored at parse
|
121
|
+
# time. Stripping it here prevents it from being escaped by the
|
122
|
+
# libxml2 workaround below.
|
123
|
+
attr.value = attr.value.strip
|
124
|
+
end
|
92
125
|
end
|
93
126
|
end
|
94
127
|
end
|
95
128
|
|
129
|
+
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
130
|
+
# attempt to preserve server-side includes. This can result in XSS since
|
131
|
+
# an unescaped double quote can allow an attacker to inject a
|
132
|
+
# non-whitelisted attribute.
|
133
|
+
#
|
134
|
+
# Sanitize works around this by implementing its own escaping for
|
135
|
+
# affected attributes, some of which can exist on any element and some
|
136
|
+
# of which can only exist on `<a>` elements.
|
137
|
+
#
|
138
|
+
# The relevant libxml2 code is here:
|
139
|
+
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
140
|
+
node.attribute_nodes.each do |attr|
|
141
|
+
attr_name = attr.name.downcase
|
142
|
+
if UNSAFE_LIBXML_ATTRS_GLOBAL.include?(attr_name) ||
|
143
|
+
(name == 'a' && UNSAFE_LIBXML_ATTRS_A.include?(attr_name))
|
144
|
+
attr.value = attr.value.gsub(UNSAFE_LIBXML_ESCAPE_REGEX, UNSAFE_LIBXML_ESCAPE_CHARS)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
96
148
|
# Add required attributes.
|
97
149
|
if @add_attributes.has_key?(name)
|
98
150
|
@add_attributes[name].each {|key, val| node[key] = val }
|
data/lib/sanitize/version.rb
CHANGED
data/test/test_sanitize.rb
CHANGED
@@ -55,10 +55,10 @@ strings = {
|
|
55
55
|
|
56
56
|
:malicious => {
|
57
57
|
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
58
|
-
:default => 'Lorem ipsum dolor sit amet script>alert("hello world");',
|
59
|
-
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet script>alert("hello world");',
|
60
|
-
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet script>alert("hello world");',
|
61
|
-
:relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet script>alert("hello world");'
|
58
|
+
:default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
59
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
60
|
+
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
61
|
+
:relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
62
62
|
},
|
63
63
|
|
64
64
|
:raw_comment => {
|
@@ -181,7 +181,7 @@ tricky = {
|
|
181
181
|
:default => '',
|
182
182
|
:restricted => '',
|
183
183
|
:basic => '',
|
184
|
-
:relaxed => '<img src
|
184
|
+
:relaxed => '<img src>'
|
185
185
|
}
|
186
186
|
}
|
187
187
|
|
@@ -645,3 +645,77 @@ describe 'bugs' do
|
|
645
645
|
Sanitize.clean!('foo <style>bar').must_equal('foo bar')
|
646
646
|
end
|
647
647
|
end
|
648
|
+
|
649
|
+
describe 'Malicious HTML' do
|
650
|
+
make_my_diffs_pretty!
|
651
|
+
parallelize_me!
|
652
|
+
|
653
|
+
before do
|
654
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
655
|
+
end
|
656
|
+
|
657
|
+
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
658
|
+
# attempt to preserve server-side includes. This can result in XSS since an
|
659
|
+
# unescaped double quote can allow an attacker to inject a non-whitelisted
|
660
|
+
# attribute. Sanitize works around this by implementing its own escaping for
|
661
|
+
# affected attributes.
|
662
|
+
#
|
663
|
+
# The relevant libxml2 code is here:
|
664
|
+
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
665
|
+
describe 'unsafe libxml2 server-side includes in attributes' do
|
666
|
+
tag_configs = [
|
667
|
+
{
|
668
|
+
tag_name: 'a',
|
669
|
+
escaped_attrs: %w[ action href src name ],
|
670
|
+
unescaped_attrs: []
|
671
|
+
},
|
672
|
+
|
673
|
+
{
|
674
|
+
tag_name: 'div',
|
675
|
+
escaped_attrs: %w[ action href src ],
|
676
|
+
unescaped_attrs: %w[ name ]
|
677
|
+
}
|
678
|
+
]
|
679
|
+
|
680
|
+
before do
|
681
|
+
@s = Sanitize.new({
|
682
|
+
elements: %w[ a div ],
|
683
|
+
|
684
|
+
attributes: {
|
685
|
+
all: %w[ action href src name ]
|
686
|
+
}
|
687
|
+
})
|
688
|
+
end
|
689
|
+
|
690
|
+
tag_configs.each do |tag_config|
|
691
|
+
tag_name = tag_config[:tag_name]
|
692
|
+
|
693
|
+
tag_config[:escaped_attrs].each do |attr_name|
|
694
|
+
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
695
|
+
|
696
|
+
it 'should escape unsafe characters in attributes' do
|
697
|
+
@s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
698
|
+
end
|
699
|
+
|
700
|
+
it 'should round-trip to the same output' do
|
701
|
+
output = @s.clean(input)
|
702
|
+
@s.clean(output).must_equal(output)
|
703
|
+
end
|
704
|
+
end
|
705
|
+
|
706
|
+
tag_config[:unescaped_attrs].each do |attr_name|
|
707
|
+
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
708
|
+
|
709
|
+
it 'should not escape characters unnecessarily' do
|
710
|
+
@s.clean(input).must_equal(input)
|
711
|
+
end
|
712
|
+
|
713
|
+
it 'should round-trip to the same output' do
|
714
|
+
output = @s.clean(input)
|
715
|
+
@s.clean(output).must_equal(output)
|
716
|
+
end
|
717
|
+
end
|
718
|
+
end
|
719
|
+
end
|
720
|
+
end
|
721
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sanitize
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Grove
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -118,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
118
118
|
version: 1.2.0
|
119
119
|
requirements: []
|
120
120
|
rubyforge_project:
|
121
|
-
rubygems_version: 2.
|
121
|
+
rubygems_version: 2.7.3
|
122
122
|
signing_key:
|
123
123
|
specification_version: 4
|
124
124
|
summary: Whitelist-based HTML sanitizer.
|