sanitize 6.1.2 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +40 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +309 -303
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +152 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_config.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Config" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
@@ -9,22 +10,22 @@ describe 'Config' do
|
|
9
10
|
_(config).must_be :frozen?
|
10
11
|
|
11
12
|
if Hash === config
|
12
|
-
config.each_value {|v| verify_deeply_frozen(v) }
|
13
|
+
config.each_value { |v| verify_deeply_frozen(v) }
|
13
14
|
elsif Set === config || Array === config
|
14
|
-
config.each {|v| verify_deeply_frozen(v) }
|
15
|
+
config.each { |v| verify_deeply_frozen(v) }
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
|
-
it
|
19
|
+
it "built-in configs should be deeply frozen" do
|
19
20
|
verify_deeply_frozen Sanitize::Config::DEFAULT
|
20
21
|
verify_deeply_frozen Sanitize::Config::BASIC
|
21
22
|
verify_deeply_frozen Sanitize::Config::RELAXED
|
22
23
|
verify_deeply_frozen Sanitize::Config::RESTRICTED
|
23
24
|
end
|
24
25
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
a = {:
|
26
|
+
describe ".freeze_config" do
|
27
|
+
it "should deeply freeze and return a configuration Hash" do
|
28
|
+
a = {one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}}
|
28
29
|
b = Sanitize::Config.freeze_config(a)
|
29
30
|
|
30
31
|
_(b).must_be_same_as a
|
@@ -32,11 +33,11 @@ describe 'Config' do
|
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
35
|
-
describe
|
36
|
-
it
|
36
|
+
describe ".merge" do
|
37
|
+
it "should deeply merge a configuration Hash" do
|
37
38
|
# Freeze to ensure that we get an error if either Hash is modified.
|
38
|
-
a = Sanitize::Config.freeze_config({:
|
39
|
-
b = Sanitize::Config.freeze_config({:
|
39
|
+
a = Sanitize::Config.freeze_config({one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}})
|
40
|
+
b = Sanitize::Config.freeze_config({one: {one_two: true, one_three: 3}, two: 2})
|
40
41
|
|
41
42
|
c = Sanitize::Config.merge(a, b)
|
42
43
|
|
@@ -44,22 +45,22 @@ describe 'Config' do
|
|
44
45
|
_(c).wont_be_same_as b
|
45
46
|
|
46
47
|
_(c).must_equal(
|
47
|
-
:
|
48
|
-
:
|
49
|
-
:
|
50
|
-
:
|
48
|
+
one: {
|
49
|
+
one_one: [0, "1", :a],
|
50
|
+
one_two: true,
|
51
|
+
one_three: 3
|
51
52
|
},
|
52
53
|
|
53
|
-
:
|
54
|
+
two: 2
|
54
55
|
)
|
55
56
|
|
56
57
|
_(c[:one]).wont_be_same_as a[:one]
|
57
58
|
_(c[:one][:one_one]).wont_be_same_as a[:one][:one_one]
|
58
59
|
end
|
59
60
|
|
60
|
-
it
|
61
|
-
_(proc { Sanitize::Config.merge(
|
62
|
-
_(proc { Sanitize::Config.merge({},
|
61
|
+
it "should raise an ArgumentError if either argument is not a Hash" do
|
62
|
+
_(proc { Sanitize::Config.merge("foo", {}) }).must_raise ArgumentError
|
63
|
+
_(proc { Sanitize::Config.merge({}, "foo") }).must_raise ArgumentError
|
63
64
|
end
|
64
65
|
end
|
65
66
|
end
|
data/test/test_malicious_css.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "common"
|
3
4
|
|
4
5
|
# Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
|
5
6
|
# these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
@@ -7,7 +8,7 @@ require_relative 'common'
|
|
7
8
|
#
|
8
9
|
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
10
|
|
10
|
-
describe
|
11
|
+
describe "Malicious CSS" do
|
11
12
|
make_my_diffs_pretty!
|
12
13
|
parallelize_me!
|
13
14
|
|
@@ -15,37 +16,37 @@ describe 'Malicious CSS' do
|
|
15
16
|
@s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
16
17
|
end
|
17
18
|
|
18
|
-
it
|
19
|
-
_(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]))
|
20
|
-
must_equal
|
19
|
+
it "should not be possible to inject an expression by munging it with a comment" do
|
20
|
+
_(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]))
|
21
|
+
.must_equal ""
|
21
22
|
|
22
|
-
_(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]))
|
23
|
-
must_equal
|
23
|
+
_(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]))
|
24
|
+
.must_equal ""
|
24
25
|
end
|
25
26
|
|
26
|
-
it
|
27
|
-
_(@s.properties(%[width:\nexpression(alert('XSS'));]))
|
28
|
-
must_equal
|
27
|
+
it "should not be possible to inject an expression by munging it with a newline" do
|
28
|
+
_(@s.properties(%[width:\nexpression(alert('XSS'));]))
|
29
|
+
.must_equal ""
|
29
30
|
end
|
30
31
|
|
31
|
-
it
|
32
|
-
_(@s.properties(%[background-image:url("javascript:alert('XSS')");]))
|
33
|
-
must_equal
|
32
|
+
it "should not allow the javascript protocol" do
|
33
|
+
_(@s.properties(%[background-image:url("javascript:alert('XSS')");]))
|
34
|
+
.must_equal ""
|
34
35
|
|
35
36
|
_(Sanitize.fragment(%[<div style="background-image: url(javascript:alert('XSS'))">],
|
36
|
-
Sanitize::Config::RELAXED)).must_equal
|
37
|
+
Sanitize::Config::RELAXED)).must_equal "<div></div>"
|
37
38
|
end
|
38
39
|
|
39
|
-
it
|
40
|
-
_(@s.properties(%[behavior: url(xss.htc);])).must_equal
|
40
|
+
it "should not allow behaviors" do
|
41
|
+
_(@s.properties(%[behavior: url(xss.htc);])).must_equal ""
|
41
42
|
end
|
42
43
|
|
43
|
-
describe
|
44
|
+
describe "sanitization bypass via CSS at-rule in HTML <style> element" do
|
44
45
|
before do
|
45
46
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
46
47
|
end
|
47
48
|
|
48
|
-
it
|
49
|
+
it "is not possible to prematurely end a <style> element" do
|
49
50
|
assert_equal(
|
50
51
|
%[<style>@media<\\/style><iframe srcdoc='<script>alert(document.domain)<\\/script>'>{}</style>],
|
51
52
|
@s.fragment(%[<style>@media</sty/**/le><iframe srcdoc='<script>alert(document.domain)</script>'></style>])
|
data/test/test_malicious_html.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "common"
|
3
4
|
|
4
5
|
# Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
|
5
6
|
# of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
@@ -7,7 +8,7 @@ require_relative 'common'
|
|
7
8
|
#
|
8
9
|
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
10
|
|
10
|
-
describe
|
11
|
+
describe "Malicious HTML" do
|
11
12
|
make_my_diffs_pretty!
|
12
13
|
parallelize_me!
|
13
14
|
|
@@ -15,114 +16,114 @@ describe 'Malicious HTML' do
|
|
15
16
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
16
17
|
end
|
17
18
|
|
18
|
-
describe
|
19
|
-
it
|
20
|
-
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]))
|
21
|
-
must_equal
|
19
|
+
describe "comments" do
|
20
|
+
it "should not allow script injection via conditional comments" do
|
21
|
+
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]))
|
22
|
+
.must_equal ""
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
_(@s.fragment(
|
28
|
-
must_equal
|
26
|
+
describe "interpolation (ERB, PHP, etc.)" do
|
27
|
+
it "should escape ERB-style tags" do
|
28
|
+
_(@s.fragment("<% naughty_ruby_code %>"))
|
29
|
+
.must_equal "<% naughty_ruby_code %>"
|
29
30
|
|
30
|
-
_(@s.fragment(
|
31
|
-
must_equal
|
31
|
+
_(@s.fragment("<%= naughty_ruby_code %>"))
|
32
|
+
.must_equal "<%= naughty_ruby_code %>"
|
32
33
|
end
|
33
34
|
|
34
|
-
it
|
35
|
-
_(@s.fragment(
|
36
|
-
must_equal
|
35
|
+
it "should remove PHP-style tags" do
|
36
|
+
_(@s.fragment("<? naughtyPHPCode(); ?>"))
|
37
|
+
.must_equal ""
|
37
38
|
|
38
|
-
_(@s.fragment(
|
39
|
-
must_equal
|
39
|
+
_(@s.fragment("<?= naughtyPHPCode(); ?>"))
|
40
|
+
.must_equal ""
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
43
|
-
describe
|
44
|
-
it
|
45
|
-
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>'))
|
46
|
-
must_equal "<html><head></head><body></body></html>"
|
44
|
+
describe "<body>" do
|
45
|
+
it "should not be possible to inject JS via a malformed event attribute" do
|
46
|
+
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>'))
|
47
|
+
.must_equal "<html><head></head><body></body></html>"
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
|
-
describe
|
51
|
-
it
|
52
|
-
_(@s.fragment(%
|
53
|
-
must_equal
|
51
|
+
describe "<iframe>" do
|
52
|
+
it "should not be possible to inject an iframe using an improperly closed tag" do
|
53
|
+
_(@s.fragment(%(<iframe src=http://ha.ckers.org/scriptlet.html <)))
|
54
|
+
.must_equal ""
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
57
|
-
describe
|
58
|
-
it
|
59
|
-
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal
|
58
|
+
describe "<img>" do
|
59
|
+
it "should not be possible to inject JS via an unquoted <img> src attribute" do
|
60
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal "<img>"
|
60
61
|
end
|
61
62
|
|
62
|
-
it
|
63
|
-
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal
|
63
|
+
it "should not be possible to inject JS using grave accents as <img> src delimiters" do
|
64
|
+
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal "<img>"
|
64
65
|
end
|
65
66
|
|
66
|
-
it
|
67
|
-
_(@s.fragment('<img """><script>alert("XSS")</script>">'))
|
68
|
-
must_equal '<img>">'
|
67
|
+
it "should not be possible to inject <script> via a malformed <img> tag" do
|
68
|
+
_(@s.fragment('<img """><script>alert("XSS")</script>">'))
|
69
|
+
.must_equal '<img>">'
|
69
70
|
end
|
70
71
|
|
71
|
-
it
|
72
|
-
_(@s.fragment(
|
73
|
-
must_equal
|
72
|
+
it "should not be possible to inject protocol-based JS" do
|
73
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
74
|
+
.must_equal "<img>"
|
74
75
|
|
75
|
-
_(@s.fragment(
|
76
|
-
must_equal
|
76
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
77
|
+
.must_equal "<img>"
|
77
78
|
|
78
|
-
_(@s.fragment(
|
79
|
-
must_equal
|
79
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
80
|
+
.must_equal "<img>"
|
80
81
|
|
81
82
|
# Encoded tab character.
|
82
|
-
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">]))
|
83
|
-
must_equal
|
83
|
+
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">]))
|
84
|
+
.must_equal "<img>"
|
84
85
|
|
85
86
|
# Encoded newline.
|
86
|
-
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
87
|
-
must_equal
|
87
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
88
|
+
.must_equal "<img>"
|
88
89
|
|
89
90
|
# Encoded carriage return.
|
90
|
-
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
91
|
-
must_equal
|
91
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
92
|
+
.must_equal "<img>"
|
92
93
|
|
93
94
|
# Null byte.
|
94
|
-
_(@s.fragment(%[<img src=java\0script:alert("XSS")>]))
|
95
|
-
must_equal
|
95
|
+
_(@s.fragment(%[<img src=java\0script:alert("XSS")>]))
|
96
|
+
.must_equal "<img>"
|
96
97
|
|
97
98
|
# Spaces plus meta char.
|
98
|
-
_(@s.fragment(%[<img src="  javascript:alert('XSS');">]))
|
99
|
-
must_equal
|
99
|
+
_(@s.fragment(%[<img src="  javascript:alert('XSS');">]))
|
100
|
+
.must_equal "<img>"
|
100
101
|
|
101
102
|
# Mixed spaces and tabs.
|
102
|
-
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]))
|
103
|
-
must_equal
|
103
|
+
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]))
|
104
|
+
.must_equal "<img>"
|
104
105
|
end
|
105
106
|
|
106
|
-
it
|
107
|
-
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">]))
|
108
|
-
must_equal
|
107
|
+
it "should not be possible to inject protocol-based JS via whitespace" do
|
108
|
+
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">]))
|
109
|
+
.must_equal "<img>"
|
109
110
|
end
|
110
111
|
|
111
|
-
it
|
112
|
-
_(@s.fragment(%[<img src="javascript:alert('XSS')"]))
|
113
|
-
must_equal
|
112
|
+
it "should not be possible to inject JS using a half-open <img> tag" do
|
113
|
+
_(@s.fragment(%[<img src="javascript:alert('XSS')"]))
|
114
|
+
.must_equal ""
|
114
115
|
end
|
115
116
|
end
|
116
117
|
|
117
|
-
describe
|
118
|
-
it
|
119
|
-
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]))
|
120
|
-
must_equal
|
118
|
+
describe "<script>" do
|
119
|
+
it "should not be possible to inject <script> using a malformed non-alphanumeric tag name" do
|
120
|
+
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]))
|
121
|
+
.must_equal ""
|
121
122
|
end
|
122
123
|
|
123
|
-
it
|
124
|
-
_(@s.fragment(%[<<script>alert("XSS");//<</script>]))
|
125
|
-
must_equal
|
124
|
+
it "should not be possible to inject <script> via extraneous open brackets" do
|
125
|
+
_(@s.fragment(%[<<script>alert("XSS");//<</script>]))
|
126
|
+
.must_equal "<"
|
126
127
|
end
|
127
128
|
end
|
128
129
|
|
@@ -134,29 +135,29 @@ describe 'Malicious HTML' do
|
|
134
135
|
#
|
135
136
|
# The relevant libxml2 code is here:
|
136
137
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
|
-
describe
|
138
|
+
describe "unsafe libxml2 server-side includes in attributes" do
|
138
139
|
using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
|
139
140
|
|
140
141
|
tag_configs = [
|
141
142
|
{
|
142
|
-
tag_name:
|
143
|
-
escaped_attrs: %w[
|
143
|
+
tag_name: "a",
|
144
|
+
escaped_attrs: %w[action href src name],
|
144
145
|
unescaped_attrs: []
|
145
146
|
},
|
146
147
|
|
147
148
|
{
|
148
|
-
tag_name:
|
149
|
-
escaped_attrs: %w[
|
150
|
-
unescaped_attrs: %w[
|
149
|
+
tag_name: "div",
|
150
|
+
escaped_attrs: %w[action href src],
|
151
|
+
unescaped_attrs: %w[name]
|
151
152
|
}
|
152
153
|
]
|
153
154
|
|
154
155
|
before do
|
155
156
|
@s = Sanitize.new({
|
156
|
-
elements: %w[
|
157
|
+
elements: %w[a div],
|
157
158
|
|
158
159
|
attributes: {
|
159
|
-
all: %w[
|
160
|
+
all: %w[action href src name]
|
160
161
|
}
|
161
162
|
})
|
162
163
|
end
|
@@ -167,13 +168,13 @@ describe 'Malicious HTML' do
|
|
167
168
|
tag_config[:escaped_attrs].each do |attr_name|
|
168
169
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
169
170
|
|
170
|
-
it
|
171
|
+
it "should escape unsafe characters in attributes" do
|
171
172
|
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
172
173
|
|
173
174
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
174
175
|
# libxml2's.
|
175
|
-
_(@s.fragment(input))
|
176
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
176
|
+
_(@s.fragment(input))
|
177
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
177
178
|
|
178
179
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
179
180
|
# Nokogiri, so the output may be a little different as of Nokogiri
|
@@ -181,11 +182,11 @@ describe 'Malicious HTML' do
|
|
181
182
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
182
183
|
fragment = Nokogiri::HTML.fragment(input)
|
183
184
|
@s.node!(fragment)
|
184
|
-
_(fragment.to_html)
|
185
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
185
|
+
_(fragment.to_html)
|
186
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
186
187
|
end
|
187
188
|
|
188
|
-
it
|
189
|
+
it "should round-trip to the same output" do
|
189
190
|
output = @s.fragment(input)
|
190
191
|
_(@s.fragment(output)).must_equal(output)
|
191
192
|
end
|
@@ -194,13 +195,13 @@ describe 'Malicious HTML' do
|
|
194
195
|
tag_config[:unescaped_attrs].each do |attr_name|
|
195
196
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
196
197
|
|
197
|
-
it
|
198
|
+
it "should not escape characters unnecessarily" do
|
198
199
|
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
199
200
|
|
200
201
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
201
202
|
# libxml2's.
|
202
|
-
_(@s.fragment(input))
|
203
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
203
|
+
_(@s.fragment(input))
|
204
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
204
205
|
|
205
206
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
206
207
|
# Nokogiri, so the output may be a little different as of Nokogiri
|
@@ -208,11 +209,11 @@ describe 'Malicious HTML' do
|
|
208
209
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
209
210
|
fragment = Nokogiri::HTML.fragment(input)
|
210
211
|
@s.node!(fragment)
|
211
|
-
_(fragment.to_html)
|
212
|
-
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
212
|
+
_(fragment.to_html)
|
213
|
+
.must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
213
214
|
end
|
214
215
|
|
215
|
-
it
|
216
|
+
it "should round-trip to the same output" do
|
216
217
|
output = @s.fragment(input)
|
217
218
|
_(@s.fragment(output)).must_equal(output)
|
218
219
|
end
|
@@ -221,14 +222,14 @@ describe 'Malicious HTML' do
|
|
221
222
|
end
|
222
223
|
|
223
224
|
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
|
224
|
-
describe
|
225
|
-
it
|
225
|
+
describe "foreign content bypass in relaxed config" do
|
226
|
+
it "prevents a sanitization bypass via carefully crafted foreign content" do
|
226
227
|
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
-
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
228
|
-
must_equal
|
228
|
+
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
229
|
+
.must_equal ""
|
229
230
|
|
230
|
-
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
231
|
-
must_equal
|
231
|
+
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
232
|
+
.must_equal ""
|
232
233
|
end
|
233
234
|
end
|
234
235
|
end
|
@@ -240,7 +241,7 @@ describe 'Malicious HTML' do
|
|
240
241
|
#
|
241
242
|
# Do not use the custom configs you see in these tests! If you do, you may be
|
242
243
|
# creating XSS vulnerabilities in your application.
|
243
|
-
describe
|
244
|
+
describe "foreign content bypass in unsafe custom config that allows MathML or SVG" do
|
244
245
|
unescaped_content_elements = %w[
|
245
246
|
noembed
|
246
247
|
noframes
|
@@ -305,33 +306,33 @@ describe 'Malicious HTML' do
|
|
305
306
|
removed_elements.each do |name|
|
306
307
|
it "removes `<#{name}>` elements in a MathML namespace" do
|
307
308
|
assert_equal(
|
308
|
-
|
309
|
+
"<math></math>",
|
309
310
|
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
310
311
|
)
|
311
312
|
end
|
312
313
|
|
313
314
|
it "removes `<#{name}>` elements in an SVG namespace" do
|
314
315
|
assert_equal(
|
315
|
-
|
316
|
+
"<svg></svg>",
|
316
317
|
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
317
318
|
)
|
318
319
|
end
|
319
320
|
end
|
320
321
|
end
|
321
322
|
|
322
|
-
describe
|
323
|
+
describe "sanitization bypass by exploiting scripting-disabled <noscript> behavior" do
|
323
324
|
before do
|
324
325
|
@s = Sanitize.new(
|
325
326
|
Sanitize::Config.merge(
|
326
327
|
Sanitize::Config::RELAXED,
|
327
|
-
elements: Sanitize::Config::RELAXED[:elements] + [
|
328
|
+
elements: Sanitize::Config::RELAXED[:elements] + ["noscript"]
|
328
329
|
)
|
329
330
|
)
|
330
331
|
end
|
331
332
|
|
332
|
-
it
|
333
|
+
it "is prevented by removing `<noscript>` elements regardless of the allowlist" do
|
333
334
|
assert_equal(
|
334
|
-
|
335
|
+
"",
|
335
336
|
@s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
|
336
337
|
)
|
337
338
|
end
|
data/test/test_parser.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Parser" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
8
|
-
it
|
9
|
+
it "should translate valid entities into characters" do
|
9
10
|
_(Sanitize.fragment("'é&")).must_equal("'é&")
|
10
11
|
end
|
11
12
|
|
12
|
-
it
|
13
|
-
_(Sanitize.fragment(
|
13
|
+
it "should translate orphaned ampersands into entities" do
|
14
|
+
_(Sanitize.fragment("at&t")).must_equal("at&t")
|
14
15
|
end
|
15
16
|
|
16
|
-
it
|
17
|
-
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :
|
17
|
+
it "should not add newlines after tags when serializing a fragment" do
|
18
|
+
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", elements: ["div", "p"]))
|
18
19
|
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
20
|
end
|
20
21
|
|
21
|
-
it
|
22
|
-
_(Sanitize.fragment(
|
23
|
-
_(Sanitize.fragment(
|
22
|
+
it "should not have the Nokogiri 1.4.2+ unterminated script/style element bug" do
|
23
|
+
_(Sanitize.fragment("foo <script>bar")).must_equal "foo "
|
24
|
+
_(Sanitize.fragment("foo <style>bar")).must_equal "foo "
|
24
25
|
end
|
25
26
|
|
26
27
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
-
_(Sanitize.fragment(
|
28
|
-
_(Sanitize.fragment(
|
28
|
+
_(Sanitize.fragment("1 > 2 and 2 < 1")).must_equal "1 > 2 and 2 < 1"
|
29
|
+
_(Sanitize.fragment("OMG HAPPY BIRTHDAY! *<:-D")).must_equal "OMG HAPPY BIRTHDAY! *<:-D"
|
29
30
|
end
|
30
31
|
|
31
|
-
describe
|
32
|
-
it
|
33
|
-
html = %
|
32
|
+
describe "when siblings are added after a node during traversal" do
|
33
|
+
it "the added siblings should be traversed" do
|
34
|
+
html = %(
|
34
35
|
<div id="one">
|
35
36
|
<div id="one_one">
|
36
37
|
<div id="one_one_one"></div>
|
@@ -42,20 +43,20 @@ describe 'Parser' do
|
|
42
43
|
<div id="two_two"></div>
|
43
44
|
</div>
|
44
45
|
<div id="three"></div>
|
45
|
-
|
46
|
+
)
|
46
47
|
|
47
48
|
siblings = []
|
48
49
|
|
49
|
-
Sanitize.fragment(html, :
|
50
|
-
|
50
|
+
Sanitize.fragment(html, transformers: ->(env) {
|
51
|
+
name = env[:node].name
|
51
52
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
if name == "div"
|
54
|
+
env[:node].add_next_sibling('<b id="added_' + env[:node]["id"] + '">')
|
55
|
+
elsif name == "b"
|
56
|
+
siblings << env[:node][:id]
|
57
|
+
end
|
57
58
|
|
58
|
-
|
59
|
+
{node_allowlist: [env[:node]]}
|
59
60
|
})
|
60
61
|
|
61
62
|
# All siblings should be traversed, and in the order added.
|