sanitize 6.1.3 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +32 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +308 -308
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +149 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
data/test/test_config.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Config" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
@@ -9,22 +10,22 @@ describe 'Config' do
|
|
9
10
|
_(config).must_be :frozen?
|
10
11
|
|
11
12
|
if Hash === config
|
12
|
-
config.each_value {|v| verify_deeply_frozen(v) }
|
13
|
+
config.each_value { |v| verify_deeply_frozen(v) }
|
13
14
|
elsif Set === config || Array === config
|
14
|
-
config.each {|v| verify_deeply_frozen(v) }
|
15
|
+
config.each { |v| verify_deeply_frozen(v) }
|
15
16
|
end
|
16
17
|
end
|
17
18
|
|
18
|
-
it
|
19
|
+
it "built-in configs should be deeply frozen" do
|
19
20
|
verify_deeply_frozen Sanitize::Config::DEFAULT
|
20
21
|
verify_deeply_frozen Sanitize::Config::BASIC
|
21
22
|
verify_deeply_frozen Sanitize::Config::RELAXED
|
22
23
|
verify_deeply_frozen Sanitize::Config::RESTRICTED
|
23
24
|
end
|
24
25
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
a = {:
|
26
|
+
describe ".freeze_config" do
|
27
|
+
it "should deeply freeze and return a configuration Hash" do
|
28
|
+
a = {one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}}
|
28
29
|
b = Sanitize::Config.freeze_config(a)
|
29
30
|
|
30
31
|
_(b).must_be_same_as a
|
@@ -32,11 +33,11 @@ describe 'Config' do
|
|
32
33
|
end
|
33
34
|
end
|
34
35
|
|
35
|
-
describe
|
36
|
-
it
|
36
|
+
describe ".merge" do
|
37
|
+
it "should deeply merge a configuration Hash" do
|
37
38
|
# Freeze to ensure that we get an error if either Hash is modified.
|
38
|
-
a = Sanitize::Config.freeze_config({:
|
39
|
-
b = Sanitize::Config.freeze_config({:
|
39
|
+
a = Sanitize::Config.freeze_config({one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}})
|
40
|
+
b = Sanitize::Config.freeze_config({one: {one_two: true, one_three: 3}, two: 2})
|
40
41
|
|
41
42
|
c = Sanitize::Config.merge(a, b)
|
42
43
|
|
@@ -44,22 +45,22 @@ describe 'Config' do
|
|
44
45
|
_(c).wont_be_same_as b
|
45
46
|
|
46
47
|
_(c).must_equal(
|
47
|
-
:
|
48
|
-
:
|
49
|
-
:
|
50
|
-
:
|
48
|
+
one: {
|
49
|
+
one_one: [0, "1", :a],
|
50
|
+
one_two: true,
|
51
|
+
one_three: 3
|
51
52
|
},
|
52
53
|
|
53
|
-
:
|
54
|
+
two: 2
|
54
55
|
)
|
55
56
|
|
56
57
|
_(c[:one]).wont_be_same_as a[:one]
|
57
58
|
_(c[:one][:one_one]).wont_be_same_as a[:one][:one_one]
|
58
59
|
end
|
59
60
|
|
60
|
-
it
|
61
|
-
_(proc { Sanitize::Config.merge(
|
62
|
-
_(proc { Sanitize::Config.merge({},
|
61
|
+
it "should raise an ArgumentError if either argument is not a Hash" do
|
62
|
+
_(proc { Sanitize::Config.merge("foo", {}) }).must_raise ArgumentError
|
63
|
+
_(proc { Sanitize::Config.merge({}, "foo") }).must_raise ArgumentError
|
63
64
|
end
|
64
65
|
end
|
65
66
|
end
|
data/test/test_malicious_css.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "common"
|
3
4
|
|
4
5
|
# Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
|
5
6
|
# these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
@@ -7,7 +8,7 @@ require_relative 'common'
|
|
7
8
|
#
|
8
9
|
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
10
|
|
10
|
-
describe
|
11
|
+
describe "Malicious CSS" do
|
11
12
|
make_my_diffs_pretty!
|
12
13
|
parallelize_me!
|
13
14
|
|
@@ -15,37 +16,37 @@ describe 'Malicious CSS' do
|
|
15
16
|
@s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
16
17
|
end
|
17
18
|
|
18
|
-
it
|
19
|
-
_(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]))
|
20
|
-
must_equal
|
19
|
+
it "should not be possible to inject an expression by munging it with a comment" do
|
20
|
+
_(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]))
|
21
|
+
.must_equal ""
|
21
22
|
|
22
|
-
_(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]))
|
23
|
-
must_equal
|
23
|
+
_(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]))
|
24
|
+
.must_equal ""
|
24
25
|
end
|
25
26
|
|
26
|
-
it
|
27
|
-
_(@s.properties(%[width:\nexpression(alert('XSS'));]))
|
28
|
-
must_equal
|
27
|
+
it "should not be possible to inject an expression by munging it with a newline" do
|
28
|
+
_(@s.properties(%[width:\nexpression(alert('XSS'));]))
|
29
|
+
.must_equal ""
|
29
30
|
end
|
30
31
|
|
31
|
-
it
|
32
|
-
_(@s.properties(%[background-image:url("javascript:alert('XSS')");]))
|
33
|
-
must_equal
|
32
|
+
it "should not allow the javascript protocol" do
|
33
|
+
_(@s.properties(%[background-image:url("javascript:alert('XSS')");]))
|
34
|
+
.must_equal ""
|
34
35
|
|
35
36
|
_(Sanitize.fragment(%[<div style="background-image: url(javascript:alert('XSS'))">],
|
36
|
-
Sanitize::Config::RELAXED)).must_equal
|
37
|
+
Sanitize::Config::RELAXED)).must_equal "<div></div>"
|
37
38
|
end
|
38
39
|
|
39
|
-
it
|
40
|
-
_(@s.properties(%[behavior: url(xss.htc);])).must_equal
|
40
|
+
it "should not allow behaviors" do
|
41
|
+
_(@s.properties(%[behavior: url(xss.htc);])).must_equal ""
|
41
42
|
end
|
42
43
|
|
43
|
-
describe
|
44
|
+
describe "sanitization bypass via CSS at-rule in HTML <style> element" do
|
44
45
|
before do
|
45
46
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
46
47
|
end
|
47
48
|
|
48
|
-
it
|
49
|
+
it "is not possible to prematurely end a <style> element" do
|
49
50
|
assert_equal(
|
50
51
|
%[<style>@media<\\/style><iframe srcdoc='<script>alert(document.domain)<\\/script>'>{}</style>],
|
51
52
|
@s.fragment(%[<style>@media</sty/**/le><iframe srcdoc='<script>alert(document.domain)</script>'></style>])
|
data/test/test_malicious_html.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "common"
|
3
4
|
|
4
5
|
# Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
|
5
6
|
# of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
@@ -7,7 +8,7 @@ require_relative 'common'
|
|
7
8
|
#
|
8
9
|
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
10
|
|
10
|
-
describe
|
11
|
+
describe "Malicious HTML" do
|
11
12
|
make_my_diffs_pretty!
|
12
13
|
parallelize_me!
|
13
14
|
|
@@ -15,114 +16,114 @@ describe 'Malicious HTML' do
|
|
15
16
|
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
16
17
|
end
|
17
18
|
|
18
|
-
describe
|
19
|
-
it
|
20
|
-
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]))
|
21
|
-
must_equal
|
19
|
+
describe "comments" do
|
20
|
+
it "should not allow script injection via conditional comments" do
|
21
|
+
_(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]))
|
22
|
+
.must_equal ""
|
22
23
|
end
|
23
24
|
end
|
24
25
|
|
25
|
-
describe
|
26
|
-
it
|
27
|
-
_(@s.fragment(
|
28
|
-
must_equal
|
26
|
+
describe "interpolation (ERB, PHP, etc.)" do
|
27
|
+
it "should escape ERB-style tags" do
|
28
|
+
_(@s.fragment("<% naughty_ruby_code %>"))
|
29
|
+
.must_equal "<% naughty_ruby_code %>"
|
29
30
|
|
30
|
-
_(@s.fragment(
|
31
|
-
must_equal
|
31
|
+
_(@s.fragment("<%= naughty_ruby_code %>"))
|
32
|
+
.must_equal "<%= naughty_ruby_code %>"
|
32
33
|
end
|
33
34
|
|
34
|
-
it
|
35
|
-
_(@s.fragment(
|
36
|
-
must_equal
|
35
|
+
it "should remove PHP-style tags" do
|
36
|
+
_(@s.fragment("<? naughtyPHPCode(); ?>"))
|
37
|
+
.must_equal ""
|
37
38
|
|
38
|
-
_(@s.fragment(
|
39
|
-
must_equal
|
39
|
+
_(@s.fragment("<?= naughtyPHPCode(); ?>"))
|
40
|
+
.must_equal ""
|
40
41
|
end
|
41
42
|
end
|
42
43
|
|
43
|
-
describe
|
44
|
-
it
|
45
|
-
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>'))
|
46
|
-
must_equal "<html><head></head><body></body></html>"
|
44
|
+
describe "<body>" do
|
45
|
+
it "should not be possible to inject JS via a malformed event attribute" do
|
46
|
+
_(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>'))
|
47
|
+
.must_equal "<html><head></head><body></body></html>"
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
|
-
describe
|
51
|
-
it
|
52
|
-
_(@s.fragment(%
|
53
|
-
must_equal
|
51
|
+
describe "<iframe>" do
|
52
|
+
it "should not be possible to inject an iframe using an improperly closed tag" do
|
53
|
+
_(@s.fragment(%(<iframe src=http://ha.ckers.org/scriptlet.html <)))
|
54
|
+
.must_equal ""
|
54
55
|
end
|
55
56
|
end
|
56
57
|
|
57
|
-
describe
|
58
|
-
it
|
59
|
-
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal
|
58
|
+
describe "<img>" do
|
59
|
+
it "should not be possible to inject JS via an unquoted <img> src attribute" do
|
60
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal "<img>"
|
60
61
|
end
|
61
62
|
|
62
|
-
it
|
63
|
-
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal
|
63
|
+
it "should not be possible to inject JS using grave accents as <img> src delimiters" do
|
64
|
+
_(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal "<img>"
|
64
65
|
end
|
65
66
|
|
66
|
-
it
|
67
|
-
_(@s.fragment('<img """><script>alert("XSS")</script>">'))
|
68
|
-
must_equal '<img>">'
|
67
|
+
it "should not be possible to inject <script> via a malformed <img> tag" do
|
68
|
+
_(@s.fragment('<img """><script>alert("XSS")</script>">'))
|
69
|
+
.must_equal '<img>">'
|
69
70
|
end
|
70
71
|
|
71
|
-
it
|
72
|
-
_(@s.fragment(
|
73
|
-
must_equal
|
72
|
+
it "should not be possible to inject protocol-based JS" do
|
73
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
74
|
+
.must_equal "<img>"
|
74
75
|
|
75
|
-
_(@s.fragment(
|
76
|
-
must_equal
|
76
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
77
|
+
.must_equal "<img>"
|
77
78
|
|
78
|
-
_(@s.fragment(
|
79
|
-
must_equal
|
79
|
+
_(@s.fragment("<img src=javascript:alert('XSS')>"))
|
80
|
+
.must_equal "<img>"
|
80
81
|
|
81
82
|
# Encoded tab character.
|
82
|
-
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">]))
|
83
|
-
must_equal
|
83
|
+
_(@s.fragment(%[<img src="jav	ascript:alert('XSS');">]))
|
84
|
+
.must_equal "<img>"
|
84
85
|
|
85
86
|
# Encoded newline.
|
86
|
-
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
87
|
-
must_equal
|
87
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
88
|
+
.must_equal "<img>"
|
88
89
|
|
89
90
|
# Encoded carriage return.
|
90
|
-
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
91
|
-
must_equal
|
91
|
+
_(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
|
92
|
+
.must_equal "<img>"
|
92
93
|
|
93
94
|
# Null byte.
|
94
|
-
_(@s.fragment(%[<img src=java\0script:alert("XSS")>]))
|
95
|
-
must_equal
|
95
|
+
_(@s.fragment(%[<img src=java\0script:alert("XSS")>]))
|
96
|
+
.must_equal "<img>"
|
96
97
|
|
97
98
|
# Spaces plus meta char.
|
98
|
-
_(@s.fragment(%[<img src="  javascript:alert('XSS');">]))
|
99
|
-
must_equal
|
99
|
+
_(@s.fragment(%[<img src="  javascript:alert('XSS');">]))
|
100
|
+
.must_equal "<img>"
|
100
101
|
|
101
102
|
# Mixed spaces and tabs.
|
102
|
-
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]))
|
103
|
-
must_equal
|
103
|
+
_(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]))
|
104
|
+
.must_equal "<img>"
|
104
105
|
end
|
105
106
|
|
106
|
-
it
|
107
|
-
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">]))
|
108
|
-
must_equal
|
107
|
+
it "should not be possible to inject protocol-based JS via whitespace" do
|
108
|
+
_(@s.fragment(%[<img src="jav\tascript:alert('XSS');">]))
|
109
|
+
.must_equal "<img>"
|
109
110
|
end
|
110
111
|
|
111
|
-
it
|
112
|
-
_(@s.fragment(%[<img src="javascript:alert('XSS')"]))
|
113
|
-
must_equal
|
112
|
+
it "should not be possible to inject JS using a half-open <img> tag" do
|
113
|
+
_(@s.fragment(%[<img src="javascript:alert('XSS')"]))
|
114
|
+
.must_equal ""
|
114
115
|
end
|
115
116
|
end
|
116
117
|
|
117
|
-
describe
|
118
|
-
it
|
119
|
-
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]))
|
120
|
-
must_equal
|
118
|
+
describe "<script>" do
|
119
|
+
it "should not be possible to inject <script> using a malformed non-alphanumeric tag name" do
|
120
|
+
_(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]))
|
121
|
+
.must_equal ""
|
121
122
|
end
|
122
123
|
|
123
|
-
it
|
124
|
-
_(@s.fragment(%[<<script>alert("XSS");//<</script>]))
|
125
|
-
must_equal
|
124
|
+
it "should not be possible to inject <script> via extraneous open brackets" do
|
125
|
+
_(@s.fragment(%[<<script>alert("XSS");//<</script>]))
|
126
|
+
.must_equal "<"
|
126
127
|
end
|
127
128
|
end
|
128
129
|
|
@@ -134,29 +135,29 @@ describe 'Malicious HTML' do
|
|
134
135
|
#
|
135
136
|
# The relevant libxml2 code is here:
|
136
137
|
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
137
|
-
describe
|
138
|
+
describe "unsafe libxml2 server-side includes in attributes" do
|
138
139
|
using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
|
139
140
|
|
140
141
|
tag_configs = [
|
141
142
|
{
|
142
|
-
tag_name:
|
143
|
-
escaped_attrs: %w[
|
143
|
+
tag_name: "a",
|
144
|
+
escaped_attrs: %w[action href src name],
|
144
145
|
unescaped_attrs: []
|
145
146
|
},
|
146
147
|
|
147
148
|
{
|
148
|
-
tag_name:
|
149
|
-
escaped_attrs: %w[
|
150
|
-
unescaped_attrs: %w[
|
149
|
+
tag_name: "div",
|
150
|
+
escaped_attrs: %w[action href src],
|
151
|
+
unescaped_attrs: %w[name]
|
151
152
|
}
|
152
153
|
]
|
153
154
|
|
154
155
|
before do
|
155
156
|
@s = Sanitize.new({
|
156
|
-
elements: %w[
|
157
|
+
elements: %w[a div],
|
157
158
|
|
158
159
|
attributes: {
|
159
|
-
all: %w[
|
160
|
+
all: %w[action href src name]
|
160
161
|
}
|
161
162
|
})
|
162
163
|
end
|
@@ -167,13 +168,13 @@ describe 'Malicious HTML' do
|
|
167
168
|
tag_config[:escaped_attrs].each do |attr_name|
|
168
169
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
169
170
|
|
170
|
-
it
|
171
|
+
it "should escape unsafe characters in attributes" do
|
171
172
|
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
172
173
|
|
173
174
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
174
175
|
# libxml2's.
|
175
|
-
_(@s.fragment(input))
|
176
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
176
|
+
_(@s.fragment(input))
|
177
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
177
178
|
|
178
179
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
179
180
|
# Nokogiri, so the output may be a little different as of Nokogiri
|
@@ -181,11 +182,11 @@ describe 'Malicious HTML' do
|
|
181
182
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
182
183
|
fragment = Nokogiri::HTML.fragment(input)
|
183
184
|
@s.node!(fragment)
|
184
|
-
_(fragment.to_html)
|
185
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
185
|
+
_(fragment.to_html)
|
186
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
186
187
|
end
|
187
188
|
|
188
|
-
it
|
189
|
+
it "should round-trip to the same output" do
|
189
190
|
output = @s.fragment(input)
|
190
191
|
_(@s.fragment(output)).must_equal(output)
|
191
192
|
end
|
@@ -194,13 +195,13 @@ describe 'Malicious HTML' do
|
|
194
195
|
tag_config[:unescaped_attrs].each do |attr_name|
|
195
196
|
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
196
197
|
|
197
|
-
it
|
198
|
+
it "should not escape characters unnecessarily" do
|
198
199
|
skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
|
199
200
|
|
200
201
|
# This uses Nokogumbo's HTML-compliant serializer rather than
|
201
202
|
# libxml2's.
|
202
|
-
_(@s.fragment(input))
|
203
|
-
must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
203
|
+
_(@s.fragment(input))
|
204
|
+
.must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
204
205
|
|
205
206
|
# This uses the not-quite-standards-compliant libxml2 serializer via
|
206
207
|
# Nokogiri, so the output may be a little different as of Nokogiri
|
@@ -208,11 +209,11 @@ describe 'Malicious HTML' do
|
|
208
209
|
# https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
|
209
210
|
fragment = Nokogiri::HTML.fragment(input)
|
210
211
|
@s.node!(fragment)
|
211
|
-
_(fragment.to_html)
|
212
|
-
must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
212
|
+
_(fragment.to_html)
|
213
|
+
.must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
|
213
214
|
end
|
214
215
|
|
215
|
-
it
|
216
|
+
it "should round-trip to the same output" do
|
216
217
|
output = @s.fragment(input)
|
217
218
|
_(@s.fragment(output)).must_equal(output)
|
218
219
|
end
|
@@ -221,14 +222,14 @@ describe 'Malicious HTML' do
|
|
221
222
|
end
|
222
223
|
|
223
224
|
# https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
|
224
|
-
describe
|
225
|
-
it
|
225
|
+
describe "foreign content bypass in relaxed config" do
|
226
|
+
it "prevents a sanitization bypass via carefully crafted foreign content" do
|
226
227
|
%w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
|
227
|
-
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
228
|
-
must_equal
|
228
|
+
_(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
229
|
+
.must_equal ""
|
229
230
|
|
230
|
-
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
231
|
-
must_equal
|
231
|
+
_(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
|
232
|
+
.must_equal ""
|
232
233
|
end
|
233
234
|
end
|
234
235
|
end
|
@@ -240,7 +241,7 @@ describe 'Malicious HTML' do
|
|
240
241
|
#
|
241
242
|
# Do not use the custom configs you see in these tests! If you do, you may be
|
242
243
|
# creating XSS vulnerabilities in your application.
|
243
|
-
describe
|
244
|
+
describe "foreign content bypass in unsafe custom config that allows MathML or SVG" do
|
244
245
|
unescaped_content_elements = %w[
|
245
246
|
noembed
|
246
247
|
noframes
|
@@ -305,33 +306,33 @@ describe 'Malicious HTML' do
|
|
305
306
|
removed_elements.each do |name|
|
306
307
|
it "removes `<#{name}>` elements in a MathML namespace" do
|
307
308
|
assert_equal(
|
308
|
-
|
309
|
+
"<math></math>",
|
309
310
|
@s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
|
310
311
|
)
|
311
312
|
end
|
312
313
|
|
313
314
|
it "removes `<#{name}>` elements in an SVG namespace" do
|
314
315
|
assert_equal(
|
315
|
-
|
316
|
+
"<svg></svg>",
|
316
317
|
@s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
|
317
318
|
)
|
318
319
|
end
|
319
320
|
end
|
320
321
|
end
|
321
322
|
|
322
|
-
describe
|
323
|
+
describe "sanitization bypass by exploiting scripting-disabled <noscript> behavior" do
|
323
324
|
before do
|
324
325
|
@s = Sanitize.new(
|
325
326
|
Sanitize::Config.merge(
|
326
327
|
Sanitize::Config::RELAXED,
|
327
|
-
elements: Sanitize::Config::RELAXED[:elements] + [
|
328
|
+
elements: Sanitize::Config::RELAXED[:elements] + ["noscript"]
|
328
329
|
)
|
329
330
|
)
|
330
331
|
end
|
331
332
|
|
332
|
-
it
|
333
|
+
it "is prevented by removing `<noscript>` elements regardless of the allowlist" do
|
333
334
|
assert_equal(
|
334
|
-
|
335
|
+
"",
|
335
336
|
@s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
|
336
337
|
)
|
337
338
|
end
|
data/test/test_parser.rb
CHANGED
@@ -1,36 +1,37 @@
|
|
1
|
-
#
|
2
|
-
require_relative 'common'
|
1
|
+
# frozen_string_literal: true
|
3
2
|
|
4
|
-
|
3
|
+
require_relative "common"
|
4
|
+
|
5
|
+
describe "Parser" do
|
5
6
|
make_my_diffs_pretty!
|
6
7
|
parallelize_me!
|
7
8
|
|
8
|
-
it
|
9
|
+
it "should translate valid entities into characters" do
|
9
10
|
_(Sanitize.fragment("'é&")).must_equal("'é&")
|
10
11
|
end
|
11
12
|
|
12
|
-
it
|
13
|
-
_(Sanitize.fragment(
|
13
|
+
it "should translate orphaned ampersands into entities" do
|
14
|
+
_(Sanitize.fragment("at&t")).must_equal("at&t")
|
14
15
|
end
|
15
16
|
|
16
|
-
it
|
17
|
-
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :
|
17
|
+
it "should not add newlines after tags when serializing a fragment" do
|
18
|
+
_(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", elements: ["div", "p"]))
|
18
19
|
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
20
|
end
|
20
21
|
|
21
|
-
it
|
22
|
-
_(Sanitize.fragment(
|
23
|
-
_(Sanitize.fragment(
|
22
|
+
it "should not have the Nokogiri 1.4.2+ unterminated script/style element bug" do
|
23
|
+
_(Sanitize.fragment("foo <script>bar")).must_equal "foo "
|
24
|
+
_(Sanitize.fragment("foo <style>bar")).must_equal "foo "
|
24
25
|
end
|
25
26
|
|
26
27
|
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
-
_(Sanitize.fragment(
|
28
|
-
_(Sanitize.fragment(
|
28
|
+
_(Sanitize.fragment("1 > 2 and 2 < 1")).must_equal "1 > 2 and 2 < 1"
|
29
|
+
_(Sanitize.fragment("OMG HAPPY BIRTHDAY! *<:-D")).must_equal "OMG HAPPY BIRTHDAY! *<:-D"
|
29
30
|
end
|
30
31
|
|
31
|
-
describe
|
32
|
-
it
|
33
|
-
html = %
|
32
|
+
describe "when siblings are added after a node during traversal" do
|
33
|
+
it "the added siblings should be traversed" do
|
34
|
+
html = %(
|
34
35
|
<div id="one">
|
35
36
|
<div id="one_one">
|
36
37
|
<div id="one_one_one"></div>
|
@@ -42,20 +43,20 @@ describe 'Parser' do
|
|
42
43
|
<div id="two_two"></div>
|
43
44
|
</div>
|
44
45
|
<div id="three"></div>
|
45
|
-
|
46
|
+
)
|
46
47
|
|
47
48
|
siblings = []
|
48
49
|
|
49
|
-
Sanitize.fragment(html, :
|
50
|
-
|
50
|
+
Sanitize.fragment(html, transformers: ->(env) {
|
51
|
+
name = env[:node].name
|
51
52
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
if name == "div"
|
54
|
+
env[:node].add_next_sibling('<b id="added_' + env[:node]["id"] + '">')
|
55
|
+
elsif name == "b"
|
56
|
+
siblings << env[:node][:id]
|
57
|
+
end
|
57
58
|
|
58
|
-
|
59
|
+
{node_allowlist: [env[:node]]}
|
59
60
|
})
|
60
61
|
|
61
62
|
# All siblings should be traversed, and in the order added.
|