rails-html-sanitizer 1.4.3 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,667 +1,1087 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "minitest/autorun"
2
4
  require "rails-html-sanitizer"
3
- require "rails/dom/testing/assertions/dom_assertions"
4
5
 
5
- puts Nokogiri::VERSION_INFO
6
+ puts "nokogiri version info: #{Nokogiri::VERSION_INFO}"
7
+ puts "html5 support: #{Rails::HTML::Sanitizer.html5_support?}"
8
+
9
+ #
10
+ # NOTE that many of these tests contain multiple acceptable results.
11
+ #
12
+ # In some cases, this is because of how the HTML4 parser's recovery behavior changed in libxml2
13
+ # 2.9.14 and 2.10.0. For more details, see:
14
+ #
15
+ # - https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
16
+ # - https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
17
+ #
18
+ # In other cases, multiple acceptable results are provided because Nokogiri's vendored libxml2 is
19
+ # patched to entity-escape server-side includes (aks "SSI", aka `<!-- #directive param=value -->`).
20
+ #
21
+ # In many other cases, it's because the parser used by Nokogiri on JRuby (xerces+nekohtml) parses
22
+ # slightly differently than libxml2 in edge cases.
23
+ #
24
+ module SanitizerTests
25
+ def self.loofah_html5_support?
26
+ Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
27
+ end
28
+
29
+ class BaseSanitizerTest < Minitest::Test
30
+ class XpathRemovalTestSanitizer < Rails::HTML::Sanitizer
31
+ def sanitize(html, options = {})
32
+ fragment = Loofah.fragment(html)
33
+ remove_xpaths(fragment, options[:xpaths]).to_s
34
+ end
35
+ end
6
36
 
7
- class SanitizersTest < Minitest::Test
8
- include Rails::Dom::Testing::Assertions::DomAssertions
37
+ def test_sanitizer_sanitize_raises_not_implemented_error
38
+ assert_raises NotImplementedError do
39
+ Rails::HTML::Sanitizer.new.sanitize("asdf")
40
+ end
41
+ end
9
42
 
10
- def test_sanitizer_sanitize_raises_not_implemented_error
11
- assert_raises NotImplementedError do
12
- Rails::Html::Sanitizer.new.sanitize('')
43
+ def test_remove_xpaths_removes_an_xpath
44
+ html = %(<h1>hello <script>code!</script></h1>)
45
+ assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
13
46
  end
14
- end
15
47
 
16
- def test_sanitize_nested_script
17
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
18
- end
48
+ def test_remove_xpaths_removes_all_occurrences_of_xpath
49
+ html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
50
+ assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
51
+ end
19
52
 
20
- def test_sanitize_nested_script_in_style
21
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>', tags: %w(em))
22
- end
53
+ def test_remove_xpaths_called_with_faulty_xpath
54
+ assert_raises Nokogiri::XML::XPath::SyntaxError do
55
+ xpath_sanitize("<h1>hello<h1>", xpaths: %w(..faulty_xpath))
56
+ end
57
+ end
23
58
 
24
- class XpathRemovalTestSanitizer < Rails::Html::Sanitizer
25
- def sanitize(html, options = {})
26
- fragment = Loofah.fragment(html)
27
- remove_xpaths(fragment, options[:xpaths]).to_s
59
+ def test_remove_xpaths_called_with_xpath_string
60
+ assert_equal "", xpath_sanitize("<a></a>", xpaths: ".//a")
28
61
  end
29
- end
30
62
 
31
- def test_remove_xpaths_removes_an_xpath
32
- html = %(<h1>hello <script>code!</script></h1>)
33
- assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
34
- end
63
+ def test_remove_xpaths_called_with_enumerable_xpaths
64
+ assert_equal "", xpath_sanitize("<a><span></span></a>", xpaths: %w(.//a .//span))
65
+ end
35
66
 
36
- def test_remove_xpaths_removes_all_occurrences_of_xpath
37
- html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
38
- assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
67
+ protected
68
+ def xpath_sanitize(input, options = {})
69
+ XpathRemovalTestSanitizer.new.sanitize(input, options)
70
+ end
39
71
  end
40
72
 
41
- def test_remove_xpaths_called_with_faulty_xpath
42
- assert_raises Nokogiri::XML::XPath::SyntaxError do
43
- xpath_sanitize('<h1>hello<h1>', xpaths: %w(..faulty_xpath))
73
+ module ModuleUnderTest
74
+ def module_under_test
75
+ self.class.instance_variable_get(:@module_under_test)
44
76
  end
45
77
  end
46
78
 
47
- def test_remove_xpaths_called_with_xpath_string
48
- assert_equal '', xpath_sanitize('<a></a>', xpaths: './/a')
49
- end
79
+ module FullSanitizerTest
80
+ include ModuleUnderTest
50
81
 
51
- def test_remove_xpaths_called_with_enumerable_xpaths
52
- assert_equal '', xpath_sanitize('<a><span></span></a>', xpaths: %w(.//a .//span))
53
- end
82
+ def test_strip_tags_with_quote
83
+ input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
84
+ result = full_sanitize(input)
85
+ acceptable_results = [
86
+ # libxml2 >= 2.9.14 and xerces+neko
87
+ %{&lt;" hi},
88
+ # other libxml2
89
+ %{ hi},
90
+ ]
54
91
 
55
- def test_strip_tags_with_quote
56
- input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
57
- expected = libxml_2_9_14_recovery? ? %{&lt;" hi} : %{ hi}
58
- assert_equal(expected, full_sanitize(input))
59
- end
92
+ assert_includes(acceptable_results, result)
93
+ end
60
94
 
61
- def test_strip_invalid_html
62
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
63
- end
95
+ def test_strip_invalid_html
96
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
97
+ end
64
98
 
65
- def test_strip_nested_tags
66
- expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
67
- input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
68
- assert_equal expected, full_sanitize(input)
69
- end
99
+ def test_strip_nested_tags
100
+ expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
101
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
102
+ assert_equal expected, full_sanitize(input)
103
+ end
70
104
 
71
- def test_strip_tags_multiline
72
- expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
73
- input = %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
105
+ def test_strip_tags_multiline
106
+ expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
107
+ input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
74
108
 
75
- assert_equal expected, full_sanitize(input)
76
- end
109
+ assert_equal expected, full_sanitize(input)
110
+ end
77
111
 
78
- def test_remove_unclosed_tags
79
- input = "This is <-- not\n a comment here."
80
- expected = libxml_2_9_14_recovery? ? %{This is &lt;-- not\n a comment here.} : %{This is }
81
- assert_equal(expected, full_sanitize(input))
82
- end
112
+ def test_remove_unclosed_tags
113
+ input = "This is <-- not\n a comment here."
114
+ result = full_sanitize(input)
115
+ acceptable_results = [
116
+ # libxml2 >= 2.9.14 and xerces+neko
117
+ %{This is &lt;-- not\n a comment here.},
118
+ # other libxml2
119
+ %{This is },
120
+ ]
121
+
122
+ assert_includes(acceptable_results, result)
123
+ end
83
124
 
84
- def test_strip_cdata
85
- input = "This has a <![CDATA[<section>]]> here."
86
- expected = libxml_2_9_14_recovery? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
87
- assert_equal(expected, full_sanitize(input))
88
- end
125
+ def test_strip_cdata
126
+ input = "This has a <![CDATA[<section>]]> here."
127
+ result = full_sanitize(input)
128
+ acceptable_results = [
129
+ # libxml2 = 2.9.14
130
+ %{This has a &lt;![CDATA[]]&gt; here.},
131
+ # other libxml2
132
+ %{This has a ]]&gt; here.},
133
+ # xerces+neko
134
+ %{This has a here.},
135
+ ]
136
+
137
+ assert_includes(acceptable_results, result)
138
+ end
89
139
 
90
- def test_strip_unclosed_cdata
91
- input = "This has an unclosed <![CDATA[<section>]] here..."
92
- expected = libxml_2_9_14_recovery? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
93
- assert_equal(expected, full_sanitize(input))
94
- end
140
+ def test_strip_blank_string
141
+ assert_nil full_sanitize(nil)
142
+ assert_equal "", full_sanitize("")
143
+ assert_equal " ", full_sanitize(" ")
144
+ end
95
145
 
96
- def test_strip_blank_string
97
- assert_nil full_sanitize(nil)
98
- assert_equal "", full_sanitize("")
99
- assert_equal " ", full_sanitize(" ")
100
- end
146
+ def test_strip_tags_with_plaintext
147
+ assert_equal "Don't touch me", full_sanitize("Don't touch me")
148
+ end
101
149
 
102
- def test_strip_tags_with_plaintext
103
- assert_equal "Don't touch me", full_sanitize("Don't touch me")
104
- end
150
+ def test_strip_tags_with_tags
151
+ assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
152
+ end
105
153
 
106
- def test_strip_tags_with_tags
107
- assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
108
- end
154
+ def test_escape_tags_with_many_open_quotes
155
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
156
+ end
109
157
 
110
- def test_escape_tags_with_many_open_quotes
111
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
112
- end
158
+ def test_strip_tags_with_sentence
159
+ assert_equal "This is a test.", full_sanitize("This is a test.")
160
+ end
113
161
 
114
- def test_strip_tags_with_sentence
115
- assert_equal "This is a test.", full_sanitize("This is a test.")
116
- end
162
+ def test_strip_tags_with_comment
163
+ assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
164
+ end
117
165
 
118
- def test_strip_tags_with_comment
119
- assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
120
- end
166
+ def test_strip_tags_with_frozen_string
167
+ assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags")
168
+ end
121
169
 
122
- def test_strip_tags_with_frozen_string
123
- assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze)
124
- end
170
+ def test_full_sanitize_respect_html_escaping_of_the_given_string
171
+ assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
172
+ assert_equal "&amp;", full_sanitize("&")
173
+ assert_equal "&amp;", full_sanitize("&amp;")
174
+ assert_equal "&amp;amp;", full_sanitize("&amp;amp;")
175
+ assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
176
+ end
125
177
 
126
- def test_full_sanitize_respect_html_escaping_of_the_given_string
127
- assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
128
- assert_equal '&amp;', full_sanitize('&')
129
- assert_equal '&amp;', full_sanitize('&amp;')
130
- assert_equal '&amp;amp;', full_sanitize('&amp;amp;')
131
- assert_equal 'omg &lt;script&gt;BOM&lt;/script&gt;', full_sanitize('omg &lt;script&gt;BOM&lt;/script&gt;')
132
- end
178
+ def test_sanitize_ascii_8bit_string
179
+ full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
180
+ assert_equal "hello", sanitized
181
+ assert_equal Encoding::UTF_8, sanitized.encoding
182
+ end
183
+ end
133
184
 
134
- def test_strip_links_with_tags_in_tags
135
- expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
136
- input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
137
- assert_equal expected, link_sanitize(input)
185
+ protected
186
+ def full_sanitize(input, options = {})
187
+ module_under_test::FullSanitizer.new.sanitize(input, options)
188
+ end
138
189
  end
139
190
 
140
- def test_strip_links_with_unclosed_tags
141
- assert_equal "", link_sanitize("<a<a")
191
+ class HTML4FullSanitizerTest < Minitest::Test
192
+ @module_under_test = Rails::HTML4
193
+ include FullSanitizerTest
142
194
  end
143
195
 
144
- def test_strip_links_with_plaintext
145
- assert_equal "Don't touch me", link_sanitize("Don't touch me")
146
- end
196
+ class HTML5FullSanitizerTest < Minitest::Test
197
+ @module_under_test = Rails::HTML5
198
+ include FullSanitizerTest
199
+ end if loofah_html5_support?
147
200
 
148
- def test_strip_links_with_line_feed_and_uppercase_tag
149
- assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
150
- end
201
+ module LinkSanitizerTest
202
+ include ModuleUnderTest
151
203
 
152
- def test_strip_links_leaves_nonlink_tags
153
- assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
154
- end
204
+ def test_strip_links_with_tags_in_tags
205
+ expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
206
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
207
+ assert_equal expected, link_sanitize(input)
208
+ end
155
209
 
156
- def test_strip_links_with_links
157
- assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
158
- end
210
+ def test_strip_links_with_unclosed_tags
211
+ assert_equal "", link_sanitize("<a<a")
212
+ end
159
213
 
160
- def test_strip_links_with_linkception
161
- assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
162
- end
214
+ def test_strip_links_with_plaintext
215
+ assert_equal "Don't touch me", link_sanitize("Don't touch me")
216
+ end
163
217
 
164
- def test_sanitize_form
165
- assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
166
- end
218
+ def test_strip_links_with_line_feed_and_uppercase_tag
219
+ assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
220
+ end
167
221
 
168
- def test_sanitize_plaintext
169
- assert_sanitized "<plaintext><span>foo</span></plaintext>", "<span>foo</span>"
170
- end
222
+ def test_strip_links_leaves_nonlink_tags
223
+ assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
224
+ end
171
225
 
172
- def test_sanitize_script
173
- assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
174
- end
226
+ def test_strip_links_with_links
227
+ assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
228
+ end
175
229
 
176
- def test_sanitize_js_handlers
177
- raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
178
- assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
179
- end
230
+ def test_strip_links_with_linkception
231
+ assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
232
+ end
180
233
 
181
- def test_sanitize_javascript_href
182
- raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
183
- assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
234
+ def test_sanitize_ascii_8bit_string
235
+ link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
236
+ assert_equal "<div>hello</div>", sanitized
237
+ assert_equal Encoding::UTF_8, sanitized.encoding
238
+ end
239
+ end
240
+
241
+ protected
242
+ def link_sanitize(input, options = {})
243
+ module_under_test::LinkSanitizer.new.sanitize(input, options)
244
+ end
184
245
  end
185
246
 
186
- def test_sanitize_image_src
187
- raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
188
- assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
247
+ class HTML4LinkSanitizerTest < Minitest::Test
248
+ @module_under_test = Rails::HTML4
249
+ include LinkSanitizerTest
189
250
  end
190
251
 
191
- tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form)
192
- tags.each do |tag_name|
193
- define_method "test_should_allow_#{tag_name}_tag" do
194
- scope_allowed_tags(tags) do
195
- assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
252
+ class HTML5LinkSanitizerTest < Minitest::Test
253
+ @module_under_test = Rails::HTML5
254
+ include LinkSanitizerTest
255
+ end if loofah_html5_support?
256
+
257
+ module SafeListSanitizerTest
258
+ include ModuleUnderTest
259
+
260
+ def test_sanitize_nested_script
261
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
262
+ end
263
+
264
+ def test_sanitize_nested_script_in_style
265
+ input = '<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>'
266
+ result = safe_list_sanitize(input, tags: %w(em))
267
+ acceptable_results = [
268
+ # libxml2
269
+ %{&lt;script&gt;alert("XSS");&lt;/script&gt;},
270
+ # xerces+neko. unavoidable double-escaping, see loofah/docs/2022-10-decision-on-cdata-nodes.md
271
+ %{&amp;lt;script&amp;gt;alert(\"XSS\");&amp;lt;&amp;lt;/style&amp;gt;/script&amp;gt;},
272
+ ]
273
+
274
+ assert_includes(acceptable_results, result)
275
+ end
276
+
277
+ def test_strip_unclosed_cdata
278
+ input = "This has an unclosed <![CDATA[<section>]] here..."
279
+
280
+ result = safe_list_sanitize(input)
281
+
282
+ acceptable_results = [
283
+ # libxml2 = 2.9.14
284
+ %{This has an unclosed &lt;![CDATA[]] here...},
285
+ # other libxml2
286
+ %{This has an unclosed ]] here...},
287
+ # xerces+neko
288
+ %{This has an unclosed }
289
+ ]
290
+
291
+ assert_includes(acceptable_results, result)
292
+ end
293
+
294
+ def test_sanitize_form
295
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ""
296
+ end
297
+
298
+ def test_sanitize_plaintext
299
+ # note that the `plaintext` tag has been deprecated since HTML 2
300
+ # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/plaintext
301
+ input = "<plaintext><span>foo</span></plaintext>"
302
+ result = safe_list_sanitize(input)
303
+ acceptable_results = [
304
+ # libxml2
305
+ "<span>foo</span>",
306
+ # xerces+nekohtml-unit
307
+ "&lt;span&gt;foo&lt;/span&gt;&lt;/plaintext&gt;",
308
+ # xerces+cyberneko
309
+ "&lt;span&gt;foo&lt;/span&gt;"
310
+ ]
311
+
312
+ assert_includes(acceptable_results, result)
313
+ end
314
+
315
+ def test_sanitize_script
316
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
317
+ end
318
+
319
+ def test_sanitize_js_handlers
320
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
321
+ assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
322
+ end
323
+
324
+ def test_sanitize_javascript_href
325
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
326
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
327
+ end
328
+
329
+ def test_sanitize_image_src
330
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
331
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo, <span>bar</span>}
332
+ end
333
+
334
+ def test_should_allow_anchors
335
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
336
+ end
337
+
338
+ def test_video_poster_sanitization
339
+ scope_allowed_tags(%w(video)) do
340
+ scope_allowed_attributes %w(src poster) do
341
+ expected = if RUBY_PLATFORM == "java"
342
+ # xerces+nekohtml alphabetizes the attributes! FML.
343
+ %(<video poster="posterimage.jpg" src="videofile.ogg"></video>)
344
+ else
345
+ %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
346
+ end
347
+ assert_sanitized(
348
+ %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>),
349
+ expected,
350
+ )
351
+ assert_sanitized(
352
+ %(<video src="videofile.ogg" poster=javascript:alert(1)></video>),
353
+ %(<video src="videofile.ogg"></video>),
354
+ )
355
+ end
196
356
  end
197
357
  end
198
- end
199
358
 
200
- def test_should_allow_anchors
201
- assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
202
- end
359
+ # RFC 3986, sec 4.2
360
+ def test_allow_colons_in_path_component
361
+ assert_sanitized "<a href=\"./this:that\">foo</a>"
362
+ end
203
363
 
204
- def test_video_poster_sanitization
205
- scope_allowed_tags(%w(video)) do
206
- scope_allowed_attributes %w(src poster) do
207
- assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
208
- assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>)
364
+ %w(src width height alt).each do |img_attr|
365
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
366
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo">)
209
367
  end
210
368
  end
211
- end
212
369
 
213
- # RFC 3986, sec 4.2
214
- def test_allow_colons_in_path_component
215
- assert_sanitized "<a href=\"./this:that\">foo</a>"
216
- end
370
+ def test_lang_and_xml_lang
371
+ # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
372
+ #
373
+ # 3.2.6.2 The lang and xml:lang attributes
374
+ #
375
+ # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML
376
+ # documents. To ease migration to and from XML, authors may specify an attribute in no namespace
377
+ # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents,
378
+ # but such attributes must only be specified if a lang attribute in no namespace is also
379
+ # specified, and both attributes must have the same value when compared in an ASCII
380
+ # case-insensitive manner.
381
+ input = expected = "<div lang=\"en\" xml:lang=\"en\">foo</div>"
382
+ assert_sanitized(input, expected)
383
+ end
217
384
 
218
- %w(src width height alt).each do |img_attr|
219
- define_method "test_should_allow_image_#{img_attr}_attribute" do
220
- assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
385
+ def test_should_handle_non_html
386
+ assert_sanitized "abc"
221
387
  end
222
- end
223
388
 
224
- def test_should_handle_non_html
225
- assert_sanitized 'abc'
226
- end
389
+ def test_should_handle_blank_text
390
+ assert_nil(safe_list_sanitize(nil))
391
+ assert_equal("", safe_list_sanitize(""))
392
+ assert_equal(" ", safe_list_sanitize(" "))
393
+ end
227
394
 
228
- def test_should_handle_blank_text
229
- [nil, '', ' '].each { |blank| assert_sanitized blank }
230
- end
395
+ def test_setting_allowed_tags_affects_sanitization
396
+ scope_allowed_tags %w(u) do |sanitizer|
397
+ assert_equal "<u></u>", sanitizer.sanitize("<a><u></u></a>")
398
+ end
399
+ end
231
400
 
232
- def test_setting_allowed_tags_affects_sanitization
233
- scope_allowed_tags %w(u) do |sanitizer|
234
- assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>')
401
+ def test_setting_allowed_attributes_affects_sanitization
402
+ scope_allowed_attributes %w(foo) do |sanitizer|
403
+ input = '<a foo="hello" bar="world"></a>'
404
+ assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
405
+ end
235
406
  end
236
- end
237
407
 
238
- def test_setting_allowed_attributes_affects_sanitization
239
- scope_allowed_attributes %w(foo) do |sanitizer|
240
- input = '<a foo="hello" bar="world"></a>'
241
- assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
408
+ def test_custom_tags_overrides_allowed_tags
409
+ scope_allowed_tags %(u) do |sanitizer|
410
+ input = "<a><u></u></a>"
411
+ assert_equal "<a></a>", sanitizer.sanitize(input, tags: %w(a))
412
+ end
242
413
  end
243
- end
244
414
 
245
- def test_custom_tags_overrides_allowed_tags
246
- scope_allowed_tags %(u) do |sanitizer|
247
- input = '<a><u></u></a>'
248
- assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a))
415
+ def test_custom_attributes_overrides_allowed_attributes
416
+ scope_allowed_attributes %(foo) do |sanitizer|
417
+ input = '<a foo="hello" bar="world"></a>'
418
+ assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
419
+ end
249
420
  end
250
- end
251
421
 
252
- def test_custom_attributes_overrides_allowed_attributes
253
- scope_allowed_attributes %(foo) do |sanitizer|
254
- input = '<a foo="hello" bar="world"></a>'
255
- assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
422
+ def test_should_allow_prune
423
+ sanitizer = module_under_test::SafeListSanitizer.new(prune: true)
424
+ text = "<u>leave me <b>now</b></u>"
425
+ assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
256
426
  end
257
- end
258
427
 
259
- def test_should_allow_custom_tags
260
- text = "<u>foo</u>"
261
- assert_equal text, safe_list_sanitize(text, tags: %w(u))
262
- end
428
+ def test_should_allow_custom_tags
429
+ text = "<u>foo</u>"
430
+ assert_equal text, safe_list_sanitize(text, tags: %w(u))
431
+ end
263
432
 
264
- def test_should_allow_only_custom_tags
265
- text = "<u>foo</u> with <i>bar</i>"
266
- assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
267
- end
433
+ def test_should_allow_only_custom_tags
434
+ text = "<u>foo</u> with <i>bar</i>"
435
+ assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
436
+ end
268
437
 
269
- def test_should_allow_custom_tags_with_attributes
270
- text = %(<blockquote cite="http://example.com/">foo</blockquote>)
271
- assert_equal text, safe_list_sanitize(text)
272
- end
438
+ def test_should_allow_custom_tags_with_attributes
439
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
440
+ assert_equal text, safe_list_sanitize(text)
441
+ end
273
442
 
274
- def test_should_allow_custom_tags_with_custom_attributes
275
- text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
276
- assert_equal text, safe_list_sanitize(text, attributes: ['foo'])
277
- end
443
+ def test_should_allow_custom_tags_with_custom_attributes
444
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
445
+ assert_equal text, safe_list_sanitize(text, attributes: ["foo"])
446
+ end
278
447
 
279
- def test_scrub_style_if_style_attribute_option_is_passed
280
- input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
281
- actual = safe_list_sanitize(input, attributes: %w(style))
282
- assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
283
- end
448
+ def test_scrub_style_if_style_attribute_option_is_passed
449
+ input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
450
+ actual = safe_list_sanitize(input, attributes: %w(style))
284
451
 
285
- def test_should_raise_argument_error_if_tags_is_not_enumerable
286
- assert_raises ArgumentError do
287
- safe_list_sanitize('<a>some html</a>', tags: 'foo')
452
+ assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
288
453
  end
289
- end
290
454
 
291
- def test_should_raise_argument_error_if_attributes_is_not_enumerable
292
- assert_raises ArgumentError do
293
- safe_list_sanitize('<a>some html</a>', attributes: 'foo')
455
+ def test_should_raise_argument_error_if_tags_is_not_enumerable
456
+ assert_raises ArgumentError do
457
+ safe_list_sanitize("<a>some html</a>", tags: "foo")
458
+ end
294
459
  end
295
- end
296
460
 
297
- def test_should_not_accept_non_loofah_inheriting_scrubber
298
- scrubber = Object.new
299
- def scrubber.scrub(node); node.name = 'h1'; end
461
+ def test_should_raise_argument_error_if_attributes_is_not_enumerable
462
+ assert_raises ArgumentError do
463
+ safe_list_sanitize("<a>some html</a>", attributes: "foo")
464
+ end
465
+ end
300
466
 
301
- assert_raises Loofah::ScrubberNotFound do
302
- safe_list_sanitize('<a>some html</a>', scrubber: scrubber)
467
+ def test_should_not_accept_non_loofah_inheriting_scrubber
468
+ scrubber = Object.new
469
+ def scrubber.scrub(node); node.name = "h1"; end
470
+
471
+ assert_raises Loofah::ScrubberNotFound do
472
+ safe_list_sanitize("<a>some html</a>", scrubber: scrubber)
473
+ end
303
474
  end
304
- end
305
475
 
306
- def test_should_accept_loofah_inheriting_scrubber
307
- scrubber = Loofah::Scrubber.new
308
- def scrubber.scrub(node); node.name = 'h1'; end
476
+ def test_should_accept_loofah_inheriting_scrubber
477
+ scrubber = Loofah::Scrubber.new
478
+ def scrubber.scrub(node); node.replace("<h1>#{node.inner_html}</h1>"); end
309
479
 
310
- html = "<script>hello!</script>"
311
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
312
- end
480
+ html = "<script>hello!</script>"
481
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
482
+ end
313
483
 
314
- def test_should_accept_loofah_scrubber_that_wraps_a_block
315
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
316
- html = "<script>hello!</script>"
317
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
318
- end
484
+ def test_should_accept_loofah_scrubber_that_wraps_a_block
485
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
486
+ html = "<script>hello!</script>"
487
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
488
+ end
319
489
 
320
- def test_custom_scrubber_takes_precedence_over_other_options
321
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
322
- html = "<script>hello!</script>"
323
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ['foo'])
324
- end
490
+ def test_custom_scrubber_takes_precedence_over_other_options
491
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
492
+ html = "<script>hello!</script>"
493
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ["foo"])
494
+ end
325
495
 
326
- [%w(img src), %w(a href)].each do |(tag, attr)|
327
- define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
328
- assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
496
+ def test_should_strip_src_attribute_in_img_with_bad_protocols
497
+ assert_sanitized %(<img src="javascript:bang" title="1">), %(<img title="1">)
329
498
  end
330
- end
331
499
 
332
- def test_should_block_script_tag
333
- assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
334
- end
500
+ def test_should_strip_href_attribute_in_a_with_bad_protocols
501
+ assert_sanitized %(<a href="javascript:bang" title="1">boo</a>), %(<a title="1">boo</a>)
502
+ end
335
503
 
336
- def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
337
- assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
338
- end
504
+ def test_should_block_script_tag
505
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
506
+ end
339
507
 
340
- [%(<IMG SRC="javascript:alert('XSS');">),
341
- %(<IMG SRC=javascript:alert('XSS')>),
342
- %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
343
- %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
344
- %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
345
- %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
346
- %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
347
- %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
348
- %(<IMG SRC="jav\tascript:alert('XSS');">),
349
- %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
350
- %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
351
- %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
352
- %(<IMG SRC=" &#14; javascript:alert('XSS');">),
353
- %(<IMG SRC="javascript&#x3a;alert('XSS');">),
354
- %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
355
- define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
356
- assert_sanitized img_hack, "<img>"
508
+ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
509
+ assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
357
510
  end
358
- end
359
511
 
360
- def test_should_sanitize_tag_broken_up_by_null
361
- assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), ""
362
- end
512
+ [%(<IMG SRC="javascript:alert('XSS');">),
513
+ %(<IMG SRC=javascript:alert('XSS')>),
514
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
515
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
516
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
517
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
518
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
519
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
520
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
521
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
522
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
523
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
524
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
525
+ %(<IMG SRC="javascript&#x3a;alert('XSS');">),
526
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
527
+ define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
528
+ assert_sanitized img_hack, "<img>"
529
+ end
530
+ end
363
531
 
364
- def test_should_sanitize_invalid_script_tag
365
- assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
366
- end
532
+ def test_should_sanitize_tag_broken_up_by_null
533
+ input = %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>)
534
+ result = safe_list_sanitize(input)
535
+ acceptable_results = [
536
+ # libxml2
537
+ "",
538
+ # xerces+neko
539
+ 'alert("XSS")',
540
+ ]
541
+
542
+ assert_includes(acceptable_results, result)
543
+ end
367
544
 
368
- def test_should_sanitize_script_tag_with_multiple_open_brackets
369
- assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
370
- assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), ""
371
- end
545
+ def test_should_sanitize_invalid_script_tag
546
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
547
+ end
372
548
 
373
- def test_should_sanitize_unclosed_script
374
- assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
375
- end
549
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
550
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
551
+ end
376
552
 
377
- def test_should_sanitize_half_open_scripts
378
- assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
379
- end
553
+ def test_should_sanitize_script_tag_with_multiple_open_brackets_2
554
+ input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a)
555
+ result = safe_list_sanitize(input)
556
+ acceptable_results = [
557
+ # libxml2
558
+ "",
559
+ # xerces+neko
560
+ "&lt;a",
561
+ ]
562
+
563
+ assert_includes(acceptable_results, result)
564
+ end
380
565
 
381
- def test_should_not_fall_for_ridiculous_hack
382
- img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
383
- assert_sanitized img_hack, "<img>"
384
- end
566
+ def test_should_sanitize_unclosed_script
567
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
568
+ end
385
569
 
386
- def test_should_sanitize_attributes
387
- assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>)
388
- end
570
+ def test_should_sanitize_half_open_scripts
571
+ input = %(<IMG SRC="javascript:alert('XSS')")
572
+ result = safe_list_sanitize(input)
573
+ acceptable_results = [
574
+ # libxml2
575
+ "<img>",
576
+ # libgumbo
577
+ "",
578
+ ]
579
+
580
+ assert_includes(acceptable_results, result)
581
+ end
389
582
 
390
- def test_should_sanitize_illegal_style_properties
391
- raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
392
- expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
393
- assert_equal expected, sanitize_css(raw)
394
- end
583
+ def test_should_not_fall_for_ridiculous_hack
584
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
585
+ assert_sanitized img_hack, "<img>"
586
+ end
395
587
 
396
- def test_should_sanitize_with_trailing_space
397
- raw = "display:block; "
398
- expected = "display:block;"
399
- assert_equal expected, sanitize_css(raw)
400
- end
588
+ def test_should_sanitize_attributes
589
+ input = %(<SPAN title="'><script>alert()</script>">blah</SPAN>)
590
+ result = safe_list_sanitize(input)
591
+ acceptable_results = [
592
+ # libxml2
593
+ %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>),
594
+ # libgumbo
595
+ # this looks scary, but it's fine. for a more detailed analysis check out:
596
+ # https://github.com/discourse/discourse/pull/21522#issuecomment-1545697968
597
+ %(<span title="'><script>alert()</script>">blah</span>)
598
+ ]
599
+
600
+ assert_includes(acceptable_results, result)
601
+ end
401
602
 
402
- def test_should_sanitize_xul_style_attributes
403
- raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
404
- assert_equal '', sanitize_css(raw)
405
- end
603
+ def test_should_sanitize_invalid_tag_names
604
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
605
+ end
406
606
 
407
- def test_should_sanitize_invalid_tag_names
408
- assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
409
- end
607
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
608
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
609
+ end
410
610
 
411
- def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
412
- assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
413
- end
611
+ def test_should_sanitize_invalid_tag_names_in_single_tags
612
+ input = %(<img/src="http://ha.ckers.org/xss.js"/>)
613
+ result = safe_list_sanitize(input)
614
+ acceptable_results = [
615
+ # libxml2
616
+ "<img>",
617
+ # libgumbo
618
+ %(<img src="http://ha.ckers.org/xss.js">),
619
+ ]
620
+
621
+ assert_includes(acceptable_results, result)
622
+ end
414
623
 
415
- def test_should_sanitize_invalid_tag_names_in_single_tags
416
- assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
417
- end
624
+ def test_should_sanitize_img_dynsrc_lowsrc
625
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img>")
626
+ end
418
627
 
419
- def test_should_sanitize_img_dynsrc_lowsrc
420
- assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
421
- end
628
+ def test_should_sanitize_img_vbscript
629
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img>"
630
+ end
422
631
 
423
- def test_should_sanitize_div_background_image_unicode_encoded
424
- [
425
- convert_to_css_hex("url(javascript:alert(1))", false),
426
- convert_to_css_hex("url(javascript:alert(1))", true),
427
- convert_to_css_hex("url(https://example.com)", false),
428
- convert_to_css_hex("url(https://example.com)", true),
429
- ].each do |propval|
430
- raw = "background-image:" + propval
431
- assert_empty(sanitize_css(raw))
632
+ def test_should_sanitize_cdata_section
633
+ input = "<![CDATA[<span>section</span>]]>"
634
+ result = safe_list_sanitize(input)
635
+ acceptable_results = [
636
+ # libxml2 = 2.9.14
637
+ %{&lt;![CDATA[<span>section</span>]]&gt;},
638
+ # other libxml2
639
+ %{section]]&gt;},
640
+ # xerces+neko
641
+ "",
642
+ ]
643
+
644
+ assert_includes(acceptable_results, result)
645
+ end
646
+
647
+ def test_should_sanitize_unterminated_cdata_section
648
+ input = "<![CDATA[<span>neverending..."
649
+ result = safe_list_sanitize(input)
650
+
651
+ acceptable_results = [
652
+ # libxml2 = 2.9.14
653
+ %{&lt;![CDATA[<span>neverending...</span>},
654
+ # other libxml2
655
+ %{neverending...},
656
+ # xerces+neko
657
+ ""
658
+ ]
659
+
660
+ assert_includes(acceptable_results, result)
661
+ end
662
+
663
+ def test_should_not_mangle_urls_with_ampersand
664
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
665
+ end
666
+
667
+ def test_should_sanitize_neverending_attribute
668
+ # note that assert_dom_equal chokes in this case! so avoid using assert_sanitized
669
+ assert_equal("<span class=\"\\\"></span>", safe_list_sanitize("<span class=\"\\\">"))
432
670
  end
433
- end
434
671
 
435
- def test_should_allow_div_background_image_unicode_encoded_safe_functions
436
672
  [
437
- convert_to_css_hex("rgb(255,0,0)", false),
438
- convert_to_css_hex("rgb(255,0,0)", true),
439
- ].each do |propval|
440
- raw = "background-image:" + propval
441
- assert_includes(sanitize_css(raw), "background-image")
673
+ %(<a href="javascript&#x3a;alert('XSS');">),
674
+ %(<a href="javascript&#x003a;alert('XSS');">),
675
+ %(<a href="javascript&#x3A;alert('XSS');">),
676
+ %(<a href="javascript&#x003A;alert('XSS');">)
677
+ ].each_with_index do |enc_hack, i|
678
+ define_method "test_x03a_handling_#{i + 1}" do
679
+ assert_sanitized enc_hack, "<a></a>"
680
+ end
442
681
  end
443
- end
444
682
 
445
- def test_should_sanitize_div_style_expression
446
- raw = %(width: expression(alert('XSS'));)
447
- assert_equal '', sanitize_css(raw)
448
- end
683
+ def test_x03a_legitimate
684
+ assert_sanitized %(<a href="http&#x3a;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
685
+ assert_sanitized %(<a href="http&#x3A;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
686
+ end
449
687
 
450
- def test_should_sanitize_across_newlines
451
- raw = %(\nwidth:\nexpression(alert('XSS'));\n)
452
- assert_equal '', sanitize_css(raw)
453
- end
688
+ def test_sanitize_ascii_8bit_string
689
+ safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
690
+ assert_equal "<div><a>hello</a></div>", sanitized
691
+ assert_equal Encoding::UTF_8, sanitized.encoding
692
+ end
693
+ end
454
694
 
455
- def test_should_sanitize_img_vbscript
456
- assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
457
- end
695
+ def test_sanitize_data_attributes
696
+ assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
697
+ assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
698
+ end
458
699
 
459
- def test_should_sanitize_cdata_section
460
- input = "<![CDATA[<span>section</span>]]>"
461
- expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
462
- assert_sanitized(input, expected)
463
- end
700
+ def test_allow_data_attribute_if_requested
701
+ text = %(<a data-foo="foo">foo</a>)
702
+ assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ["data-foo"])
703
+ end
464
704
 
465
- def test_should_sanitize_unterminated_cdata_section
466
- input = "<![CDATA[<span>neverending..."
467
- expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
468
- assert_sanitized(input, expected)
469
- end
705
+ # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
706
+ VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr]
707
+
708
+ %w(strong em b i p code pre tt samp kbd var sub
709
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
710
+ acronym a img blockquote del ins time).each do |tag_name|
711
+ define_method "test_default_safelist_should_allow_#{tag_name}" do
712
+ if VOID_ELEMENTS.include?(tag_name)
713
+ assert_sanitized("<#{tag_name}>")
714
+ else
715
+ assert_sanitized("<#{tag_name}>foo</#{tag_name}>")
716
+ end
717
+ end
718
+ end
470
719
 
471
- def test_should_not_mangle_urls_with_ampersand
472
- assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
473
- end
720
+ def test_datetime_attribute
721
+ assert_sanitized("<time datetime=\"2023-01-01\">Today</time>")
722
+ end
474
723
 
475
- def test_should_sanitize_neverending_attribute
476
- assert_sanitized "<span class=\"\\", "<span class=\"\\\">"
477
- end
724
+ def test_abbr_attribute
725
+ scope_allowed_tags(%w(table tr th td)) do
726
+ assert_sanitized(%(<table><tr><td abbr="UK">United Kingdom</td></tr></table>))
727
+ end
728
+ end
729
+
730
+ def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
731
+ skip if RUBY_VERSION < "2.3"
732
+
733
+ html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
734
+
735
+ text = safe_list_sanitize(html)
736
+
737
+ acceptable_results = [
738
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
739
+ %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
740
+ # system libxml2
741
+ %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
742
+ # xerces+neko
743
+ %{<a href="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
744
+ ]
478
745
 
479
- [
480
- %(<a href="javascript&#x3a;alert('XSS');">),
481
- %(<a href="javascript&#x003a;alert('XSS');">),
482
- %(<a href="javascript&#x3A;alert('XSS');">),
483
- %(<a href="javascript&#x003A;alert('XSS');">)
484
- ].each_with_index do |enc_hack, i|
485
- define_method "test_x03a_handling_#{i+1}" do
486
- assert_sanitized enc_hack, "<a>"
746
+ assert_includes(acceptable_results, text)
487
747
  end
488
- end
489
748
 
490
- def test_x03a_legitimate
491
- assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">)
492
- assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">)
493
- end
749
+ def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
750
+ skip if RUBY_VERSION < "2.3"
751
+
752
+ html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
753
+
754
+ text = safe_list_sanitize(html)
755
+
756
+ acceptable_results = [
757
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
758
+ %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
759
+ # system libxml2
760
+ %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
761
+ # xerces+neko
762
+ %{<a src="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
763
+ ]
494
764
 
495
- def test_sanitize_ascii_8bit_string
496
- safe_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized|
497
- assert_equal '<a>hello</a>', sanitized
498
- assert_equal Encoding::UTF_8, sanitized.encoding
765
+ assert_includes(acceptable_results, text)
499
766
  end
500
- end
501
767
 
502
- def test_sanitize_data_attributes
503
- assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
504
- assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
505
- end
768
+ def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
769
+ skip if RUBY_VERSION < "2.3"
506
770
 
507
- def test_allow_data_attribute_if_requested
508
- text = %(<a data-foo="foo">foo</a>)
509
- assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ['data-foo'])
510
- end
771
+ html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
511
772
 
512
- def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
513
- skip if RUBY_VERSION < "2.3"
773
+ text = safe_list_sanitize(html)
514
774
 
515
- html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
775
+ acceptable_results = [
776
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
777
+ %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
778
+ # system libxml2
779
+ %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
780
+ # xerces+neko
781
+ %{<a name="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
782
+ ]
516
783
 
517
- text = safe_list_sanitize(html)
784
+ assert_includes(acceptable_results, text)
785
+ end
518
786
 
519
- acceptable_results = [
520
- # nokogiri w/vendored+patched libxml2
521
- %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
522
- # nokogiri w/ system libxml2
523
- %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
524
- ]
525
- assert_includes(acceptable_results, text)
526
- end
787
+ def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
788
+ skip if RUBY_VERSION < "2.3"
527
789
 
528
- def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
529
- skip if RUBY_VERSION < "2.3"
790
+ html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
530
791
 
531
- html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
792
+ text = safe_list_sanitize(html, attributes: ["action"])
532
793
 
533
- text = safe_list_sanitize(html)
794
+ acceptable_results = [
795
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
796
+ %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
797
+ # system libxml2
798
+ %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
799
+ # xerces+neko
800
+ %{<a action="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>},
801
+ ]
534
802
 
535
- acceptable_results = [
536
- # nokogiri w/vendored+patched libxml2
537
- %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
538
- # nokogiri w/system libxml2
539
- %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
540
- ]
541
- assert_includes(acceptable_results, text)
542
- end
803
+ assert_includes(acceptable_results, text)
804
+ end
543
805
 
544
- def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
545
- skip if RUBY_VERSION < "2.3"
806
+ def test_exclude_node_type_processing_instructions
807
+ input = "<div>text</div><?div content><b>text</b>"
808
+ result = safe_list_sanitize(input)
809
+ acceptable_results = [
810
+ # jruby cyberneko (nokogiri < 1.14.0)
811
+ "<div>text</div>",
812
+ # everything else
813
+ "<div>text</div><b>text</b>",
814
+ ]
815
+
816
+ assert_includes(acceptable_results, result)
817
+ end
546
818
 
547
- html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
819
+ def test_exclude_node_type_comment
820
+ assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
821
+ end
548
822
 
549
- text = safe_list_sanitize(html)
823
+ %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
824
+ define_method "test_mediatype_#{mediatype}_allowed" do
825
+ input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
826
+ expected = input
827
+ actual = safe_list_sanitize(input)
828
+ assert_equal(expected, actual)
829
+
830
+ input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
831
+ expected = input
832
+ actual = safe_list_sanitize(input)
833
+ assert_equal(expected, actual)
834
+ end
835
+ end
550
836
 
551
- acceptable_results = [
552
- # nokogiri w/vendored+patched libxml2
553
- %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
554
- # nokogiri w/system libxml2
555
- %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
556
- ]
557
- assert_includes(acceptable_results, text)
558
- end
837
+ def test_mediatype_text_html_disallowed
838
+ input = '<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
839
+ expected = "<img>"
840
+ actual = safe_list_sanitize(input)
841
+ assert_equal(expected, actual)
559
842
 
560
- def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
561
- skip if RUBY_VERSION < "2.3"
843
+ input = '<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
844
+ expected = "<img>"
845
+ actual = safe_list_sanitize(input)
846
+ assert_equal(expected, actual)
847
+ end
562
848
 
563
- html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
849
+ def test_mediatype_image_svg_xml_disallowed
850
+ input = '<img src="">'
851
+ expected = "<img>"
852
+ actual = safe_list_sanitize(input)
853
+ assert_equal(expected, actual)
564
854
 
565
- text = safe_list_sanitize(html, attributes: ['action'])
855
+ input = '<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
856
+ expected = "<img>"
857
+ actual = safe_list_sanitize(input)
858
+ assert_equal(expected, actual)
859
+ end
566
860
 
567
- acceptable_results = [
568
- # nokogiri w/vendored+patched libxml2
569
- %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
570
- # nokogiri w/system libxml2
571
- %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
572
- ]
573
- assert_includes(acceptable_results, text)
574
- end
861
+ def test_mediatype_other_disallowed
862
+ input = '<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
863
+ expected = "<a>foo</a>"
864
+ actual = safe_list_sanitize(input)
865
+ assert_equal(expected, actual)
575
866
 
576
- def test_exclude_node_type_processing_instructions
577
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><?div content><b>text</b>"))
578
- end
867
+ input = '<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
868
+ expected = "<a>foo</a>"
869
+ actual = safe_list_sanitize(input)
870
+ assert_equal(expected, actual)
871
+ end
579
872
 
580
- def test_exclude_node_type_comment
581
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
582
- end
873
+ def test_scrubbing_svg_attr_values_that_allow_ref
874
+ input = '<div fill="yellow url(http://bad.com/) #fff">hey</div>'
875
+ expected = '<div fill="yellow #fff">hey</div>'
876
+ actual = scope_allowed_attributes %w(fill) do
877
+ safe_list_sanitize(input)
878
+ end
583
879
 
584
- def test_disallow_the_dangerous_safelist_combination_of_select_and_style
585
- input = "<select><style><script>alert(1)</script></style></select>"
586
- tags = ["select", "style"]
587
- warning = /WARNING: Rails::Html::SafeListSanitizer: removing 'style' from safelist/
588
- sanitized = nil
589
- invocation = Proc.new { sanitized = safe_list_sanitize(input, tags: tags) }
590
-
591
- if html5_mode?
592
- # if Loofah is using an HTML5 parser,
593
- # then "style" should be removed by the parser as an invalid child of "select"
594
- assert_silent(&invocation)
595
- else
596
- # if Loofah is using an HTML4 parser,
597
- # then SafeListSanitizer should remove "style" from the safelist
598
- assert_output(nil, warning, &invocation)
599
- end
600
- refute_includes(sanitized, "style")
601
- end
880
+ assert_equal(expected, actual)
881
+ end
602
882
 
603
- protected
883
+ def test_style_with_css_payload
884
+ input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
885
+ actual = safe_list_sanitize(input, tags: tags)
886
+ acceptable_results = [
887
+ # libxml2
888
+ "<style>div &gt; span { background: \"red\"; }</style>",
889
+ # libgumbo
890
+ "<style>div > span { background: \"red\"; }</style>",
891
+ ]
892
+
893
+ assert_includes(acceptable_results, actual)
894
+ end
604
895
 
605
- def xpath_sanitize(input, options = {})
606
- XpathRemovalTestSanitizer.new.sanitize(input, options)
607
- end
896
+ def test_combination_of_select_and_style_with_css_payload
897
+ input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
898
+ actual = safe_list_sanitize(input, tags: tags)
899
+ acceptable_results = [
900
+ # libxml2
901
+ "<select><style>div &gt; span { background: \"red\"; }</style></select>",
902
+ # libgumbo
903
+ "<select>div &gt; span { background: \"red\"; }</select>",
904
+ ]
905
+
906
+ assert_includes(acceptable_results, actual)
907
+ end
608
908
 
609
- def full_sanitize(input, options = {})
610
- Rails::Html::FullSanitizer.new.sanitize(input, options)
611
- end
909
+ def test_combination_of_select_and_style_with_script_payload
910
+ input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
911
+ actual = safe_list_sanitize(input, tags: tags)
912
+ acceptable_results = [
913
+ # libxml2
914
+ "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>",
915
+ # libgumbo
916
+ "<select>alert(1)</select>",
917
+ ]
918
+
919
+ assert_includes(acceptable_results, actual)
920
+ end
612
921
 
613
- def link_sanitize(input, options = {})
614
- Rails::Html::LinkSanitizer.new.sanitize(input, options)
615
- end
922
+ def test_combination_of_svg_and_style_with_script_payload
923
+ input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
924
+ actual = safe_list_sanitize(input, tags: tags)
925
+ acceptable_results = [
926
+ # libxml2
927
+ "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>",
928
+ # libgumbo
929
+ "<svg><style>alert(1)</style></svg>"
930
+ ]
931
+
932
+ assert_includes(acceptable_results, actual)
933
+ end
616
934
 
617
- def safe_list_sanitize(input, options = {})
618
- Rails::Html::SafeListSanitizer.new.sanitize(input, options)
619
- end
935
+ def test_combination_of_math_and_style_with_img_payload
936
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
937
+ actual = safe_list_sanitize(input, tags: tags)
938
+ acceptable_results = [
939
+ # libxml2
940
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
941
+ # libgumbo
942
+ "<math><style></style></math>",
943
+ ]
944
+
945
+ assert_includes(acceptable_results, actual)
946
+ end
620
947
 
621
- def assert_sanitized(input, expected = nil)
622
- if input
623
- assert_dom_equal expected || input, safe_list_sanitize(input)
624
- else
625
- assert_nil safe_list_sanitize(input)
948
+ def test_combination_of_math_and_style_with_img_payload_2
949
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
950
+ actual = safe_list_sanitize(input, tags: tags)
951
+ acceptable_results = [
952
+ # libxml2
953
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
954
+ # libgumbo
955
+ "<math><style></style></math><img src=\"x\">",
956
+ ]
957
+
958
+ assert_includes(acceptable_results, actual)
626
959
  end
627
- end
628
960
 
629
- def sanitize_css(input)
630
- Rails::Html::SafeListSanitizer.new.sanitize_css(input)
631
- end
961
+ def test_combination_of_svg_and_style_with_img_payload
962
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
963
+ actual = safe_list_sanitize(input, tags: tags)
964
+ acceptable_results = [
965
+ # libxml2
966
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
967
+ # libgumbo
968
+ "<svg><style></style></svg>",
969
+ ]
970
+
971
+ assert_includes(acceptable_results, actual)
972
+ end
632
973
 
633
- def scope_allowed_tags(tags)
634
- old_tags = Rails::Html::SafeListSanitizer.allowed_tags
635
- Rails::Html::SafeListSanitizer.allowed_tags = tags
636
- yield Rails::Html::SafeListSanitizer.new
637
- ensure
638
- Rails::Html::SafeListSanitizer.allowed_tags = old_tags
639
- end
974
+ def test_combination_of_svg_and_style_with_img_payload_2
975
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
976
+ actual = safe_list_sanitize(input, tags: tags)
977
+ acceptable_results = [
978
+ # libxml2
979
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
980
+ # libgumbo
981
+ "<svg><style></style></svg><img src=\"x\">",
982
+ ]
983
+
984
+ assert_includes(acceptable_results, actual)
985
+ end
640
986
 
641
- def scope_allowed_attributes(attributes)
642
- old_attributes = Rails::Html::SafeListSanitizer.allowed_attributes
643
- Rails::Html::SafeListSanitizer.allowed_attributes = attributes
644
- yield Rails::Html::SafeListSanitizer.new
645
- ensure
646
- Rails::Html::SafeListSanitizer.allowed_attributes = old_attributes
647
- end
987
+ def test_should_sanitize_illegal_style_properties
988
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
989
+ expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
990
+ assert_equal expected, sanitize_css(raw)
991
+ end
648
992
 
649
- # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
650
- def convert_to_css_hex(string, escape_parens=false)
651
- string.chars.map do |c|
652
- if !escape_parens && (c == "(" || c == ")")
653
- c
654
- else
655
- format('\00%02X', c.ord)
993
+ def test_should_sanitize_with_trailing_space
994
+ raw = "display:block; "
995
+ expected = "display:block;"
996
+ assert_equal expected, sanitize_css(raw)
997
+ end
998
+
999
+ def test_should_sanitize_xul_style_attributes
1000
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
1001
+ assert_equal "", sanitize_css(raw)
1002
+ end
1003
+
1004
+ def test_should_sanitize_div_background_image_unicode_encoded
1005
+ [
1006
+ convert_to_css_hex("url(javascript:alert(1))", false),
1007
+ convert_to_css_hex("url(javascript:alert(1))", true),
1008
+ convert_to_css_hex("url(https://example.com)", false),
1009
+ convert_to_css_hex("url(https://example.com)", true),
1010
+ ].each do |propval|
1011
+ raw = "background-image:" + propval
1012
+ assert_empty(sanitize_css(raw))
1013
+ end
1014
+ end
1015
+
1016
+ def test_should_allow_div_background_image_unicode_encoded_safe_functions
1017
+ [
1018
+ convert_to_css_hex("rgb(255,0,0)", false),
1019
+ convert_to_css_hex("rgb(255,0,0)", true),
1020
+ ].each do |propval|
1021
+ raw = "background-image:" + propval
1022
+
1023
+ assert_includes(sanitize_css(raw), "background-image")
1024
+ end
1025
+ end
1026
+
1027
+ def test_should_sanitize_div_style_expression
1028
+ raw = %(width: expression(alert('XSS'));)
1029
+ assert_equal "", sanitize_css(raw)
1030
+ end
1031
+
1032
+ def test_should_sanitize_across_newlines
1033
+ raw = %(\nwidth:\nexpression(alert('XSS'));\n)
1034
+ assert_equal "", sanitize_css(raw)
1035
+ end
1036
+
1037
+ protected
1038
+ def safe_list_sanitize(input, options = {})
1039
+ module_under_test::SafeListSanitizer.new.sanitize(input, options)
1040
+ end
1041
+
1042
+ def assert_sanitized(input, expected = nil)
1043
+ assert_equal((expected || input), safe_list_sanitize(input))
656
1044
  end
657
- end.join
658
- end
659
1045
 
660
- def libxml_2_9_14_recovery?
661
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
1046
+ def scope_allowed_tags(tags)
1047
+ old_tags = module_under_test::SafeListSanitizer.allowed_tags
1048
+ module_under_test::SafeListSanitizer.allowed_tags = tags
1049
+ yield module_under_test::SafeListSanitizer.new
1050
+ ensure
1051
+ module_under_test::SafeListSanitizer.allowed_tags = old_tags
1052
+ end
1053
+
1054
+ def scope_allowed_attributes(attributes)
1055
+ old_attributes = module_under_test::SafeListSanitizer.allowed_attributes
1056
+ module_under_test::SafeListSanitizer.allowed_attributes = attributes
1057
+ yield module_under_test::SafeListSanitizer.new
1058
+ ensure
1059
+ module_under_test::SafeListSanitizer.allowed_attributes = old_attributes
1060
+ end
1061
+
1062
+ def sanitize_css(input)
1063
+ module_under_test::SafeListSanitizer.new.sanitize_css(input)
1064
+ end
1065
+
1066
+ # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
1067
+ def convert_to_css_hex(string, escape_parens = false)
1068
+ string.chars.map do |c|
1069
+ if !escape_parens && (c == "(" || c == ")")
1070
+ c
1071
+ else
1072
+ format('\00%02X', c.ord)
1073
+ end
1074
+ end.join
1075
+ end
662
1076
  end
663
1077
 
664
- def html5_mode?
665
- ::Loofah.respond_to?(:html5_mode?) && ::Loofah.html5_mode?
1078
+ class HTML4SafeListSanitizerTest < Minitest::Test
1079
+ @module_under_test = Rails::HTML4
1080
+ include SafeListSanitizerTest
666
1081
  end
1082
+
1083
+ class HTML5SafeListSanitizerTest < Minitest::Test
1084
+ @module_under_test = Rails::HTML5
1085
+ include SafeListSanitizerTest
1086
+ end if loofah_html5_support?
667
1087
  end