rails-html-sanitizer 1.4.3 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rails-html-sanitizer might be problematic. Click here for more details.

@@ -1,667 +1,1087 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "minitest/autorun"
2
4
  require "rails-html-sanitizer"
3
- require "rails/dom/testing/assertions/dom_assertions"
4
5
 
5
- puts Nokogiri::VERSION_INFO
6
+ puts "nokogiri version info: #{Nokogiri::VERSION_INFO}"
7
+ puts "html5 support: #{Rails::HTML::Sanitizer.html5_support?}"
8
+
9
+ #
10
+ # NOTE that many of these tests contain multiple acceptable results.
11
+ #
12
+ # In some cases, this is because of how the HTML4 parser's recovery behavior changed in libxml2
13
+ # 2.9.14 and 2.10.0. For more details, see:
14
+ #
15
+ # - https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
16
+ # - https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
17
+ #
18
+ # In other cases, multiple acceptable results are provided because Nokogiri's vendored libxml2 is
19
+ # patched to entity-escape server-side includes (aks "SSI", aka `<!-- #directive param=value -->`).
20
+ #
21
+ # In many other cases, it's because the parser used by Nokogiri on JRuby (xerces+nekohtml) parses
22
+ # slightly differently than libxml2 in edge cases.
23
+ #
24
+ module SanitizerTests
25
+ def self.loofah_html5_support?
26
+ Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
27
+ end
28
+
29
+ class BaseSanitizerTest < Minitest::Test
30
+ class XpathRemovalTestSanitizer < Rails::HTML::Sanitizer
31
+ def sanitize(html, options = {})
32
+ fragment = Loofah.fragment(html)
33
+ remove_xpaths(fragment, options[:xpaths]).to_s
34
+ end
35
+ end
6
36
 
7
- class SanitizersTest < Minitest::Test
8
- include Rails::Dom::Testing::Assertions::DomAssertions
37
+ def test_sanitizer_sanitize_raises_not_implemented_error
38
+ assert_raises NotImplementedError do
39
+ Rails::HTML::Sanitizer.new.sanitize("asdf")
40
+ end
41
+ end
9
42
 
10
- def test_sanitizer_sanitize_raises_not_implemented_error
11
- assert_raises NotImplementedError do
12
- Rails::Html::Sanitizer.new.sanitize('')
43
+ def test_remove_xpaths_removes_an_xpath
44
+ html = %(<h1>hello <script>code!</script></h1>)
45
+ assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
13
46
  end
14
- end
15
47
 
16
- def test_sanitize_nested_script
17
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
18
- end
48
+ def test_remove_xpaths_removes_all_occurrences_of_xpath
49
+ html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
50
+ assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
51
+ end
19
52
 
20
- def test_sanitize_nested_script_in_style
21
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>', tags: %w(em))
22
- end
53
+ def test_remove_xpaths_called_with_faulty_xpath
54
+ assert_raises Nokogiri::XML::XPath::SyntaxError do
55
+ xpath_sanitize("<h1>hello<h1>", xpaths: %w(..faulty_xpath))
56
+ end
57
+ end
23
58
 
24
- class XpathRemovalTestSanitizer < Rails::Html::Sanitizer
25
- def sanitize(html, options = {})
26
- fragment = Loofah.fragment(html)
27
- remove_xpaths(fragment, options[:xpaths]).to_s
59
+ def test_remove_xpaths_called_with_xpath_string
60
+ assert_equal "", xpath_sanitize("<a></a>", xpaths: ".//a")
28
61
  end
29
- end
30
62
 
31
- def test_remove_xpaths_removes_an_xpath
32
- html = %(<h1>hello <script>code!</script></h1>)
33
- assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
34
- end
63
+ def test_remove_xpaths_called_with_enumerable_xpaths
64
+ assert_equal "", xpath_sanitize("<a><span></span></a>", xpaths: %w(.//a .//span))
65
+ end
35
66
 
36
- def test_remove_xpaths_removes_all_occurrences_of_xpath
37
- html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
38
- assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
67
+ protected
68
+ def xpath_sanitize(input, options = {})
69
+ XpathRemovalTestSanitizer.new.sanitize(input, options)
70
+ end
39
71
  end
40
72
 
41
- def test_remove_xpaths_called_with_faulty_xpath
42
- assert_raises Nokogiri::XML::XPath::SyntaxError do
43
- xpath_sanitize('<h1>hello<h1>', xpaths: %w(..faulty_xpath))
73
+ module ModuleUnderTest
74
+ def module_under_test
75
+ self.class.instance_variable_get(:@module_under_test)
44
76
  end
45
77
  end
46
78
 
47
- def test_remove_xpaths_called_with_xpath_string
48
- assert_equal '', xpath_sanitize('<a></a>', xpaths: './/a')
49
- end
79
+ module FullSanitizerTest
80
+ include ModuleUnderTest
50
81
 
51
- def test_remove_xpaths_called_with_enumerable_xpaths
52
- assert_equal '', xpath_sanitize('<a><span></span></a>', xpaths: %w(.//a .//span))
53
- end
82
+ def test_strip_tags_with_quote
83
+ input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
84
+ result = full_sanitize(input)
85
+ acceptable_results = [
86
+ # libxml2 >= 2.9.14 and xerces+neko
87
+ %{&lt;" hi},
88
+ # other libxml2
89
+ %{ hi},
90
+ ]
54
91
 
55
- def test_strip_tags_with_quote
56
- input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
57
- expected = libxml_2_9_14_recovery? ? %{&lt;" hi} : %{ hi}
58
- assert_equal(expected, full_sanitize(input))
59
- end
92
+ assert_includes(acceptable_results, result)
93
+ end
60
94
 
61
- def test_strip_invalid_html
62
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
63
- end
95
+ def test_strip_invalid_html
96
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
97
+ end
64
98
 
65
- def test_strip_nested_tags
66
- expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
67
- input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
68
- assert_equal expected, full_sanitize(input)
69
- end
99
+ def test_strip_nested_tags
100
+ expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
101
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
102
+ assert_equal expected, full_sanitize(input)
103
+ end
70
104
 
71
- def test_strip_tags_multiline
72
- expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
73
- input = %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
105
+ def test_strip_tags_multiline
106
+ expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
107
+ input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
74
108
 
75
- assert_equal expected, full_sanitize(input)
76
- end
109
+ assert_equal expected, full_sanitize(input)
110
+ end
77
111
 
78
- def test_remove_unclosed_tags
79
- input = "This is <-- not\n a comment here."
80
- expected = libxml_2_9_14_recovery? ? %{This is &lt;-- not\n a comment here.} : %{This is }
81
- assert_equal(expected, full_sanitize(input))
82
- end
112
+ def test_remove_unclosed_tags
113
+ input = "This is <-- not\n a comment here."
114
+ result = full_sanitize(input)
115
+ acceptable_results = [
116
+ # libxml2 >= 2.9.14 and xerces+neko
117
+ %{This is &lt;-- not\n a comment here.},
118
+ # other libxml2
119
+ %{This is },
120
+ ]
121
+
122
+ assert_includes(acceptable_results, result)
123
+ end
83
124
 
84
- def test_strip_cdata
85
- input = "This has a <![CDATA[<section>]]> here."
86
- expected = libxml_2_9_14_recovery? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
87
- assert_equal(expected, full_sanitize(input))
88
- end
125
+ def test_strip_cdata
126
+ input = "This has a <![CDATA[<section>]]> here."
127
+ result = full_sanitize(input)
128
+ acceptable_results = [
129
+ # libxml2 = 2.9.14
130
+ %{This has a &lt;![CDATA[]]&gt; here.},
131
+ # other libxml2
132
+ %{This has a ]]&gt; here.},
133
+ # xerces+neko
134
+ %{This has a here.},
135
+ ]
136
+
137
+ assert_includes(acceptable_results, result)
138
+ end
89
139
 
90
- def test_strip_unclosed_cdata
91
- input = "This has an unclosed <![CDATA[<section>]] here..."
92
- expected = libxml_2_9_14_recovery? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
93
- assert_equal(expected, full_sanitize(input))
94
- end
140
+ def test_strip_blank_string
141
+ assert_nil full_sanitize(nil)
142
+ assert_equal "", full_sanitize("")
143
+ assert_equal " ", full_sanitize(" ")
144
+ end
95
145
 
96
- def test_strip_blank_string
97
- assert_nil full_sanitize(nil)
98
- assert_equal "", full_sanitize("")
99
- assert_equal " ", full_sanitize(" ")
100
- end
146
+ def test_strip_tags_with_plaintext
147
+ assert_equal "Don't touch me", full_sanitize("Don't touch me")
148
+ end
101
149
 
102
- def test_strip_tags_with_plaintext
103
- assert_equal "Don't touch me", full_sanitize("Don't touch me")
104
- end
150
+ def test_strip_tags_with_tags
151
+ assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
152
+ end
105
153
 
106
- def test_strip_tags_with_tags
107
- assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
108
- end
154
+ def test_escape_tags_with_many_open_quotes
155
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
156
+ end
109
157
 
110
- def test_escape_tags_with_many_open_quotes
111
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
112
- end
158
+ def test_strip_tags_with_sentence
159
+ assert_equal "This is a test.", full_sanitize("This is a test.")
160
+ end
113
161
 
114
- def test_strip_tags_with_sentence
115
- assert_equal "This is a test.", full_sanitize("This is a test.")
116
- end
162
+ def test_strip_tags_with_comment
163
+ assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
164
+ end
117
165
 
118
- def test_strip_tags_with_comment
119
- assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
120
- end
166
+ def test_strip_tags_with_frozen_string
167
+ assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags")
168
+ end
121
169
 
122
- def test_strip_tags_with_frozen_string
123
- assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze)
124
- end
170
+ def test_full_sanitize_respect_html_escaping_of_the_given_string
171
+ assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
172
+ assert_equal "&amp;", full_sanitize("&")
173
+ assert_equal "&amp;", full_sanitize("&amp;")
174
+ assert_equal "&amp;amp;", full_sanitize("&amp;amp;")
175
+ assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
176
+ end
125
177
 
126
- def test_full_sanitize_respect_html_escaping_of_the_given_string
127
- assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
128
- assert_equal '&amp;', full_sanitize('&')
129
- assert_equal '&amp;', full_sanitize('&amp;')
130
- assert_equal '&amp;amp;', full_sanitize('&amp;amp;')
131
- assert_equal 'omg &lt;script&gt;BOM&lt;/script&gt;', full_sanitize('omg &lt;script&gt;BOM&lt;/script&gt;')
132
- end
178
+ def test_sanitize_ascii_8bit_string
179
+ full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
180
+ assert_equal "hello", sanitized
181
+ assert_equal Encoding::UTF_8, sanitized.encoding
182
+ end
183
+ end
133
184
 
134
- def test_strip_links_with_tags_in_tags
135
- expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
136
- input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
137
- assert_equal expected, link_sanitize(input)
185
+ protected
186
+ def full_sanitize(input, options = {})
187
+ module_under_test::FullSanitizer.new.sanitize(input, options)
188
+ end
138
189
  end
139
190
 
140
- def test_strip_links_with_unclosed_tags
141
- assert_equal "", link_sanitize("<a<a")
191
+ class HTML4FullSanitizerTest < Minitest::Test
192
+ @module_under_test = Rails::HTML4
193
+ include FullSanitizerTest
142
194
  end
143
195
 
144
- def test_strip_links_with_plaintext
145
- assert_equal "Don't touch me", link_sanitize("Don't touch me")
146
- end
196
+ class HTML5FullSanitizerTest < Minitest::Test
197
+ @module_under_test = Rails::HTML5
198
+ include FullSanitizerTest
199
+ end if loofah_html5_support?
147
200
 
148
- def test_strip_links_with_line_feed_and_uppercase_tag
149
- assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
150
- end
201
+ module LinkSanitizerTest
202
+ include ModuleUnderTest
151
203
 
152
- def test_strip_links_leaves_nonlink_tags
153
- assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
154
- end
204
+ def test_strip_links_with_tags_in_tags
205
+ expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
206
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
207
+ assert_equal expected, link_sanitize(input)
208
+ end
155
209
 
156
- def test_strip_links_with_links
157
- assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
158
- end
210
+ def test_strip_links_with_unclosed_tags
211
+ assert_equal "", link_sanitize("<a<a")
212
+ end
159
213
 
160
- def test_strip_links_with_linkception
161
- assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
162
- end
214
+ def test_strip_links_with_plaintext
215
+ assert_equal "Don't touch me", link_sanitize("Don't touch me")
216
+ end
163
217
 
164
- def test_sanitize_form
165
- assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
166
- end
218
+ def test_strip_links_with_line_feed_and_uppercase_tag
219
+ assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
220
+ end
167
221
 
168
- def test_sanitize_plaintext
169
- assert_sanitized "<plaintext><span>foo</span></plaintext>", "<span>foo</span>"
170
- end
222
+ def test_strip_links_leaves_nonlink_tags
223
+ assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
224
+ end
171
225
 
172
- def test_sanitize_script
173
- assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
174
- end
226
+ def test_strip_links_with_links
227
+ assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
228
+ end
175
229
 
176
- def test_sanitize_js_handlers
177
- raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
178
- assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
179
- end
230
+ def test_strip_links_with_linkception
231
+ assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
232
+ end
180
233
 
181
- def test_sanitize_javascript_href
182
- raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
183
- assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
234
+ def test_sanitize_ascii_8bit_string
235
+ link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
236
+ assert_equal "<div>hello</div>", sanitized
237
+ assert_equal Encoding::UTF_8, sanitized.encoding
238
+ end
239
+ end
240
+
241
+ protected
242
+ def link_sanitize(input, options = {})
243
+ module_under_test::LinkSanitizer.new.sanitize(input, options)
244
+ end
184
245
  end
185
246
 
186
- def test_sanitize_image_src
187
- raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
188
- assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
247
+ class HTML4LinkSanitizerTest < Minitest::Test
248
+ @module_under_test = Rails::HTML4
249
+ include LinkSanitizerTest
189
250
  end
190
251
 
191
- tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form)
192
- tags.each do |tag_name|
193
- define_method "test_should_allow_#{tag_name}_tag" do
194
- scope_allowed_tags(tags) do
195
- assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
252
+ class HTML5LinkSanitizerTest < Minitest::Test
253
+ @module_under_test = Rails::HTML5
254
+ include LinkSanitizerTest
255
+ end if loofah_html5_support?
256
+
257
+ module SafeListSanitizerTest
258
+ include ModuleUnderTest
259
+
260
+ def test_sanitize_nested_script
261
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
262
+ end
263
+
264
+ def test_sanitize_nested_script_in_style
265
+ input = '<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>'
266
+ result = safe_list_sanitize(input, tags: %w(em))
267
+ acceptable_results = [
268
+ # libxml2
269
+ %{&lt;script&gt;alert("XSS");&lt;/script&gt;},
270
+ # xerces+neko. unavoidable double-escaping, see loofah/docs/2022-10-decision-on-cdata-nodes.md
271
+ %{&amp;lt;script&amp;gt;alert(\"XSS\");&amp;lt;&amp;lt;/style&amp;gt;/script&amp;gt;},
272
+ ]
273
+
274
+ assert_includes(acceptable_results, result)
275
+ end
276
+
277
+ def test_strip_unclosed_cdata
278
+ input = "This has an unclosed <![CDATA[<section>]] here..."
279
+
280
+ result = safe_list_sanitize(input)
281
+
282
+ acceptable_results = [
283
+ # libxml2 = 2.9.14
284
+ %{This has an unclosed &lt;![CDATA[]] here...},
285
+ # other libxml2
286
+ %{This has an unclosed ]] here...},
287
+ # xerces+neko
288
+ %{This has an unclosed }
289
+ ]
290
+
291
+ assert_includes(acceptable_results, result)
292
+ end
293
+
294
+ def test_sanitize_form
295
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ""
296
+ end
297
+
298
+ def test_sanitize_plaintext
299
+ # note that the `plaintext` tag has been deprecated since HTML 2
300
+ # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/plaintext
301
+ input = "<plaintext><span>foo</span></plaintext>"
302
+ result = safe_list_sanitize(input)
303
+ acceptable_results = [
304
+ # libxml2
305
+ "<span>foo</span>",
306
+ # xerces+nekohtml-unit
307
+ "&lt;span&gt;foo&lt;/span&gt;&lt;/plaintext&gt;",
308
+ # xerces+cyberneko
309
+ "&lt;span&gt;foo&lt;/span&gt;"
310
+ ]
311
+
312
+ assert_includes(acceptable_results, result)
313
+ end
314
+
315
+ def test_sanitize_script
316
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
317
+ end
318
+
319
+ def test_sanitize_js_handlers
320
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
321
+ assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
322
+ end
323
+
324
+ def test_sanitize_javascript_href
325
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
326
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
327
+ end
328
+
329
+ def test_sanitize_image_src
330
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
331
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo, <span>bar</span>}
332
+ end
333
+
334
+ def test_should_allow_anchors
335
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
336
+ end
337
+
338
+ def test_video_poster_sanitization
339
+ scope_allowed_tags(%w(video)) do
340
+ scope_allowed_attributes %w(src poster) do
341
+ expected = if RUBY_PLATFORM == "java"
342
+ # xerces+nekohtml alphabetizes the attributes! FML.
343
+ %(<video poster="posterimage.jpg" src="videofile.ogg"></video>)
344
+ else
345
+ %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
346
+ end
347
+ assert_sanitized(
348
+ %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>),
349
+ expected,
350
+ )
351
+ assert_sanitized(
352
+ %(<video src="videofile.ogg" poster=javascript:alert(1)></video>),
353
+ %(<video src="videofile.ogg"></video>),
354
+ )
355
+ end
196
356
  end
197
357
  end
198
- end
199
358
 
200
- def test_should_allow_anchors
201
- assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
202
- end
359
+ # RFC 3986, sec 4.2
360
+ def test_allow_colons_in_path_component
361
+ assert_sanitized "<a href=\"./this:that\">foo</a>"
362
+ end
203
363
 
204
- def test_video_poster_sanitization
205
- scope_allowed_tags(%w(video)) do
206
- scope_allowed_attributes %w(src poster) do
207
- assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
208
- assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>)
364
+ %w(src width height alt).each do |img_attr|
365
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
366
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo">)
209
367
  end
210
368
  end
211
- end
212
369
 
213
- # RFC 3986, sec 4.2
214
- def test_allow_colons_in_path_component
215
- assert_sanitized "<a href=\"./this:that\">foo</a>"
216
- end
370
+ def test_lang_and_xml_lang
371
+ # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
372
+ #
373
+ # 3.2.6.2 The lang and xml:lang attributes
374
+ #
375
+ # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML
376
+ # documents. To ease migration to and from XML, authors may specify an attribute in no namespace
377
+ # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents,
378
+ # but such attributes must only be specified if a lang attribute in no namespace is also
379
+ # specified, and both attributes must have the same value when compared in an ASCII
380
+ # case-insensitive manner.
381
+ input = expected = "<div lang=\"en\" xml:lang=\"en\">foo</div>"
382
+ assert_sanitized(input, expected)
383
+ end
217
384
 
218
- %w(src width height alt).each do |img_attr|
219
- define_method "test_should_allow_image_#{img_attr}_attribute" do
220
- assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
385
+ def test_should_handle_non_html
386
+ assert_sanitized "abc"
221
387
  end
222
- end
223
388
 
224
- def test_should_handle_non_html
225
- assert_sanitized 'abc'
226
- end
389
+ def test_should_handle_blank_text
390
+ assert_nil(safe_list_sanitize(nil))
391
+ assert_equal("", safe_list_sanitize(""))
392
+ assert_equal(" ", safe_list_sanitize(" "))
393
+ end
227
394
 
228
- def test_should_handle_blank_text
229
- [nil, '', ' '].each { |blank| assert_sanitized blank }
230
- end
395
+ def test_setting_allowed_tags_affects_sanitization
396
+ scope_allowed_tags %w(u) do |sanitizer|
397
+ assert_equal "<u></u>", sanitizer.sanitize("<a><u></u></a>")
398
+ end
399
+ end
231
400
 
232
- def test_setting_allowed_tags_affects_sanitization
233
- scope_allowed_tags %w(u) do |sanitizer|
234
- assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>')
401
+ def test_setting_allowed_attributes_affects_sanitization
402
+ scope_allowed_attributes %w(foo) do |sanitizer|
403
+ input = '<a foo="hello" bar="world"></a>'
404
+ assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
405
+ end
235
406
  end
236
- end
237
407
 
238
- def test_setting_allowed_attributes_affects_sanitization
239
- scope_allowed_attributes %w(foo) do |sanitizer|
240
- input = '<a foo="hello" bar="world"></a>'
241
- assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
408
+ def test_custom_tags_overrides_allowed_tags
409
+ scope_allowed_tags %(u) do |sanitizer|
410
+ input = "<a><u></u></a>"
411
+ assert_equal "<a></a>", sanitizer.sanitize(input, tags: %w(a))
412
+ end
242
413
  end
243
- end
244
414
 
245
- def test_custom_tags_overrides_allowed_tags
246
- scope_allowed_tags %(u) do |sanitizer|
247
- input = '<a><u></u></a>'
248
- assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a))
415
+ def test_custom_attributes_overrides_allowed_attributes
416
+ scope_allowed_attributes %(foo) do |sanitizer|
417
+ input = '<a foo="hello" bar="world"></a>'
418
+ assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
419
+ end
249
420
  end
250
- end
251
421
 
252
- def test_custom_attributes_overrides_allowed_attributes
253
- scope_allowed_attributes %(foo) do |sanitizer|
254
- input = '<a foo="hello" bar="world"></a>'
255
- assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
422
+ def test_should_allow_prune
423
+ sanitizer = module_under_test::SafeListSanitizer.new(prune: true)
424
+ text = "<u>leave me <b>now</b></u>"
425
+ assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
256
426
  end
257
- end
258
427
 
259
- def test_should_allow_custom_tags
260
- text = "<u>foo</u>"
261
- assert_equal text, safe_list_sanitize(text, tags: %w(u))
262
- end
428
+ def test_should_allow_custom_tags
429
+ text = "<u>foo</u>"
430
+ assert_equal text, safe_list_sanitize(text, tags: %w(u))
431
+ end
263
432
 
264
- def test_should_allow_only_custom_tags
265
- text = "<u>foo</u> with <i>bar</i>"
266
- assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
267
- end
433
+ def test_should_allow_only_custom_tags
434
+ text = "<u>foo</u> with <i>bar</i>"
435
+ assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
436
+ end
268
437
 
269
- def test_should_allow_custom_tags_with_attributes
270
- text = %(<blockquote cite="http://example.com/">foo</blockquote>)
271
- assert_equal text, safe_list_sanitize(text)
272
- end
438
+ def test_should_allow_custom_tags_with_attributes
439
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
440
+ assert_equal text, safe_list_sanitize(text)
441
+ end
273
442
 
274
- def test_should_allow_custom_tags_with_custom_attributes
275
- text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
276
- assert_equal text, safe_list_sanitize(text, attributes: ['foo'])
277
- end
443
+ def test_should_allow_custom_tags_with_custom_attributes
444
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
445
+ assert_equal text, safe_list_sanitize(text, attributes: ["foo"])
446
+ end
278
447
 
279
- def test_scrub_style_if_style_attribute_option_is_passed
280
- input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
281
- actual = safe_list_sanitize(input, attributes: %w(style))
282
- assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
283
- end
448
+ def test_scrub_style_if_style_attribute_option_is_passed
449
+ input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
450
+ actual = safe_list_sanitize(input, attributes: %w(style))
284
451
 
285
- def test_should_raise_argument_error_if_tags_is_not_enumerable
286
- assert_raises ArgumentError do
287
- safe_list_sanitize('<a>some html</a>', tags: 'foo')
452
+ assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
288
453
  end
289
- end
290
454
 
291
- def test_should_raise_argument_error_if_attributes_is_not_enumerable
292
- assert_raises ArgumentError do
293
- safe_list_sanitize('<a>some html</a>', attributes: 'foo')
455
+ def test_should_raise_argument_error_if_tags_is_not_enumerable
456
+ assert_raises ArgumentError do
457
+ safe_list_sanitize("<a>some html</a>", tags: "foo")
458
+ end
294
459
  end
295
- end
296
460
 
297
- def test_should_not_accept_non_loofah_inheriting_scrubber
298
- scrubber = Object.new
299
- def scrubber.scrub(node); node.name = 'h1'; end
461
+ def test_should_raise_argument_error_if_attributes_is_not_enumerable
462
+ assert_raises ArgumentError do
463
+ safe_list_sanitize("<a>some html</a>", attributes: "foo")
464
+ end
465
+ end
300
466
 
301
- assert_raises Loofah::ScrubberNotFound do
302
- safe_list_sanitize('<a>some html</a>', scrubber: scrubber)
467
+ def test_should_not_accept_non_loofah_inheriting_scrubber
468
+ scrubber = Object.new
469
+ def scrubber.scrub(node); node.name = "h1"; end
470
+
471
+ assert_raises Loofah::ScrubberNotFound do
472
+ safe_list_sanitize("<a>some html</a>", scrubber: scrubber)
473
+ end
303
474
  end
304
- end
305
475
 
306
- def test_should_accept_loofah_inheriting_scrubber
307
- scrubber = Loofah::Scrubber.new
308
- def scrubber.scrub(node); node.name = 'h1'; end
476
+ def test_should_accept_loofah_inheriting_scrubber
477
+ scrubber = Loofah::Scrubber.new
478
+ def scrubber.scrub(node); node.replace("<h1>#{node.inner_html}</h1>"); end
309
479
 
310
- html = "<script>hello!</script>"
311
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
312
- end
480
+ html = "<script>hello!</script>"
481
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
482
+ end
313
483
 
314
- def test_should_accept_loofah_scrubber_that_wraps_a_block
315
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
316
- html = "<script>hello!</script>"
317
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
318
- end
484
+ def test_should_accept_loofah_scrubber_that_wraps_a_block
485
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
486
+ html = "<script>hello!</script>"
487
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
488
+ end
319
489
 
320
- def test_custom_scrubber_takes_precedence_over_other_options
321
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
322
- html = "<script>hello!</script>"
323
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ['foo'])
324
- end
490
+ def test_custom_scrubber_takes_precedence_over_other_options
491
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
492
+ html = "<script>hello!</script>"
493
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ["foo"])
494
+ end
325
495
 
326
- [%w(img src), %w(a href)].each do |(tag, attr)|
327
- define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
328
- assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
496
+ def test_should_strip_src_attribute_in_img_with_bad_protocols
497
+ assert_sanitized %(<img src="javascript:bang" title="1">), %(<img title="1">)
329
498
  end
330
- end
331
499
 
332
- def test_should_block_script_tag
333
- assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
334
- end
500
+ def test_should_strip_href_attribute_in_a_with_bad_protocols
501
+ assert_sanitized %(<a href="javascript:bang" title="1">boo</a>), %(<a title="1">boo</a>)
502
+ end
335
503
 
336
- def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
337
- assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
338
- end
504
+ def test_should_block_script_tag
505
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
506
+ end
339
507
 
340
- [%(<IMG SRC="javascript:alert('XSS');">),
341
- %(<IMG SRC=javascript:alert('XSS')>),
342
- %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
343
- %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
344
- %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
345
- %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
346
- %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
347
- %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
348
- %(<IMG SRC="jav\tascript:alert('XSS');">),
349
- %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
350
- %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
351
- %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
352
- %(<IMG SRC=" &#14; javascript:alert('XSS');">),
353
- %(<IMG SRC="javascript&#x3a;alert('XSS');">),
354
- %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
355
- define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
356
- assert_sanitized img_hack, "<img>"
508
+ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
509
+ assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
357
510
  end
358
- end
359
511
 
360
- def test_should_sanitize_tag_broken_up_by_null
361
- assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), ""
362
- end
512
+ [%(<IMG SRC="javascript:alert('XSS');">),
513
+ %(<IMG SRC=javascript:alert('XSS')>),
514
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
515
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
516
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
517
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
518
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
519
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
520
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
521
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
522
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
523
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
524
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
525
+ %(<IMG SRC="javascript&#x3a;alert('XSS');">),
526
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
527
+ define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
528
+ assert_sanitized img_hack, "<img>"
529
+ end
530
+ end
363
531
 
364
- def test_should_sanitize_invalid_script_tag
365
- assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
366
- end
532
+ def test_should_sanitize_tag_broken_up_by_null
533
+ input = %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>)
534
+ result = safe_list_sanitize(input)
535
+ acceptable_results = [
536
+ # libxml2
537
+ "",
538
+ # xerces+neko
539
+ 'alert("XSS")',
540
+ ]
541
+
542
+ assert_includes(acceptable_results, result)
543
+ end
367
544
 
368
- def test_should_sanitize_script_tag_with_multiple_open_brackets
369
- assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
370
- assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), ""
371
- end
545
+ def test_should_sanitize_invalid_script_tag
546
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
547
+ end
372
548
 
373
- def test_should_sanitize_unclosed_script
374
- assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
375
- end
549
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
550
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
551
+ end
376
552
 
377
- def test_should_sanitize_half_open_scripts
378
- assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
379
- end
553
+ def test_should_sanitize_script_tag_with_multiple_open_brackets_2
554
+ input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a)
555
+ result = safe_list_sanitize(input)
556
+ acceptable_results = [
557
+ # libxml2
558
+ "",
559
+ # xerces+neko
560
+ "&lt;a",
561
+ ]
562
+
563
+ assert_includes(acceptable_results, result)
564
+ end
380
565
 
381
- def test_should_not_fall_for_ridiculous_hack
382
- img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
383
- assert_sanitized img_hack, "<img>"
384
- end
566
+ def test_should_sanitize_unclosed_script
567
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
568
+ end
385
569
 
386
- def test_should_sanitize_attributes
387
- assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>)
388
- end
570
+ def test_should_sanitize_half_open_scripts
571
+ input = %(<IMG SRC="javascript:alert('XSS')")
572
+ result = safe_list_sanitize(input)
573
+ acceptable_results = [
574
+ # libxml2
575
+ "<img>",
576
+ # libgumbo
577
+ "",
578
+ ]
579
+
580
+ assert_includes(acceptable_results, result)
581
+ end
389
582
 
390
- def test_should_sanitize_illegal_style_properties
391
- raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
392
- expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
393
- assert_equal expected, sanitize_css(raw)
394
- end
583
+ def test_should_not_fall_for_ridiculous_hack
584
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
585
+ assert_sanitized img_hack, "<img>"
586
+ end
395
587
 
396
- def test_should_sanitize_with_trailing_space
397
- raw = "display:block; "
398
- expected = "display:block;"
399
- assert_equal expected, sanitize_css(raw)
400
- end
588
+ def test_should_sanitize_attributes
589
+ input = %(<SPAN title="'><script>alert()</script>">blah</SPAN>)
590
+ result = safe_list_sanitize(input)
591
+ acceptable_results = [
592
+ # libxml2
593
+ %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>),
594
+ # libgumbo
595
+ # this looks scary, but it's fine. for a more detailed analysis check out:
596
+ # https://github.com/discourse/discourse/pull/21522#issuecomment-1545697968
597
+ %(<span title="'><script>alert()</script>">blah</span>)
598
+ ]
599
+
600
+ assert_includes(acceptable_results, result)
601
+ end
401
602
 
402
- def test_should_sanitize_xul_style_attributes
403
- raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
404
- assert_equal '', sanitize_css(raw)
405
- end
603
+ def test_should_sanitize_invalid_tag_names
604
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
605
+ end
406
606
 
407
- def test_should_sanitize_invalid_tag_names
408
- assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
409
- end
607
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
608
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
609
+ end
410
610
 
411
- def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
412
- assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
413
- end
611
+ def test_should_sanitize_invalid_tag_names_in_single_tags
612
+ input = %(<img/src="http://ha.ckers.org/xss.js"/>)
613
+ result = safe_list_sanitize(input)
614
+ acceptable_results = [
615
+ # libxml2
616
+ "<img>",
617
+ # libgumbo
618
+ %(<img src="http://ha.ckers.org/xss.js">),
619
+ ]
620
+
621
+ assert_includes(acceptable_results, result)
622
+ end
414
623
 
415
- def test_should_sanitize_invalid_tag_names_in_single_tags
416
- assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
417
- end
624
+ def test_should_sanitize_img_dynsrc_lowsrc
625
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img>")
626
+ end
418
627
 
419
- def test_should_sanitize_img_dynsrc_lowsrc
420
- assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
421
- end
628
+ def test_should_sanitize_img_vbscript
629
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img>"
630
+ end
422
631
 
423
- def test_should_sanitize_div_background_image_unicode_encoded
424
- [
425
- convert_to_css_hex("url(javascript:alert(1))", false),
426
- convert_to_css_hex("url(javascript:alert(1))", true),
427
- convert_to_css_hex("url(https://example.com)", false),
428
- convert_to_css_hex("url(https://example.com)", true),
429
- ].each do |propval|
430
- raw = "background-image:" + propval
431
- assert_empty(sanitize_css(raw))
632
+ def test_should_sanitize_cdata_section
633
+ input = "<![CDATA[<span>section</span>]]>"
634
+ result = safe_list_sanitize(input)
635
+ acceptable_results = [
636
+ # libxml2 = 2.9.14
637
+ %{&lt;![CDATA[<span>section</span>]]&gt;},
638
+ # other libxml2
639
+ %{section]]&gt;},
640
+ # xerces+neko
641
+ "",
642
+ ]
643
+
644
+ assert_includes(acceptable_results, result)
645
+ end
646
+
647
+ def test_should_sanitize_unterminated_cdata_section
648
+ input = "<![CDATA[<span>neverending..."
649
+ result = safe_list_sanitize(input)
650
+
651
+ acceptable_results = [
652
+ # libxml2 = 2.9.14
653
+ %{&lt;![CDATA[<span>neverending...</span>},
654
+ # other libxml2
655
+ %{neverending...},
656
+ # xerces+neko
657
+ ""
658
+ ]
659
+
660
+ assert_includes(acceptable_results, result)
661
+ end
662
+
663
+ def test_should_not_mangle_urls_with_ampersand
664
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
665
+ end
666
+
667
+ def test_should_sanitize_neverending_attribute
668
+ # note that assert_dom_equal chokes in this case! so avoid using assert_sanitized
669
+ assert_equal("<span class=\"\\\"></span>", safe_list_sanitize("<span class=\"\\\">"))
432
670
  end
433
- end
434
671
 
435
- def test_should_allow_div_background_image_unicode_encoded_safe_functions
436
672
  [
437
- convert_to_css_hex("rgb(255,0,0)", false),
438
- convert_to_css_hex("rgb(255,0,0)", true),
439
- ].each do |propval|
440
- raw = "background-image:" + propval
441
- assert_includes(sanitize_css(raw), "background-image")
673
+ %(<a href="javascript&#x3a;alert('XSS');">),
674
+ %(<a href="javascript&#x003a;alert('XSS');">),
675
+ %(<a href="javascript&#x3A;alert('XSS');">),
676
+ %(<a href="javascript&#x003A;alert('XSS');">)
677
+ ].each_with_index do |enc_hack, i|
678
+ define_method "test_x03a_handling_#{i + 1}" do
679
+ assert_sanitized enc_hack, "<a></a>"
680
+ end
442
681
  end
443
- end
444
682
 
445
- def test_should_sanitize_div_style_expression
446
- raw = %(width: expression(alert('XSS'));)
447
- assert_equal '', sanitize_css(raw)
448
- end
683
+ def test_x03a_legitimate
684
+ assert_sanitized %(<a href="http&#x3a;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
685
+ assert_sanitized %(<a href="http&#x3A;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
686
+ end
449
687
 
450
- def test_should_sanitize_across_newlines
451
- raw = %(\nwidth:\nexpression(alert('XSS'));\n)
452
- assert_equal '', sanitize_css(raw)
453
- end
688
+ def test_sanitize_ascii_8bit_string
689
+ safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
690
+ assert_equal "<div><a>hello</a></div>", sanitized
691
+ assert_equal Encoding::UTF_8, sanitized.encoding
692
+ end
693
+ end
454
694
 
455
- def test_should_sanitize_img_vbscript
456
- assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
457
- end
695
+ def test_sanitize_data_attributes
696
+ assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
697
+ assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
698
+ end
458
699
 
459
- def test_should_sanitize_cdata_section
460
- input = "<![CDATA[<span>section</span>]]>"
461
- expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
462
- assert_sanitized(input, expected)
463
- end
700
+ def test_allow_data_attribute_if_requested
701
+ text = %(<a data-foo="foo">foo</a>)
702
+ assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ["data-foo"])
703
+ end
464
704
 
465
- def test_should_sanitize_unterminated_cdata_section
466
- input = "<![CDATA[<span>neverending..."
467
- expected = libxml_2_9_14_recovery? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
468
- assert_sanitized(input, expected)
469
- end
705
+ # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
706
+ VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr]
707
+
708
+ %w(strong em b i p code pre tt samp kbd var sub
709
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
710
+ acronym a img blockquote del ins time).each do |tag_name|
711
+ define_method "test_default_safelist_should_allow_#{tag_name}" do
712
+ if VOID_ELEMENTS.include?(tag_name)
713
+ assert_sanitized("<#{tag_name}>")
714
+ else
715
+ assert_sanitized("<#{tag_name}>foo</#{tag_name}>")
716
+ end
717
+ end
718
+ end
470
719
 
471
- def test_should_not_mangle_urls_with_ampersand
472
- assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
473
- end
720
+ def test_datetime_attribute
721
+ assert_sanitized("<time datetime=\"2023-01-01\">Today</time>")
722
+ end
474
723
 
475
- def test_should_sanitize_neverending_attribute
476
- assert_sanitized "<span class=\"\\", "<span class=\"\\\">"
477
- end
724
+ def test_abbr_attribute
725
+ scope_allowed_tags(%w(table tr th td)) do
726
+ assert_sanitized(%(<table><tr><td abbr="UK">United Kingdom</td></tr></table>))
727
+ end
728
+ end
729
+
730
+ def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
731
+ skip if RUBY_VERSION < "2.3"
732
+
733
+ html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
734
+
735
+ text = safe_list_sanitize(html)
736
+
737
+ acceptable_results = [
738
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
739
+ %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
740
+ # system libxml2
741
+ %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
742
+ # xerces+neko
743
+ %{<a href="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
744
+ ]
478
745
 
479
- [
480
- %(<a href="javascript&#x3a;alert('XSS');">),
481
- %(<a href="javascript&#x003a;alert('XSS');">),
482
- %(<a href="javascript&#x3A;alert('XSS');">),
483
- %(<a href="javascript&#x003A;alert('XSS');">)
484
- ].each_with_index do |enc_hack, i|
485
- define_method "test_x03a_handling_#{i+1}" do
486
- assert_sanitized enc_hack, "<a>"
746
+ assert_includes(acceptable_results, text)
487
747
  end
488
- end
489
748
 
490
- def test_x03a_legitimate
491
- assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">)
492
- assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">)
493
- end
749
+ def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
750
+ skip if RUBY_VERSION < "2.3"
751
+
752
+ html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
753
+
754
+ text = safe_list_sanitize(html)
755
+
756
+ acceptable_results = [
757
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
758
+ %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
759
+ # system libxml2
760
+ %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
761
+ # xerces+neko
762
+ %{<a src="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
763
+ ]
494
764
 
495
- def test_sanitize_ascii_8bit_string
496
- safe_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized|
497
- assert_equal '<a>hello</a>', sanitized
498
- assert_equal Encoding::UTF_8, sanitized.encoding
765
+ assert_includes(acceptable_results, text)
499
766
  end
500
- end
501
767
 
502
- def test_sanitize_data_attributes
503
- assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
504
- assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
505
- end
768
+ def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
769
+ skip if RUBY_VERSION < "2.3"
506
770
 
507
- def test_allow_data_attribute_if_requested
508
- text = %(<a data-foo="foo">foo</a>)
509
- assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ['data-foo'])
510
- end
771
+ html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
511
772
 
512
- def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
513
- skip if RUBY_VERSION < "2.3"
773
+ text = safe_list_sanitize(html)
514
774
 
515
- html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
775
+ acceptable_results = [
776
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
777
+ %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
778
+ # system libxml2
779
+ %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
780
+ # xerces+neko
781
+ %{<a name="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
782
+ ]
516
783
 
517
- text = safe_list_sanitize(html)
784
+ assert_includes(acceptable_results, text)
785
+ end
518
786
 
519
- acceptable_results = [
520
- # nokogiri w/vendored+patched libxml2
521
- %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
522
- # nokogiri w/ system libxml2
523
- %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
524
- ]
525
- assert_includes(acceptable_results, text)
526
- end
787
+ def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
788
+ skip if RUBY_VERSION < "2.3"
527
789
 
528
- def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
529
- skip if RUBY_VERSION < "2.3"
790
+ html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
530
791
 
531
- html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
792
+ text = safe_list_sanitize(html, attributes: ["action"])
532
793
 
533
- text = safe_list_sanitize(html)
794
+ acceptable_results = [
795
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
796
+ %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
797
+ # system libxml2
798
+ %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
799
+ # xerces+neko
800
+ %{<a action="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>},
801
+ ]
534
802
 
535
- acceptable_results = [
536
- # nokogiri w/vendored+patched libxml2
537
- %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
538
- # nokogiri w/system libxml2
539
- %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
540
- ]
541
- assert_includes(acceptable_results, text)
542
- end
803
+ assert_includes(acceptable_results, text)
804
+ end
543
805
 
544
- def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
545
- skip if RUBY_VERSION < "2.3"
806
+ def test_exclude_node_type_processing_instructions
807
+ input = "<div>text</div><?div content><b>text</b>"
808
+ result = safe_list_sanitize(input)
809
+ acceptable_results = [
810
+ # jruby cyberneko (nokogiri < 1.14.0)
811
+ "<div>text</div>",
812
+ # everything else
813
+ "<div>text</div><b>text</b>",
814
+ ]
815
+
816
+ assert_includes(acceptable_results, result)
817
+ end
546
818
 
547
- html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
819
+ def test_exclude_node_type_comment
820
+ assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
821
+ end
548
822
 
549
- text = safe_list_sanitize(html)
823
+ %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
824
+ define_method "test_mediatype_#{mediatype}_allowed" do
825
+ input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
826
+ expected = input
827
+ actual = safe_list_sanitize(input)
828
+ assert_equal(expected, actual)
829
+
830
+ input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
831
+ expected = input
832
+ actual = safe_list_sanitize(input)
833
+ assert_equal(expected, actual)
834
+ end
835
+ end
550
836
 
551
- acceptable_results = [
552
- # nokogiri w/vendored+patched libxml2
553
- %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
554
- # nokogiri w/system libxml2
555
- %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
556
- ]
557
- assert_includes(acceptable_results, text)
558
- end
837
+ def test_mediatype_text_html_disallowed
838
+ input = '<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
839
+ expected = "<img>"
840
+ actual = safe_list_sanitize(input)
841
+ assert_equal(expected, actual)
559
842
 
560
- def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
561
- skip if RUBY_VERSION < "2.3"
843
+ input = '<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
844
+ expected = "<img>"
845
+ actual = safe_list_sanitize(input)
846
+ assert_equal(expected, actual)
847
+ end
562
848
 
563
- html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
849
+ def test_mediatype_image_svg_xml_disallowed
850
+ input = '<img src="data:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
851
+ expected = "<img>"
852
+ actual = safe_list_sanitize(input)
853
+ assert_equal(expected, actual)
564
854
 
565
- text = safe_list_sanitize(html, attributes: ['action'])
855
+ input = '<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
856
+ expected = "<img>"
857
+ actual = safe_list_sanitize(input)
858
+ assert_equal(expected, actual)
859
+ end
566
860
 
567
- acceptable_results = [
568
- # nokogiri w/vendored+patched libxml2
569
- %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
570
- # nokogiri w/system libxml2
571
- %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
572
- ]
573
- assert_includes(acceptable_results, text)
574
- end
861
+ def test_mediatype_other_disallowed
862
+ input = '<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
863
+ expected = "<a>foo</a>"
864
+ actual = safe_list_sanitize(input)
865
+ assert_equal(expected, actual)
575
866
 
576
- def test_exclude_node_type_processing_instructions
577
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><?div content><b>text</b>"))
578
- end
867
+ input = '<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
868
+ expected = "<a>foo</a>"
869
+ actual = safe_list_sanitize(input)
870
+ assert_equal(expected, actual)
871
+ end
579
872
 
580
- def test_exclude_node_type_comment
581
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
582
- end
873
+ def test_scrubbing_svg_attr_values_that_allow_ref
874
+ input = '<div fill="yellow url(http://bad.com/) #fff">hey</div>'
875
+ expected = '<div fill="yellow #fff">hey</div>'
876
+ actual = scope_allowed_attributes %w(fill) do
877
+ safe_list_sanitize(input)
878
+ end
583
879
 
584
- def test_disallow_the_dangerous_safelist_combination_of_select_and_style
585
- input = "<select><style><script>alert(1)</script></style></select>"
586
- tags = ["select", "style"]
587
- warning = /WARNING: Rails::Html::SafeListSanitizer: removing 'style' from safelist/
588
- sanitized = nil
589
- invocation = Proc.new { sanitized = safe_list_sanitize(input, tags: tags) }
590
-
591
- if html5_mode?
592
- # if Loofah is using an HTML5 parser,
593
- # then "style" should be removed by the parser as an invalid child of "select"
594
- assert_silent(&invocation)
595
- else
596
- # if Loofah is using an HTML4 parser,
597
- # then SafeListSanitizer should remove "style" from the safelist
598
- assert_output(nil, warning, &invocation)
599
- end
600
- refute_includes(sanitized, "style")
601
- end
880
+ assert_equal(expected, actual)
881
+ end
602
882
 
603
- protected
883
+ def test_style_with_css_payload
884
+ input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
885
+ actual = safe_list_sanitize(input, tags: tags)
886
+ acceptable_results = [
887
+ # libxml2
888
+ "<style>div &gt; span { background: \"red\"; }</style>",
889
+ # libgumbo
890
+ "<style>div > span { background: \"red\"; }</style>",
891
+ ]
892
+
893
+ assert_includes(acceptable_results, actual)
894
+ end
604
895
 
605
- def xpath_sanitize(input, options = {})
606
- XpathRemovalTestSanitizer.new.sanitize(input, options)
607
- end
896
+ def test_combination_of_select_and_style_with_css_payload
897
+ input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
898
+ actual = safe_list_sanitize(input, tags: tags)
899
+ acceptable_results = [
900
+ # libxml2
901
+ "<select><style>div &gt; span { background: \"red\"; }</style></select>",
902
+ # libgumbo
903
+ "<select>div &gt; span { background: \"red\"; }</select>",
904
+ ]
905
+
906
+ assert_includes(acceptable_results, actual)
907
+ end
608
908
 
609
- def full_sanitize(input, options = {})
610
- Rails::Html::FullSanitizer.new.sanitize(input, options)
611
- end
909
+ def test_combination_of_select_and_style_with_script_payload
910
+ input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
911
+ actual = safe_list_sanitize(input, tags: tags)
912
+ acceptable_results = [
913
+ # libxml2
914
+ "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>",
915
+ # libgumbo
916
+ "<select>alert(1)</select>",
917
+ ]
918
+
919
+ assert_includes(acceptable_results, actual)
920
+ end
612
921
 
613
- def link_sanitize(input, options = {})
614
- Rails::Html::LinkSanitizer.new.sanitize(input, options)
615
- end
922
+ def test_combination_of_svg_and_style_with_script_payload
923
+ input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
924
+ actual = safe_list_sanitize(input, tags: tags)
925
+ acceptable_results = [
926
+ # libxml2
927
+ "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>",
928
+ # libgumbo
929
+ "<svg><style>alert(1)</style></svg>"
930
+ ]
931
+
932
+ assert_includes(acceptable_results, actual)
933
+ end
616
934
 
617
- def safe_list_sanitize(input, options = {})
618
- Rails::Html::SafeListSanitizer.new.sanitize(input, options)
619
- end
935
+ def test_combination_of_math_and_style_with_img_payload
936
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
937
+ actual = safe_list_sanitize(input, tags: tags)
938
+ acceptable_results = [
939
+ # libxml2
940
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
941
+ # libgumbo
942
+ "<math><style></style></math>",
943
+ ]
944
+
945
+ assert_includes(acceptable_results, actual)
946
+ end
620
947
 
621
- def assert_sanitized(input, expected = nil)
622
- if input
623
- assert_dom_equal expected || input, safe_list_sanitize(input)
624
- else
625
- assert_nil safe_list_sanitize(input)
948
+ def test_combination_of_math_and_style_with_img_payload_2
949
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
950
+ actual = safe_list_sanitize(input, tags: tags)
951
+ acceptable_results = [
952
+ # libxml2
953
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
954
+ # libgumbo
955
+ "<math><style></style></math><img src=\"x\">",
956
+ ]
957
+
958
+ assert_includes(acceptable_results, actual)
626
959
  end
627
- end
628
960
 
629
- def sanitize_css(input)
630
- Rails::Html::SafeListSanitizer.new.sanitize_css(input)
631
- end
961
+ def test_combination_of_svg_and_style_with_img_payload
962
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
963
+ actual = safe_list_sanitize(input, tags: tags)
964
+ acceptable_results = [
965
+ # libxml2
966
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
967
+ # libgumbo
968
+ "<svg><style></style></svg>",
969
+ ]
970
+
971
+ assert_includes(acceptable_results, actual)
972
+ end
632
973
 
633
- def scope_allowed_tags(tags)
634
- old_tags = Rails::Html::SafeListSanitizer.allowed_tags
635
- Rails::Html::SafeListSanitizer.allowed_tags = tags
636
- yield Rails::Html::SafeListSanitizer.new
637
- ensure
638
- Rails::Html::SafeListSanitizer.allowed_tags = old_tags
639
- end
974
+ def test_combination_of_svg_and_style_with_img_payload_2
975
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
976
+ actual = safe_list_sanitize(input, tags: tags)
977
+ acceptable_results = [
978
+ # libxml2
979
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
980
+ # libgumbo
981
+ "<svg><style></style></svg><img src=\"x\">",
982
+ ]
983
+
984
+ assert_includes(acceptable_results, actual)
985
+ end
640
986
 
641
- def scope_allowed_attributes(attributes)
642
- old_attributes = Rails::Html::SafeListSanitizer.allowed_attributes
643
- Rails::Html::SafeListSanitizer.allowed_attributes = attributes
644
- yield Rails::Html::SafeListSanitizer.new
645
- ensure
646
- Rails::Html::SafeListSanitizer.allowed_attributes = old_attributes
647
- end
987
+ def test_should_sanitize_illegal_style_properties
988
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
989
+ expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
990
+ assert_equal expected, sanitize_css(raw)
991
+ end
648
992
 
649
- # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
650
- def convert_to_css_hex(string, escape_parens=false)
651
- string.chars.map do |c|
652
- if !escape_parens && (c == "(" || c == ")")
653
- c
654
- else
655
- format('\00%02X', c.ord)
993
+ def test_should_sanitize_with_trailing_space
994
+ raw = "display:block; "
995
+ expected = "display:block;"
996
+ assert_equal expected, sanitize_css(raw)
997
+ end
998
+
999
+ def test_should_sanitize_xul_style_attributes
1000
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
1001
+ assert_equal "", sanitize_css(raw)
1002
+ end
1003
+
1004
+ def test_should_sanitize_div_background_image_unicode_encoded
1005
+ [
1006
+ convert_to_css_hex("url(javascript:alert(1))", false),
1007
+ convert_to_css_hex("url(javascript:alert(1))", true),
1008
+ convert_to_css_hex("url(https://example.com)", false),
1009
+ convert_to_css_hex("url(https://example.com)", true),
1010
+ ].each do |propval|
1011
+ raw = "background-image:" + propval
1012
+ assert_empty(sanitize_css(raw))
1013
+ end
1014
+ end
1015
+
1016
+ def test_should_allow_div_background_image_unicode_encoded_safe_functions
1017
+ [
1018
+ convert_to_css_hex("rgb(255,0,0)", false),
1019
+ convert_to_css_hex("rgb(255,0,0)", true),
1020
+ ].each do |propval|
1021
+ raw = "background-image:" + propval
1022
+
1023
+ assert_includes(sanitize_css(raw), "background-image")
1024
+ end
1025
+ end
1026
+
1027
+ def test_should_sanitize_div_style_expression
1028
+ raw = %(width: expression(alert('XSS'));)
1029
+ assert_equal "", sanitize_css(raw)
1030
+ end
1031
+
1032
+ def test_should_sanitize_across_newlines
1033
+ raw = %(\nwidth:\nexpression(alert('XSS'));\n)
1034
+ assert_equal "", sanitize_css(raw)
1035
+ end
1036
+
1037
+ protected
1038
+ def safe_list_sanitize(input, options = {})
1039
+ module_under_test::SafeListSanitizer.new.sanitize(input, options)
1040
+ end
1041
+
1042
+ def assert_sanitized(input, expected = nil)
1043
+ assert_equal((expected || input), safe_list_sanitize(input))
656
1044
  end
657
- end.join
658
- end
659
1045
 
660
- def libxml_2_9_14_recovery?
661
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
1046
+ def scope_allowed_tags(tags)
1047
+ old_tags = module_under_test::SafeListSanitizer.allowed_tags
1048
+ module_under_test::SafeListSanitizer.allowed_tags = tags
1049
+ yield module_under_test::SafeListSanitizer.new
1050
+ ensure
1051
+ module_under_test::SafeListSanitizer.allowed_tags = old_tags
1052
+ end
1053
+
1054
+ def scope_allowed_attributes(attributes)
1055
+ old_attributes = module_under_test::SafeListSanitizer.allowed_attributes
1056
+ module_under_test::SafeListSanitizer.allowed_attributes = attributes
1057
+ yield module_under_test::SafeListSanitizer.new
1058
+ ensure
1059
+ module_under_test::SafeListSanitizer.allowed_attributes = old_attributes
1060
+ end
1061
+
1062
+ def sanitize_css(input)
1063
+ module_under_test::SafeListSanitizer.new.sanitize_css(input)
1064
+ end
1065
+
1066
+ # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
1067
+ def convert_to_css_hex(string, escape_parens = false)
1068
+ string.chars.map do |c|
1069
+ if !escape_parens && (c == "(" || c == ")")
1070
+ c
1071
+ else
1072
+ format('\00%02X', c.ord)
1073
+ end
1074
+ end.join
1075
+ end
662
1076
  end
663
1077
 
664
- def html5_mode?
665
- ::Loofah.respond_to?(:html5_mode?) && ::Loofah.html5_mode?
1078
+ class HTML4SafeListSanitizerTest < Minitest::Test
1079
+ @module_under_test = Rails::HTML4
1080
+ include SafeListSanitizerTest
666
1081
  end
1082
+
1083
+ class HTML5SafeListSanitizerTest < Minitest::Test
1084
+ @module_under_test = Rails::HTML5
1085
+ include SafeListSanitizerTest
1086
+ end if loofah_html5_support?
667
1087
  end