rails-html-sanitizer 1.5.0 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,777 +1,1288 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "minitest/autorun"
2
4
  require "rails-html-sanitizer"
3
- require "rails/dom/testing/assertions/dom_assertions"
4
5
 
5
- puts Nokogiri::VERSION_INFO
6
+ puts "nokogiri version info: #{Nokogiri::VERSION_INFO}"
7
+ puts "html5 support: #{Rails::HTML::Sanitizer.html5_support?}"
8
+
9
+ #
10
+ # NOTE that many of these tests contain multiple acceptable results.
11
+ #
12
+ # In some cases, this is because of how the HTML4 parser's recovery behavior changed in libxml2
13
+ # 2.9.14 and 2.10.0. For more details, see:
14
+ #
15
+ # - https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
16
+ # - https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
17
+ #
18
+ # In other cases, multiple acceptable results are provided because Nokogiri's vendored libxml2 is
19
+ # patched to entity-escape server-side includes (aks "SSI", aka `<!-- #directive param=value -->`).
20
+ #
21
+ # In many other cases, it's because the parser used by Nokogiri on JRuby (xerces+nekohtml) parses
22
+ # slightly differently than libxml2 in edge cases.
23
+ #
24
+ module SanitizerTests
25
+ def self.loofah_html5_support?
26
+ Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
27
+ end
28
+
29
+ class BaseSanitizerTest < Minitest::Test
30
+ class XpathRemovalTestSanitizer < Rails::HTML::Sanitizer
31
+ def sanitize(html, options = {})
32
+ fragment = Loofah.fragment(html)
33
+ remove_xpaths(fragment, options[:xpaths]).to_s
34
+ end
35
+ end
6
36
 
7
- class SanitizersTest < Minitest::Test
8
- include Rails::Dom::Testing::Assertions::DomAssertions
37
+ def test_sanitizer_sanitize_raises_not_implemented_error
38
+ assert_raises NotImplementedError do
39
+ Rails::HTML::Sanitizer.new.sanitize("asdf")
40
+ end
41
+ end
9
42
 
10
- def test_sanitizer_sanitize_raises_not_implemented_error
11
- assert_raises NotImplementedError do
12
- Rails::Html::Sanitizer.new.sanitize('')
43
+ def test_remove_xpaths_removes_an_xpath
44
+ html = %(<h1>hello <script>code!</script></h1>)
45
+ assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
13
46
  end
14
- end
15
47
 
16
- def test_sanitize_nested_script
17
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
18
- end
48
+ def test_remove_xpaths_removes_all_occurrences_of_xpath
49
+ html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
50
+ assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
51
+ end
19
52
 
20
- def test_sanitize_nested_script_in_style
21
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>', tags: %w(em))
22
- end
53
+ def test_remove_xpaths_called_with_faulty_xpath
54
+ assert_raises Nokogiri::XML::XPath::SyntaxError do
55
+ xpath_sanitize("<h1>hello<h1>", xpaths: %w(..faulty_xpath))
56
+ end
57
+ end
23
58
 
24
- class XpathRemovalTestSanitizer < Rails::Html::Sanitizer
25
- def sanitize(html, options = {})
26
- fragment = Loofah.fragment(html)
27
- remove_xpaths(fragment, options[:xpaths]).to_s
59
+ def test_remove_xpaths_called_with_xpath_string
60
+ assert_equal "", xpath_sanitize("<a></a>", xpaths: ".//a")
28
61
  end
29
- end
30
62
 
31
- def test_remove_xpaths_removes_an_xpath
32
- html = %(<h1>hello <script>code!</script></h1>)
33
- assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
34
- end
63
+ def test_remove_xpaths_called_with_enumerable_xpaths
64
+ assert_equal "", xpath_sanitize("<a><span></span></a>", xpaths: %w(.//a .//span))
65
+ end
35
66
 
36
- def test_remove_xpaths_removes_all_occurrences_of_xpath
37
- html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
38
- assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
67
+ protected
68
+ def xpath_sanitize(input, options = {})
69
+ XpathRemovalTestSanitizer.new.sanitize(input, options)
70
+ end
39
71
  end
40
72
 
41
- def test_remove_xpaths_called_with_faulty_xpath
42
- assert_raises Nokogiri::XML::XPath::SyntaxError do
43
- xpath_sanitize('<h1>hello<h1>', xpaths: %w(..faulty_xpath))
73
+ module ModuleUnderTest
74
+ def module_under_test
75
+ self.class.instance_variable_get(:@module_under_test)
44
76
  end
45
77
  end
46
78
 
47
- def test_remove_xpaths_called_with_xpath_string
48
- assert_equal '', xpath_sanitize('<a></a>', xpaths: './/a')
49
- end
79
+ module FullSanitizerTest
80
+ include ModuleUnderTest
50
81
 
51
- def test_remove_xpaths_called_with_enumerable_xpaths
52
- assert_equal '', xpath_sanitize('<a><span></span></a>', xpaths: %w(.//a .//span))
53
- end
82
+ def test_strip_tags_with_quote
83
+ input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
84
+ result = full_sanitize(input)
85
+ acceptable_results = [
86
+ # libxml2 >= 2.9.14 and xerces+neko
87
+ %{&lt;" hi},
88
+ # other libxml2
89
+ %{ hi},
90
+ ]
54
91
 
55
- def test_strip_tags_with_quote
56
- input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
57
- expected = libxml_2_9_14_recovery_lt? ? %{&lt;" hi} : %{ hi}
58
- assert_equal(expected, full_sanitize(input))
59
- end
92
+ assert_includes(acceptable_results, result)
93
+ end
60
94
 
61
- def test_strip_invalid_html
62
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
63
- end
95
+ def test_strip_invalid_html
96
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
97
+ end
64
98
 
65
- def test_strip_nested_tags
66
- expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
67
- input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
68
- assert_equal expected, full_sanitize(input)
69
- end
99
+ def test_strip_nested_tags
100
+ expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
101
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
102
+ assert_equal expected, full_sanitize(input)
103
+ end
70
104
 
71
- def test_strip_tags_multiline
72
- expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
73
- input = %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
105
+ def test_strip_tags_multiline
106
+ expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
107
+ input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
74
108
 
75
- assert_equal expected, full_sanitize(input)
76
- end
109
+ assert_equal expected, full_sanitize(input)
110
+ end
77
111
 
78
- def test_remove_unclosed_tags
79
- input = "This is <-- not\n a comment here."
80
- expected = libxml_2_9_14_recovery_lt? ? %{This is &lt;-- not\n a comment here.} : %{This is }
81
- assert_equal(expected, full_sanitize(input))
82
- end
112
+ def test_remove_unclosed_tags
113
+ input = "This is <-- not\n a comment here."
114
+ result = full_sanitize(input)
115
+ acceptable_results = [
116
+ # libxml2 >= 2.9.14 and xerces+neko
117
+ %{This is &lt;-- not\n a comment here.},
118
+ # other libxml2
119
+ %{This is },
120
+ ]
121
+
122
+ assert_includes(acceptable_results, result)
123
+ end
83
124
 
84
- def test_strip_cdata
85
- input = "This has a <![CDATA[<section>]]> here."
86
- expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
87
- assert_equal(expected, full_sanitize(input))
88
- end
125
+ def test_strip_cdata
126
+ input = "This has a <![CDATA[<section>]]> here."
127
+ result = full_sanitize(input)
128
+ acceptable_results = [
129
+ # libxml2 = 2.9.14
130
+ %{This has a &lt;![CDATA[]]&gt; here.},
131
+ # other libxml2
132
+ %{This has a ]]&gt; here.},
133
+ # xerces+neko
134
+ %{This has a here.},
135
+ ]
136
+
137
+ assert_includes(acceptable_results, result)
138
+ end
89
139
 
90
- def test_strip_unclosed_cdata
91
- input = "This has an unclosed <![CDATA[<section>]] here..."
92
- expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
93
- assert_equal(expected, full_sanitize(input))
94
- end
140
+ def test_strip_blank_string
141
+ assert_nil full_sanitize(nil)
142
+ assert_equal "", full_sanitize("")
143
+ assert_equal " ", full_sanitize(" ")
144
+ end
95
145
 
96
- def test_strip_blank_string
97
- assert_nil full_sanitize(nil)
98
- assert_equal "", full_sanitize("")
99
- assert_equal " ", full_sanitize(" ")
100
- end
146
+ def test_strip_tags_with_plaintext
147
+ assert_equal "Don't touch me", full_sanitize("Don't touch me")
148
+ end
101
149
 
102
- def test_strip_tags_with_plaintext
103
- assert_equal "Don't touch me", full_sanitize("Don't touch me")
104
- end
150
+ def test_strip_tags_with_tags
151
+ assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
152
+ end
105
153
 
106
- def test_strip_tags_with_tags
107
- assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
108
- end
154
+ def test_escape_tags_with_many_open_quotes
155
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
156
+ end
109
157
 
110
- def test_escape_tags_with_many_open_quotes
111
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
112
- end
158
+ def test_strip_tags_with_sentence
159
+ assert_equal "This is a test.", full_sanitize("This is a test.")
160
+ end
113
161
 
114
- def test_strip_tags_with_sentence
115
- assert_equal "This is a test.", full_sanitize("This is a test.")
116
- end
162
+ def test_strip_tags_with_comment
163
+ assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
164
+ end
117
165
 
118
- def test_strip_tags_with_comment
119
- assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
120
- end
166
+ def test_strip_tags_with_frozen_string
167
+ assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags")
168
+ end
121
169
 
122
- def test_strip_tags_with_frozen_string
123
- assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze)
124
- end
170
+ def test_full_sanitize_respect_html_escaping_of_the_given_string
171
+ assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
172
+ assert_equal "&amp;", full_sanitize("&")
173
+ assert_equal "&amp;", full_sanitize("&amp;")
174
+ assert_equal "&amp;amp;", full_sanitize("&amp;amp;")
175
+ assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
176
+ end
125
177
 
126
- def test_full_sanitize_respect_html_escaping_of_the_given_string
127
- assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
128
- assert_equal '&amp;', full_sanitize('&')
129
- assert_equal '&amp;', full_sanitize('&amp;')
130
- assert_equal '&amp;amp;', full_sanitize('&amp;amp;')
131
- assert_equal 'omg &lt;script&gt;BOM&lt;/script&gt;', full_sanitize('omg &lt;script&gt;BOM&lt;/script&gt;')
132
- end
178
+ def test_sanitize_ascii_8bit_string
179
+ full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
180
+ assert_equal "hello", sanitized
181
+ assert_equal Encoding::UTF_8, sanitized.encoding
182
+ end
183
+ end
133
184
 
134
- def test_strip_links_with_tags_in_tags
135
- expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
136
- input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
137
- assert_equal expected, link_sanitize(input)
185
+ protected
186
+ def full_sanitize(input, options = {})
187
+ module_under_test::FullSanitizer.new.sanitize(input, options)
188
+ end
138
189
  end
139
190
 
140
- def test_strip_links_with_unclosed_tags
141
- assert_equal "", link_sanitize("<a<a")
191
+ class HTML4FullSanitizerTest < Minitest::Test
192
+ @module_under_test = Rails::HTML4
193
+ include FullSanitizerTest
142
194
  end
143
195
 
144
- def test_strip_links_with_plaintext
145
- assert_equal "Don't touch me", link_sanitize("Don't touch me")
146
- end
196
+ class HTML5FullSanitizerTest < Minitest::Test
197
+ @module_under_test = Rails::HTML5
198
+ include FullSanitizerTest
199
+ end if loofah_html5_support?
147
200
 
148
- def test_strip_links_with_line_feed_and_uppercase_tag
149
- assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
150
- end
201
+ module LinkSanitizerTest
202
+ include ModuleUnderTest
151
203
 
152
- def test_strip_links_leaves_nonlink_tags
153
- assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
154
- end
204
+ def test_strip_links_with_tags_in_tags
205
+ expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
206
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
207
+ assert_equal expected, link_sanitize(input)
208
+ end
155
209
 
156
- def test_strip_links_with_links
157
- assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
158
- end
210
+ def test_strip_links_with_unclosed_tags
211
+ assert_equal "", link_sanitize("<a<a")
212
+ end
159
213
 
160
- def test_strip_links_with_linkception
161
- assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
162
- end
214
+ def test_strip_links_with_plaintext
215
+ assert_equal "Don't touch me", link_sanitize("Don't touch me")
216
+ end
163
217
 
164
- def test_sanitize_form
165
- assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
166
- end
218
+ def test_strip_links_with_line_feed_and_uppercase_tag
219
+ assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
220
+ end
167
221
 
168
- def test_sanitize_plaintext
169
- assert_sanitized "<plaintext><span>foo</span></plaintext>", "<span>foo</span>"
170
- end
222
+ def test_strip_links_leaves_nonlink_tags
223
+ assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
224
+ end
171
225
 
172
- def test_sanitize_script
173
- assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
174
- end
226
+ def test_strip_links_with_links
227
+ assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
228
+ end
175
229
 
176
- def test_sanitize_js_handlers
177
- raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
178
- assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
179
- end
230
+ def test_strip_links_with_linkception
231
+ assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
232
+ end
180
233
 
181
- def test_sanitize_javascript_href
182
- raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
183
- assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
234
+ def test_sanitize_ascii_8bit_string
235
+ link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
236
+ assert_equal "<div>hello</div>", sanitized
237
+ assert_equal Encoding::UTF_8, sanitized.encoding
238
+ end
239
+ end
240
+
241
+ protected
242
+ def link_sanitize(input, options = {})
243
+ module_under_test::LinkSanitizer.new.sanitize(input, options)
244
+ end
184
245
  end
185
246
 
186
- def test_sanitize_image_src
187
- raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
188
- assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
247
+ class HTML4LinkSanitizerTest < Minitest::Test
248
+ @module_under_test = Rails::HTML4
249
+ include LinkSanitizerTest
189
250
  end
190
251
 
191
- tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form)
192
- tags.each do |tag_name|
193
- define_method "test_should_allow_#{tag_name}_tag" do
194
- scope_allowed_tags(tags) do
195
- assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
252
+ class HTML5LinkSanitizerTest < Minitest::Test
253
+ @module_under_test = Rails::HTML5
254
+ include LinkSanitizerTest
255
+ end if loofah_html5_support?
256
+
257
+ module SafeListSanitizerTest
258
+ include ModuleUnderTest
259
+
260
+ def test_sanitize_nested_script
261
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
262
+ end
263
+
264
+ def test_sanitize_nested_script_in_style
265
+ input = '<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>'
266
+ result = safe_list_sanitize(input, tags: %w(em))
267
+ acceptable_results = [
268
+ # libxml2
269
+ %{&lt;script&gt;alert("XSS");&lt;/script&gt;},
270
+ # xerces+neko. unavoidable double-escaping, see loofah/docs/2022-10-decision-on-cdata-nodes.md
271
+ %{&amp;lt;script&amp;gt;alert(\"XSS\");&amp;lt;&amp;lt;/style&amp;gt;/script&amp;gt;},
272
+ ]
273
+
274
+ assert_includes(acceptable_results, result)
275
+ end
276
+
277
+ def test_strip_unclosed_cdata
278
+ input = "This has an unclosed <![CDATA[<section>]] here..."
279
+
280
+ result = safe_list_sanitize(input)
281
+
282
+ acceptable_results = [
283
+ # libxml2 = 2.9.14
284
+ %{This has an unclosed &lt;![CDATA[]] here...},
285
+ # other libxml2
286
+ %{This has an unclosed ]] here...},
287
+ # xerces+neko
288
+ %{This has an unclosed }
289
+ ]
290
+
291
+ assert_includes(acceptable_results, result)
292
+ end
293
+
294
+ def test_sanitize_form
295
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ""
296
+ end
297
+
298
+ def test_sanitize_plaintext
299
+ # note that the `plaintext` tag has been deprecated since HTML 2
300
+ # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/plaintext
301
+ input = "<plaintext><span>foo</span></plaintext>"
302
+ result = safe_list_sanitize(input)
303
+ acceptable_results = [
304
+ # libxml2
305
+ "<span>foo</span>",
306
+ # xerces+nekohtml-unit
307
+ "&lt;span&gt;foo&lt;/span&gt;&lt;/plaintext&gt;",
308
+ # xerces+cyberneko
309
+ "&lt;span&gt;foo&lt;/span&gt;"
310
+ ]
311
+
312
+ assert_includes(acceptable_results, result)
313
+ end
314
+
315
+ def test_sanitize_script
316
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
317
+ end
318
+
319
+ def test_sanitize_js_handlers
320
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
321
+ assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
322
+ end
323
+
324
+ def test_sanitize_javascript_href
325
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
326
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
327
+ end
328
+
329
+ def test_sanitize_image_src
330
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
331
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo, <span>bar</span>}
332
+ end
333
+
334
+ def test_should_allow_anchors
335
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
336
+ end
337
+
338
+ def test_video_poster_sanitization
339
+ scope_allowed_tags(%w(video)) do
340
+ scope_allowed_attributes %w(src poster) do
341
+ expected = if RUBY_PLATFORM == "java"
342
+ # xerces+nekohtml alphabetizes the attributes! FML.
343
+ %(<video poster="posterimage.jpg" src="videofile.ogg"></video>)
344
+ else
345
+ %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
346
+ end
347
+ assert_sanitized(
348
+ %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>),
349
+ expected,
350
+ )
351
+ assert_sanitized(
352
+ %(<video src="videofile.ogg" poster=javascript:alert(1)></video>),
353
+ %(<video src="videofile.ogg"></video>),
354
+ )
355
+ end
196
356
  end
197
357
  end
198
- end
199
358
 
200
- def test_should_allow_anchors
201
- assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
202
- end
359
+ # RFC 3986, sec 4.2
360
+ def test_allow_colons_in_path_component
361
+ assert_sanitized "<a href=\"./this:that\">foo</a>"
362
+ end
203
363
 
204
- def test_video_poster_sanitization
205
- scope_allowed_tags(%w(video)) do
206
- scope_allowed_attributes %w(src poster) do
207
- assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
208
- assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>)
364
+ %w(src width height alt).each do |img_attr|
365
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
366
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo">)
209
367
  end
210
368
  end
211
- end
212
369
 
213
- # RFC 3986, sec 4.2
214
- def test_allow_colons_in_path_component
215
- assert_sanitized "<a href=\"./this:that\">foo</a>"
216
- end
370
+ def test_lang_and_xml_lang
371
+ # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
372
+ #
373
+ # 3.2.6.2 The lang and xml:lang attributes
374
+ #
375
+ # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML
376
+ # documents. To ease migration to and from XML, authors may specify an attribute in no namespace
377
+ # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents,
378
+ # but such attributes must only be specified if a lang attribute in no namespace is also
379
+ # specified, and both attributes must have the same value when compared in an ASCII
380
+ # case-insensitive manner.
381
+ input = expected = "<div lang=\"en\" xml:lang=\"en\">foo</div>"
382
+ assert_sanitized(input, expected)
383
+ end
217
384
 
218
- %w(src width height alt).each do |img_attr|
219
- define_method "test_should_allow_image_#{img_attr}_attribute" do
220
- assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
385
+ def test_should_handle_non_html
386
+ assert_sanitized "abc"
221
387
  end
222
- end
223
388
 
224
- def test_should_handle_non_html
225
- assert_sanitized 'abc'
226
- end
389
+ def test_should_handle_blank_text
390
+ assert_nil(safe_list_sanitize(nil))
391
+ assert_equal("", safe_list_sanitize(""))
392
+ assert_equal(" ", safe_list_sanitize(" "))
393
+ end
227
394
 
228
- def test_should_handle_blank_text
229
- [nil, '', ' '].each { |blank| assert_sanitized blank }
230
- end
395
+ def test_setting_allowed_tags_affects_sanitization
396
+ scope_allowed_tags %w(u) do |sanitizer|
397
+ assert_equal "<u></u>", sanitizer.sanitize("<a><u></u></a>")
398
+ end
399
+ end
231
400
 
232
- def test_setting_allowed_tags_affects_sanitization
233
- scope_allowed_tags %w(u) do |sanitizer|
234
- assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>')
401
+ def test_setting_allowed_attributes_affects_sanitization
402
+ scope_allowed_attributes %w(foo) do |sanitizer|
403
+ input = '<a foo="hello" bar="world"></a>'
404
+ assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
405
+ end
235
406
  end
236
- end
237
407
 
238
- def test_setting_allowed_attributes_affects_sanitization
239
- scope_allowed_attributes %w(foo) do |sanitizer|
240
- input = '<a foo="hello" bar="world"></a>'
241
- assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
408
+ def test_custom_tags_overrides_allowed_tags
409
+ scope_allowed_tags %(u) do |sanitizer|
410
+ input = "<a><u></u></a>"
411
+ assert_equal "<a></a>", sanitizer.sanitize(input, tags: %w(a))
412
+ end
242
413
  end
243
- end
244
414
 
245
- def test_custom_tags_overrides_allowed_tags
246
- scope_allowed_tags %(u) do |sanitizer|
247
- input = '<a><u></u></a>'
248
- assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a))
415
+ def test_custom_attributes_overrides_allowed_attributes
416
+ scope_allowed_attributes %(foo) do |sanitizer|
417
+ input = '<a foo="hello" bar="world"></a>'
418
+ assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
419
+ end
249
420
  end
250
- end
251
421
 
252
- def test_custom_attributes_overrides_allowed_attributes
253
- scope_allowed_attributes %(foo) do |sanitizer|
254
- input = '<a foo="hello" bar="world"></a>'
255
- assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
422
+ def test_should_allow_prune
423
+ sanitizer = module_under_test::SafeListSanitizer.new(prune: true)
424
+ text = "<u>leave me <b>now</b></u>"
425
+ assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
256
426
  end
257
- end
258
427
 
259
- def test_should_allow_prune
260
- sanitizer = Rails::Html::SafeListSanitizer.new(prune: true)
261
- text = '<u>leave me <b>now</b></u>'
262
- assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
263
- end
428
+ def test_should_allow_custom_tags
429
+ text = "<u>foo</u>"
430
+ assert_equal text, safe_list_sanitize(text, tags: %w(u))
431
+ end
264
432
 
265
- def test_should_allow_custom_tags
266
- text = "<u>foo</u>"
267
- assert_equal text, safe_list_sanitize(text, tags: %w(u))
268
- end
433
+ def test_should_allow_only_custom_tags
434
+ text = "<u>foo</u> with <i>bar</i>"
435
+ assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
436
+ end
269
437
 
270
- def test_should_allow_only_custom_tags
271
- text = "<u>foo</u> with <i>bar</i>"
272
- assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
273
- end
438
+ def test_should_allow_custom_tags_with_attributes
439
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
440
+ assert_equal text, safe_list_sanitize(text)
441
+ end
274
442
 
275
- def test_should_allow_custom_tags_with_attributes
276
- text = %(<blockquote cite="http://example.com/">foo</blockquote>)
277
- assert_equal text, safe_list_sanitize(text)
278
- end
443
+ def test_should_allow_custom_tags_with_custom_attributes
444
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
445
+ assert_equal text, safe_list_sanitize(text, attributes: ["foo"])
446
+ end
279
447
 
280
- def test_should_allow_custom_tags_with_custom_attributes
281
- text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
282
- assert_equal text, safe_list_sanitize(text, attributes: ['foo'])
283
- end
448
+ def test_scrub_style_if_style_attribute_option_is_passed
449
+ input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
450
+ actual = safe_list_sanitize(input, attributes: %w(style))
284
451
 
285
- def test_scrub_style_if_style_attribute_option_is_passed
286
- input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
287
- actual = safe_list_sanitize(input, attributes: %w(style))
288
- assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
289
- end
452
+ assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
453
+ end
290
454
 
291
- def test_should_raise_argument_error_if_tags_is_not_enumerable
292
- assert_raises ArgumentError do
293
- safe_list_sanitize('<a>some html</a>', tags: 'foo')
455
+ def test_should_raise_argument_error_if_tags_is_not_enumerable
456
+ assert_raises ArgumentError do
457
+ safe_list_sanitize("<a>some html</a>", tags: "foo")
458
+ end
294
459
  end
295
- end
296
460
 
297
- def test_should_raise_argument_error_if_attributes_is_not_enumerable
298
- assert_raises ArgumentError do
299
- safe_list_sanitize('<a>some html</a>', attributes: 'foo')
461
+ def test_should_raise_argument_error_if_attributes_is_not_enumerable
462
+ assert_raises ArgumentError do
463
+ safe_list_sanitize("<a>some html</a>", attributes: "foo")
464
+ end
300
465
  end
301
- end
302
466
 
303
- def test_should_not_accept_non_loofah_inheriting_scrubber
304
- scrubber = Object.new
305
- def scrubber.scrub(node); node.name = 'h1'; end
467
+ def test_should_not_accept_non_loofah_inheriting_scrubber
468
+ scrubber = Object.new
469
+ def scrubber.scrub(node); node.name = "h1"; end
306
470
 
307
- assert_raises Loofah::ScrubberNotFound do
308
- safe_list_sanitize('<a>some html</a>', scrubber: scrubber)
471
+ assert_raises Loofah::ScrubberNotFound do
472
+ safe_list_sanitize("<a>some html</a>", scrubber: scrubber)
473
+ end
309
474
  end
310
- end
311
475
 
312
- def test_should_accept_loofah_inheriting_scrubber
313
- scrubber = Loofah::Scrubber.new
314
- def scrubber.scrub(node); node.name = 'h1'; end
476
+ def test_should_accept_loofah_inheriting_scrubber
477
+ scrubber = Loofah::Scrubber.new
478
+ def scrubber.scrub(node); node.replace("<h1>#{node.inner_html}</h1>"); end
315
479
 
316
- html = "<script>hello!</script>"
317
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
318
- end
480
+ html = "<script>hello!</script>"
481
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
482
+ end
319
483
 
320
- def test_should_accept_loofah_scrubber_that_wraps_a_block
321
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
322
- html = "<script>hello!</script>"
323
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
324
- end
484
+ def test_should_accept_loofah_scrubber_that_wraps_a_block
485
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
486
+ html = "<script>hello!</script>"
487
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
488
+ end
325
489
 
326
- def test_custom_scrubber_takes_precedence_over_other_options
327
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
328
- html = "<script>hello!</script>"
329
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ['foo'])
330
- end
490
+ def test_custom_scrubber_takes_precedence_over_other_options
491
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
492
+ html = "<script>hello!</script>"
493
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ["foo"])
494
+ end
331
495
 
332
- [%w(img src), %w(a href)].each do |(tag, attr)|
333
- define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
334
- assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
496
+ def test_should_strip_src_attribute_in_img_with_bad_protocols
497
+ assert_sanitized %(<img src="javascript:bang" title="1">), %(<img title="1">)
335
498
  end
336
- end
337
499
 
338
- def test_should_block_script_tag
339
- assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
340
- end
500
+ def test_should_strip_href_attribute_in_a_with_bad_protocols
501
+ assert_sanitized %(<a href="javascript:bang" title="1">boo</a>), %(<a title="1">boo</a>)
502
+ end
341
503
 
342
- def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
343
- assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
344
- end
504
+ def test_should_block_script_tag
505
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
506
+ end
345
507
 
346
- [%(<IMG SRC="javascript:alert('XSS');">),
347
- %(<IMG SRC=javascript:alert('XSS')>),
348
- %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
349
- %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
350
- %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
351
- %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
352
- %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
353
- %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
354
- %(<IMG SRC="jav\tascript:alert('XSS');">),
355
- %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
356
- %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
357
- %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
358
- %(<IMG SRC=" &#14; javascript:alert('XSS');">),
359
- %(<IMG SRC="javascript&#x3a;alert('XSS');">),
360
- %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
361
- define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
362
- assert_sanitized img_hack, "<img>"
508
+ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
509
+ assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
363
510
  end
364
- end
365
511
 
366
- def test_should_sanitize_tag_broken_up_by_null
367
- assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), ""
368
- end
512
+ [%(<IMG SRC="javascript:alert('XSS');">),
513
+ %(<IMG SRC=javascript:alert('XSS')>),
514
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
515
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
516
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
517
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
518
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
519
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
520
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
521
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
522
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
523
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
524
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
525
+ %(<IMG SRC="javascript&#x3a;alert('XSS');">),
526
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
527
+ define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
528
+ assert_sanitized img_hack, "<img>"
529
+ end
530
+ end
369
531
 
370
- def test_should_sanitize_invalid_script_tag
371
- assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
372
- end
532
+ def test_should_sanitize_tag_broken_up_by_null
533
+ input = %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>)
534
+ result = safe_list_sanitize(input)
535
+ acceptable_results = [
536
+ # libxml2
537
+ "",
538
+ # xerces+neko
539
+ 'alert("XSS")',
540
+ ]
541
+
542
+ assert_includes(acceptable_results, result)
543
+ end
373
544
 
374
- def test_should_sanitize_script_tag_with_multiple_open_brackets
375
- assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
376
- assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), ""
377
- end
545
+ def test_should_sanitize_invalid_script_tag
546
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
547
+ end
378
548
 
379
- def test_should_sanitize_unclosed_script
380
- assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
381
- end
549
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
550
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
551
+ end
382
552
 
383
- def test_should_sanitize_half_open_scripts
384
- assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
385
- end
553
+ def test_should_sanitize_script_tag_with_multiple_open_brackets_2
554
+ input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a)
555
+ result = safe_list_sanitize(input)
556
+ acceptable_results = [
557
+ # libxml2
558
+ "",
559
+ # xerces+neko
560
+ "&lt;a",
561
+ ]
562
+
563
+ assert_includes(acceptable_results, result)
564
+ end
386
565
 
387
- def test_should_not_fall_for_ridiculous_hack
388
- img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
389
- assert_sanitized img_hack, "<img>"
390
- end
566
+ def test_should_sanitize_unclosed_script
567
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
568
+ end
391
569
 
392
- def test_should_sanitize_attributes
393
- assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>)
394
- end
570
+ def test_should_sanitize_half_open_scripts
571
+ input = %(<IMG SRC="javascript:alert('XSS')")
572
+ result = safe_list_sanitize(input)
573
+ acceptable_results = [
574
+ # libxml2
575
+ "<img>",
576
+ # libgumbo
577
+ "",
578
+ ]
579
+
580
+ assert_includes(acceptable_results, result)
581
+ end
395
582
 
396
- def test_should_sanitize_illegal_style_properties
397
- raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
398
- expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
399
- assert_equal expected, sanitize_css(raw)
400
- end
583
+ def test_should_not_fall_for_ridiculous_hack
584
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
585
+ assert_sanitized img_hack, "<img>"
586
+ end
401
587
 
402
- def test_should_sanitize_with_trailing_space
403
- raw = "display:block; "
404
- expected = "display:block;"
405
- assert_equal expected, sanitize_css(raw)
406
- end
588
+ def test_should_sanitize_attributes
589
+ input = %(<SPAN title="'><script>alert()</script>">blah</SPAN>)
590
+ result = safe_list_sanitize(input)
591
+ acceptable_results = [
592
+ # libxml2
593
+ %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>),
594
+ # libgumbo
595
+ # this looks scary, but it's fine. for a more detailed analysis check out:
596
+ # https://github.com/discourse/discourse/pull/21522#issuecomment-1545697968
597
+ %(<span title="'><script>alert()</script>">blah</span>)
598
+ ]
599
+
600
+ assert_includes(acceptable_results, result)
601
+ end
407
602
 
408
- def test_should_sanitize_xul_style_attributes
409
- raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
410
- assert_equal '', sanitize_css(raw)
411
- end
603
+ def test_should_sanitize_invalid_tag_names
604
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
605
+ end
412
606
 
413
- def test_should_sanitize_invalid_tag_names
414
- assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
415
- end
607
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
608
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
609
+ end
416
610
 
417
- def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
418
- assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
419
- end
611
+ def test_should_sanitize_invalid_tag_names_in_single_tags
612
+ input = %(<img/src="http://ha.ckers.org/xss.js"/>)
613
+ result = safe_list_sanitize(input)
614
+ acceptable_results = [
615
+ # libxml2
616
+ "<img>",
617
+ # libgumbo
618
+ %(<img src="http://ha.ckers.org/xss.js">),
619
+ ]
620
+
621
+ assert_includes(acceptable_results, result)
622
+ end
420
623
 
421
- def test_should_sanitize_invalid_tag_names_in_single_tags
422
- assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
423
- end
624
+ def test_should_sanitize_img_dynsrc_lowsrc
625
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img>")
626
+ end
424
627
 
425
- def test_should_sanitize_img_dynsrc_lowsrc
426
- assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
427
- end
628
+ def test_should_sanitize_img_vbscript
629
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img>"
630
+ end
428
631
 
429
- def test_should_sanitize_div_background_image_unicode_encoded
430
- [
431
- convert_to_css_hex("url(javascript:alert(1))", false),
432
- convert_to_css_hex("url(javascript:alert(1))", true),
433
- convert_to_css_hex("url(https://example.com)", false),
434
- convert_to_css_hex("url(https://example.com)", true),
435
- ].each do |propval|
436
- raw = "background-image:" + propval
437
- assert_empty(sanitize_css(raw))
632
+ def test_should_sanitize_cdata_section
633
+ input = "<![CDATA[<span>section</span>]]>"
634
+ result = safe_list_sanitize(input)
635
+ acceptable_results = [
636
+ # libxml2 = 2.9.14
637
+ %{&lt;![CDATA[<span>section</span>]]&gt;},
638
+ # other libxml2
639
+ %{section]]&gt;},
640
+ # xerces+neko
641
+ "",
642
+ ]
643
+
644
+ assert_includes(acceptable_results, result)
438
645
  end
439
- end
440
646
 
441
- def test_should_allow_div_background_image_unicode_encoded_safe_functions
442
- [
443
- convert_to_css_hex("rgb(255,0,0)", false),
444
- convert_to_css_hex("rgb(255,0,0)", true),
445
- ].each do |propval|
446
- raw = "background-image:" + propval
447
- assert_includes(sanitize_css(raw), "background-image")
647
+ def test_should_sanitize_unterminated_cdata_section
648
+ input = "<![CDATA[<span>neverending..."
649
+ result = safe_list_sanitize(input)
650
+
651
+ acceptable_results = [
652
+ # libxml2 = 2.9.14
653
+ %{&lt;![CDATA[<span>neverending...</span>},
654
+ # other libxml2
655
+ %{neverending...},
656
+ # xerces+neko
657
+ ""
658
+ ]
659
+
660
+ assert_includes(acceptable_results, result)
448
661
  end
449
- end
450
662
 
451
- def test_should_sanitize_div_style_expression
452
- raw = %(width: expression(alert('XSS'));)
453
- assert_equal '', sanitize_css(raw)
454
- end
663
+ def test_should_not_mangle_urls_with_ampersand
664
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
665
+ end
455
666
 
456
- def test_should_sanitize_across_newlines
457
- raw = %(\nwidth:\nexpression(alert('XSS'));\n)
458
- assert_equal '', sanitize_css(raw)
459
- end
667
+ def test_should_sanitize_neverending_attribute
668
+ # note that assert_dom_equal chokes in this case! so avoid using assert_sanitized
669
+ assert_equal("<span class=\"\\\"></span>", safe_list_sanitize("<span class=\"\\\">"))
670
+ end
460
671
 
461
- def test_should_sanitize_img_vbscript
462
- assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
463
- end
672
+ [
673
+ %(<a href="javascript&#x3a;alert('XSS');">),
674
+ %(<a href="javascript&#x003a;alert('XSS');">),
675
+ %(<a href="javascript&#x3A;alert('XSS');">),
676
+ %(<a href="javascript&#x003A;alert('XSS');">)
677
+ ].each_with_index do |enc_hack, i|
678
+ define_method "test_x03a_handling_#{i + 1}" do
679
+ assert_sanitized enc_hack, "<a></a>"
680
+ end
681
+ end
464
682
 
465
- def test_should_sanitize_cdata_section
466
- input = "<![CDATA[<span>section</span>]]>"
467
- expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
468
- assert_sanitized(input, expected)
469
- end
683
+ def test_x03a_legitimate
684
+ assert_sanitized %(<a href="http&#x3a;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
685
+ assert_sanitized %(<a href="http&#x3A;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
686
+ end
470
687
 
471
- def test_should_sanitize_unterminated_cdata_section
472
- input = "<![CDATA[<span>neverending..."
473
- expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
474
- assert_sanitized(input, expected)
475
- end
688
+ def test_sanitize_ascii_8bit_string
689
+ safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
690
+ assert_equal "<div><a>hello</a></div>", sanitized
691
+ assert_equal Encoding::UTF_8, sanitized.encoding
692
+ end
693
+ end
476
694
 
477
- def test_should_not_mangle_urls_with_ampersand
478
- assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
479
- end
695
+ def test_sanitize_data_attributes
696
+ assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
697
+ assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
698
+ end
480
699
 
481
- def test_should_sanitize_neverending_attribute
482
- assert_sanitized "<span class=\"\\", "<span class=\"\\\">"
483
- end
700
+ def test_allow_data_attribute_if_requested
701
+ text = %(<a data-foo="foo">foo</a>)
702
+ assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ["data-foo"])
703
+ end
484
704
 
485
- [
486
- %(<a href="javascript&#x3a;alert('XSS');">),
487
- %(<a href="javascript&#x003a;alert('XSS');">),
488
- %(<a href="javascript&#x3A;alert('XSS');">),
489
- %(<a href="javascript&#x003A;alert('XSS');">)
490
- ].each_with_index do |enc_hack, i|
491
- define_method "test_x03a_handling_#{i+1}" do
492
- assert_sanitized enc_hack, "<a>"
705
+ # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
706
+ VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr]
707
+
708
+ %w(strong em b i p code pre tt samp kbd var sub
709
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
710
+ acronym a img blockquote del ins time).each do |tag_name|
711
+ define_method "test_default_safelist_should_allow_#{tag_name}" do
712
+ if VOID_ELEMENTS.include?(tag_name)
713
+ assert_sanitized("<#{tag_name}>")
714
+ else
715
+ assert_sanitized("<#{tag_name}>foo</#{tag_name}>")
716
+ end
717
+ end
493
718
  end
494
- end
495
719
 
496
- def test_x03a_legitimate
497
- assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">)
498
- assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">)
499
- end
720
+ def test_datetime_attribute
721
+ assert_sanitized("<time datetime=\"2023-01-01\">Today</time>")
722
+ end
500
723
 
501
- def test_sanitize_ascii_8bit_string
502
- safe_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized|
503
- assert_equal '<a>hello</a>', sanitized
504
- assert_equal Encoding::UTF_8, sanitized.encoding
724
+ def test_abbr_attribute
725
+ scope_allowed_tags(%w(table tr th td)) do
726
+ assert_sanitized(%(<table><tr><td abbr="UK">United Kingdom</td></tr></table>))
727
+ end
505
728
  end
506
- end
507
729
 
508
- def test_sanitize_data_attributes
509
- assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
510
- assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
511
- end
730
+ def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
731
+ html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
512
732
 
513
- def test_allow_data_attribute_if_requested
514
- text = %(<a data-foo="foo">foo</a>)
515
- assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ['data-foo'])
516
- end
733
+ text = safe_list_sanitize(html)
517
734
 
518
- def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
519
- skip if RUBY_VERSION < "2.3"
735
+ acceptable_results = [
736
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
737
+ %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
738
+ # system libxml2
739
+ %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
740
+ # xerces+neko
741
+ %{<a href="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
742
+ ]
520
743
 
521
- html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
744
+ assert_includes(acceptable_results, text)
745
+ end
522
746
 
523
- text = safe_list_sanitize(html)
747
+ def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
748
+ html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
524
749
 
525
- acceptable_results = [
526
- # nokogiri w/vendored+patched libxml2
527
- %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
528
- # nokogiri w/ system libxml2
529
- %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
530
- ]
531
- assert_includes(acceptable_results, text)
532
- end
750
+ text = safe_list_sanitize(html)
533
751
 
534
- def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
535
- skip if RUBY_VERSION < "2.3"
752
+ acceptable_results = [
753
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
754
+ %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
755
+ # system libxml2
756
+ %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
757
+ # xerces+neko
758
+ %{<a src="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
759
+ ]
536
760
 
537
- html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
761
+ assert_includes(acceptable_results, text)
762
+ end
538
763
 
539
- text = safe_list_sanitize(html)
764
+ def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
765
+ html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
540
766
 
541
- acceptable_results = [
542
- # nokogiri w/vendored+patched libxml2
543
- %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
544
- # nokogiri w/system libxml2
545
- %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
546
- ]
547
- assert_includes(acceptable_results, text)
548
- end
767
+ text = safe_list_sanitize(html)
549
768
 
550
- def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
551
- skip if RUBY_VERSION < "2.3"
769
+ acceptable_results = [
770
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
771
+ %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
772
+ # system libxml2
773
+ %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
774
+ # xerces+neko
775
+ %{<a name="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
776
+ ]
552
777
 
553
- html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
778
+ assert_includes(acceptable_results, text)
779
+ end
554
780
 
555
- text = safe_list_sanitize(html)
781
+ def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
782
+ html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
556
783
 
557
- acceptable_results = [
558
- # nokogiri w/vendored+patched libxml2
559
- %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
560
- # nokogiri w/system libxml2
561
- %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
562
- ]
563
- assert_includes(acceptable_results, text)
564
- end
784
+ text = safe_list_sanitize(html, attributes: ["action"])
565
785
 
566
- def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
567
- skip if RUBY_VERSION < "2.3"
786
+ acceptable_results = [
787
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
788
+ %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
789
+ # system libxml2
790
+ %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
791
+ # xerces+neko
792
+ %{<a action="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>},
793
+ ]
568
794
 
569
- html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
795
+ assert_includes(acceptable_results, text)
796
+ end
570
797
 
571
- text = safe_list_sanitize(html, attributes: ['action'])
798
+ def test_exclude_node_type_processing_instructions
799
+ input = "<div>text</div><?div content><b>text</b>"
800
+ result = safe_list_sanitize(input)
801
+ acceptable_results = [
802
+ # jruby cyberneko (nokogiri < 1.14.0)
803
+ "<div>text</div>",
804
+ # everything else
805
+ "<div>text</div><b>text</b>",
806
+ ]
807
+
808
+ assert_includes(acceptable_results, result)
809
+ end
572
810
 
573
- acceptable_results = [
574
- # nokogiri w/vendored+patched libxml2
575
- %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
576
- # nokogiri w/system libxml2
577
- %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
578
- ]
579
- assert_includes(acceptable_results, text)
580
- end
811
+ def test_exclude_node_type_comment
812
+ assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
813
+ end
581
814
 
582
- def test_exclude_node_type_processing_instructions
583
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><?div content><b>text</b>"))
584
- end
815
+ %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
816
+ define_method "test_mediatype_#{mediatype}_allowed" do
817
+ input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
818
+ expected = input
819
+ actual = safe_list_sanitize(input)
820
+ assert_equal(expected, actual)
821
+
822
+ input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
823
+ expected = input
824
+ actual = safe_list_sanitize(input)
825
+ assert_equal(expected, actual)
826
+ end
827
+ end
585
828
 
586
- def test_exclude_node_type_comment
587
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
588
- end
829
+ def test_mediatype_text_html_disallowed
830
+ input = '<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
831
+ expected = "<img>"
832
+ actual = safe_list_sanitize(input)
833
+ assert_equal(expected, actual)
589
834
 
590
- %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
591
- define_method "test_mediatype_#{mediatype}_allowed" do
592
- input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
593
- expected = input
835
+ input = '<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
836
+ expected = "<img>"
594
837
  actual = safe_list_sanitize(input)
595
838
  assert_equal(expected, actual)
839
+ end
596
840
 
597
- input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
598
- expected = input
841
+ def test_mediatype_image_svg_xml_disallowed
842
+ input = '<img src="">'
843
+ expected = "<img>"
844
+ actual = safe_list_sanitize(input)
845
+ assert_equal(expected, actual)
846
+
847
+ input = '<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
848
+ expected = "<img>"
599
849
  actual = safe_list_sanitize(input)
600
850
  assert_equal(expected, actual)
601
851
  end
602
- end
603
852
 
604
- def test_mediatype_text_html_disallowed
605
- input = %q(<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
606
- expected = %q(<img>)
607
- actual = safe_list_sanitize(input)
608
- assert_equal(expected, actual)
853
+ def test_mediatype_other_disallowed
854
+ input = '<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
855
+ expected = "<a>foo</a>"
856
+ actual = safe_list_sanitize(input)
857
+ assert_equal(expected, actual)
609
858
 
610
- input = %q(<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
611
- expected = %q(<img>)
612
- actual = safe_list_sanitize(input)
613
- assert_equal(expected, actual)
614
- end
859
+ input = '<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
860
+ expected = "<a>foo</a>"
861
+ actual = safe_list_sanitize(input)
862
+ assert_equal(expected, actual)
863
+ end
615
864
 
616
- def test_mediatype_image_svg_xml_disallowed
617
- input = %q(<img src="">)
618
- expected = %q(<img>)
619
- actual = safe_list_sanitize(input)
620
- assert_equal(expected, actual)
865
+ def test_scrubbing_svg_attr_values_that_allow_ref
866
+ input = '<div fill="yellow url(http://bad.com/) #fff">hey</div>'
867
+ expected = '<div fill="yellow #fff">hey</div>'
868
+ actual = scope_allowed_attributes %w(fill) do
869
+ safe_list_sanitize(input)
870
+ end
621
871
 
622
- input = %q(<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
623
- expected = %q(<img>)
624
- actual = safe_list_sanitize(input)
625
- assert_equal(expected, actual)
626
- end
872
+ assert_equal(expected, actual)
873
+ end
627
874
 
628
- def test_mediatype_other_disallowed
629
- input = %q(<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
630
- expected = %q(<a>foo</a>)
631
- actual = safe_list_sanitize(input)
632
- assert_equal(expected, actual)
875
+ def test_style_with_css_payload
876
+ input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
877
+ actual = safe_list_sanitize(input, tags: tags)
878
+ acceptable_results = [
879
+ # libxml2
880
+ "<style>div &gt; span { background: \"red\"; }</style>",
881
+ # libgumbo
882
+ "<style>div > span { background: \"red\"; }</style>",
883
+ ]
884
+
885
+ assert_includes(acceptable_results, actual)
886
+ end
633
887
 
634
- input = %q(<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
635
- expected = %q(<a>foo</a>)
636
- actual = safe_list_sanitize(input)
637
- assert_equal(expected, actual)
638
- end
888
+ def test_combination_of_select_and_style_with_css_payload
889
+ input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
890
+ actual = safe_list_sanitize(input, tags: tags)
891
+ acceptable_results = [
892
+ # libxml2
893
+ "<select><style>div &gt; span { background: \"red\"; }</style></select>",
894
+ # libgumbo
895
+ "<select>div &gt; span { background: \"red\"; }</select>",
896
+ ]
897
+
898
+ assert_includes(acceptable_results, actual)
899
+ end
639
900
 
640
- def test_scrubbing_svg_attr_values_that_allow_ref
641
- input = %Q(<div fill="yellow url(http://bad.com/) #fff">hey</div>)
642
- expected = %Q(<div fill="yellow #fff">hey</div>)
643
- actual = scope_allowed_attributes %w(fill) do
644
- safe_list_sanitize(input)
901
+ def test_combination_of_select_and_style_with_script_payload
902
+ input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
903
+ actual = safe_list_sanitize(input, tags: tags)
904
+ acceptable_results = [
905
+ # libxml2
906
+ "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>",
907
+ # libgumbo
908
+ "<select>alert(1)</select>",
909
+ ]
910
+
911
+ assert_includes(acceptable_results, actual)
645
912
  end
646
913
 
647
- assert_equal(expected, actual)
648
- end
914
+ def test_combination_of_svg_and_style_with_script_payload
915
+ input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
916
+ actual = safe_list_sanitize(input, tags: tags)
917
+ acceptable_results = [
918
+ # libxml2
919
+ "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>",
920
+ # libgumbo
921
+ "<svg><style></style></svg>",
922
+ ]
923
+
924
+ assert_includes(acceptable_results, actual)
925
+ end
649
926
 
650
- def test_style_with_css_payload
651
- input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
652
- expected = "<style>div &gt; span { background: \"red\"; }</style>"
653
- actual = safe_list_sanitize(input, tags: tags)
927
+ def test_combination_of_math_and_style_with_img_payload
928
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
929
+ actual = safe_list_sanitize(input, tags: tags)
930
+ acceptable_results = [
931
+ # libxml2
932
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
933
+ # libgumbo
934
+ "<math><style></style></math>",
935
+ ]
936
+
937
+ assert_includes(acceptable_results, actual)
938
+ end
654
939
 
655
- assert_equal(expected, actual)
656
- end
940
+ def test_combination_of_math_and_style_with_img_payload_2
941
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
942
+ actual = safe_list_sanitize(input, tags: tags)
943
+ acceptable_results = [
944
+ # libxml2
945
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
946
+ # libgumbo
947
+ "<math><style></style></math><img src=\"x\">",
948
+ ]
949
+
950
+ assert_includes(acceptable_results, actual)
951
+ end
657
952
 
658
- def test_combination_of_select_and_style_with_css_payload
659
- input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
660
- expected = "<select><style>div &gt; span { background: \"red\"; }</style></select>"
661
- actual = safe_list_sanitize(input, tags: tags)
953
+ def test_combination_of_svg_and_style_with_img_payload
954
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
955
+ actual = safe_list_sanitize(input, tags: tags)
956
+ acceptable_results = [
957
+ # libxml2
958
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
959
+ # libgumbo
960
+ "<svg><style></style></svg>",
961
+ ]
962
+
963
+ assert_includes(acceptable_results, actual)
964
+ end
662
965
 
663
- assert_equal(expected, actual)
664
- end
966
+ def test_combination_of_svg_and_style_with_img_payload_2
967
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
968
+ actual = safe_list_sanitize(input, tags: tags)
969
+ acceptable_results = [
970
+ # libxml2
971
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
972
+ # libgumbo
973
+ "<svg><style></style></svg><img src=\"x\">",
974
+ ]
975
+
976
+ assert_includes(acceptable_results, actual)
977
+ end
665
978
 
666
- def test_combination_of_select_and_style_with_script_payload
667
- input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
668
- expected = "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>"
669
- actual = safe_list_sanitize(input, tags: tags)
979
+ def test_combination_of_svg_and_style_with_escaped_img_payload
980
+ # https://hackerone.com/reports/2503220
981
+ input, tags = "<svg><style>&lt;img src onerror=alert(1)>", ["svg", "style"]
982
+ actual = safe_list_sanitize(input, tags: tags)
983
+ acceptable_results = [
984
+ # libxml2
985
+ "<svg><style>&amp;lt;img src onerror=alert(1)&gt;</style></svg>",
986
+ # libgumbo
987
+ "<svg><style>&lt;img src onerror=alert(1)&gt;</style></svg>",
988
+ ]
989
+
990
+ assert_includes(acceptable_results, actual)
991
+ end
670
992
 
671
- assert_equal(expected, actual)
672
- end
993
+ def test_combination_of_math_and_style_with_escaped_img_payload
994
+ # https://hackerone.com/reports/2503220
995
+ input, tags = "<math><style>&lt;img src onerror=alert(1)>", ["math", "style"]
996
+ actual = safe_list_sanitize(input, tags: tags)
997
+ acceptable_results = [
998
+ # libxml2
999
+ "<math><style>&amp;lt;img src onerror=alert(1)&gt;</style></math>",
1000
+ # libgumbo
1001
+ "<math><style>&lt;img src onerror=alert(1)&gt;</style></math>",
1002
+ ]
1003
+
1004
+ assert_includes(acceptable_results, actual)
1005
+ end
1006
+
1007
+ def test_combination_of_style_and_disallowed_svg_with_script_payload
1008
+ # https://hackerone.com/reports/2519936
1009
+ input, tags = "<svg><style><style class='</style><script>alert(1)</script>'>", ["style"]
1010
+ actual = safe_list_sanitize(input, tags: tags)
1011
+ acceptable_results = [
1012
+ # libxml2
1013
+ "<style>&lt;style class='</style>alert(1)'&gt;",
1014
+ # libgumbo
1015
+ "",
1016
+ ]
1017
+
1018
+ assert_includes(acceptable_results, actual)
1019
+ end
673
1020
 
674
- def test_combination_of_svg_and_style_with_script_payload
675
- input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
676
- expected = "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>"
677
- actual = safe_list_sanitize(input, tags: tags)
1021
+ def test_combination_of_style_and_disallowed_math_with_script_payload
1022
+ # https://hackerone.com/reports/2519936
1023
+ input, tags = "<math><style><style class='</style><script>alert(1)</script>'>", ["style"]
1024
+ actual = safe_list_sanitize(input, tags: tags)
1025
+ acceptable_results = [
1026
+ # libxml2
1027
+ "<style>&lt;style class='</style>alert(1)'&gt;",
1028
+ # libgumbo
1029
+ "",
1030
+ ]
1031
+
1032
+ assert_includes(acceptable_results, actual)
1033
+ end
678
1034
 
679
- assert_equal(expected, actual)
680
- end
1035
+ def test_math_with_disallowed_mtext_and_img_payload
1036
+ # https://hackerone.com/reports/2519941
1037
+ input, tags = "<math><mtext><table><mglyph><style><img src=: onerror=alert(1)>", ["math", "style"]
1038
+ actual = safe_list_sanitize(input, tags: tags)
1039
+ acceptable_results = [
1040
+ # libxml2
1041
+ "<math><style>&lt;img src=: onerror=alert(1)&gt;</style></math>",
1042
+ # libgumbo
1043
+ "<math></math>",
1044
+ ]
1045
+
1046
+ assert_includes(acceptable_results, actual)
1047
+ end
681
1048
 
682
- def test_combination_of_math_and_style_with_img_payload
683
- input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
684
- expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
685
- actual = safe_list_sanitize(input, tags: tags)
1049
+ def test_should_sanitize_illegal_style_properties
1050
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
1051
+ expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
1052
+ assert_equal expected, sanitize_css(raw)
1053
+ end
686
1054
 
687
- assert_equal(expected, actual)
1055
+ def test_should_sanitize_with_trailing_space
1056
+ raw = "display:block; "
1057
+ expected = "display:block;"
1058
+ assert_equal expected, sanitize_css(raw)
1059
+ end
688
1060
 
689
- input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
690
- expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
691
- actual = safe_list_sanitize(input, tags: tags)
1061
+ def test_should_sanitize_xul_style_attributes
1062
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
1063
+ assert_equal "", sanitize_css(raw)
1064
+ end
692
1065
 
693
- assert_equal(expected, actual)
694
- end
1066
+ def test_should_sanitize_div_background_image_unicode_encoded
1067
+ [
1068
+ convert_to_css_hex("url(javascript:alert(1))", false),
1069
+ convert_to_css_hex("url(javascript:alert(1))", true),
1070
+ convert_to_css_hex("url(https://example.com)", false),
1071
+ convert_to_css_hex("url(https://example.com)", true),
1072
+ ].each do |propval|
1073
+ raw = "background-image:" + propval
1074
+ assert_empty(sanitize_css(raw))
1075
+ end
1076
+ end
695
1077
 
696
- def test_combination_of_svg_and_style_with_img_payload
697
- input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
698
- expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
699
- actual = safe_list_sanitize(input, tags: tags)
1078
+ def test_should_allow_div_background_image_unicode_encoded_safe_functions
1079
+ [
1080
+ convert_to_css_hex("rgb(255,0,0)", false),
1081
+ convert_to_css_hex("rgb(255,0,0)", true),
1082
+ ].each do |propval|
1083
+ raw = "background-image:" + propval
700
1084
 
701
- assert_equal(expected, actual)
1085
+ assert_includes(sanitize_css(raw), "background-image")
1086
+ end
1087
+ end
702
1088
 
703
- input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
704
- expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
705
- actual = safe_list_sanitize(input, tags: tags)
1089
+ def test_should_sanitize_div_style_expression
1090
+ raw = %(width: expression(alert('XSS'));)
1091
+ assert_equal "", sanitize_css(raw)
1092
+ end
706
1093
 
707
- assert_equal(expected, actual)
708
- end
1094
+ def test_should_sanitize_across_newlines
1095
+ raw = %(\nwidth:\nexpression(alert('XSS'));\n)
1096
+ assert_equal "", sanitize_css(raw)
1097
+ end
709
1098
 
710
- protected
1099
+ def test_should_prune_mglyph
1100
+ # https://hackerone.com/reports/2519936
1101
+ input = "<math><mtext><table><mglyph><style><img src=: onerror=alert(1)>"
1102
+ tags = %w(math mtext table mglyph style).freeze
711
1103
 
712
- def xpath_sanitize(input, options = {})
713
- XpathRemovalTestSanitizer.new.sanitize(input, options)
714
- end
1104
+ actual = nil
1105
+ assert_output(nil, /WARNING: 'mglyph' tags cannot be allowed by the PermitScrubber/) do
1106
+ actual = safe_list_sanitize(input, tags: tags)
1107
+ end
715
1108
 
716
- def full_sanitize(input, options = {})
717
- Rails::Html::FullSanitizer.new.sanitize(input, options)
718
- end
1109
+ acceptable_results = [
1110
+ # libxml2
1111
+ "<math><mtext><table><style>&lt;img src=: onerror=alert(1)&gt;</style></table></mtext></math>",
1112
+ # libgumbo
1113
+ "<math><mtext><style><img src=: onerror=alert(1)></style><table></table></mtext></math>",
1114
+ ]
719
1115
 
720
- def link_sanitize(input, options = {})
721
- Rails::Html::LinkSanitizer.new.sanitize(input, options)
722
- end
1116
+ assert_includes(acceptable_results, actual)
1117
+ end
723
1118
 
724
- def safe_list_sanitize(input, options = {})
725
- Rails::Html::SafeListSanitizer.new.sanitize(input, options)
726
- end
1119
+ def test_should_prune_malignmark
1120
+ # https://hackerone.com/reports/2519936
1121
+ input = "<math><mtext><table><malignmark><style><img src=: onerror=alert(1)>"
1122
+ tags = %w(math mtext table malignmark style).freeze
1123
+
1124
+ actual = nil
1125
+ assert_output(nil, /WARNING: 'malignmark' tags cannot be allowed by the PermitScrubber/) do
1126
+ actual = safe_list_sanitize(input, tags: tags)
1127
+ end
727
1128
 
728
- def assert_sanitized(input, expected = nil)
729
- if input
730
- assert_dom_equal expected || input, safe_list_sanitize(input)
731
- else
732
- assert_nil safe_list_sanitize(input)
1129
+ acceptable_results = [
1130
+ # libxml2
1131
+ "<math><mtext><table><style>&lt;img src=: onerror=alert(1)&gt;</style></table></mtext></math>",
1132
+ # libgumbo
1133
+ "<math><mtext><style><img src=: onerror=alert(1)></style><table></table></mtext></math>",
1134
+ ]
1135
+
1136
+ assert_includes(acceptable_results, actual)
733
1137
  end
734
- end
735
1138
 
736
- def sanitize_css(input)
737
- Rails::Html::SafeListSanitizer.new.sanitize_css(input)
738
- end
1139
+ def test_should_prune_noscript
1140
+ # https://hackerone.com/reports/2509647
1141
+ input = "<div><noscript><p id='</noscript><script>alert(1)</script>'></noscript>"
1142
+ tags = ["p", "div", "noscript"].freeze
739
1143
 
740
- def scope_allowed_tags(tags)
741
- old_tags = Rails::Html::SafeListSanitizer.allowed_tags
742
- Rails::Html::SafeListSanitizer.allowed_tags = tags
743
- yield Rails::Html::SafeListSanitizer.new
744
- ensure
745
- Rails::Html::SafeListSanitizer.allowed_tags = old_tags
746
- end
1144
+ actual = nil
1145
+ assert_output(nil, /WARNING: 'noscript' tags cannot be allowed by the PermitScrubber/) do
1146
+ actual = safe_list_sanitize(input, tags: tags, attributes: %w(id))
1147
+ end
747
1148
 
748
- def scope_allowed_attributes(attributes)
749
- old_attributes = Rails::Html::SafeListSanitizer.allowed_attributes
750
- Rails::Html::SafeListSanitizer.allowed_attributes = attributes
751
- yield Rails::Html::SafeListSanitizer.new
752
- ensure
753
- Rails::Html::SafeListSanitizer.allowed_attributes = old_attributes
754
- end
1149
+ acceptable_results = [
1150
+ # libxml2
1151
+ "<div><p id=\"&lt;/noscript&gt;&lt;script&gt;alert(1)&lt;/script&gt;\"></p></div>",
1152
+ # libgumbo
1153
+ "<div><p id=\"</noscript><script>alert(1)</script>\"></p></div>",
1154
+ ]
1155
+
1156
+ assert_includes(acceptable_results, actual)
1157
+ end
755
1158
 
756
- # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
757
- def convert_to_css_hex(string, escape_parens=false)
758
- string.chars.map do |c|
759
- if !escape_parens && (c == "(" || c == ")")
760
- c
761
- else
762
- format('\00%02X', c.ord)
1159
+ protected
1160
+ def safe_list_sanitize(input, options = {})
1161
+ module_under_test::SafeListSanitizer.new.sanitize(input, options)
1162
+ end
1163
+
1164
+ def assert_sanitized(input, expected = nil)
1165
+ assert_equal((expected || input), safe_list_sanitize(input))
1166
+ end
1167
+
1168
+ def scope_allowed_tags(tags)
1169
+ old_tags = module_under_test::SafeListSanitizer.allowed_tags
1170
+ module_under_test::SafeListSanitizer.allowed_tags = tags
1171
+ yield module_under_test::SafeListSanitizer.new
1172
+ ensure
1173
+ module_under_test::SafeListSanitizer.allowed_tags = old_tags
1174
+ end
1175
+
1176
+ def scope_allowed_attributes(attributes)
1177
+ old_attributes = module_under_test::SafeListSanitizer.allowed_attributes
1178
+ module_under_test::SafeListSanitizer.allowed_attributes = attributes
1179
+ yield module_under_test::SafeListSanitizer.new
1180
+ ensure
1181
+ module_under_test::SafeListSanitizer.allowed_attributes = old_attributes
1182
+ end
1183
+
1184
+ def sanitize_css(input)
1185
+ module_under_test::SafeListSanitizer.new.sanitize_css(input)
763
1186
  end
764
- end.join
765
- end
766
1187
 
767
- def libxml_2_9_14_recovery_lt?
768
- # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
769
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
1188
+ # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
1189
+ def convert_to_css_hex(string, escape_parens = false)
1190
+ string.chars.map do |c|
1191
+ if !escape_parens && (c == "(" || c == ")")
1192
+ c
1193
+ else
1194
+ format('\00%02X', c.ord)
1195
+ end
1196
+ end.join
1197
+ end
770
1198
  end
771
1199
 
772
- def libxml_2_9_14_recovery_lt_bang?
773
- # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
774
- # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
775
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14")
1200
+ class HTML4SafeListSanitizerTest < Minitest::Test
1201
+ @module_under_test = Rails::HTML4
1202
+ include SafeListSanitizerTest
776
1203
  end
1204
+
1205
+ class HTML5SafeListSanitizerTest < Minitest::Test
1206
+ @module_under_test = Rails::HTML5
1207
+ include SafeListSanitizerTest
1208
+
1209
+ def test_should_not_be_vulnerable_to_nokogiri_foreign_style_serialization_bug
1210
+ # https://hackerone.com/reports/2503220
1211
+ input = "<svg><style>&lt;img src onerror=alert(1)>"
1212
+ result = Rails::HTML5::SafeListSanitizer.new.sanitize(input, tags: ["svg", "style"])
1213
+ browser = Nokogiri::HTML5::Document.parse(result)
1214
+ xss = browser.at_xpath("//img/@onerror")
1215
+
1216
+ assert_nil(xss)
1217
+ end
1218
+
1219
+ def test_should_not_be_vulnerable_to_ns_confusion_2519936
1220
+ # https://hackerone.com/reports/2519936
1221
+ input = "<math><style><style class='</style><script>alert(1)</script>'>"
1222
+ result = Rails::HTML5::SafeListSanitizer.new.sanitize(input, tags: ["style"])
1223
+ browser = Nokogiri::HTML5::Document.parse(result)
1224
+ xss = browser.at_xpath("//script")
1225
+
1226
+ assert_nil(xss)
1227
+ end
1228
+
1229
+ def test_should_not_be_vulnerable_to_ns_confusion_2519941
1230
+ # https://hackerone.com/reports/2519941
1231
+ input = "<math><mtext><table><mglyph><style><img src=: onerror=alert(1)>"
1232
+ result = Rails::HTML5::SafeListSanitizer.new.sanitize(input, tags: %w(math style))
1233
+ browser = Nokogiri::HTML5::Document.parse(result)
1234
+ xss = browser.at_xpath("//img/@onerror")
1235
+
1236
+ assert_nil(xss)
1237
+ end
1238
+
1239
+ def test_should_not_be_vulnerable_to_mglyph_namespace_confusion
1240
+ # https://hackerone.com/reports/2519936
1241
+ input = "<math><mtext><table><mglyph><style><img src=: onerror=alert(1)>"
1242
+ tags = %w(math mtext table mglyph style)
1243
+
1244
+ result = nil
1245
+ assert_output(nil, /WARNING/) do
1246
+ result = safe_list_sanitize(input, tags: tags)
1247
+ end
1248
+
1249
+ browser = Nokogiri::HTML5::Document.parse(result)
1250
+ xss = browser.at_xpath("//img/@onerror")
1251
+
1252
+ assert_nil(xss)
1253
+ end
1254
+
1255
+ def test_should_not_be_vulnerable_to_malignmark_namespace_confusion
1256
+ # https://hackerone.com/reports/2519936
1257
+ input = "<math><mtext><table><malignmark><style><img src=: onerror=alert(1)>"
1258
+ tags = %w(math mtext table malignmark style)
1259
+
1260
+ result = nil
1261
+ assert_output(nil, /WARNING/) do
1262
+ result = safe_list_sanitize(input, tags: tags)
1263
+ end
1264
+
1265
+ browser = Nokogiri::HTML5::Document.parse(result)
1266
+ xss = browser.at_xpath("//img/@onerror")
1267
+
1268
+ assert_nil(xss)
1269
+ end
1270
+
1271
+ def test_should_not_be_vulnerable_to_noscript_attacks
1272
+ # https://hackerone.com/reports/2509647
1273
+ skip("browser assertion requires parse_noscript_content_as_text") unless Nokogiri::VERSION >= "1.17"
1274
+
1275
+ input = '<noscript><p id="</noscript><script>alert(1)</script>"></noscript>'
1276
+
1277
+ result = nil
1278
+ assert_output(nil, /WARNING/) do
1279
+ result = Rails::HTML5::SafeListSanitizer.new.sanitize(input, tags: %w(p div noscript), attributes: %w(id class style))
1280
+ end
1281
+
1282
+ browser = Nokogiri::HTML5::Document.parse(result, parse_noscript_content_as_text: true)
1283
+ xss = browser.at_xpath("//script")
1284
+
1285
+ assert_nil(xss)
1286
+ end
1287
+ end if loofah_html5_support?
777
1288
  end