rails-html-sanitizer 1.5.0 → 1.6.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,777 +1,1087 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "minitest/autorun"
2
4
  require "rails-html-sanitizer"
3
- require "rails/dom/testing/assertions/dom_assertions"
4
5
 
5
- puts Nokogiri::VERSION_INFO
6
+ puts "nokogiri version info: #{Nokogiri::VERSION_INFO}"
7
+ puts "html5 support: #{Rails::HTML::Sanitizer.html5_support?}"
8
+
9
+ #
10
+ # NOTE that many of these tests contain multiple acceptable results.
11
+ #
12
+ # In some cases, this is because of how the HTML4 parser's recovery behavior changed in libxml2
13
+ # 2.9.14 and 2.10.0. For more details, see:
14
+ #
15
+ # - https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
16
+ # - https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
17
+ #
18
+ # In other cases, multiple acceptable results are provided because Nokogiri's vendored libxml2 is
19
+ # patched to entity-escape server-side includes (aks "SSI", aka `<!-- #directive param=value -->`).
20
+ #
21
+ # In many other cases, it's because the parser used by Nokogiri on JRuby (xerces+nekohtml) parses
22
+ # slightly differently than libxml2 in edge cases.
23
+ #
24
+ module SanitizerTests
25
+ def self.loofah_html5_support?
26
+ Loofah.respond_to?(:html5_support?) && Loofah.html5_support?
27
+ end
28
+
29
+ class BaseSanitizerTest < Minitest::Test
30
+ class XpathRemovalTestSanitizer < Rails::HTML::Sanitizer
31
+ def sanitize(html, options = {})
32
+ fragment = Loofah.fragment(html)
33
+ remove_xpaths(fragment, options[:xpaths]).to_s
34
+ end
35
+ end
6
36
 
7
- class SanitizersTest < Minitest::Test
8
- include Rails::Dom::Testing::Assertions::DomAssertions
37
+ def test_sanitizer_sanitize_raises_not_implemented_error
38
+ assert_raises NotImplementedError do
39
+ Rails::HTML::Sanitizer.new.sanitize("asdf")
40
+ end
41
+ end
9
42
 
10
- def test_sanitizer_sanitize_raises_not_implemented_error
11
- assert_raises NotImplementedError do
12
- Rails::Html::Sanitizer.new.sanitize('')
43
+ def test_remove_xpaths_removes_an_xpath
44
+ html = %(<h1>hello <script>code!</script></h1>)
45
+ assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
13
46
  end
14
- end
15
47
 
16
- def test_sanitize_nested_script
17
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
18
- end
48
+ def test_remove_xpaths_removes_all_occurrences_of_xpath
49
+ html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
50
+ assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
51
+ end
19
52
 
20
- def test_sanitize_nested_script_in_style
21
- assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>', tags: %w(em))
22
- end
53
+ def test_remove_xpaths_called_with_faulty_xpath
54
+ assert_raises Nokogiri::XML::XPath::SyntaxError do
55
+ xpath_sanitize("<h1>hello<h1>", xpaths: %w(..faulty_xpath))
56
+ end
57
+ end
23
58
 
24
- class XpathRemovalTestSanitizer < Rails::Html::Sanitizer
25
- def sanitize(html, options = {})
26
- fragment = Loofah.fragment(html)
27
- remove_xpaths(fragment, options[:xpaths]).to_s
59
+ def test_remove_xpaths_called_with_xpath_string
60
+ assert_equal "", xpath_sanitize("<a></a>", xpaths: ".//a")
28
61
  end
29
- end
30
62
 
31
- def test_remove_xpaths_removes_an_xpath
32
- html = %(<h1>hello <script>code!</script></h1>)
33
- assert_equal %(<h1>hello </h1>), xpath_sanitize(html, xpaths: %w(.//script))
34
- end
63
+ def test_remove_xpaths_called_with_enumerable_xpaths
64
+ assert_equal "", xpath_sanitize("<a><span></span></a>", xpaths: %w(.//a .//span))
65
+ end
35
66
 
36
- def test_remove_xpaths_removes_all_occurrences_of_xpath
37
- html = %(<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>)
38
- assert_equal %(<section><header></header><p>hello </p></section>), xpath_sanitize(html, xpaths: %w(.//script))
67
+ protected
68
+ def xpath_sanitize(input, options = {})
69
+ XpathRemovalTestSanitizer.new.sanitize(input, options)
70
+ end
39
71
  end
40
72
 
41
- def test_remove_xpaths_called_with_faulty_xpath
42
- assert_raises Nokogiri::XML::XPath::SyntaxError do
43
- xpath_sanitize('<h1>hello<h1>', xpaths: %w(..faulty_xpath))
73
+ module ModuleUnderTest
74
+ def module_under_test
75
+ self.class.instance_variable_get(:@module_under_test)
44
76
  end
45
77
  end
46
78
 
47
- def test_remove_xpaths_called_with_xpath_string
48
- assert_equal '', xpath_sanitize('<a></a>', xpaths: './/a')
49
- end
79
+ module FullSanitizerTest
80
+ include ModuleUnderTest
50
81
 
51
- def test_remove_xpaths_called_with_enumerable_xpaths
52
- assert_equal '', xpath_sanitize('<a><span></span></a>', xpaths: %w(.//a .//span))
53
- end
82
+ def test_strip_tags_with_quote
83
+ input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
84
+ result = full_sanitize(input)
85
+ acceptable_results = [
86
+ # libxml2 >= 2.9.14 and xerces+neko
87
+ %{&lt;" hi},
88
+ # other libxml2
89
+ %{ hi},
90
+ ]
54
91
 
55
- def test_strip_tags_with_quote
56
- input = '<" <img src="trollface.gif" onload="alert(1)"> hi'
57
- expected = libxml_2_9_14_recovery_lt? ? %{&lt;" hi} : %{ hi}
58
- assert_equal(expected, full_sanitize(input))
59
- end
92
+ assert_includes(acceptable_results, result)
93
+ end
60
94
 
61
- def test_strip_invalid_html
62
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
63
- end
95
+ def test_strip_invalid_html
96
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html")
97
+ end
64
98
 
65
- def test_strip_nested_tags
66
- expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
67
- input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
68
- assert_equal expected, full_sanitize(input)
69
- end
99
+ def test_strip_nested_tags
100
+ expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
101
+ input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
102
+ assert_equal expected, full_sanitize(input)
103
+ end
70
104
 
71
- def test_strip_tags_multiline
72
- expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
73
- input = %{<title>This is <b>a <a href="" target="_blank">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
105
+ def test_strip_tags_multiline
106
+ expected = %{This is a test.\n\n\n\nIt no longer contains any HTML.\n}
107
+ input = %{<h1>This is <b>a <a href="" target="_blank">test</a></b>.</h1>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n}
74
108
 
75
- assert_equal expected, full_sanitize(input)
76
- end
109
+ assert_equal expected, full_sanitize(input)
110
+ end
77
111
 
78
- def test_remove_unclosed_tags
79
- input = "This is <-- not\n a comment here."
80
- expected = libxml_2_9_14_recovery_lt? ? %{This is &lt;-- not\n a comment here.} : %{This is }
81
- assert_equal(expected, full_sanitize(input))
82
- end
112
+ def test_remove_unclosed_tags
113
+ input = "This is <-- not\n a comment here."
114
+ result = full_sanitize(input)
115
+ acceptable_results = [
116
+ # libxml2 >= 2.9.14 and xerces+neko
117
+ %{This is &lt;-- not\n a comment here.},
118
+ # other libxml2
119
+ %{This is },
120
+ ]
121
+
122
+ assert_includes(acceptable_results, result)
123
+ end
83
124
 
84
- def test_strip_cdata
85
- input = "This has a <![CDATA[<section>]]> here."
86
- expected = libxml_2_9_14_recovery_lt_bang? ? %{This has a &lt;![CDATA[]]&gt; here.} : %{This has a ]]&gt; here.}
87
- assert_equal(expected, full_sanitize(input))
88
- end
125
+ def test_strip_cdata
126
+ input = "This has a <![CDATA[<section>]]> here."
127
+ result = full_sanitize(input)
128
+ acceptable_results = [
129
+ # libxml2 = 2.9.14
130
+ %{This has a &lt;![CDATA[]]&gt; here.},
131
+ # other libxml2
132
+ %{This has a ]]&gt; here.},
133
+ # xerces+neko
134
+ %{This has a here.},
135
+ ]
136
+
137
+ assert_includes(acceptable_results, result)
138
+ end
89
139
 
90
- def test_strip_unclosed_cdata
91
- input = "This has an unclosed <![CDATA[<section>]] here..."
92
- expected = libxml_2_9_14_recovery_lt_bang? ? %{This has an unclosed &lt;![CDATA[]] here...} : %{This has an unclosed ]] here...}
93
- assert_equal(expected, full_sanitize(input))
94
- end
140
+ def test_strip_blank_string
141
+ assert_nil full_sanitize(nil)
142
+ assert_equal "", full_sanitize("")
143
+ assert_equal " ", full_sanitize(" ")
144
+ end
95
145
 
96
- def test_strip_blank_string
97
- assert_nil full_sanitize(nil)
98
- assert_equal "", full_sanitize("")
99
- assert_equal " ", full_sanitize(" ")
100
- end
146
+ def test_strip_tags_with_plaintext
147
+ assert_equal "Don't touch me", full_sanitize("Don't touch me")
148
+ end
101
149
 
102
- def test_strip_tags_with_plaintext
103
- assert_equal "Don't touch me", full_sanitize("Don't touch me")
104
- end
150
+ def test_strip_tags_with_tags
151
+ assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
152
+ end
105
153
 
106
- def test_strip_tags_with_tags
107
- assert_equal "This is a test.", full_sanitize("<p>This <u>is<u> a <a href='test.html'><strong>test</strong></a>.</p>")
108
- end
154
+ def test_escape_tags_with_many_open_quotes
155
+ assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
156
+ end
109
157
 
110
- def test_escape_tags_with_many_open_quotes
111
- assert_equal "&lt;&lt;", full_sanitize("<<<bad html>")
112
- end
158
+ def test_strip_tags_with_sentence
159
+ assert_equal "This is a test.", full_sanitize("This is a test.")
160
+ end
113
161
 
114
- def test_strip_tags_with_sentence
115
- assert_equal "This is a test.", full_sanitize("This is a test.")
116
- end
162
+ def test_strip_tags_with_comment
163
+ assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
164
+ end
117
165
 
118
- def test_strip_tags_with_comment
119
- assert_equal "This has a here.", full_sanitize("This has a <!-- comment --> here.")
120
- end
166
+ def test_strip_tags_with_frozen_string
167
+ assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags")
168
+ end
121
169
 
122
- def test_strip_tags_with_frozen_string
123
- assert_equal "Frozen string with no tags", full_sanitize("Frozen string with no tags".freeze)
124
- end
170
+ def test_full_sanitize_respect_html_escaping_of_the_given_string
171
+ assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
172
+ assert_equal "&amp;", full_sanitize("&")
173
+ assert_equal "&amp;", full_sanitize("&amp;")
174
+ assert_equal "&amp;amp;", full_sanitize("&amp;amp;")
175
+ assert_equal "omg &lt;script&gt;BOM&lt;/script&gt;", full_sanitize("omg &lt;script&gt;BOM&lt;/script&gt;")
176
+ end
125
177
 
126
- def test_full_sanitize_respect_html_escaping_of_the_given_string
127
- assert_equal 'test\r\nstring', full_sanitize('test\r\nstring')
128
- assert_equal '&amp;', full_sanitize('&')
129
- assert_equal '&amp;', full_sanitize('&amp;')
130
- assert_equal '&amp;amp;', full_sanitize('&amp;amp;')
131
- assert_equal 'omg &lt;script&gt;BOM&lt;/script&gt;', full_sanitize('omg &lt;script&gt;BOM&lt;/script&gt;')
132
- end
178
+ def test_sanitize_ascii_8bit_string
179
+ full_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
180
+ assert_equal "hello", sanitized
181
+ assert_equal Encoding::UTF_8, sanitized.encoding
182
+ end
183
+ end
133
184
 
134
- def test_strip_links_with_tags_in_tags
135
- expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
136
- input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
137
- assert_equal expected, link_sanitize(input)
185
+ protected
186
+ def full_sanitize(input, options = {})
187
+ module_under_test::FullSanitizer.new.sanitize(input, options)
188
+ end
138
189
  end
139
190
 
140
- def test_strip_links_with_unclosed_tags
141
- assert_equal "", link_sanitize("<a<a")
191
+ class HTML4FullSanitizerTest < Minitest::Test
192
+ @module_under_test = Rails::HTML4
193
+ include FullSanitizerTest
142
194
  end
143
195
 
144
- def test_strip_links_with_plaintext
145
- assert_equal "Don't touch me", link_sanitize("Don't touch me")
146
- end
196
+ class HTML5FullSanitizerTest < Minitest::Test
197
+ @module_under_test = Rails::HTML5
198
+ include FullSanitizerTest
199
+ end if loofah_html5_support?
147
200
 
148
- def test_strip_links_with_line_feed_and_uppercase_tag
149
- assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
150
- end
201
+ module LinkSanitizerTest
202
+ include ModuleUnderTest
151
203
 
152
- def test_strip_links_leaves_nonlink_tags
153
- assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
154
- end
204
+ def test_strip_links_with_tags_in_tags
205
+ expected = "&lt;a href='hello'&gt;all <b>day</b> long&lt;/a&gt;"
206
+ input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
207
+ assert_equal expected, link_sanitize(input)
208
+ end
155
209
 
156
- def test_strip_links_with_links
157
- assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
158
- end
210
+ def test_strip_links_with_unclosed_tags
211
+ assert_equal "", link_sanitize("<a<a")
212
+ end
159
213
 
160
- def test_strip_links_with_linkception
161
- assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
162
- end
214
+ def test_strip_links_with_plaintext
215
+ assert_equal "Don't touch me", link_sanitize("Don't touch me")
216
+ end
163
217
 
164
- def test_sanitize_form
165
- assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ''
166
- end
218
+ def test_strip_links_with_line_feed_and_uppercase_tag
219
+ assert_equal "on my mind\nall day long", link_sanitize("<a href='almost'>on my mind</a>\n<A href='almost'>all day long</A>")
220
+ end
167
221
 
168
- def test_sanitize_plaintext
169
- assert_sanitized "<plaintext><span>foo</span></plaintext>", "<span>foo</span>"
170
- end
222
+ def test_strip_links_leaves_nonlink_tags
223
+ assert_equal "My mind\nall <b>day</b> long", link_sanitize("<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>")
224
+ end
171
225
 
172
- def test_sanitize_script
173
- assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
174
- end
226
+ def test_strip_links_with_links
227
+ assert_equal "0wn3d", link_sanitize("<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>")
228
+ end
175
229
 
176
- def test_sanitize_js_handlers
177
- raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
178
- assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
179
- end
230
+ def test_strip_links_with_linkception
231
+ assert_equal "Magic", link_sanitize("<a href='http://www.rubyonrails.com/'>Mag<a href='http://www.ruby-lang.org/'>ic")
232
+ end
233
+
234
+ def test_sanitize_ascii_8bit_string
235
+ link_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
236
+ assert_equal "<div>hello</div>", sanitized
237
+ assert_equal Encoding::UTF_8, sanitized.encoding
238
+ end
239
+ end
180
240
 
181
- def test_sanitize_javascript_href
182
- raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
183
- assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
241
+ protected
242
+ def link_sanitize(input, options = {})
243
+ module_under_test::LinkSanitizer.new.sanitize(input, options)
244
+ end
184
245
  end
185
246
 
186
- def test_sanitize_image_src
187
- raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
188
- assert_sanitized raw, %{src="javascript:bang" <img width="5">foo</img>, <span>bar</span>}
247
+ class HTML4LinkSanitizerTest < Minitest::Test
248
+ @module_under_test = Rails::HTML4
249
+ include LinkSanitizerTest
189
250
  end
190
251
 
191
- tags = Loofah::HTML5::SafeList::ALLOWED_ELEMENTS - %w(script form)
192
- tags.each do |tag_name|
193
- define_method "test_should_allow_#{tag_name}_tag" do
194
- scope_allowed_tags(tags) do
195
- assert_sanitized "start <#{tag_name} title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</#{tag_name}> end", %(start <#{tag_name} title="1">foo bar baz</#{tag_name}> end)
196
- end
252
+ class HTML5LinkSanitizerTest < Minitest::Test
253
+ @module_under_test = Rails::HTML5
254
+ include LinkSanitizerTest
255
+ end if loofah_html5_support?
256
+
257
+ module SafeListSanitizerTest
258
+ include ModuleUnderTest
259
+
260
+ def test_sanitize_nested_script
261
+ assert_equal '&lt;script&gt;alert("XSS");&lt;/script&gt;', safe_list_sanitize('<script><script></script>alert("XSS");<script><</script>/</script><script>script></script>', tags: %w(em))
197
262
  end
198
- end
199
263
 
200
- def test_should_allow_anchors
201
- assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
202
- end
264
+ def test_sanitize_nested_script_in_style
265
+ input = '<style><script></style>alert("XSS");<style><</style>/</style><style>script></style>'
266
+ result = safe_list_sanitize(input, tags: %w(em))
267
+ acceptable_results = [
268
+ # libxml2
269
+ %{&lt;script&gt;alert("XSS");&lt;/script&gt;},
270
+ # xerces+neko. unavoidable double-escaping, see loofah/docs/2022-10-decision-on-cdata-nodes.md
271
+ %{&amp;lt;script&amp;gt;alert(\"XSS\");&amp;lt;&amp;lt;/style&amp;gt;/script&amp;gt;},
272
+ ]
273
+
274
+ assert_includes(acceptable_results, result)
275
+ end
203
276
 
204
- def test_video_poster_sanitization
205
- scope_allowed_tags(%w(video)) do
206
- scope_allowed_attributes %w(src poster) do
207
- assert_sanitized %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>), %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
208
- assert_sanitized %(<video src="videofile.ogg" poster=javascript:alert(1)></video>), %(<video src="videofile.ogg"></video>)
209
- end
277
+ def test_strip_unclosed_cdata
278
+ input = "This has an unclosed <![CDATA[<section>]] here..."
279
+
280
+ result = safe_list_sanitize(input)
281
+
282
+ acceptable_results = [
283
+ # libxml2 = 2.9.14
284
+ %{This has an unclosed &lt;![CDATA[]] here...},
285
+ # other libxml2
286
+ %{This has an unclosed ]] here...},
287
+ # xerces+neko
288
+ %{This has an unclosed }
289
+ ]
290
+
291
+ assert_includes(acceptable_results, result)
210
292
  end
211
- end
212
293
 
213
- # RFC 3986, sec 4.2
214
- def test_allow_colons_in_path_component
215
- assert_sanitized "<a href=\"./this:that\">foo</a>"
216
- end
294
+ def test_sanitize_form
295
+ assert_sanitized "<form action=\"/foo/bar\" method=\"post\"><input></form>", ""
296
+ end
217
297
 
218
- %w(src width height alt).each do |img_attr|
219
- define_method "test_should_allow_image_#{img_attr}_attribute" do
220
- assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo" />)
298
+ def test_sanitize_plaintext
299
+ # note that the `plaintext` tag has been deprecated since HTML 2
300
+ # https://developer.mozilla.org/en-US/docs/Web/HTML/Element/plaintext
301
+ input = "<plaintext><span>foo</span></plaintext>"
302
+ result = safe_list_sanitize(input)
303
+ acceptable_results = [
304
+ # libxml2
305
+ "<span>foo</span>",
306
+ # xerces+nekohtml-unit
307
+ "&lt;span&gt;foo&lt;/span&gt;&lt;/plaintext&gt;",
308
+ # xerces+cyberneko
309
+ "&lt;span&gt;foo&lt;/span&gt;"
310
+ ]
311
+
312
+ assert_includes(acceptable_results, result)
221
313
  end
222
- end
223
314
 
224
- def test_should_handle_non_html
225
- assert_sanitized 'abc'
226
- end
315
+ def test_sanitize_script
316
+ assert_sanitized "a b c<script language=\"Javascript\">blah blah blah</script>d e f", "a b cblah blah blahd e f"
317
+ end
227
318
 
228
- def test_should_handle_blank_text
229
- [nil, '', ' '].each { |blank| assert_sanitized blank }
230
- end
319
+ def test_sanitize_js_handlers
320
+ raw = %{onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>}
321
+ assert_sanitized raw, %{onthis="do that" <a href="#" name="foo">hello</a>}
322
+ end
231
323
 
232
- def test_setting_allowed_tags_affects_sanitization
233
- scope_allowed_tags %w(u) do |sanitizer|
234
- assert_equal '<u></u>', sanitizer.sanitize('<a><u></u></a>')
324
+ def test_sanitize_javascript_href
325
+ raw = %{href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>}
326
+ assert_sanitized raw, %{href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>}
235
327
  end
236
- end
237
328
 
238
- def test_setting_allowed_attributes_affects_sanitization
239
- scope_allowed_attributes %w(foo) do |sanitizer|
240
- input = '<a foo="hello" bar="world"></a>'
241
- assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
329
+ def test_sanitize_image_src
330
+ raw = %{src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>}
331
+ assert_sanitized raw, %{src="javascript:bang" <img width="5">foo, <span>bar</span>}
242
332
  end
243
- end
244
333
 
245
- def test_custom_tags_overrides_allowed_tags
246
- scope_allowed_tags %(u) do |sanitizer|
247
- input = '<a><u></u></a>'
248
- assert_equal '<a></a>', sanitizer.sanitize(input, tags: %w(a))
334
+ def test_should_allow_anchors
335
+ assert_sanitized %(<a href="foo" onclick="bar"><script>baz</script></a>), %(<a href=\"foo\">baz</a>)
249
336
  end
250
- end
251
337
 
252
- def test_custom_attributes_overrides_allowed_attributes
253
- scope_allowed_attributes %(foo) do |sanitizer|
254
- input = '<a foo="hello" bar="world"></a>'
255
- assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
338
+ def test_video_poster_sanitization
339
+ scope_allowed_tags(%w(video)) do
340
+ scope_allowed_attributes %w(src poster) do
341
+ expected = if RUBY_PLATFORM == "java"
342
+ # xerces+nekohtml alphabetizes the attributes! FML.
343
+ %(<video poster="posterimage.jpg" src="videofile.ogg"></video>)
344
+ else
345
+ %(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
346
+ end
347
+ assert_sanitized(
348
+ %(<video src="videofile.ogg" autoplay poster="posterimage.jpg"></video>),
349
+ expected,
350
+ )
351
+ assert_sanitized(
352
+ %(<video src="videofile.ogg" poster=javascript:alert(1)></video>),
353
+ %(<video src="videofile.ogg"></video>),
354
+ )
355
+ end
356
+ end
256
357
  end
257
- end
258
358
 
259
- def test_should_allow_prune
260
- sanitizer = Rails::Html::SafeListSanitizer.new(prune: true)
261
- text = '<u>leave me <b>now</b></u>'
262
- assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
263
- end
359
+ # RFC 3986, sec 4.2
360
+ def test_allow_colons_in_path_component
361
+ assert_sanitized "<a href=\"./this:that\">foo</a>"
362
+ end
264
363
 
265
- def test_should_allow_custom_tags
266
- text = "<u>foo</u>"
267
- assert_equal text, safe_list_sanitize(text, tags: %w(u))
268
- end
364
+ %w(src width height alt).each do |img_attr|
365
+ define_method "test_should_allow_image_#{img_attr}_attribute" do
366
+ assert_sanitized %(<img #{img_attr}="foo" onclick="bar" />), %(<img #{img_attr}="foo">)
367
+ end
368
+ end
269
369
 
270
- def test_should_allow_only_custom_tags
271
- text = "<u>foo</u> with <i>bar</i>"
272
- assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
273
- end
370
+ def test_lang_and_xml_lang
371
+ # https://html.spec.whatwg.org/multipage/dom.html#the-lang-and-xml:lang-attributes
372
+ #
373
+ # 3.2.6.2 The lang and xml:lang attributes
374
+ #
375
+ # ... Authors must not use the lang attribute in the XML namespace on HTML elements in HTML
376
+ # documents. To ease migration to and from XML, authors may specify an attribute in no namespace
377
+ # with no prefix and with the literal localname "xml:lang" on HTML elements in HTML documents,
378
+ # but such attributes must only be specified if a lang attribute in no namespace is also
379
+ # specified, and both attributes must have the same value when compared in an ASCII
380
+ # case-insensitive manner.
381
+ input = expected = "<div lang=\"en\" xml:lang=\"en\">foo</div>"
382
+ assert_sanitized(input, expected)
383
+ end
274
384
 
275
- def test_should_allow_custom_tags_with_attributes
276
- text = %(<blockquote cite="http://example.com/">foo</blockquote>)
277
- assert_equal text, safe_list_sanitize(text)
278
- end
385
+ def test_should_handle_non_html
386
+ assert_sanitized "abc"
387
+ end
279
388
 
280
- def test_should_allow_custom_tags_with_custom_attributes
281
- text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
282
- assert_equal text, safe_list_sanitize(text, attributes: ['foo'])
283
- end
389
+ def test_should_handle_blank_text
390
+ assert_nil(safe_list_sanitize(nil))
391
+ assert_equal("", safe_list_sanitize(""))
392
+ assert_equal(" ", safe_list_sanitize(" "))
393
+ end
284
394
 
285
- def test_scrub_style_if_style_attribute_option_is_passed
286
- input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
287
- actual = safe_list_sanitize(input, attributes: %w(style))
288
- assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
289
- end
395
+ def test_setting_allowed_tags_affects_sanitization
396
+ scope_allowed_tags %w(u) do |sanitizer|
397
+ assert_equal "<u></u>", sanitizer.sanitize("<a><u></u></a>")
398
+ end
399
+ end
290
400
 
291
- def test_should_raise_argument_error_if_tags_is_not_enumerable
292
- assert_raises ArgumentError do
293
- safe_list_sanitize('<a>some html</a>', tags: 'foo')
401
+ def test_setting_allowed_attributes_affects_sanitization
402
+ scope_allowed_attributes %w(foo) do |sanitizer|
403
+ input = '<a foo="hello" bar="world"></a>'
404
+ assert_equal '<a foo="hello"></a>', sanitizer.sanitize(input)
405
+ end
294
406
  end
295
- end
296
407
 
297
- def test_should_raise_argument_error_if_attributes_is_not_enumerable
298
- assert_raises ArgumentError do
299
- safe_list_sanitize('<a>some html</a>', attributes: 'foo')
408
+ def test_custom_tags_overrides_allowed_tags
409
+ scope_allowed_tags %(u) do |sanitizer|
410
+ input = "<a><u></u></a>"
411
+ assert_equal "<a></a>", sanitizer.sanitize(input, tags: %w(a))
412
+ end
300
413
  end
301
- end
302
414
 
303
- def test_should_not_accept_non_loofah_inheriting_scrubber
304
- scrubber = Object.new
305
- def scrubber.scrub(node); node.name = 'h1'; end
415
+ def test_custom_attributes_overrides_allowed_attributes
416
+ scope_allowed_attributes %(foo) do |sanitizer|
417
+ input = '<a foo="hello" bar="world"></a>'
418
+ assert_equal '<a bar="world"></a>', sanitizer.sanitize(input, attributes: %w(bar))
419
+ end
420
+ end
306
421
 
307
- assert_raises Loofah::ScrubberNotFound do
308
- safe_list_sanitize('<a>some html</a>', scrubber: scrubber)
422
+ def test_should_allow_prune
423
+ sanitizer = module_under_test::SafeListSanitizer.new(prune: true)
424
+ text = "<u>leave me <b>now</b></u>"
425
+ assert_equal "<u>leave me </u>", sanitizer.sanitize(text, tags: %w(u))
426
+ end
427
+
428
+ def test_should_allow_custom_tags
429
+ text = "<u>foo</u>"
430
+ assert_equal text, safe_list_sanitize(text, tags: %w(u))
309
431
  end
310
- end
311
432
 
312
- def test_should_accept_loofah_inheriting_scrubber
313
- scrubber = Loofah::Scrubber.new
314
- def scrubber.scrub(node); node.name = 'h1'; end
433
+ def test_should_allow_only_custom_tags
434
+ text = "<u>foo</u> with <i>bar</i>"
435
+ assert_equal "<u>foo</u> with bar", safe_list_sanitize(text, tags: %w(u))
436
+ end
315
437
 
316
- html = "<script>hello!</script>"
317
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
318
- end
438
+ def test_should_allow_custom_tags_with_attributes
439
+ text = %(<blockquote cite="http://example.com/">foo</blockquote>)
440
+ assert_equal text, safe_list_sanitize(text)
441
+ end
319
442
 
320
- def test_should_accept_loofah_scrubber_that_wraps_a_block
321
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
322
- html = "<script>hello!</script>"
323
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
324
- end
443
+ def test_should_allow_custom_tags_with_custom_attributes
444
+ text = %(<blockquote foo="bar">Lorem ipsum</blockquote>)
445
+ assert_equal text, safe_list_sanitize(text, attributes: ["foo"])
446
+ end
325
447
 
326
- def test_custom_scrubber_takes_precedence_over_other_options
327
- scrubber = Loofah::Scrubber.new { |node| node.name = 'h1' }
328
- html = "<script>hello!</script>"
329
- assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ['foo'])
330
- end
448
+ def test_scrub_style_if_style_attribute_option_is_passed
449
+ input = '<p style="color: #000; background-image: url(http://www.ragingplatypus.com/i/cam-full.jpg);"></p>'
450
+ actual = safe_list_sanitize(input, attributes: %w(style))
331
451
 
332
- [%w(img src), %w(a href)].each do |(tag, attr)|
333
- define_method "test_should_strip_#{attr}_attribute_in_#{tag}_with_bad_protocols" do
334
- assert_sanitized %(<#{tag} #{attr}="javascript:bang" title="1">boo</#{tag}>), %(<#{tag} title="1">boo</#{tag}>)
452
+ assert_includes(['<p style="color: #000;"></p>', '<p style="color:#000;"></p>'], actual)
335
453
  end
336
- end
337
454
 
338
- def test_should_block_script_tag
339
- assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
340
- end
455
+ def test_should_raise_argument_error_if_tags_is_not_enumerable
456
+ assert_raises ArgumentError do
457
+ safe_list_sanitize("<a>some html</a>", tags: "foo")
458
+ end
459
+ end
341
460
 
342
- def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
343
- assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
344
- end
461
+ def test_should_raise_argument_error_if_attributes_is_not_enumerable
462
+ assert_raises ArgumentError do
463
+ safe_list_sanitize("<a>some html</a>", attributes: "foo")
464
+ end
465
+ end
345
466
 
346
- [%(<IMG SRC="javascript:alert('XSS');">),
347
- %(<IMG SRC=javascript:alert('XSS')>),
348
- %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
349
- %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
350
- %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
351
- %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
352
- %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
353
- %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
354
- %(<IMG SRC="jav\tascript:alert('XSS');">),
355
- %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
356
- %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
357
- %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
358
- %(<IMG SRC=" &#14; javascript:alert('XSS');">),
359
- %(<IMG SRC="javascript&#x3a;alert('XSS');">),
360
- %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
361
- define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
362
- assert_sanitized img_hack, "<img>"
467
+ def test_should_not_accept_non_loofah_inheriting_scrubber
468
+ scrubber = Object.new
469
+ def scrubber.scrub(node); node.name = "h1"; end
470
+
471
+ assert_raises Loofah::ScrubberNotFound do
472
+ safe_list_sanitize("<a>some html</a>", scrubber: scrubber)
473
+ end
363
474
  end
364
- end
365
475
 
366
- def test_should_sanitize_tag_broken_up_by_null
367
- assert_sanitized %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>), ""
368
- end
476
+ def test_should_accept_loofah_inheriting_scrubber
477
+ scrubber = Loofah::Scrubber.new
478
+ def scrubber.scrub(node); node.replace("<h1>#{node.inner_html}</h1>"); end
369
479
 
370
- def test_should_sanitize_invalid_script_tag
371
- assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
372
- end
480
+ html = "<script>hello!</script>"
481
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
482
+ end
373
483
 
374
- def test_should_sanitize_script_tag_with_multiple_open_brackets
375
- assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
376
- assert_sanitized %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a), ""
377
- end
484
+ def test_should_accept_loofah_scrubber_that_wraps_a_block
485
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
486
+ html = "<script>hello!</script>"
487
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber)
488
+ end
378
489
 
379
- def test_should_sanitize_unclosed_script
380
- assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
381
- end
490
+ def test_custom_scrubber_takes_precedence_over_other_options
491
+ scrubber = Loofah::Scrubber.new { |node| node.replace("<h1>#{node.inner_html}</h1>") }
492
+ html = "<script>hello!</script>"
493
+ assert_equal "<h1>hello!</h1>", safe_list_sanitize(html, scrubber: scrubber, tags: ["foo"])
494
+ end
382
495
 
383
- def test_should_sanitize_half_open_scripts
384
- assert_sanitized %(<IMG SRC="javascript:alert('XSS')"), "<img>"
385
- end
496
+ def test_should_strip_src_attribute_in_img_with_bad_protocols
497
+ assert_sanitized %(<img src="javascript:bang" title="1">), %(<img title="1">)
498
+ end
386
499
 
387
- def test_should_not_fall_for_ridiculous_hack
388
- img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
389
- assert_sanitized img_hack, "<img>"
390
- end
500
+ def test_should_strip_href_attribute_in_a_with_bad_protocols
501
+ assert_sanitized %(<a href="javascript:bang" title="1">boo</a>), %(<a title="1">boo</a>)
502
+ end
391
503
 
392
- def test_should_sanitize_attributes
393
- assert_sanitized %(<SPAN title="'><script>alert()</script>">blah</SPAN>), %(<span title="#{CGI.escapeHTML "'><script>alert()</script>"}">blah</span>)
394
- end
504
+ def test_should_block_script_tag
505
+ assert_sanitized %(<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>), ""
506
+ end
395
507
 
396
- def test_should_sanitize_illegal_style_properties
397
- raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
398
- expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
399
- assert_equal expected, sanitize_css(raw)
400
- end
508
+ def test_should_not_fall_for_xss_image_hack_with_uppercase_tags
509
+ assert_sanitized %(<IMG """><SCRIPT>alert("XSS")</SCRIPT>">), %(<img>alert("XSS")"&gt;)
510
+ end
401
511
 
402
- def test_should_sanitize_with_trailing_space
403
- raw = "display:block; "
404
- expected = "display:block;"
405
- assert_equal expected, sanitize_css(raw)
406
- end
512
+ [%(<IMG SRC="javascript:alert('XSS');">),
513
+ %(<IMG SRC=javascript:alert('XSS')>),
514
+ %(<IMG SRC=JaVaScRiPt:alert('XSS')>),
515
+ %(<IMG SRC=javascript:alert(&quot;XSS&quot;)>),
516
+ %(<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>),
517
+ %(<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>),
518
+ %(<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>),
519
+ %(<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>),
520
+ %(<IMG SRC="jav\tascript:alert('XSS');">),
521
+ %(<IMG SRC="jav&#x09;ascript:alert('XSS');">),
522
+ %(<IMG SRC="jav&#x0A;ascript:alert('XSS');">),
523
+ %(<IMG SRC="jav&#x0D;ascript:alert('XSS');">),
524
+ %(<IMG SRC=" &#14; javascript:alert('XSS');">),
525
+ %(<IMG SRC="javascript&#x3a;alert('XSS');">),
526
+ %(<IMG SRC=`javascript:alert("RSnake says, 'XSS'")`>)].each do |img_hack|
527
+ define_method "test_should_not_fall_for_xss_image_hack_#{img_hack}" do
528
+ assert_sanitized img_hack, "<img>"
529
+ end
530
+ end
407
531
 
408
- def test_should_sanitize_xul_style_attributes
409
- raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
410
- assert_equal '', sanitize_css(raw)
411
- end
532
+ def test_should_sanitize_tag_broken_up_by_null
533
+ input = %(<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>)
534
+ result = safe_list_sanitize(input)
535
+ acceptable_results = [
536
+ # libxml2
537
+ "",
538
+ # xerces+neko
539
+ 'alert("XSS")',
540
+ ]
541
+
542
+ assert_includes(acceptable_results, result)
543
+ end
412
544
 
413
- def test_should_sanitize_invalid_tag_names
414
- assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
415
- end
545
+ def test_should_sanitize_invalid_script_tag
546
+ assert_sanitized %(<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT>), ""
547
+ end
416
548
 
417
- def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
418
- assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
419
- end
549
+ def test_should_sanitize_script_tag_with_multiple_open_brackets
550
+ assert_sanitized %(<<SCRIPT>alert("XSS");//<</SCRIPT>), "&lt;alert(\"XSS\");//&lt;"
551
+ end
420
552
 
421
- def test_should_sanitize_invalid_tag_names_in_single_tags
422
- assert_sanitized('<img/src="http://ha.ckers.org/xss.js"/>', "<img />")
423
- end
553
+ def test_should_sanitize_script_tag_with_multiple_open_brackets_2
554
+ input = %(<iframe src=http://ha.ckers.org/scriptlet.html\n<a)
555
+ result = safe_list_sanitize(input)
556
+ acceptable_results = [
557
+ # libxml2
558
+ "",
559
+ # xerces+neko
560
+ "&lt;a",
561
+ ]
562
+
563
+ assert_includes(acceptable_results, result)
564
+ end
424
565
 
425
- def test_should_sanitize_img_dynsrc_lowsrc
426
- assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img />")
427
- end
566
+ def test_should_sanitize_unclosed_script
567
+ assert_sanitized %(<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>), ""
568
+ end
428
569
 
429
- def test_should_sanitize_div_background_image_unicode_encoded
430
- [
431
- convert_to_css_hex("url(javascript:alert(1))", false),
432
- convert_to_css_hex("url(javascript:alert(1))", true),
433
- convert_to_css_hex("url(https://example.com)", false),
434
- convert_to_css_hex("url(https://example.com)", true),
435
- ].each do |propval|
436
- raw = "background-image:" + propval
437
- assert_empty(sanitize_css(raw))
570
+ def test_should_sanitize_half_open_scripts
571
+ input = %(<IMG SRC="javascript:alert('XSS')")
572
+ result = safe_list_sanitize(input)
573
+ acceptable_results = [
574
+ # libxml2
575
+ "<img>",
576
+ # libgumbo
577
+ "",
578
+ ]
579
+
580
+ assert_includes(acceptable_results, result)
438
581
  end
439
- end
440
582
 
441
- def test_should_allow_div_background_image_unicode_encoded_safe_functions
442
- [
443
- convert_to_css_hex("rgb(255,0,0)", false),
444
- convert_to_css_hex("rgb(255,0,0)", true),
445
- ].each do |propval|
446
- raw = "background-image:" + propval
447
- assert_includes(sanitize_css(raw), "background-image")
583
+ def test_should_not_fall_for_ridiculous_hack
584
+ img_hack = %(<IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
585
+ assert_sanitized img_hack, "<img>"
448
586
  end
449
- end
450
587
 
451
- def test_should_sanitize_div_style_expression
452
- raw = %(width: expression(alert('XSS'));)
453
- assert_equal '', sanitize_css(raw)
454
- end
588
+ def test_should_sanitize_attributes
589
+ input = %(<SPAN title="'><script>alert()</script>">blah</SPAN>)
590
+ result = safe_list_sanitize(input)
591
+ acceptable_results = [
592
+ # libxml2
593
+ %(<span title="'&gt;&lt;script&gt;alert()&lt;/script&gt;">blah</span>),
594
+ # libgumbo
595
+ # this looks scary, but it's fine. for a more detailed analysis check out:
596
+ # https://github.com/discourse/discourse/pull/21522#issuecomment-1545697968
597
+ %(<span title="'><script>alert()</script>">blah</span>)
598
+ ]
599
+
600
+ assert_includes(acceptable_results, result)
601
+ end
455
602
 
456
- def test_should_sanitize_across_newlines
457
- raw = %(\nwidth:\nexpression(alert('XSS'));\n)
458
- assert_equal '', sanitize_css(raw)
459
- end
603
+ def test_should_sanitize_invalid_tag_names
604
+ assert_sanitized(%(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f), "a b cd e f")
605
+ end
460
606
 
461
- def test_should_sanitize_img_vbscript
462
- assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), '<img />'
463
- end
607
+ def test_should_sanitize_non_alpha_and_non_digit_characters_in_tags
608
+ assert_sanitized('<a onclick!#$%&()*~+-_.,:;?@[/|\]^`=alert("XSS")>foo</a>', "<a>foo</a>")
609
+ end
464
610
 
465
- def test_should_sanitize_cdata_section
466
- input = "<![CDATA[<span>section</span>]]>"
467
- expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>section</span>]]&gt;} : %{section]]&gt;}
468
- assert_sanitized(input, expected)
469
- end
611
+ def test_should_sanitize_invalid_tag_names_in_single_tags
612
+ input = %(<img/src="http://ha.ckers.org/xss.js"/>)
613
+ result = safe_list_sanitize(input)
614
+ acceptable_results = [
615
+ # libxml2
616
+ "<img>",
617
+ # libgumbo
618
+ %(<img src="http://ha.ckers.org/xss.js">),
619
+ ]
620
+
621
+ assert_includes(acceptable_results, result)
622
+ end
470
623
 
471
- def test_should_sanitize_unterminated_cdata_section
472
- input = "<![CDATA[<span>neverending..."
473
- expected = libxml_2_9_14_recovery_lt_bang? ? %{&lt;![CDATA[<span>neverending...</span>} : %{neverending...}
474
- assert_sanitized(input, expected)
475
- end
624
+ def test_should_sanitize_img_dynsrc_lowsrc
625
+ assert_sanitized(%(<img lowsrc="javascript:alert('XSS')" />), "<img>")
626
+ end
476
627
 
477
- def test_should_not_mangle_urls_with_ampersand
478
- assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
479
- end
628
+ def test_should_sanitize_img_vbscript
629
+ assert_sanitized %(<img src='vbscript:msgbox("XSS")' />), "<img>"
630
+ end
480
631
 
481
- def test_should_sanitize_neverending_attribute
482
- assert_sanitized "<span class=\"\\", "<span class=\"\\\">"
483
- end
632
+ def test_should_sanitize_cdata_section
633
+ input = "<![CDATA[<span>section</span>]]>"
634
+ result = safe_list_sanitize(input)
635
+ acceptable_results = [
636
+ # libxml2 = 2.9.14
637
+ %{&lt;![CDATA[<span>section</span>]]&gt;},
638
+ # other libxml2
639
+ %{section]]&gt;},
640
+ # xerces+neko
641
+ "",
642
+ ]
643
+
644
+ assert_includes(acceptable_results, result)
645
+ end
646
+
647
+ def test_should_sanitize_unterminated_cdata_section
648
+ input = "<![CDATA[<span>neverending..."
649
+ result = safe_list_sanitize(input)
484
650
 
485
- [
486
- %(<a href="javascript&#x3a;alert('XSS');">),
487
- %(<a href="javascript&#x003a;alert('XSS');">),
488
- %(<a href="javascript&#x3A;alert('XSS');">),
489
- %(<a href="javascript&#x003A;alert('XSS');">)
490
- ].each_with_index do |enc_hack, i|
491
- define_method "test_x03a_handling_#{i+1}" do
492
- assert_sanitized enc_hack, "<a>"
651
+ acceptable_results = [
652
+ # libxml2 = 2.9.14
653
+ %{&lt;![CDATA[<span>neverending...</span>},
654
+ # other libxml2
655
+ %{neverending...},
656
+ # xerces+neko
657
+ ""
658
+ ]
659
+
660
+ assert_includes(acceptable_results, result)
493
661
  end
494
- end
495
662
 
496
- def test_x03a_legitimate
497
- assert_sanitized %(<a href="http&#x3a;//legit">), %(<a href="http://legit">)
498
- assert_sanitized %(<a href="http&#x3A;//legit">), %(<a href="http://legit">)
499
- end
663
+ def test_should_not_mangle_urls_with_ampersand
664
+ assert_sanitized %{<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>}
665
+ end
500
666
 
501
- def test_sanitize_ascii_8bit_string
502
- safe_list_sanitize('<a>hello</a>'.encode('ASCII-8BIT')).tap do |sanitized|
503
- assert_equal '<a>hello</a>', sanitized
504
- assert_equal Encoding::UTF_8, sanitized.encoding
667
+ def test_should_sanitize_neverending_attribute
668
+ # note that assert_dom_equal chokes in this case! so avoid using assert_sanitized
669
+ assert_equal("<span class=\"\\\"></span>", safe_list_sanitize("<span class=\"\\\">"))
505
670
  end
506
- end
507
671
 
508
- def test_sanitize_data_attributes
509
- assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
510
- assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
511
- end
672
+ [
673
+ %(<a href="javascript&#x3a;alert('XSS');">),
674
+ %(<a href="javascript&#x003a;alert('XSS');">),
675
+ %(<a href="javascript&#x3A;alert('XSS');">),
676
+ %(<a href="javascript&#x003A;alert('XSS');">)
677
+ ].each_with_index do |enc_hack, i|
678
+ define_method "test_x03a_handling_#{i + 1}" do
679
+ assert_sanitized enc_hack, "<a></a>"
680
+ end
681
+ end
512
682
 
513
- def test_allow_data_attribute_if_requested
514
- text = %(<a data-foo="foo">foo</a>)
515
- assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ['data-foo'])
516
- end
683
+ def test_x03a_legitimate
684
+ assert_sanitized %(<a href="http&#x3a;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
685
+ assert_sanitized %(<a href="http&#x3A;//legit">asdf</a>), %(<a href="http://legit">asdf</a>)
686
+ end
517
687
 
518
- def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
519
- skip if RUBY_VERSION < "2.3"
688
+ def test_sanitize_ascii_8bit_string
689
+ safe_list_sanitize("<div><a>hello</a></div>".encode("ASCII-8BIT")).tap do |sanitized|
690
+ assert_equal "<div><a>hello</a></div>", sanitized
691
+ assert_equal Encoding::UTF_8, sanitized.encoding
692
+ end
693
+ end
520
694
 
521
- html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
695
+ def test_sanitize_data_attributes
696
+ assert_sanitized %(<a href="/blah" data-method="post">foo</a>), %(<a href="/blah">foo</a>)
697
+ assert_sanitized %(<a data-remote="true" data-type="script" data-method="get" data-cross-domain="true" href="attack.js">Launch the missiles</a>), %(<a href="attack.js">Launch the missiles</a>)
698
+ end
522
699
 
523
- text = safe_list_sanitize(html)
700
+ def test_allow_data_attribute_if_requested
701
+ text = %(<a data-foo="foo">foo</a>)
702
+ assert_equal %(<a data-foo="foo">foo</a>), safe_list_sanitize(text, attributes: ["data-foo"])
703
+ end
524
704
 
525
- acceptable_results = [
526
- # nokogiri w/vendored+patched libxml2
527
- %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
528
- # nokogiri w/ system libxml2
529
- %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
530
- ]
531
- assert_includes(acceptable_results, text)
532
- end
705
+ # https://developer.mozilla.org/en-US/docs/Glossary/Void_element
706
+ VOID_ELEMENTS = %w[area base br col embed hr img input keygen link meta param source track wbr]
707
+
708
+ %w(strong em b i p code pre tt samp kbd var sub
709
+ sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
710
+ acronym a img blockquote del ins time).each do |tag_name|
711
+ define_method "test_default_safelist_should_allow_#{tag_name}" do
712
+ if VOID_ELEMENTS.include?(tag_name)
713
+ assert_sanitized("<#{tag_name}>")
714
+ else
715
+ assert_sanitized("<#{tag_name}>foo</#{tag_name}>")
716
+ end
717
+ end
718
+ end
533
719
 
534
- def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
535
- skip if RUBY_VERSION < "2.3"
720
+ def test_datetime_attribute
721
+ assert_sanitized("<time datetime=\"2023-01-01\">Today</time>")
722
+ end
536
723
 
537
- html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
724
+ def test_abbr_attribute
725
+ scope_allowed_tags(%w(table tr th td)) do
726
+ assert_sanitized(%(<table><tr><td abbr="UK">United Kingdom</td></tr></table>))
727
+ end
728
+ end
538
729
 
539
- text = safe_list_sanitize(html)
730
+ def test_uri_escaping_of_href_attr_in_a_tag_in_safe_list_sanitizer
731
+ skip if RUBY_VERSION < "2.3"
540
732
 
541
- acceptable_results = [
542
- # nokogiri w/vendored+patched libxml2
543
- %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
544
- # nokogiri w/system libxml2
545
- %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
546
- ]
547
- assert_includes(acceptable_results, text)
548
- end
733
+ html = %{<a href='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
549
734
 
550
- def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
551
- skip if RUBY_VERSION < "2.3"
735
+ text = safe_list_sanitize(html)
552
736
 
553
- html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
737
+ acceptable_results = [
738
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
739
+ %{<a href="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
740
+ # system libxml2
741
+ %{<a href="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
742
+ # xerces+neko
743
+ %{<a href="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
744
+ ]
554
745
 
555
- text = safe_list_sanitize(html)
746
+ assert_includes(acceptable_results, text)
747
+ end
556
748
 
557
- acceptable_results = [
558
- # nokogiri w/vendored+patched libxml2
559
- %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
560
- # nokogiri w/system libxml2
561
- %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
562
- ]
563
- assert_includes(acceptable_results, text)
564
- end
749
+ def test_uri_escaping_of_src_attr_in_a_tag_in_safe_list_sanitizer
750
+ skip if RUBY_VERSION < "2.3"
565
751
 
566
- def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
567
- skip if RUBY_VERSION < "2.3"
752
+ html = %{<a src='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
568
753
 
569
- html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
754
+ text = safe_list_sanitize(html)
570
755
 
571
- text = safe_list_sanitize(html, attributes: ['action'])
756
+ acceptable_results = [
757
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
758
+ %{<a src="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
759
+ # system libxml2
760
+ %{<a src="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
761
+ # xerces+neko
762
+ %{<a src="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
763
+ ]
572
764
 
573
- acceptable_results = [
574
- # nokogiri w/vendored+patched libxml2
575
- %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
576
- # nokogiri w/system libxml2
577
- %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
578
- ]
579
- assert_includes(acceptable_results, text)
580
- end
765
+ assert_includes(acceptable_results, text)
766
+ end
581
767
 
582
- def test_exclude_node_type_processing_instructions
583
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><?div content><b>text</b>"))
584
- end
768
+ def test_uri_escaping_of_name_attr_in_a_tag_in_safe_list_sanitizer
769
+ skip if RUBY_VERSION < "2.3"
585
770
 
586
- def test_exclude_node_type_comment
587
- assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
588
- end
771
+ html = %{<a name='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
589
772
 
590
- %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
591
- define_method "test_mediatype_#{mediatype}_allowed" do
592
- input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
593
- expected = input
594
- actual = safe_list_sanitize(input)
595
- assert_equal(expected, actual)
773
+ text = safe_list_sanitize(html)
596
774
 
597
- input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
598
- expected = input
599
- actual = safe_list_sanitize(input)
600
- assert_equal(expected, actual)
775
+ acceptable_results = [
776
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
777
+ %{<a name="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
778
+ # system libxml2
779
+ %{<a name="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
780
+ # xerces+neko
781
+ %{<a name="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>}
782
+ ]
783
+
784
+ assert_includes(acceptable_results, text)
601
785
  end
602
- end
603
786
 
604
- def test_mediatype_text_html_disallowed
605
- input = %q(<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
606
- expected = %q(<img>)
607
- actual = safe_list_sanitize(input)
608
- assert_equal(expected, actual)
787
+ def test_uri_escaping_of_name_action_in_a_tag_in_safe_list_sanitizer
788
+ skip if RUBY_VERSION < "2.3"
609
789
 
610
- input = %q(<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
611
- expected = %q(<img>)
612
- actual = safe_list_sanitize(input)
613
- assert_equal(expected, actual)
614
- end
790
+ html = %{<a action='examp<!--" unsafeattr=foo()>-->le.com'>test</a>}
615
791
 
616
- def test_mediatype_image_svg_xml_disallowed
617
- input = %q(<img src="data:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
618
- expected = %q(<img>)
619
- actual = safe_list_sanitize(input)
620
- assert_equal(expected, actual)
792
+ text = safe_list_sanitize(html, attributes: ["action"])
621
793
 
622
- input = %q(<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
623
- expected = %q(<img>)
624
- actual = safe_list_sanitize(input)
625
- assert_equal(expected, actual)
626
- end
794
+ acceptable_results = [
795
+ # nokogiri's vendored+patched libxml2 (0002-Update-entities-to-remove-handling-of-ssi.patch)
796
+ %{<a action="examp&lt;!--%22%20unsafeattr=foo()&gt;--&gt;le.com">test</a>},
797
+ # system libxml2
798
+ %{<a action="examp<!--%22%20unsafeattr=foo()>-->le.com">test</a>},
799
+ # xerces+neko
800
+ %{<a action="examp&lt;!--%22 unsafeattr=foo()&gt;--&gt;le.com">test</a>},
801
+ ]
627
802
 
628
- def test_mediatype_other_disallowed
629
- input = %q(<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
630
- expected = %q(<a>foo</a>)
631
- actual = safe_list_sanitize(input)
632
- assert_equal(expected, actual)
803
+ assert_includes(acceptable_results, text)
804
+ end
633
805
 
634
- input = %q(<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>)
635
- expected = %q(<a>foo</a>)
636
- actual = safe_list_sanitize(input)
637
- assert_equal(expected, actual)
638
- end
806
+ def test_exclude_node_type_processing_instructions
807
+ input = "<div>text</div><?div content><b>text</b>"
808
+ result = safe_list_sanitize(input)
809
+ acceptable_results = [
810
+ # jruby cyberneko (nokogiri < 1.14.0)
811
+ "<div>text</div>",
812
+ # everything else
813
+ "<div>text</div><b>text</b>",
814
+ ]
815
+
816
+ assert_includes(acceptable_results, result)
817
+ end
639
818
 
640
- def test_scrubbing_svg_attr_values_that_allow_ref
641
- input = %Q(<div fill="yellow url(http://bad.com/) #fff">hey</div>)
642
- expected = %Q(<div fill="yellow #fff">hey</div>)
643
- actual = scope_allowed_attributes %w(fill) do
644
- safe_list_sanitize(input)
819
+ def test_exclude_node_type_comment
820
+ assert_equal("<div>text</div><b>text</b>", safe_list_sanitize("<div>text</div><!-- comment --><b>text</b>"))
645
821
  end
646
822
 
647
- assert_equal(expected, actual)
648
- end
823
+ %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype|
824
+ define_method "test_mediatype_#{mediatype}_allowed" do
825
+ input = %Q(<img src="data:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
826
+ expected = input
827
+ actual = safe_list_sanitize(input)
828
+ assert_equal(expected, actual)
829
+
830
+ input = %Q(<img src="DATA:#{mediatype};base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">)
831
+ expected = input
832
+ actual = safe_list_sanitize(input)
833
+ assert_equal(expected, actual)
834
+ end
835
+ end
649
836
 
650
- def test_style_with_css_payload
651
- input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
652
- expected = "<style>div &gt; span { background: \"red\"; }</style>"
653
- actual = safe_list_sanitize(input, tags: tags)
837
+ def test_mediatype_text_html_disallowed
838
+ input = '<img src="data:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
839
+ expected = "<img>"
840
+ actual = safe_list_sanitize(input)
841
+ assert_equal(expected, actual)
654
842
 
655
- assert_equal(expected, actual)
656
- end
843
+ input = '<img src="DATA:text/html;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
844
+ expected = "<img>"
845
+ actual = safe_list_sanitize(input)
846
+ assert_equal(expected, actual)
847
+ end
657
848
 
658
- def test_combination_of_select_and_style_with_css_payload
659
- input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
660
- expected = "<select><style>div &gt; span { background: \"red\"; }</style></select>"
661
- actual = safe_list_sanitize(input, tags: tags)
849
+ def test_mediatype_image_svg_xml_disallowed
850
+ input = '<img src="data:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
851
+ expected = "<img>"
852
+ actual = safe_list_sanitize(input)
853
+ assert_equal(expected, actual)
662
854
 
663
- assert_equal(expected, actual)
664
- end
855
+ input = '<img src="DATA:image/svg+xml;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">'
856
+ expected = "<img>"
857
+ actual = safe_list_sanitize(input)
858
+ assert_equal(expected, actual)
859
+ end
665
860
 
666
- def test_combination_of_select_and_style_with_script_payload
667
- input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
668
- expected = "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>"
669
- actual = safe_list_sanitize(input, tags: tags)
861
+ def test_mediatype_other_disallowed
862
+ input = '<a href="data:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
863
+ expected = "<a>foo</a>"
864
+ actual = safe_list_sanitize(input)
865
+ assert_equal(expected, actual)
670
866
 
671
- assert_equal(expected, actual)
672
- end
867
+ input = '<a href="DATA:foo;base64,PHNjcmlwdD5hbGVydCgnWFNTJyk8L3NjcmlwdD4=">foo</a>'
868
+ expected = "<a>foo</a>"
869
+ actual = safe_list_sanitize(input)
870
+ assert_equal(expected, actual)
871
+ end
872
+
873
+ def test_scrubbing_svg_attr_values_that_allow_ref
874
+ input = '<div fill="yellow url(http://bad.com/) #fff">hey</div>'
875
+ expected = '<div fill="yellow #fff">hey</div>'
876
+ actual = scope_allowed_attributes %w(fill) do
877
+ safe_list_sanitize(input)
878
+ end
673
879
 
674
- def test_combination_of_svg_and_style_with_script_payload
675
- input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
676
- expected = "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>"
677
- actual = safe_list_sanitize(input, tags: tags)
880
+ assert_equal(expected, actual)
881
+ end
678
882
 
679
- assert_equal(expected, actual)
680
- end
883
+ def test_style_with_css_payload
884
+ input, tags = "<style>div > span { background: \"red\"; }</style>", ["style"]
885
+ actual = safe_list_sanitize(input, tags: tags)
886
+ acceptable_results = [
887
+ # libxml2
888
+ "<style>div &gt; span { background: \"red\"; }</style>",
889
+ # libgumbo
890
+ "<style>div > span { background: \"red\"; }</style>",
891
+ ]
892
+
893
+ assert_includes(acceptable_results, actual)
894
+ end
681
895
 
682
- def test_combination_of_math_and_style_with_img_payload
683
- input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
684
- expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
685
- actual = safe_list_sanitize(input, tags: tags)
896
+ def test_combination_of_select_and_style_with_css_payload
897
+ input, tags = "<select><style>div > span { background: \"red\"; }</style></select>", ["select", "style"]
898
+ actual = safe_list_sanitize(input, tags: tags)
899
+ acceptable_results = [
900
+ # libxml2
901
+ "<select><style>div &gt; span { background: \"red\"; }</style></select>",
902
+ # libgumbo
903
+ "<select>div &gt; span { background: \"red\"; }</select>",
904
+ ]
905
+
906
+ assert_includes(acceptable_results, actual)
907
+ end
686
908
 
687
- assert_equal(expected, actual)
909
+ def test_combination_of_select_and_style_with_script_payload
910
+ input, tags = "<select><style><script>alert(1)</script></style></select>", ["select", "style"]
911
+ actual = safe_list_sanitize(input, tags: tags)
912
+ acceptable_results = [
913
+ # libxml2
914
+ "<select><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></select>",
915
+ # libgumbo
916
+ "<select>alert(1)</select>",
917
+ ]
918
+
919
+ assert_includes(acceptable_results, actual)
920
+ end
688
921
 
689
- input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
690
- expected = "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>"
691
- actual = safe_list_sanitize(input, tags: tags)
922
+ def test_combination_of_svg_and_style_with_script_payload
923
+ input, tags = "<svg><style><script>alert(1)</script></style></svg>", ["svg", "style"]
924
+ actual = safe_list_sanitize(input, tags: tags)
925
+ acceptable_results = [
926
+ # libxml2
927
+ "<svg><style>&lt;script&gt;alert(1)&lt;/script&gt;</style></svg>",
928
+ # libgumbo
929
+ "<svg><style>alert(1)</style></svg>"
930
+ ]
931
+
932
+ assert_includes(acceptable_results, actual)
933
+ end
692
934
 
693
- assert_equal(expected, actual)
694
- end
935
+ def test_combination_of_math_and_style_with_img_payload
936
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style"]
937
+ actual = safe_list_sanitize(input, tags: tags)
938
+ acceptable_results = [
939
+ # libxml2
940
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
941
+ # libgumbo
942
+ "<math><style></style></math>",
943
+ ]
944
+
945
+ assert_includes(acceptable_results, actual)
946
+ end
695
947
 
696
- def test_combination_of_svg_and_style_with_img_payload
697
- input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
698
- expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
699
- actual = safe_list_sanitize(input, tags: tags)
948
+ def test_combination_of_math_and_style_with_img_payload_2
949
+ input, tags = "<math><style><img src=x onerror=alert(1)></style></math>", ["math", "style", "img"]
950
+ actual = safe_list_sanitize(input, tags: tags)
951
+ acceptable_results = [
952
+ # libxml2
953
+ "<math><style>&lt;img src=x onerror=alert(1)&gt;</style></math>",
954
+ # libgumbo
955
+ "<math><style></style></math><img src=\"x\">",
956
+ ]
957
+
958
+ assert_includes(acceptable_results, actual)
959
+ end
700
960
 
701
- assert_equal(expected, actual)
961
+ def test_combination_of_svg_and_style_with_img_payload
962
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style"]
963
+ actual = safe_list_sanitize(input, tags: tags)
964
+ acceptable_results = [
965
+ # libxml2
966
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
967
+ # libgumbo
968
+ "<svg><style></style></svg>",
969
+ ]
970
+
971
+ assert_includes(acceptable_results, actual)
972
+ end
702
973
 
703
- input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
704
- expected = "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>"
705
- actual = safe_list_sanitize(input, tags: tags)
974
+ def test_combination_of_svg_and_style_with_img_payload_2
975
+ input, tags = "<svg><style><img src=x onerror=alert(1)></style></svg>", ["svg", "style", "img"]
976
+ actual = safe_list_sanitize(input, tags: tags)
977
+ acceptable_results = [
978
+ # libxml2
979
+ "<svg><style>&lt;img src=x onerror=alert(1)&gt;</style></svg>",
980
+ # libgumbo
981
+ "<svg><style></style></svg><img src=\"x\">",
982
+ ]
983
+
984
+ assert_includes(acceptable_results, actual)
985
+ end
706
986
 
707
- assert_equal(expected, actual)
708
- end
987
+ def test_should_sanitize_illegal_style_properties
988
+ raw = %(display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;)
989
+ expected = %(display:block;width:100%;height:100%;background-color:black;background-x:center;background-y:center;)
990
+ assert_equal expected, sanitize_css(raw)
991
+ end
709
992
 
710
- protected
993
+ def test_should_sanitize_with_trailing_space
994
+ raw = "display:block; "
995
+ expected = "display:block;"
996
+ assert_equal expected, sanitize_css(raw)
997
+ end
711
998
 
712
- def xpath_sanitize(input, options = {})
713
- XpathRemovalTestSanitizer.new.sanitize(input, options)
714
- end
999
+ def test_should_sanitize_xul_style_attributes
1000
+ raw = %(-moz-binding:url('http://ha.ckers.org/xssmoz.xml#xss'))
1001
+ assert_equal "", sanitize_css(raw)
1002
+ end
715
1003
 
716
- def full_sanitize(input, options = {})
717
- Rails::Html::FullSanitizer.new.sanitize(input, options)
718
- end
1004
+ def test_should_sanitize_div_background_image_unicode_encoded
1005
+ [
1006
+ convert_to_css_hex("url(javascript:alert(1))", false),
1007
+ convert_to_css_hex("url(javascript:alert(1))", true),
1008
+ convert_to_css_hex("url(https://example.com)", false),
1009
+ convert_to_css_hex("url(https://example.com)", true),
1010
+ ].each do |propval|
1011
+ raw = "background-image:" + propval
1012
+ assert_empty(sanitize_css(raw))
1013
+ end
1014
+ end
719
1015
 
720
- def link_sanitize(input, options = {})
721
- Rails::Html::LinkSanitizer.new.sanitize(input, options)
722
- end
1016
+ def test_should_allow_div_background_image_unicode_encoded_safe_functions
1017
+ [
1018
+ convert_to_css_hex("rgb(255,0,0)", false),
1019
+ convert_to_css_hex("rgb(255,0,0)", true),
1020
+ ].each do |propval|
1021
+ raw = "background-image:" + propval
723
1022
 
724
- def safe_list_sanitize(input, options = {})
725
- Rails::Html::SafeListSanitizer.new.sanitize(input, options)
726
- end
1023
+ assert_includes(sanitize_css(raw), "background-image")
1024
+ end
1025
+ end
727
1026
 
728
- def assert_sanitized(input, expected = nil)
729
- if input
730
- assert_dom_equal expected || input, safe_list_sanitize(input)
731
- else
732
- assert_nil safe_list_sanitize(input)
1027
+ def test_should_sanitize_div_style_expression
1028
+ raw = %(width: expression(alert('XSS'));)
1029
+ assert_equal "", sanitize_css(raw)
733
1030
  end
734
- end
735
1031
 
736
- def sanitize_css(input)
737
- Rails::Html::SafeListSanitizer.new.sanitize_css(input)
738
- end
1032
+ def test_should_sanitize_across_newlines
1033
+ raw = %(\nwidth:\nexpression(alert('XSS'));\n)
1034
+ assert_equal "", sanitize_css(raw)
1035
+ end
739
1036
 
740
- def scope_allowed_tags(tags)
741
- old_tags = Rails::Html::SafeListSanitizer.allowed_tags
742
- Rails::Html::SafeListSanitizer.allowed_tags = tags
743
- yield Rails::Html::SafeListSanitizer.new
744
- ensure
745
- Rails::Html::SafeListSanitizer.allowed_tags = old_tags
746
- end
1037
+ protected
1038
+ def safe_list_sanitize(input, options = {})
1039
+ module_under_test::SafeListSanitizer.new.sanitize(input, options)
1040
+ end
747
1041
 
748
- def scope_allowed_attributes(attributes)
749
- old_attributes = Rails::Html::SafeListSanitizer.allowed_attributes
750
- Rails::Html::SafeListSanitizer.allowed_attributes = attributes
751
- yield Rails::Html::SafeListSanitizer.new
752
- ensure
753
- Rails::Html::SafeListSanitizer.allowed_attributes = old_attributes
754
- end
1042
+ def assert_sanitized(input, expected = nil)
1043
+ assert_equal((expected || input), safe_list_sanitize(input))
1044
+ end
755
1045
 
756
- # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
757
- def convert_to_css_hex(string, escape_parens=false)
758
- string.chars.map do |c|
759
- if !escape_parens && (c == "(" || c == ")")
760
- c
761
- else
762
- format('\00%02X', c.ord)
1046
+ def scope_allowed_tags(tags)
1047
+ old_tags = module_under_test::SafeListSanitizer.allowed_tags
1048
+ module_under_test::SafeListSanitizer.allowed_tags = tags
1049
+ yield module_under_test::SafeListSanitizer.new
1050
+ ensure
1051
+ module_under_test::SafeListSanitizer.allowed_tags = old_tags
1052
+ end
1053
+
1054
+ def scope_allowed_attributes(attributes)
1055
+ old_attributes = module_under_test::SafeListSanitizer.allowed_attributes
1056
+ module_under_test::SafeListSanitizer.allowed_attributes = attributes
1057
+ yield module_under_test::SafeListSanitizer.new
1058
+ ensure
1059
+ module_under_test::SafeListSanitizer.allowed_attributes = old_attributes
763
1060
  end
764
- end.join
765
- end
766
1061
 
767
- def libxml_2_9_14_recovery_lt?
768
- # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
769
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?(">= 2.9.14")
1062
+ def sanitize_css(input)
1063
+ module_under_test::SafeListSanitizer.new.sanitize_css(input)
1064
+ end
1065
+
1066
+ # note that this is used for testing CSS hex encoding: \\[0-9a-f]{1,6}
1067
+ def convert_to_css_hex(string, escape_parens = false)
1068
+ string.chars.map do |c|
1069
+ if !escape_parens && (c == "(" || c == ")")
1070
+ c
1071
+ else
1072
+ format('\00%02X', c.ord)
1073
+ end
1074
+ end.join
1075
+ end
770
1076
  end
771
1077
 
772
- def libxml_2_9_14_recovery_lt_bang?
773
- # changed in 2.9.14, see https://github.com/sparklemotion/nokogiri/releases/tag/v1.13.5
774
- # then reverted in 2.10.0, see https://gitlab.gnome.org/GNOME/libxml2/-/issues/380
775
- Nokogiri.method(:uses_libxml?).arity == -1 && Nokogiri.uses_libxml?("= 2.9.14")
1078
+ class HTML4SafeListSanitizerTest < Minitest::Test
1079
+ @module_under_test = Rails::HTML4
1080
+ include SafeListSanitizerTest
776
1081
  end
1082
+
1083
+ class HTML5SafeListSanitizerTest < Minitest::Test
1084
+ @module_under_test = Rails::HTML5
1085
+ include SafeListSanitizerTest
1086
+ end if loofah_html5_support?
777
1087
  end