loofah 2.3.1 → 2.19.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +193 -40
  3. data/README.md +13 -12
  4. data/lib/loofah/elements.rb +79 -75
  5. data/lib/loofah/helpers.rb +5 -4
  6. data/lib/loofah/html/document.rb +1 -0
  7. data/lib/loofah/html/document_fragment.rb +4 -2
  8. data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
  9. data/lib/loofah/html5/safelist.rb +273 -27
  10. data/lib/loofah/html5/scrub.rb +147 -52
  11. data/lib/loofah/instance_methods.rb +14 -8
  12. data/lib/loofah/metahelpers.rb +2 -1
  13. data/lib/loofah/scrubber.rb +12 -7
  14. data/lib/loofah/scrubbers.rb +20 -18
  15. data/lib/loofah/version.rb +5 -0
  16. data/lib/loofah/xml/document.rb +1 -0
  17. data/lib/loofah/xml/document_fragment.rb +2 -1
  18. data/lib/loofah.rb +33 -16
  19. metadata +45 -125
  20. data/.gemtest +0 -0
  21. data/Gemfile +0 -22
  22. data/Manifest.txt +0 -41
  23. data/Rakefile +0 -81
  24. data/benchmark/benchmark.rb +0 -149
  25. data/benchmark/fragment.html +0 -96
  26. data/benchmark/helper.rb +0 -73
  27. data/benchmark/www.slashdot.com.html +0 -2560
  28. data/test/assets/msword.html +0 -63
  29. data/test/assets/testdata_sanitizer_tests1.dat +0 -502
  30. data/test/helper.rb +0 -18
  31. data/test/html5/test_sanitizer.rb +0 -401
  32. data/test/html5/test_scrub.rb +0 -10
  33. data/test/integration/test_ad_hoc.rb +0 -220
  34. data/test/integration/test_helpers.rb +0 -43
  35. data/test/integration/test_html.rb +0 -72
  36. data/test/integration/test_scrubbers.rb +0 -400
  37. data/test/integration/test_xml.rb +0 -55
  38. data/test/unit/test_api.rb +0 -142
  39. data/test/unit/test_encoding.rb +0 -20
  40. data/test/unit/test_helpers.rb +0 -62
  41. data/test/unit/test_scrubber.rb +0 -229
  42. data/test/unit/test_scrubbers.rb +0 -14
@@ -1,401 +0,0 @@
1
- #
2
- # these tests taken from the HTML5 sanitization project and modified for use with Loofah
3
- # see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
4
- #
5
- # license text at the bottom of this file
6
- #
7
- require "helper"
8
-
9
- class Html5TestSanitizer < Loofah::TestCase
10
- include Loofah
11
-
12
- def sanitize_xhtml stream
13
- Loofah.fragment(stream).scrub!(:escape).to_xhtml
14
- end
15
-
16
- def sanitize_html stream
17
- Loofah.fragment(stream).scrub!(:escape).to_html
18
- end
19
-
20
- def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
21
- ## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
22
- sane = sanitize_html(input).gsub('"',"'")
23
- htmloutput = htmloutput.gsub('"',"'")
24
- xhtmloutput = xhtmloutput.gsub('"',"'")
25
- rexmloutput = rexmloutput.gsub('"',"'")
26
-
27
- ## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
28
- ## it would require a lot of manual hacking to make the tests match libxml's output.
29
- ## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
30
- assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
31
- %Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
32
- end
33
-
34
- def assert_completes_in_reasonable_time &block
35
- t0 = Time.now
36
- block.call
37
- assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
38
- end
39
-
40
- (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
41
- define_method "test_should_allow_#{tag_name}_tag" do
42
- input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
43
- htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
44
- xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
45
- rexmloutput = xhtmloutput
46
-
47
- if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
48
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
49
- xhtmloutput = htmloutput
50
- elsif tag_name == 'col'
51
- htmloutput = "<col title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
52
- xhtmloutput = htmloutput
53
- rexmloutput = "<col title='1' />"
54
- elsif tag_name == 'table'
55
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
56
- xhtmloutput = htmloutput
57
- elsif tag_name == 'image'
58
- htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
59
- xhtmloutput = htmloutput
60
- rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
61
- elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
62
- htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
63
- xhtmloutput = htmloutput
64
- htmloutput += '<br/>' if tag_name == 'br'
65
- rexmloutput = "<#{tag_name} title='1' />"
66
- end
67
- check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
68
- end
69
- end
70
-
71
- ##
72
- ## libxml2 downcases elements, so this is moot.
73
- ##
74
- # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
75
- # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
76
- # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
77
- # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
78
- # check_sanitization(input, output, output, output)
79
- # end
80
- # end
81
-
82
- HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
83
- next if attribute_name == 'style'
84
- define_method "test_should_allow_#{attribute_name}_attribute" do
85
- input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
86
- if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
87
- output = "<p #{attribute_name}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
88
- htmloutput = "<p #{attribute_name.downcase}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
89
- else
90
- output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
91
- htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
92
- end
93
- check_sanitization(input, htmloutput, output, output)
94
- end
95
- end
96
-
97
- def test_should_allow_data_attributes
98
- input = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
99
-
100
- output = "<p data-foo='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
101
- htmloutput = "<p data-foo='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
102
-
103
- check_sanitization(input, htmloutput, output, output)
104
- end
105
-
106
- def test_should_allow_multi_word_data_attributes
107
- input = "<p data-foo-bar-id='11'>foo <bad>bar</bad> baz</p>"
108
- output = htmloutput = "<p data-foo-bar-id='11'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
109
-
110
- check_sanitization(input, htmloutput, output, output)
111
- end
112
-
113
- def test_should_allow_contenteditable
114
- input = '<p contenteditable="false">Hi!</p>'
115
- output = '<p contenteditable="false">Hi!</p>'
116
-
117
- check_sanitization(input, output, output, output)
118
- end
119
-
120
- ##
121
- ## libxml2 downcases attributes, so this is moot.
122
- ##
123
- # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
124
- # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
125
- # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
126
- # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
127
- # check_sanitization(input, output, output, output)
128
- # end
129
- # end
130
-
131
- HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
132
- define_method "test_should_allow_#{protocol}_uris" do
133
- input = %(<a href="#{protocol}">foo</a>)
134
- output = "<a href='#{protocol}'>foo</a>"
135
- check_sanitization(input, output, output, output)
136
- end
137
- end
138
-
139
- HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
140
- define_method "test_should_allow_uppercase_#{protocol}_uris" do
141
- input = %(<a href="#{protocol.upcase}">foo</a>)
142
- output = "<a href='#{protocol.upcase}'>foo</a>"
143
- check_sanitization(input, output, output, output)
144
- end
145
- end
146
-
147
- HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
148
- define_method "test_should_allow_data_#{data_uri_type}_uris" do
149
- input = %(<a href="data:#{data_uri_type}">foo</a>)
150
- output = "<a href='data:#{data_uri_type}'>foo</a>"
151
- check_sanitization(input, output, output, output)
152
-
153
- input = %(<a href="data:#{data_uri_type};base64,R0lGODlhAQABA">foo</a>)
154
- output = "<a href='data:#{data_uri_type};base64,R0lGODlhAQABA'>foo</a>"
155
- check_sanitization(input, output, output, output)
156
- end
157
- end
158
-
159
- HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
160
- define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
161
- input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
162
- output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
163
- check_sanitization(input, output, output, output)
164
- end
165
- end
166
-
167
- def test_should_disallow_other_uri_mediatypes
168
- input = %(<a href="data:foo">foo</a>)
169
- output = "<a>foo</a>"
170
- check_sanitization(input, output, output, output)
171
-
172
- input = %(<a href="data:image/xxx">foo</a>)
173
- output = "<a>foo</a>"
174
- check_sanitization(input, output, output, output)
175
-
176
- input = %(<a href="data:image/xxx;base64,R0lGODlhAQABA">foo</a>)
177
- output = "<a>foo</a>"
178
- check_sanitization(input, output, output, output)
179
- end
180
-
181
-
182
- HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
183
- next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
184
- define_method "test_#{tag_name}_should_allow_local_href" do
185
- input = %(<#{tag_name} xlink:href="#foo"/>)
186
- output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
187
- xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
188
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
189
- end
190
-
191
- define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
192
- input = %(<#{tag_name} xlink:href="\n#foo"/>)
193
- output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
194
- xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
195
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
196
- end
197
-
198
- define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
199
- input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
200
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
201
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
202
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
203
- end
204
-
205
- define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
206
- input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
207
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
208
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
209
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
210
- end
211
- end
212
-
213
- def test_figure_element_is_valid
214
- fragment = Loofah.scrub_fragment("<span>hello</span> <figure>asd</figure>", :prune)
215
- assert fragment.at_css("figure"), "<figure> tag was scrubbed"
216
- end
217
-
218
- ##
219
- ## as tenderlove says, "care < 0"
220
- ##
221
- # def test_should_handle_astral_plane_characters
222
- # input = "<p>&#x1d4b5; &#x1d538;</p>"
223
- # output = "<p>\360\235\222\265 \360\235\224\270</p>"
224
- # check_sanitization(input, output, output, output)
225
-
226
- # input = "<p><tspan>\360\235\224\270</tspan> a</p>"
227
- # output = "<p><tspan>\360\235\224\270</tspan> a</p>"
228
- # check_sanitization(input, output, output, output)
229
- # end
230
-
231
- # This affects only NS4. Is it worth fixing?
232
- # def test_javascript_includes
233
- # input = %(<div size="&{alert('XSS')}">foo</div>)
234
- # output = "<div>foo</div>"
235
- # check_sanitization(input, output, output, output)
236
- # end
237
-
238
- ##
239
- ## these tests primarily test the parser logic, not the sanitizer
240
- ## logic. i call bullshit. we're not writing a test suite for
241
- ## libxml2 here, so let's rely on the unit tests above to take care
242
- ## of our valid elements and attributes.
243
- ##
244
- require 'json'
245
- Dir[File.join(File.dirname(__FILE__), '..', 'assets', 'testdata_sanitizer_tests1.dat')].each do |filename|
246
- JSON::parse(open(filename).read).each do |test|
247
- it "testdata sanitizer #{test['name']}" do
248
- check_sanitization(
249
- test['input'],
250
- test['output'],
251
- test['xhtml'] || test['output'],
252
- test['rexml'] || test['output']
253
- )
254
- end
255
- end
256
- end
257
-
258
- ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
259
- HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
260
- define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
261
- input = "<rect fill='url(#foo)' />"
262
- output = "<rect fill='url(#foo)'></rect>"
263
- check_sanitization(input, output, output, output)
264
- end
265
-
266
- define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
267
- input = "<rect fill='url(http://bad.com/) #fff' />"
268
- output = "<rect fill=' #fff'></rect>"
269
- check_sanitization(input, output, output, output)
270
- end
271
- end
272
-
273
- def test_css_list_style
274
- html = '<ul style="list-style: none"></ul>'
275
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
276
- assert_match %r/list-style/, sane.inner_html
277
- end
278
-
279
- def test_css_negative_value_sanitization
280
- html = "<span style=\"letter-spacing:-0.03em;\">"
281
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
282
- assert_match %r/-0.03em/, sane.inner_html
283
- end
284
-
285
- def test_css_negative_value_sanitization_shorthand_css_properties
286
- html = "<span style=\"margin-left:-0.05em;\">"
287
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
288
- assert_match %r/-0.05em/, sane.inner_html
289
- end
290
-
291
- def test_css_high_precision_value_shorthand_css_properties
292
- html = "<span style=\"margin-left:0.3333333334em;\">"
293
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
294
- assert_match %r/0.3333333334em/, sane.inner_html
295
- end
296
-
297
- def test_css_function_sanitization_leaves_safelisted_functions_calc
298
- html = "<span style=\"width:calc(5%)\">"
299
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
300
- assert_match %r/calc\(5%\)/, sane.inner_html
301
-
302
- html = "<span style=\"width: calc(5%)\">"
303
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
304
- assert_match %r/calc\(5%\)/, sane.inner_html
305
- end
306
-
307
- def test_css_function_sanitization_leaves_safelisted_functions_rgb
308
- html = '<span style="color: rgb(255, 0, 0)">'
309
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
310
- assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
311
- end
312
-
313
- def test_css_function_sanitization_leaves_safelisted_list_style_type
314
- html = "<ol style='list-style-type:lower-greek;'></ol>"
315
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
316
- assert_match %r/list-style-type:lower-greek/, sane.inner_html
317
- end
318
-
319
- def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
320
- html = "<span style=\"width:url(data-evil-url)\">"
321
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
322
- assert_match %r/<span><\/span>/, sane.inner_html
323
-
324
- html = "<span style=\"width: url(data-evil-url)\">"
325
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
326
- assert_match %r/<span><\/span>/, sane.inner_html
327
- end
328
-
329
- def test_issue_90_slow_regex
330
- skip("timing tests are hard to make pass and have little regression-testing value")
331
-
332
- html = %q{<span style="background: url('data:image/svg&#43;xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2232%22%20height%3D%2232%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20fill%3D%22%23D4C8AE%22%20d%3D%22M0%200h32v32h-32z%22%2F%3E%3Cpath%20fill%3D%22%2383604B%22%20d%3D%22M0%200h31.99v11.75h-31.99z%22%2F%3E%3Cpath%20fill%3D%22%233D2319%22%20d%3D%22M0%2011.5h32v.5h-32z%22%2F%3E%3Cpath%20fill%3D%22%23F83651%22%20d%3D%22M5%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23FCD050%22%20d%3D%22M6%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%2371C797%22%20d%3D%22M7%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23509CF9%22%20d%3D%22M8%200h1v10.5h-1z%22%2F%3E%3ClinearGradient%20id%3D%22a%22%20gradientUnits%3D%22userSpaceOnUse%22%20x1%3D%2224.996%22%20y1%3D%2210.5%22%20x2%3D%2224.996%22%20y2%3D%224.5%22%3E%3Cstop%20offset%3D%220%22%20stop-color%3D%22%23796055%22%2F%3E%3Cstop%20offset%3D%22.434%22%20stop-color%3D%22%23614C43%22%2F%3E%3Cstop%20offset%3D%221%22%20stop-color%3D%22%233D2D28%22%2F%3E%3C%2FlinearGradient%3E%3Cpath%20fill%3D%22url(%23a)%22%20d%3D%22M28%208.5c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3Cpath%20fill%3D%22%235F402E%22%20d%3D%22M28%208c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3C');"></span>}
333
-
334
- assert_completes_in_reasonable_time {
335
- Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
336
- }
337
- end
338
-
339
- def test_upper_case_css_property
340
- html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
341
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
342
- assert_match(/COLOR:\s*BLUE/i, sane.at_css("div")["style"])
343
- refute_match(/NOTAPROPERTY/i, sane.at_css("div")["style"])
344
- end
345
-
346
- def test_many_properties_some_allowed
347
- html = "<div style=\"background: bold notaproperty center alsonotaproperty 10px;\">asdf</div>"
348
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
349
- assert_match(/bold\s+center\s+10px/, sane.at_css("div")["style"])
350
- end
351
-
352
- def test_many_properties_non_allowed
353
- html = "<div style=\"background: notaproperty alsonotaproperty;\">asdf</div>"
354
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
355
- assert_nil sane.at_css("div")["style"]
356
- end
357
-
358
- def test_svg_properties
359
- html = "<line style='stroke-width: 10px;'></line>"
360
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
361
- assert_match(/stroke-width:\s*10px/, sane.at_css("line")["style"])
362
- end
363
- end
364
-
365
- # <html5_license>
366
- #
367
- # Copyright (c) 2006-2008 The Authors
368
- #
369
- # Contributors:
370
- # James Graham - jg307@cam.ac.uk
371
- # Anne van Kesteren - annevankesteren@gmail.com
372
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
373
- # Matt McDonald - kanashii@kanashii.ca
374
- # Sam Ruby - rubys@intertwingly.net
375
- # Ian Hickson (Google) - ian@hixie.ch
376
- # Thomas Broyer - t.broyer@ltgt.net
377
- # Jacques Distler - distler@golem.ph.utexas.edu
378
- # Henri Sivonen - hsivonen@iki.fi
379
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
380
- #
381
- # Permission is hereby granted, free of charge, to any person
382
- # obtaining a copy of this software and associated documentation files
383
- # (the "Software"), to deal in the Software without restriction,
384
- # including without limitation the rights to use, copy, modify, merge,
385
- # publish, distribute, sublicense, and/or sell copies of the Software,
386
- # and to permit persons to whom the Software is furnished to do so,
387
- # subject to the following conditions:
388
- #
389
- # The above copyright notice and this permission notice shall be
390
- # included in all copies or substantial portions of the Software.
391
- #
392
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
393
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
394
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
395
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
396
- # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
397
- # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
398
- # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
399
- # SOFTWARE.
400
- #
401
- # </html5_license>
@@ -1,10 +0,0 @@
1
- require "helper"
2
-
3
- class UnitHTML5Scrub < Loofah::TestCase
4
- include Loofah
5
-
6
- def test_scrub_css
7
- assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
8
- assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
9
- end
10
- end
@@ -1,220 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestAdHoc < Loofah::TestCase
4
- context "blank input string" do
5
- context "fragment" do
6
- it "return a blank string" do
7
- assert_equal "", Loofah.scrub_fragment("", :prune).to_s
8
- end
9
- end
10
-
11
- context "document" do
12
- it "return a blank string" do
13
- assert_equal "", Loofah.scrub_document("", :prune).root.to_s
14
- end
15
- end
16
- end
17
-
18
- context "tests" do
19
- MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
20
-
21
- def test_removal_of_illegal_tag
22
- html = <<-HTML
23
- following this there should be no jim tag
24
- <jim>jim</jim>
25
- was there?
26
- HTML
27
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
28
- assert sane.xpath("//jim").empty?
29
- end
30
-
31
- def test_removal_of_illegal_attribute
32
- html = "<p class=bar foo=bar abbr=bar />"
33
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
34
- node = sane.xpath("//p").first
35
- assert node.attributes["class"]
36
- assert node.attributes["abbr"]
37
- assert_nil node.attributes["foo"]
38
- end
39
-
40
- def test_removal_of_illegal_url_in_href
41
- html = <<-HTML
42
- <a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
43
- <a href='http://jim.jim/'>this link should be fine</a>
44
- HTML
45
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
46
- nodes = sane.xpath("//a")
47
- assert_nil nodes.first.attributes["href"]
48
- assert nodes.last.attributes["href"]
49
- end
50
-
51
- def test_css_sanitization
52
- html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
53
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
54
- assert_match %r/#000/, sane.inner_html
55
- refute_match %r/foo\.com/, sane.inner_html
56
- end
57
-
58
- def test_fragment_with_no_tags
59
- assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
60
- end
61
-
62
- def test_fragment_in_p_tag
63
- assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
64
- end
65
-
66
- def test_fragment_in_p_tag_plus_stuff
67
- assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
68
- end
69
-
70
- def test_fragment_with_text_nodes_leading_and_trailing
71
- assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
72
- end
73
-
74
- def test_whitewash_on_fragment
75
- html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
76
- whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
77
- assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n", "")
78
- end
79
-
80
- def test_fragment_whitewash_on_microsofty_markup
81
- whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
82
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
83
- end
84
-
85
- def test_document_whitewash_on_microsofty_markup
86
- whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
87
- assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
88
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
89
- end
90
-
91
- def test_return_empty_string_when_nothing_left
92
- assert_equal "", Loofah.scrub_document("<script>test</script>", :prune).text
93
- end
94
-
95
- def test_nested_script_cdata_tags_should_be_scrubbed
96
- html = "<script><script src='malicious.js'></script>"
97
- stripped = Loofah.fragment(html).scrub!(:strip)
98
- assert_empty stripped.xpath("//script")
99
- refute_match("<script", stripped.to_html)
100
- end
101
-
102
- def test_nested_script_cdata_tags_should_be_scrubbed_2
103
- html = "<script><script>alert('a');</script></script>"
104
- stripped = Loofah.fragment(html).scrub!(:strip)
105
- assert_empty stripped.xpath("//script")
106
- refute_match("<script", stripped.to_html)
107
- end
108
-
109
- def test_removal_of_all_tags
110
- html = <<-HTML
111
- What's up <strong>doc</strong>?
112
- HTML
113
- stripped = Loofah.scrub_document(html, :prune).text
114
- assert_equal %Q(What\'s up doc?).strip, stripped.strip
115
- end
116
-
117
- def test_dont_remove_whitespace
118
- html = "Foo\nBar"
119
- assert_equal html, Loofah.scrub_document(html, :prune).text
120
- end
121
-
122
- def test_dont_remove_whitespace_between_tags
123
- html = "<p>Foo</p>\n<p>Bar</p>"
124
- assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
125
- end
126
-
127
- #
128
- # tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
129
- #
130
- # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
131
- # wants to ensure these comments can be treated as "server-side includes",
132
- # but as a result fails to ensure that serialization is well-formed,
133
- # resulting in an opportunity for XSS injection of code into a final
134
- # re-parsed document (presumably in a browser).
135
- #
136
- # we'll test this by parsing the HTML, serializing it, then
137
- # re-parsing it to ensure there isn't any ambiguity in the output
138
- # that might allow code injection into a browser consuming
139
- # "sanitized" output.
140
- #
141
- [
142
- #
143
- # these tags and attributes are determined by the code at:
144
- #
145
- # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
146
- #
147
- { tag: "a", attr: "href" },
148
- { tag: "div", attr: "href" },
149
- { tag: "a", attr: "action" },
150
- { tag: "div", attr: "action" },
151
- { tag: "a", attr: "src" },
152
- { tag: "div", attr: "src" },
153
- { tag: "a", attr: "name" },
154
- #
155
- # note that div+name is _not_ affected by the libxml2 issue.
156
- # but we test it anyway to ensure our logic isn't modifying
157
- # attributes that don't need modifying.
158
- #
159
- { tag: "div", attr: "name", unescaped: true },
160
- ].each do |config|
161
- define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
162
- html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
163
-
164
- reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
165
- attributes = reparsed.at_css(config[:tag]).attribute_nodes
166
-
167
- assert_equal [config[:attr]], attributes.collect(&:name)
168
- if Nokogiri::VersionInfo.instance.libxml2?
169
- if config[:unescaped]
170
- #
171
- # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
172
- # assert that this attribute's serialization is unaffected.
173
- #
174
- assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
175
- else
176
- #
177
- # let's match the behavior in libxml < 2.9.2.
178
- # test that this attribute's serialization is well-formed and sanitized.
179
- #
180
- assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
181
- end
182
- else
183
- #
184
- # yay for consistency in javaland. move along, nothing to see here.
185
- #
186
- assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
187
- end
188
- end
189
- end
190
-
191
- context "xss protection from svg animate attributes" do
192
- # see recommendation from https://html5sec.org/#137
193
- # to sanitize "to", "from", "values", and "by" attributes
194
-
195
- it "sanitizes 'from', 'to', and 'by' attributes" do
196
- # for CVE-2018-16468
197
- # see:
198
- # - https://github.com/flavorjones/loofah/issues/154
199
- # - https://hackerone.com/reports/429267
200
- html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26 by=5>}
201
-
202
- sanitized = Loofah.scrub_fragment(html, :escape)
203
- assert_nil sanitized.at_css("animate")["from"]
204
- assert_nil sanitized.at_css("animate")["to"]
205
- assert_nil sanitized.at_css("animate")["by"]
206
- end
207
-
208
- it "sanitizes 'values' attribute" do
209
- # for CVE-2019-15587
210
- # see:
211
- # - https://github.com/flavorjones/loofah/issues/171
212
- # - https://hackerone.com/reports/709009
213
- html = %Q{<svg> <animate href="#foo" attributeName="href" values="javascript:alert('xss')"/> <a id="foo"> <circle r=400 /> </a> </svg>}
214
-
215
- sanitized = Loofah.scrub_fragment(html, :escape)
216
- assert_nil sanitized.at_css("animate")["values"]
217
- end
218
- end
219
- end
220
- end
@@ -1,43 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestHelpers < Loofah::TestCase
4
- context ".strip_tags" do
5
- context "on safe markup" do
6
- it "strip out tags" do
7
- assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
8
- end
9
- end
10
-
11
- context "on hack attack" do
12
- it "strip escape html entities" do
13
- bad_shit = "&lt;script&gt;alert('evil')&lt;/script&gt;"
14
- assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
15
- end
16
- end
17
- end
18
-
19
- context ".sanitize" do
20
- context "on safe markup" do
21
- it "render the safe html" do
22
- html = "<div>omgwtfbbq</div><span>!!1!</span>"
23
- assert_equal html, Loofah::Helpers.sanitize(html)
24
- end
25
- end
26
-
27
- context "on hack attack" do
28
- it "strip the unsafe tags" do
29
- assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
30
- end
31
-
32
- it "strips form tags" do
33
- assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><form action=\"/foo/bar\" method=\"post\"><input></form><span>w00t</span>")
34
- end
35
- end
36
- end
37
-
38
- context ".sanitize_css" do
39
- it "removes unsafe css properties" do
40
- assert_match(/display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue"))
41
- end
42
- end
43
- end