loofah 2.3.0 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,18 +0,0 @@
1
- require 'rubygems'
2
- require 'minitest/unit'
3
- require 'minitest/spec'
4
- require 'minitest/autorun'
5
- require 'rr'
6
-
7
- require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah"))
8
-
9
- # require the ActionView helpers here, since they are no longer required automatically
10
- require File.expand_path(File.join(File.dirname(__FILE__), "..", "lib", "loofah", "helpers"))
11
-
12
- puts "=> testing with Nokogiri #{Nokogiri::VERSION_INFO.inspect}"
13
-
14
- class Loofah::TestCase < MiniTest::Spec
15
- class << self
16
- alias_method :context, :describe
17
- end
18
- end
@@ -1,401 +0,0 @@
1
- #
2
- # these tests taken from the HTML5 sanitization project and modified for use with Loofah
3
- # see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
4
- #
5
- # license text at the bottom of this file
6
- #
7
- require "helper"
8
-
9
- class Html5TestSanitizer < Loofah::TestCase
10
- include Loofah
11
-
12
- def sanitize_xhtml stream
13
- Loofah.fragment(stream).scrub!(:escape).to_xhtml
14
- end
15
-
16
- def sanitize_html stream
17
- Loofah.fragment(stream).scrub!(:escape).to_html
18
- end
19
-
20
- def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
21
- ## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
22
- sane = sanitize_html(input).gsub('"',"'")
23
- htmloutput = htmloutput.gsub('"',"'")
24
- xhtmloutput = xhtmloutput.gsub('"',"'")
25
- rexmloutput = rexmloutput.gsub('"',"'")
26
-
27
- ## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
28
- ## it would require a lot of manual hacking to make the tests match libxml's output.
29
- ## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
30
- assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
31
- %Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
32
- end
33
-
34
- def assert_completes_in_reasonable_time &block
35
- t0 = Time.now
36
- block.call
37
- assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
38
- end
39
-
40
- (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
41
- define_method "test_should_allow_#{tag_name}_tag" do
42
- input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
43
- htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
44
- xhtmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name}>"
45
- rexmloutput = xhtmloutput
46
-
47
- if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
48
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt; baz"
49
- xhtmloutput = htmloutput
50
- elsif tag_name == 'col'
51
- htmloutput = "<col title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
52
- xhtmloutput = htmloutput
53
- rexmloutput = "<col title='1' />"
54
- elsif tag_name == 'table'
55
- htmloutput = "foo &lt;bad&gt;bar&lt;/bad&gt;baz<table title='1'> </table>"
56
- xhtmloutput = htmloutput
57
- elsif tag_name == 'image'
58
- htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
59
- xhtmloutput = htmloutput
60
- rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
61
- elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
62
- htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
63
- xhtmloutput = htmloutput
64
- htmloutput += '<br/>' if tag_name == 'br'
65
- rexmloutput = "<#{tag_name} title='1' />"
66
- end
67
- check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
68
- end
69
- end
70
-
71
- ##
72
- ## libxml2 downcases elements, so this is moot.
73
- ##
74
- # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
75
- # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
76
- # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
77
- # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
78
- # check_sanitization(input, output, output, output)
79
- # end
80
- # end
81
-
82
- HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
83
- next if attribute_name == 'style'
84
- define_method "test_should_allow_#{attribute_name}_attribute" do
85
- input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
86
- if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
87
- output = "<p #{attribute_name}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
88
- htmloutput = "<p #{attribute_name.downcase}>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
89
- else
90
- output = "<p #{attribute_name}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
91
- htmloutput = "<p #{attribute_name.downcase}='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
92
- end
93
- check_sanitization(input, htmloutput, output, output)
94
- end
95
- end
96
-
97
- def test_should_allow_data_attributes
98
- input = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
99
-
100
- output = "<p data-foo='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
101
- htmloutput = "<p data-foo='foo'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
102
-
103
- check_sanitization(input, htmloutput, output, output)
104
- end
105
-
106
- def test_should_allow_multi_word_data_attributes
107
- input = "<p data-foo-bar-id='11'>foo <bad>bar</bad> baz</p>"
108
- output = htmloutput = "<p data-foo-bar-id='11'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
109
-
110
- check_sanitization(input, htmloutput, output, output)
111
- end
112
-
113
- def test_should_allow_contenteditable
114
- input = '<p contenteditable="false">Hi!</p>'
115
- output = '<p contenteditable="false">Hi!</p>'
116
-
117
- check_sanitization(input, output, output, output)
118
- end
119
-
120
- ##
121
- ## libxml2 downcases attributes, so this is moot.
122
- ##
123
- # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
124
- # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
125
- # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
126
- # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
127
- # check_sanitization(input, output, output, output)
128
- # end
129
- # end
130
-
131
- HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
132
- define_method "test_should_allow_#{protocol}_uris" do
133
- input = %(<a href="#{protocol}">foo</a>)
134
- output = "<a href='#{protocol}'>foo</a>"
135
- check_sanitization(input, output, output, output)
136
- end
137
- end
138
-
139
- HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
140
- define_method "test_should_allow_uppercase_#{protocol}_uris" do
141
- input = %(<a href="#{protocol.upcase}">foo</a>)
142
- output = "<a href='#{protocol.upcase}'>foo</a>"
143
- check_sanitization(input, output, output, output)
144
- end
145
- end
146
-
147
- HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
148
- define_method "test_should_allow_data_#{data_uri_type}_uris" do
149
- input = %(<a href="data:#{data_uri_type}">foo</a>)
150
- output = "<a href='data:#{data_uri_type}'>foo</a>"
151
- check_sanitization(input, output, output, output)
152
-
153
- input = %(<a href="data:#{data_uri_type};base64,R0lGODlhAQABA">foo</a>)
154
- output = "<a href='data:#{data_uri_type};base64,R0lGODlhAQABA'>foo</a>"
155
- check_sanitization(input, output, output, output)
156
- end
157
- end
158
-
159
- HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
160
- define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
161
- input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
162
- output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
163
- check_sanitization(input, output, output, output)
164
- end
165
- end
166
-
167
- def test_should_disallow_other_uri_mediatypes
168
- input = %(<a href="data:foo">foo</a>)
169
- output = "<a>foo</a>"
170
- check_sanitization(input, output, output, output)
171
-
172
- input = %(<a href="">foo</a>)
177
- output = "<a>foo</a>"
178
- check_sanitization(input, output, output, output)
179
- end
180
-
181
-
182
- HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
183
- next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
184
- define_method "test_#{tag_name}_should_allow_local_href" do
185
- input = %(<#{tag_name} xlink:href="#foo"/>)
186
- output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
187
- xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
188
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
189
- end
190
-
191
- define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
192
- input = %(<#{tag_name} xlink:href="\n#foo"/>)
193
- output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
194
- xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
195
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
196
- end
197
-
198
- define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
199
- input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
200
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
201
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
202
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
203
- end
204
-
205
- define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
206
- input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
207
- output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
208
- xhtmloutput = "<#{tag_name}></#{tag_name}>"
209
- check_sanitization(input, output, xhtmloutput, xhtmloutput)
210
- end
211
- end
212
-
213
- def test_figure_element_is_valid
214
- fragment = Loofah.scrub_fragment("<span>hello</span> <figure>asd</figure>", :prune)
215
- assert fragment.at_css("figure"), "<figure> tag was scrubbed"
216
- end
217
-
218
- ##
219
- ## as tenderlove says, "care < 0"
220
- ##
221
- # def test_should_handle_astral_plane_characters
222
- # input = "<p>&#x1d4b5; &#x1d538;</p>"
223
- # output = "<p>\360\235\222\265 \360\235\224\270</p>"
224
- # check_sanitization(input, output, output, output)
225
-
226
- # input = "<p><tspan>\360\235\224\270</tspan> a</p>"
227
- # output = "<p><tspan>\360\235\224\270</tspan> a</p>"
228
- # check_sanitization(input, output, output, output)
229
- # end
230
-
231
- # This affects only NS4. Is it worth fixing?
232
- # def test_javascript_includes
233
- # input = %(<div size="&{alert('XSS')}">foo</div>)
234
- # output = "<div>foo</div>"
235
- # check_sanitization(input, output, output, output)
236
- # end
237
-
238
- ##
239
- ## these tests primarily test the parser logic, not the sanitizer
240
- ## logic. i call bullshit. we're not writing a test suite for
241
- ## libxml2 here, so let's rely on the unit tests above to take care
242
- ## of our valid elements and attributes.
243
- ##
244
- require 'json'
245
- Dir[File.join(File.dirname(__FILE__), '..', 'assets', 'testdata_sanitizer_tests1.dat')].each do |filename|
246
- JSON::parse(open(filename).read).each do |test|
247
- it "testdata sanitizer #{test['name']}" do
248
- check_sanitization(
249
- test['input'],
250
- test['output'],
251
- test['xhtml'] || test['output'],
252
- test['rexml'] || test['output']
253
- )
254
- end
255
- end
256
- end
257
-
258
- ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
259
- HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
260
- define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
261
- input = "<rect fill='url(#foo)' />"
262
- output = "<rect fill='url(#foo)'></rect>"
263
- check_sanitization(input, output, output, output)
264
- end
265
-
266
- define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
267
- input = "<rect fill='url(http://bad.com/) #fff' />"
268
- output = "<rect fill=' #fff'></rect>"
269
- check_sanitization(input, output, output, output)
270
- end
271
- end
272
-
273
- def test_css_list_style
274
- html = '<ul style="list-style: none"></ul>'
275
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
276
- assert_match %r/list-style/, sane.inner_html
277
- end
278
-
279
- def test_css_negative_value_sanitization
280
- html = "<span style=\"letter-spacing:-0.03em;\">"
281
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
282
- assert_match %r/-0.03em/, sane.inner_html
283
- end
284
-
285
- def test_css_negative_value_sanitization_shorthand_css_properties
286
- html = "<span style=\"margin-left:-0.05em;\">"
287
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
288
- assert_match %r/-0.05em/, sane.inner_html
289
- end
290
-
291
- def test_css_high_precision_value_shorthand_css_properties
292
- html = "<span style=\"margin-left:0.3333333334em;\">"
293
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
294
- assert_match %r/0.3333333334em/, sane.inner_html
295
- end
296
-
297
- def test_css_function_sanitization_leaves_safelisted_functions_calc
298
- html = "<span style=\"width:calc(5%)\">"
299
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
300
- assert_match %r/calc\(5%\)/, sane.inner_html
301
-
302
- html = "<span style=\"width: calc(5%)\">"
303
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
304
- assert_match %r/calc\(5%\)/, sane.inner_html
305
- end
306
-
307
- def test_css_function_sanitization_leaves_safelisted_functions_rgb
308
- html = '<span style="color: rgb(255, 0, 0)">'
309
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
310
- assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
311
- end
312
-
313
- def test_css_function_sanitization_leaves_safelisted_list_style_type
314
- html = "<ol style='list-style-type:lower-greek;'></ol>"
315
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
316
- assert_match %r/list-style-type:lower-greek/, sane.inner_html
317
- end
318
-
319
- def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
320
- html = "<span style=\"width:url(data-evil-url)\">"
321
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
322
- assert_match %r/<span><\/span>/, sane.inner_html
323
-
324
- html = "<span style=\"width: url(data-evil-url)\">"
325
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
326
- assert_match %r/<span><\/span>/, sane.inner_html
327
- end
328
-
329
- def test_issue_90_slow_regex
330
- skip("timing tests are hard to make pass and have little regression-testing value")
331
-
332
- html = %q{<span style="background: url('data:image/svg&#43;xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2232%22%20height%3D%2232%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20fill%3D%22%23D4C8AE%22%20d%3D%22M0%200h32v32h-32z%22%2F%3E%3Cpath%20fill%3D%22%2383604B%22%20d%3D%22M0%200h31.99v11.75h-31.99z%22%2F%3E%3Cpath%20fill%3D%22%233D2319%22%20d%3D%22M0%2011.5h32v.5h-32z%22%2F%3E%3Cpath%20fill%3D%22%23F83651%22%20d%3D%22M5%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23FCD050%22%20d%3D%22M6%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%2371C797%22%20d%3D%22M7%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23509CF9%22%20d%3D%22M8%200h1v10.5h-1z%22%2F%3E%3ClinearGradient%20id%3D%22a%22%20gradientUnits%3D%22userSpaceOnUse%22%20x1%3D%2224.996%22%20y1%3D%2210.5%22%20x2%3D%2224.996%22%20y2%3D%224.5%22%3E%3Cstop%20offset%3D%220%22%20stop-color%3D%22%23796055%22%2F%3E%3Cstop%20offset%3D%22.434%22%20stop-color%3D%22%23614C43%22%2F%3E%3Cstop%20offset%3D%221%22%20stop-color%3D%22%233D2D28%22%2F%3E%3C%2FlinearGradient%3E%3Cpath%20fill%3D%22url(%23a)%22%20d%3D%22M28%208.5c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3Cpath%20fill%3D%22%235F402E%22%20d%3D%22M28%208c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3C');"></span>}
333
-
334
- assert_completes_in_reasonable_time {
335
- Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
336
- }
337
- end
338
-
339
- def test_upper_case_css_property
340
- html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
341
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
342
- assert_match(/COLOR:\s*BLUE/i, sane.at_css("div")["style"])
343
- refute_match(/NOTAPROPERTY/i, sane.at_css("div")["style"])
344
- end
345
-
346
- def test_many_properties_some_allowed
347
- html = "<div style=\"background: bold notaproperty center alsonotaproperty 10px;\">asdf</div>"
348
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
349
- assert_match(/bold\s+center\s+10px/, sane.at_css("div")["style"])
350
- end
351
-
352
- def test_many_properties_non_allowed
353
- html = "<div style=\"background: notaproperty alsonotaproperty;\">asdf</div>"
354
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
355
- assert_nil sane.at_css("div")["style"]
356
- end
357
-
358
- def test_svg_properties
359
- html = "<line style='stroke-width: 10px;'></line>"
360
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
361
- assert_match(/stroke-width:\s*10px/, sane.at_css("line")["style"])
362
- end
363
- end
364
-
365
- # <html5_license>
366
- #
367
- # Copyright (c) 2006-2008 The Authors
368
- #
369
- # Contributors:
370
- # James Graham - jg307@cam.ac.uk
371
- # Anne van Kesteren - annevankesteren@gmail.com
372
- # Lachlan Hunt - lachlan.hunt@lachy.id.au
373
- # Matt McDonald - kanashii@kanashii.ca
374
- # Sam Ruby - rubys@intertwingly.net
375
- # Ian Hickson (Google) - ian@hixie.ch
376
- # Thomas Broyer - t.broyer@ltgt.net
377
- # Jacques Distler - distler@golem.ph.utexas.edu
378
- # Henri Sivonen - hsivonen@iki.fi
379
- # The Mozilla Foundation (contributions from Henri Sivonen since 2008)
380
- #
381
- # Permission is hereby granted, free of charge, to any person
382
- # obtaining a copy of this software and associated documentation files
383
- # (the "Software"), to deal in the Software without restriction,
384
- # including without limitation the rights to use, copy, modify, merge,
385
- # publish, distribute, sublicense, and/or sell copies of the Software,
386
- # and to permit persons to whom the Software is furnished to do so,
387
- # subject to the following conditions:
388
- #
389
- # The above copyright notice and this permission notice shall be
390
- # included in all copies or substantial portions of the Software.
391
- #
392
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
393
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
394
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
395
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
396
- # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
397
- # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
398
- # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
399
- # SOFTWARE.
400
- #
401
- # </html5_license>
@@ -1,10 +0,0 @@
1
- require "helper"
2
-
3
- class UnitHTML5Scrub < Loofah::TestCase
4
- include Loofah
5
-
6
- def test_scrub_css
7
- assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
8
- assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
9
- end
10
- end
@@ -1,204 +0,0 @@
1
- require "helper"
2
-
3
- class IntegrationTestAdHoc < Loofah::TestCase
4
-
5
- context "blank input string" do
6
- context "fragment" do
7
- it "return a blank string" do
8
- assert_equal "", Loofah.scrub_fragment("", :prune).to_s
9
- end
10
- end
11
-
12
- context "document" do
13
- it "return a blank string" do
14
- assert_equal "", Loofah.scrub_document("", :prune).root.to_s
15
- end
16
- end
17
- end
18
-
19
- context "tests" do
20
- MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
21
-
22
- def test_removal_of_illegal_tag
23
- html = <<-HTML
24
- following this there should be no jim tag
25
- <jim>jim</jim>
26
- was there?
27
- HTML
28
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
29
- assert sane.xpath("//jim").empty?
30
- end
31
-
32
- def test_removal_of_illegal_attribute
33
- html = "<p class=bar foo=bar abbr=bar />"
34
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
35
- node = sane.xpath("//p").first
36
- assert node.attributes['class']
37
- assert node.attributes['abbr']
38
- assert_nil node.attributes['foo']
39
- end
40
-
41
- def test_removal_of_illegal_url_in_href
42
- html = <<-HTML
43
- <a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
44
- <a href='http://jim.jim/'>this link should be fine</a>
45
- HTML
46
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
47
- nodes = sane.xpath("//a")
48
- assert_nil nodes.first.attributes['href']
49
- assert nodes.last.attributes['href']
50
- end
51
-
52
- def test_css_sanitization
53
- html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
54
- sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
55
- assert_match %r/#000/, sane.inner_html
56
- refute_match %r/foo\.com/, sane.inner_html
57
- end
58
-
59
- def test_fragment_with_no_tags
60
- assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
61
- end
62
-
63
- def test_fragment_in_p_tag
64
- assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
65
- end
66
-
67
- def test_fragment_in_p_tag_plus_stuff
68
- assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
69
- end
70
-
71
- def test_fragment_with_text_nodes_leading_and_trailing
72
- assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
73
- end
74
-
75
- def test_whitewash_on_fragment
76
- html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
77
- whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
78
- assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
79
- end
80
-
81
- def test_fragment_whitewash_on_microsofty_markup
82
- whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
83
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
84
- end
85
-
86
- def test_document_whitewash_on_microsofty_markup
87
- whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
88
- assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
89
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
90
- end
91
-
92
- def test_return_empty_string_when_nothing_left
93
- assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
94
- end
95
-
96
- def test_nested_script_cdata_tags_should_be_scrubbed
97
- html = "<script><script src='malicious.js'></script>"
98
- stripped = Loofah.fragment(html).scrub!(:strip)
99
- assert_empty stripped.xpath("//script")
100
- refute_match("<script", stripped.to_html)
101
- end
102
-
103
- def test_nested_script_cdata_tags_should_be_scrubbed_2
104
- html = "<script><script>alert('a');</script></script>"
105
- stripped = Loofah.fragment(html).scrub!(:strip)
106
- assert_empty stripped.xpath("//script")
107
- refute_match("<script", stripped.to_html)
108
- end
109
-
110
- def test_removal_of_all_tags
111
- html = <<-HTML
112
- What's up <strong>doc</strong>?
113
- HTML
114
- stripped = Loofah.scrub_document(html, :prune).text
115
- assert_equal %Q(What\'s up doc?).strip, stripped.strip
116
- end
117
-
118
- def test_dont_remove_whitespace
119
- html = "Foo\nBar"
120
- assert_equal html, Loofah.scrub_document(html, :prune).text
121
- end
122
-
123
- def test_dont_remove_whitespace_between_tags
124
- html = "<p>Foo</p>\n<p>Bar</p>"
125
- assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
126
- end
127
-
128
- #
129
- # tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
130
- #
131
- # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
132
- # wants to ensure these comments can be treated as "server-side includes",
133
- # but as a result fails to ensure that serialization is well-formed,
134
- # resulting in an opportunity for XSS injection of code into a final
135
- # re-parsed document (presumably in a browser).
136
- #
137
- # we'll test this by parsing the HTML, serializing it, then
138
- # re-parsing it to ensure there isn't any ambiguity in the output
139
- # that might allow code injection into a browser consuming
140
- # "sanitized" output.
141
- #
142
- [
143
- #
144
- # these tags and attributes are determined by the code at:
145
- #
146
- # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
147
- #
148
- {tag: "a", attr: "href"},
149
- {tag: "div", attr: "href"},
150
- {tag: "a", attr: "action"},
151
- {tag: "div", attr: "action"},
152
- {tag: "a", attr: "src"},
153
- {tag: "div", attr: "src"},
154
- {tag: "a", attr: "name"},
155
- #
156
- # note that div+name is _not_ affected by the libxml2 issue.
157
- # but we test it anyway to ensure our logic isn't modifying
158
- # attributes that don't need modifying.
159
- #
160
- {tag: "div", attr: "name", unescaped: true},
161
- ].each do |config|
162
-
163
- define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
164
- html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
165
-
166
- reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
167
- attributes = reparsed.at_css(config[:tag]).attribute_nodes
168
-
169
- assert_equal [config[:attr]], attributes.collect(&:name)
170
- if Nokogiri::VersionInfo.instance.libxml2?
171
- if config[:unescaped]
172
- #
173
- # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
174
- # assert that this attribute's serialization is unaffected.
175
- #
176
- assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
177
- else
178
- #
179
- # let's match the behavior in libxml < 2.9.2.
180
- # test that this attribute's serialization is well-formed and sanitized.
181
- #
182
- assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
183
- end
184
- else
185
- #
186
- # yay for consistency in javaland. move along, nothing to see here.
187
- #
188
- assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
189
- end
190
- end
191
- end
192
-
193
- # see:
194
- # - https://github.com/flavorjones/loofah/issues/154
195
- # - https://hackerone.com/reports/429267
196
- context "xss protection from svg xmlns:xlink animate attribute" do
197
- it "sanitizes appropriate attributes" do
198
- html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26>}
199
- sanitized = Loofah.scrub_fragment(html, :escape)
200
- assert_nil sanitized.at_css("animate")["from"]
201
- end
202
- end
203
- end
204
- end