loofah 2.2.3 → 2.21.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +269 -31
- data/README.md +109 -124
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +85 -79
- data/lib/loofah/helpers.rb +37 -20
- data/lib/loofah/{html → html4}/document.rb +6 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +10 -8
- data/lib/loofah/html5/safelist.rb +1055 -0
- data/lib/loofah/html5/scrub.rb +153 -58
- data/lib/loofah/metahelpers.rb +11 -6
- data/lib/loofah/scrubber.rb +22 -15
- data/lib/loofah/scrubbers.rb +66 -55
- data/lib/loofah/version.rb +6 -0
- data/lib/loofah/xml/document.rb +2 -0
- data/lib/loofah/xml/document_fragment.rb +4 -7
- data/lib/loofah.rb +131 -38
- metadata +28 -216
- data/.gemtest +0 -0
- data/Gemfile +0 -22
- data/Manifest.txt +0 -40
- data/Rakefile +0 -79
- data/benchmark/benchmark.rb +0 -149
- data/benchmark/fragment.html +0 -96
- data/benchmark/helper.rb +0 -73
- data/benchmark/www.slashdot.com.html +0 -2560
- data/lib/loofah/html/document_fragment.rb +0 -40
- data/lib/loofah/html5/whitelist.rb +0 -186
- data/lib/loofah/instance_methods.rb +0 -127
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -382
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
@@ -1,382 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# these tests taken from the HTML5 sanitization project and modified for use with Loofah
|
3
|
-
# see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
|
4
|
-
#
|
5
|
-
# license text at the bottom of this file
|
6
|
-
#
|
7
|
-
require "helper"
|
8
|
-
|
9
|
-
class Html5TestSanitizer < Loofah::TestCase
|
10
|
-
include Loofah
|
11
|
-
|
12
|
-
def sanitize_xhtml stream
|
13
|
-
Loofah.fragment(stream).scrub!(:escape).to_xhtml
|
14
|
-
end
|
15
|
-
|
16
|
-
def sanitize_html stream
|
17
|
-
Loofah.fragment(stream).scrub!(:escape).to_html
|
18
|
-
end
|
19
|
-
|
20
|
-
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
21
|
-
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
|
22
|
-
sane = sanitize_html(input).gsub('"',"'")
|
23
|
-
htmloutput = htmloutput.gsub('"',"'")
|
24
|
-
xhtmloutput = xhtmloutput.gsub('"',"'")
|
25
|
-
rexmloutput = rexmloutput.gsub('"',"'")
|
26
|
-
|
27
|
-
## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
|
28
|
-
## it would require a lot of manual hacking to make the tests match libxml's output.
|
29
|
-
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
|
30
|
-
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
|
31
|
-
%Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
|
32
|
-
end
|
33
|
-
|
34
|
-
def assert_completes_in_reasonable_time &block
|
35
|
-
t0 = Time.now
|
36
|
-
block.call
|
37
|
-
assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
|
38
|
-
end
|
39
|
-
|
40
|
-
(HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
|
41
|
-
define_method "test_should_allow_#{tag_name}_tag" do
|
42
|
-
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
43
|
-
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
44
|
-
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
45
|
-
rexmloutput = xhtmloutput
|
46
|
-
|
47
|
-
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
|
48
|
-
htmloutput = "foo <bad>bar</bad> baz"
|
49
|
-
xhtmloutput = htmloutput
|
50
|
-
elsif tag_name == 'col'
|
51
|
-
htmloutput = "<col title='1'>foo <bad>bar</bad> baz"
|
52
|
-
xhtmloutput = htmloutput
|
53
|
-
rexmloutput = "<col title='1' />"
|
54
|
-
elsif tag_name == 'table'
|
55
|
-
htmloutput = "foo <bad>bar</bad>baz<table title='1'> </table>"
|
56
|
-
xhtmloutput = htmloutput
|
57
|
-
elsif tag_name == 'image'
|
58
|
-
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
59
|
-
xhtmloutput = htmloutput
|
60
|
-
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
61
|
-
elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
|
62
|
-
htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz"
|
63
|
-
xhtmloutput = htmloutput
|
64
|
-
htmloutput += '<br/>' if tag_name == 'br'
|
65
|
-
rexmloutput = "<#{tag_name} title='1' />"
|
66
|
-
end
|
67
|
-
check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
##
|
72
|
-
## libxml2 downcases elements, so this is moot.
|
73
|
-
##
|
74
|
-
# HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
|
75
|
-
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
76
|
-
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
77
|
-
# output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
78
|
-
# check_sanitization(input, output, output, output)
|
79
|
-
# end
|
80
|
-
# end
|
81
|
-
|
82
|
-
HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
83
|
-
next if attribute_name == 'style'
|
84
|
-
define_method "test_should_allow_#{attribute_name}_attribute" do
|
85
|
-
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
86
|
-
if %w[checked compact disabled ismap multiple nohref noshade nowrap readonly selected].include?(attribute_name)
|
87
|
-
output = "<p #{attribute_name}>foo <bad>bar</bad> baz</p>"
|
88
|
-
htmloutput = "<p #{attribute_name.downcase}>foo <bad>bar</bad> baz</p>"
|
89
|
-
else
|
90
|
-
output = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
91
|
-
htmloutput = "<p #{attribute_name.downcase}='foo'>foo <bad>bar</bad> baz</p>"
|
92
|
-
end
|
93
|
-
check_sanitization(input, htmloutput, output, output)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_should_allow_data_attributes
|
98
|
-
input = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
99
|
-
|
100
|
-
output = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
101
|
-
htmloutput = "<p data-foo='foo'>foo <bad>bar</bad> baz</p>"
|
102
|
-
|
103
|
-
check_sanitization(input, htmloutput, output, output)
|
104
|
-
end
|
105
|
-
|
106
|
-
def test_should_allow_multi_word_data_attributes
|
107
|
-
input = "<p data-foo-bar-id='11'>foo <bad>bar</bad> baz</p>"
|
108
|
-
output = htmloutput = "<p data-foo-bar-id='11'>foo <bad>bar</bad> baz</p>"
|
109
|
-
|
110
|
-
check_sanitization(input, htmloutput, output, output)
|
111
|
-
end
|
112
|
-
|
113
|
-
##
|
114
|
-
## libxml2 downcases attributes, so this is moot.
|
115
|
-
##
|
116
|
-
# HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
117
|
-
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
118
|
-
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
119
|
-
# output = "<p>foo <bad>bar</bad> baz</p>"
|
120
|
-
# check_sanitization(input, output, output, output)
|
121
|
-
# end
|
122
|
-
# end
|
123
|
-
|
124
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
125
|
-
define_method "test_should_allow_#{protocol}_uris" do
|
126
|
-
input = %(<a href="#{protocol}">foo</a>)
|
127
|
-
output = "<a href='#{protocol}'>foo</a>"
|
128
|
-
check_sanitization(input, output, output, output)
|
129
|
-
end
|
130
|
-
end
|
131
|
-
|
132
|
-
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
|
133
|
-
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
134
|
-
input = %(<a href="#{protocol.upcase}">foo</a>)
|
135
|
-
output = "<a href='#{protocol.upcase}'>foo</a>"
|
136
|
-
check_sanitization(input, output, output, output)
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
141
|
-
define_method "test_should_allow_data_#{data_uri_type}_uris" do
|
142
|
-
input = %(<a href="data:#{data_uri_type}">foo</a>)
|
143
|
-
output = "<a href='data:#{data_uri_type}'>foo</a>"
|
144
|
-
check_sanitization(input, output, output, output)
|
145
|
-
|
146
|
-
input = %(<a href="data:#{data_uri_type};base64,R0lGODlhAQABA">foo</a>)
|
147
|
-
output = "<a href='data:#{data_uri_type};base64,R0lGODlhAQABA'>foo</a>"
|
148
|
-
check_sanitization(input, output, output, output)
|
149
|
-
end
|
150
|
-
end
|
151
|
-
|
152
|
-
HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
153
|
-
define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
|
154
|
-
input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
|
155
|
-
output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
|
156
|
-
check_sanitization(input, output, output, output)
|
157
|
-
end
|
158
|
-
end
|
159
|
-
|
160
|
-
def test_should_disallow_other_uri_mediatypes
|
161
|
-
input = %(<a href="data:foo">foo</a>)
|
162
|
-
output = "<a>foo</a>"
|
163
|
-
check_sanitization(input, output, output, output)
|
164
|
-
|
165
|
-
input = %(<a href="data:image/xxx">foo</a>)
|
166
|
-
output = "<a>foo</a>"
|
167
|
-
check_sanitization(input, output, output, output)
|
168
|
-
|
169
|
-
input = %(<a href="data:image/xxx;base64,R0lGODlhAQABA">foo</a>)
|
170
|
-
output = "<a>foo</a>"
|
171
|
-
check_sanitization(input, output, output, output)
|
172
|
-
end
|
173
|
-
|
174
|
-
|
175
|
-
HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
176
|
-
next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
|
177
|
-
define_method "test_#{tag_name}_should_allow_local_href" do
|
178
|
-
input = %(<#{tag_name} xlink:href="#foo"/>)
|
179
|
-
output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
|
180
|
-
xhtmloutput = "<#{tag_name} xlink:href='#foo'></#{tag_name}>"
|
181
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
182
|
-
end
|
183
|
-
|
184
|
-
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
|
185
|
-
input = %(<#{tag_name} xlink:href="\n#foo"/>)
|
186
|
-
output = "<#{tag_name.downcase} xlink:href='\n#foo'></#{tag_name.downcase}>"
|
187
|
-
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'></#{tag_name}>"
|
188
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
189
|
-
end
|
190
|
-
|
191
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
|
192
|
-
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
|
193
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
194
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
195
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
196
|
-
end
|
197
|
-
|
198
|
-
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
|
199
|
-
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
|
200
|
-
output = "<#{tag_name.downcase}></#{tag_name.downcase}>"
|
201
|
-
xhtmloutput = "<#{tag_name}></#{tag_name}>"
|
202
|
-
check_sanitization(input, output, xhtmloutput, xhtmloutput)
|
203
|
-
end
|
204
|
-
end
|
205
|
-
|
206
|
-
def test_figure_element_is_valid
|
207
|
-
fragment = Loofah.scrub_fragment("<span>hello</span> <figure>asd</figure>", :prune)
|
208
|
-
assert fragment.at_css("figure"), "<figure> tag was scrubbed"
|
209
|
-
end
|
210
|
-
|
211
|
-
##
|
212
|
-
## as tenderlove says, "care < 0"
|
213
|
-
##
|
214
|
-
# def test_should_handle_astral_plane_characters
|
215
|
-
# input = "<p>𝒵 𝔸</p>"
|
216
|
-
# output = "<p>\360\235\222\265 \360\235\224\270</p>"
|
217
|
-
# check_sanitization(input, output, output, output)
|
218
|
-
|
219
|
-
# input = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
220
|
-
# output = "<p><tspan>\360\235\224\270</tspan> a</p>"
|
221
|
-
# check_sanitization(input, output, output, output)
|
222
|
-
# end
|
223
|
-
|
224
|
-
# This affects only NS4. Is it worth fixing?
|
225
|
-
# def test_javascript_includes
|
226
|
-
# input = %(<div size="&{alert('XSS')}">foo</div>)
|
227
|
-
# output = "<div>foo</div>"
|
228
|
-
# check_sanitization(input, output, output, output)
|
229
|
-
# end
|
230
|
-
|
231
|
-
##
|
232
|
-
## these tests primarily test the parser logic, not the sanitizer
|
233
|
-
## logic. i call bullshit. we're not writing a test suite for
|
234
|
-
## libxml2 here, so let's rely on the unit tests above to take care
|
235
|
-
## of our valid elements and attributes.
|
236
|
-
##
|
237
|
-
require 'json'
|
238
|
-
Dir[File.join(File.dirname(__FILE__), '..', 'assets', 'testdata_sanitizer_tests1.dat')].each do |filename|
|
239
|
-
JSON::parse(open(filename).read).each do |test|
|
240
|
-
it "testdata sanitizer #{test['name']}" do
|
241
|
-
check_sanitization(
|
242
|
-
test['input'],
|
243
|
-
test['output'],
|
244
|
-
test['xhtml'] || test['output'],
|
245
|
-
test['rexml'] || test['output']
|
246
|
-
)
|
247
|
-
end
|
248
|
-
end
|
249
|
-
end
|
250
|
-
|
251
|
-
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
252
|
-
HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
253
|
-
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
254
|
-
input = "<rect fill='url(#foo)' />"
|
255
|
-
output = "<rect fill='url(#foo)'></rect>"
|
256
|
-
check_sanitization(input, output, output, output)
|
257
|
-
end
|
258
|
-
|
259
|
-
define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
|
260
|
-
input = "<rect fill='url(http://bad.com/) #fff' />"
|
261
|
-
output = "<rect fill=' #fff'></rect>"
|
262
|
-
check_sanitization(input, output, output, output)
|
263
|
-
end
|
264
|
-
end
|
265
|
-
|
266
|
-
def test_css_negative_value_sanitization
|
267
|
-
html = "<span style=\"letter-spacing:-0.03em;\">"
|
268
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
269
|
-
assert_match %r/-0.03em/, sane.inner_html
|
270
|
-
end
|
271
|
-
|
272
|
-
def test_css_negative_value_sanitization_shorthand_css_properties
|
273
|
-
html = "<span style=\"margin-left:-0.05em;\">"
|
274
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
275
|
-
assert_match %r/-0.05em/, sane.inner_html
|
276
|
-
end
|
277
|
-
|
278
|
-
def test_css_function_sanitization_leaves_whitelisted_functions_calc
|
279
|
-
html = "<span style=\"width:calc(5%)\">"
|
280
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
281
|
-
assert_match %r/calc\(5%\)/, sane.inner_html
|
282
|
-
|
283
|
-
html = "<span style=\"width: calc(5%)\">"
|
284
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
285
|
-
assert_match %r/calc\(5%\)/, sane.inner_html
|
286
|
-
end
|
287
|
-
|
288
|
-
def test_css_function_sanitization_leaves_whitelisted_functions_rgb
|
289
|
-
html = '<span style="color: rgb(255, 0, 0)">'
|
290
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
291
|
-
assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
|
292
|
-
end
|
293
|
-
|
294
|
-
def test_css_function_sanitization_leaves_whitelisted_list_style_type
|
295
|
-
html = "<ol style='list-style-type:lower-greek;'></ol>"
|
296
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
297
|
-
assert_match %r/list-style-type:lower-greek/, sane.inner_html
|
298
|
-
end
|
299
|
-
|
300
|
-
def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
|
301
|
-
html = "<span style=\"width:attr(data-evil-attr)\">"
|
302
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
303
|
-
assert_match %r/<span><\/span>/, sane.inner_html
|
304
|
-
|
305
|
-
html = "<span style=\"width: attr(data-evil-attr)\">"
|
306
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
307
|
-
assert_match %r/<span><\/span>/, sane.inner_html
|
308
|
-
end
|
309
|
-
|
310
|
-
def test_issue_90_slow_regex
|
311
|
-
skip("timing tests are hard to make pass and have little regression-testing value")
|
312
|
-
|
313
|
-
html = %q{<span style="background: url('data:image/svg+xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2232%22%20height%3D%2232%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20fill%3D%22%23D4C8AE%22%20d%3D%22M0%200h32v32h-32z%22%2F%3E%3Cpath%20fill%3D%22%2383604B%22%20d%3D%22M0%200h31.99v11.75h-31.99z%22%2F%3E%3Cpath%20fill%3D%22%233D2319%22%20d%3D%22M0%2011.5h32v.5h-32z%22%2F%3E%3Cpath%20fill%3D%22%23F83651%22%20d%3D%22M5%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23FCD050%22%20d%3D%22M6%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%2371C797%22%20d%3D%22M7%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23509CF9%22%20d%3D%22M8%200h1v10.5h-1z%22%2F%3E%3ClinearGradient%20id%3D%22a%22%20gradientUnits%3D%22userSpaceOnUse%22%20x1%3D%2224.996%22%20y1%3D%2210.5%22%20x2%3D%2224.996%22%20y2%3D%224.5%22%3E%3Cstop%20offset%3D%220%22%20stop-color%3D%22%23796055%22%2F%3E%3Cstop%20offset%3D%22.434%22%20stop-color%3D%22%23614C43%22%2F%3E%3Cstop%20offset%3D%221%22%20stop-color%3D%22%233D2D28%22%2F%3E%3C%2FlinearGradient%3E%3Cpath%20fill%3D%22url(%23a)%22%20d%3D%22M28%208.5c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3Cpath%20fill%3D%22%235F402E%22%20d%3D%22M28%208c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3C');"></span>}
|
314
|
-
|
315
|
-
assert_completes_in_reasonable_time {
|
316
|
-
Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
317
|
-
}
|
318
|
-
end
|
319
|
-
|
320
|
-
def test_upper_case_css_property
|
321
|
-
html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
|
322
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
|
323
|
-
assert_match(/COLOR:\s*BLUE/i, sane.at_css("div")["style"])
|
324
|
-
refute_match(/NOTAPROPERTY/i, sane.at_css("div")["style"])
|
325
|
-
end
|
326
|
-
|
327
|
-
def test_many_properties_some_allowed
|
328
|
-
html = "<div style=\"background: bold notaproperty center alsonotaproperty 10px;\">asdf</div>"
|
329
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
|
330
|
-
assert_match(/bold\s+center\s+10px/, sane.at_css("div")["style"])
|
331
|
-
end
|
332
|
-
|
333
|
-
def test_many_properties_non_allowed
|
334
|
-
html = "<div style=\"background: notaproperty alsonotaproperty;\">asdf</div>"
|
335
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
|
336
|
-
assert_nil sane.at_css("div")["style"]
|
337
|
-
end
|
338
|
-
|
339
|
-
def test_svg_properties
|
340
|
-
html = "<line style='stroke-width: 10px;'></line>"
|
341
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
|
342
|
-
assert_match(/stroke-width:\s*10px/, sane.at_css("line")["style"])
|
343
|
-
end
|
344
|
-
end
|
345
|
-
|
346
|
-
# <html5_license>
|
347
|
-
#
|
348
|
-
# Copyright (c) 2006-2008 The Authors
|
349
|
-
#
|
350
|
-
# Contributors:
|
351
|
-
# James Graham - jg307@cam.ac.uk
|
352
|
-
# Anne van Kesteren - annevankesteren@gmail.com
|
353
|
-
# Lachlan Hunt - lachlan.hunt@lachy.id.au
|
354
|
-
# Matt McDonald - kanashii@kanashii.ca
|
355
|
-
# Sam Ruby - rubys@intertwingly.net
|
356
|
-
# Ian Hickson (Google) - ian@hixie.ch
|
357
|
-
# Thomas Broyer - t.broyer@ltgt.net
|
358
|
-
# Jacques Distler - distler@golem.ph.utexas.edu
|
359
|
-
# Henri Sivonen - hsivonen@iki.fi
|
360
|
-
# The Mozilla Foundation (contributions from Henri Sivonen since 2008)
|
361
|
-
#
|
362
|
-
# Permission is hereby granted, free of charge, to any person
|
363
|
-
# obtaining a copy of this software and associated documentation files
|
364
|
-
# (the "Software"), to deal in the Software without restriction,
|
365
|
-
# including without limitation the rights to use, copy, modify, merge,
|
366
|
-
# publish, distribute, sublicense, and/or sell copies of the Software,
|
367
|
-
# and to permit persons to whom the Software is furnished to do so,
|
368
|
-
# subject to the following conditions:
|
369
|
-
#
|
370
|
-
# The above copyright notice and this permission notice shall be
|
371
|
-
# included in all copies or substantial portions of the Software.
|
372
|
-
#
|
373
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
374
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
375
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
376
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
377
|
-
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
378
|
-
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
379
|
-
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
380
|
-
# SOFTWARE.
|
381
|
-
#
|
382
|
-
# </html5_license>
|
@@ -1,204 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestAdHoc < Loofah::TestCase
|
4
|
-
|
5
|
-
context "blank input string" do
|
6
|
-
context "fragment" do
|
7
|
-
it "return a blank string" do
|
8
|
-
assert_equal "", Loofah.scrub_fragment("", :prune).to_s
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
context "document" do
|
13
|
-
it "return a blank string" do
|
14
|
-
assert_equal "", Loofah.scrub_document("", :prune).root.to_s
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context "tests" do
|
20
|
-
MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
|
21
|
-
|
22
|
-
def test_removal_of_illegal_tag
|
23
|
-
html = <<-HTML
|
24
|
-
following this there should be no jim tag
|
25
|
-
<jim>jim</jim>
|
26
|
-
was there?
|
27
|
-
HTML
|
28
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
29
|
-
assert sane.xpath("//jim").empty?
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_removal_of_illegal_attribute
|
33
|
-
html = "<p class=bar foo=bar abbr=bar />"
|
34
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
35
|
-
node = sane.xpath("//p").first
|
36
|
-
assert node.attributes['class']
|
37
|
-
assert node.attributes['abbr']
|
38
|
-
assert_nil node.attributes['foo']
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_removal_of_illegal_url_in_href
|
42
|
-
html = <<-HTML
|
43
|
-
<a href='jimbo://jim.jim/'>this link should have its href removed because of illegal url</a>
|
44
|
-
<a href='http://jim.jim/'>this link should be fine</a>
|
45
|
-
HTML
|
46
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
47
|
-
nodes = sane.xpath("//a")
|
48
|
-
assert_nil nodes.first.attributes['href']
|
49
|
-
assert nodes.last.attributes['href']
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_css_sanitization
|
53
|
-
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
54
|
-
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
55
|
-
assert_match %r/#000/, sane.inner_html
|
56
|
-
refute_match %r/foo\.com/, sane.inner_html
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_fragment_with_no_tags
|
60
|
-
assert_equal "This fragment has no tags.", Loofah.scrub_fragment("This fragment has no tags.", :escape).to_xml
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_fragment_in_p_tag
|
64
|
-
assert_equal "<p>This fragment is in a p.</p>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>", :escape).to_xml
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_fragment_in_p_tag_plus_stuff
|
68
|
-
assert_equal "<p>This fragment is in a p.</p>foo<strong>bar</strong>", Loofah.scrub_fragment("<p>This fragment is in a p.</p>foo<strong>bar</strong>", :escape).to_xml
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_fragment_with_text_nodes_leading_and_trailing
|
72
|
-
assert_equal "text<p>fragment</p>text", Loofah.scrub_fragment("text<p>fragment</p>text", :escape).to_xml
|
73
|
-
end
|
74
|
-
|
75
|
-
def test_whitewash_on_fragment
|
76
|
-
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
77
|
-
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
78
|
-
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_fragment_whitewash_on_microsofty_markup
|
82
|
-
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
83
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_document_whitewash_on_microsofty_markup
|
87
|
-
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
88
|
-
assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
|
89
|
-
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
90
|
-
end
|
91
|
-
|
92
|
-
def test_return_empty_string_when_nothing_left
|
93
|
-
assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
|
94
|
-
end
|
95
|
-
|
96
|
-
def test_nested_script_cdata_tags_should_be_scrubbed
|
97
|
-
html = "<script><script src='malicious.js'></script>"
|
98
|
-
stripped = Loofah.fragment(html).scrub!(:strip)
|
99
|
-
assert_empty stripped.xpath("//script")
|
100
|
-
refute_match("<script", stripped.to_html)
|
101
|
-
end
|
102
|
-
|
103
|
-
def test_nested_script_cdata_tags_should_be_scrubbed_2
|
104
|
-
html = "<script><script>alert('a');</script></script>"
|
105
|
-
stripped = Loofah.fragment(html).scrub!(:strip)
|
106
|
-
assert_empty stripped.xpath("//script")
|
107
|
-
refute_match("<script", stripped.to_html)
|
108
|
-
end
|
109
|
-
|
110
|
-
def test_removal_of_all_tags
|
111
|
-
html = <<-HTML
|
112
|
-
What's up <strong>doc</strong>?
|
113
|
-
HTML
|
114
|
-
stripped = Loofah.scrub_document(html, :prune).text
|
115
|
-
assert_equal %Q(What\'s up doc?).strip, stripped.strip
|
116
|
-
end
|
117
|
-
|
118
|
-
def test_dont_remove_whitespace
|
119
|
-
html = "Foo\nBar"
|
120
|
-
assert_equal html, Loofah.scrub_document(html, :prune).text
|
121
|
-
end
|
122
|
-
|
123
|
-
def test_dont_remove_whitespace_between_tags
|
124
|
-
html = "<p>Foo</p>\n<p>Bar</p>"
|
125
|
-
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
126
|
-
end
|
127
|
-
|
128
|
-
#
|
129
|
-
# tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
|
130
|
-
#
|
131
|
-
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
|
132
|
-
# wants to ensure these comments can be treated as "server-side includes",
|
133
|
-
# but as a result fails to ensure that serialization is well-formed,
|
134
|
-
# resulting in an opportunity for XSS injection of code into a final
|
135
|
-
# re-parsed document (presumably in a browser).
|
136
|
-
#
|
137
|
-
# we'll test this by parsing the HTML, serializing it, then
|
138
|
-
# re-parsing it to ensure there isn't any ambiguity in the output
|
139
|
-
# that might allow code injection into a browser consuming
|
140
|
-
# "sanitized" output.
|
141
|
-
#
|
142
|
-
[
|
143
|
-
#
|
144
|
-
# these tags and attributes are determined by the code at:
|
145
|
-
#
|
146
|
-
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
|
147
|
-
#
|
148
|
-
{tag: "a", attr: "href"},
|
149
|
-
{tag: "div", attr: "href"},
|
150
|
-
{tag: "a", attr: "action"},
|
151
|
-
{tag: "div", attr: "action"},
|
152
|
-
{tag: "a", attr: "src"},
|
153
|
-
{tag: "div", attr: "src"},
|
154
|
-
{tag: "a", attr: "name"},
|
155
|
-
#
|
156
|
-
# note that div+name is _not_ affected by the libxml2 issue.
|
157
|
-
# but we test it anyway to ensure our logic isn't modifying
|
158
|
-
# attributes that don't need modifying.
|
159
|
-
#
|
160
|
-
{tag: "div", attr: "name", unescaped: true},
|
161
|
-
].each do |config|
|
162
|
-
|
163
|
-
define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
|
164
|
-
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
|
165
|
-
|
166
|
-
reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
|
167
|
-
attributes = reparsed.at_css(config[:tag]).attribute_nodes
|
168
|
-
|
169
|
-
assert_equal [config[:attr]], attributes.collect(&:name)
|
170
|
-
if Nokogiri::VersionInfo.instance.libxml2?
|
171
|
-
if config[:unescaped]
|
172
|
-
#
|
173
|
-
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
|
174
|
-
# assert that this attribute's serialization is unaffected.
|
175
|
-
#
|
176
|
-
assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
|
177
|
-
else
|
178
|
-
#
|
179
|
-
# let's match the behavior in libxml < 2.9.2.
|
180
|
-
# test that this attribute's serialization is well-formed and sanitized.
|
181
|
-
#
|
182
|
-
assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
|
183
|
-
end
|
184
|
-
else
|
185
|
-
#
|
186
|
-
# yay for consistency in javaland. move along, nothing to see here.
|
187
|
-
#
|
188
|
-
assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
|
189
|
-
end
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# see:
|
194
|
-
# - https://github.com/flavorjones/loofah/issues/154
|
195
|
-
# - https://hackerone.com/reports/429267
|
196
|
-
context "xss protection from svg xmlns:xlink animate attribute" do
|
197
|
-
it "sanitizes appropriate attributes" do
|
198
|
-
html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26>}
|
199
|
-
sanitized = Loofah.scrub_fragment(html, :escape)
|
200
|
-
assert_nil sanitized.at_css("animate")["from"]
|
201
|
-
end
|
202
|
-
end
|
203
|
-
end
|
204
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
require "helper"
|
2
|
-
|
3
|
-
class IntegrationTestHelpers < Loofah::TestCase
|
4
|
-
context ".strip_tags" do
|
5
|
-
context "on safe markup" do
|
6
|
-
it "strip out tags" do
|
7
|
-
assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
|
8
|
-
end
|
9
|
-
end
|
10
|
-
|
11
|
-
context "on hack attack" do
|
12
|
-
it "strip escape html entities" do
|
13
|
-
bad_shit = "<script>alert('evil')</script>"
|
14
|
-
assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
context ".sanitize" do
|
20
|
-
context "on safe markup" do
|
21
|
-
it "render the safe html" do
|
22
|
-
html = "<div>omgwtfbbq</div><span>!!1!</span>"
|
23
|
-
assert_equal html, Loofah::Helpers.sanitize(html)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
|
27
|
-
context "on hack attack" do
|
28
|
-
it "strip the unsafe tags" do
|
29
|
-
assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
|
30
|
-
end
|
31
|
-
|
32
|
-
it "strips form tags" do
|
33
|
-
assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><form action=\"/foo/bar\" method=\"post\"><input></form><span>w00t</span>")
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
context ".sanitize_css" do
|
39
|
-
it "removes unsafe css properties" do
|
40
|
-
assert_match(/display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue"))
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|