loofah 2.2.1 → 2.4.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/CHANGELOG.md +107 -32
- data/Gemfile +10 -9
- data/Manifest.txt +3 -1
- data/README.md +11 -17
- data/Rakefile +32 -20
- data/SECURITY.md +1 -1
- data/lib/loofah.rb +16 -15
- data/lib/loofah/elements.rb +1 -0
- data/lib/loofah/helpers.rb +14 -3
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +1 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +1 -0
- data/lib/loofah/html5/safelist.rb +798 -0
- data/lib/loofah/html5/scrub.rb +15 -16
- data/lib/loofah/instance_methods.rb +1 -0
- data/lib/loofah/metahelpers.rb +1 -0
- data/lib/loofah/scrubber.rb +1 -0
- data/lib/loofah/scrubbers.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +1 -0
- data/test/assets/msword.html +63 -0
- data/test/html5/test_sanitizer.rb +49 -17
- data/test/html5/test_scrub.rb +10 -0
- data/test/integration/test_ad_hoc.rb +48 -86
- data/test/unit/test_helpers.rb +4 -4
- metadata +61 -45
- data/lib/loofah/html5/whitelist.rb +0 -186
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
require 'cgi'
|
2
3
|
require 'crass'
|
3
4
|
|
@@ -6,13 +7,13 @@ module Loofah
|
|
6
7
|
module Scrub
|
7
8
|
|
8
9
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
9
|
-
CSS_KEYWORDISH = /\A(#[0-9a-
|
10
|
+
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|r?em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
|
10
11
|
CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
|
11
12
|
|
12
13
|
class << self
|
13
14
|
|
14
15
|
def allowed_element? element_name
|
15
|
-
::Loofah::HTML5::
|
16
|
+
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
|
16
17
|
end
|
17
18
|
|
18
19
|
# alternative implementation of the html5lib attribute scrubbing algorithm
|
@@ -28,31 +29,31 @@ module Loofah
|
|
28
29
|
next
|
29
30
|
end
|
30
31
|
|
31
|
-
unless
|
32
|
+
unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
|
32
33
|
attr_node.remove
|
33
34
|
next
|
34
35
|
end
|
35
36
|
|
36
|
-
if
|
37
|
+
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
37
38
|
# this block lifted nearly verbatim from HTML5 sanitization
|
38
39
|
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
|
39
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !
|
40
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
40
41
|
attr_node.remove
|
41
42
|
next
|
42
|
-
elsif val_unescaped.split(
|
43
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
|
43
44
|
# permit only allowed data mediatypes
|
44
|
-
mediatype = val_unescaped.split(
|
45
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
45
46
|
mediatype, _ = mediatype.split(';')[0..1] if mediatype
|
46
|
-
if mediatype && !
|
47
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
47
48
|
attr_node.remove
|
48
49
|
next
|
49
50
|
end
|
50
51
|
end
|
51
52
|
end
|
52
|
-
if
|
53
|
+
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
53
54
|
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
|
54
55
|
end
|
55
|
-
if
|
56
|
+
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
|
56
57
|
attr_node.remove
|
57
58
|
next
|
58
59
|
end
|
@@ -79,14 +80,14 @@ module Loofah
|
|
79
80
|
style_tree.each do |node|
|
80
81
|
next unless node[:node] == :property
|
81
82
|
next if node[:children].any? do |child|
|
82
|
-
[:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !
|
83
|
+
[:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
|
83
84
|
end
|
84
85
|
name = node[:name].downcase
|
85
|
-
if
|
86
|
+
if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
|
86
87
|
sanitized_tree << node << CRASS_SEMICOLON
|
87
|
-
elsif
|
88
|
+
elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
|
88
89
|
value = node[:value].split.map do |keyword|
|
89
|
-
if
|
90
|
+
if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
|
90
91
|
keyword
|
91
92
|
end
|
92
93
|
end.compact
|
@@ -101,8 +102,6 @@ module Loofah
|
|
101
102
|
Crass::Parser.stringify sanitized_tree
|
102
103
|
end
|
103
104
|
|
104
|
-
private
|
105
|
-
|
106
105
|
#
|
107
106
|
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
|
108
107
|
#
|
data/lib/loofah/metahelpers.rb
CHANGED
data/lib/loofah/scrubber.rb
CHANGED
data/lib/loofah/scrubbers.rb
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Loofah
|
2
3
|
#
|
3
4
|
# Loofah provides some built-in scrubbers for sanitizing with
|
4
|
-
# HTML5lib's
|
5
|
+
# HTML5lib's safelist and for accomplishing some common
|
5
6
|
# transformation tasks.
|
6
7
|
#
|
7
8
|
#
|
data/lib/loofah/xml/document.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
2
|
+
<w:WordDocument>
|
3
|
+
<w:View>Normal</w:View>
|
4
|
+
<w:Zoom>0</w:Zoom>
|
5
|
+
<w:PunctuationKerning/>
|
6
|
+
<w:ValidateAgainstSchemas/>
|
7
|
+
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
8
|
+
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
9
|
+
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
10
|
+
<w:Compatibility>
|
11
|
+
<w:BreakWrappedTables/>
|
12
|
+
<w:SnapToGridInCell/>
|
13
|
+
<w:WrapTextWithPunct/>
|
14
|
+
<w:UseAsianBreakRules/>
|
15
|
+
<w:DontGrowAutofit/>
|
16
|
+
</w:Compatibility>
|
17
|
+
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
18
|
+
</w:WordDocument>
|
19
|
+
</xml><![endif]--><!--[if gte mso 9]><xml>
|
20
|
+
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
21
|
+
</w:LatentStyles>
|
22
|
+
</xml><![endif]--><style>
|
23
|
+
<!--
|
24
|
+
/* Style Definitions */
|
25
|
+
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
26
|
+
{mso-style-parent:"";
|
27
|
+
margin:0in;
|
28
|
+
margin-bottom:.0001pt;
|
29
|
+
mso-pagination:widow-orphan;
|
30
|
+
font-size:12.0pt;
|
31
|
+
font-family:"Times New Roman";
|
32
|
+
mso-fareast-font-family:"Times New Roman";}
|
33
|
+
@page Section1
|
34
|
+
{size:8.5in 11.0in;
|
35
|
+
margin:1.0in 1.25in 1.0in 1.25in;
|
36
|
+
mso-header-margin:.5in;
|
37
|
+
mso-footer-margin:.5in;
|
38
|
+
mso-paper-source:0;}
|
39
|
+
div.Section1
|
40
|
+
{page:Section1;}
|
41
|
+
-->
|
42
|
+
</style><!--[if gte mso 10]>
|
43
|
+
<style>
|
44
|
+
/* Style Definitions */
|
45
|
+
table.MsoNormalTable
|
46
|
+
{mso-style-name:"Table Normal";
|
47
|
+
mso-tstyle-rowband-size:0;
|
48
|
+
mso-tstyle-colband-size:0;
|
49
|
+
mso-style-noshow:yes;
|
50
|
+
mso-style-parent:"";
|
51
|
+
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
52
|
+
mso-para-margin:0in;
|
53
|
+
mso-para-margin-bottom:.0001pt;
|
54
|
+
mso-pagination:widow-orphan;
|
55
|
+
font-size:10.0pt;
|
56
|
+
font-family:"Times New Roman";
|
57
|
+
mso-ansi-language:#0400;
|
58
|
+
mso-fareast-language:#0400;
|
59
|
+
mso-bidi-language:#0400;}
|
60
|
+
</style>
|
61
|
+
<![endif]-->
|
62
|
+
|
63
|
+
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
37
37
|
assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
|
38
38
|
end
|
39
39
|
|
40
|
-
(HTML5::
|
40
|
+
(HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
|
41
41
|
define_method "test_should_allow_#{tag_name}_tag" do
|
42
42
|
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
43
43
|
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
58
58
|
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
59
59
|
xhtmloutput = htmloutput
|
60
60
|
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
61
|
-
elsif HTML5::
|
61
|
+
elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
|
62
62
|
htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz"
|
63
63
|
xhtmloutput = htmloutput
|
64
64
|
htmloutput += '<br/>' if tag_name == 'br'
|
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
71
71
|
##
|
72
72
|
## libxml2 downcases elements, so this is moot.
|
73
73
|
##
|
74
|
-
# HTML5::
|
74
|
+
# HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
|
75
75
|
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
76
76
|
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
77
77
|
# output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
79
79
|
# end
|
80
80
|
# end
|
81
81
|
|
82
|
-
HTML5::
|
82
|
+
HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
83
83
|
next if attribute_name == 'style'
|
84
84
|
define_method "test_should_allow_#{attribute_name}_attribute" do
|
85
85
|
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
110
110
|
check_sanitization(input, htmloutput, output, output)
|
111
111
|
end
|
112
112
|
|
113
|
+
def test_should_allow_contenteditable
|
114
|
+
input = '<p contenteditable="false">Hi!</p>'
|
115
|
+
output = '<p contenteditable="false">Hi!</p>'
|
116
|
+
|
117
|
+
check_sanitization(input, output, output, output)
|
118
|
+
end
|
119
|
+
|
113
120
|
##
|
114
121
|
## libxml2 downcases attributes, so this is moot.
|
115
122
|
##
|
116
|
-
# HTML5::
|
123
|
+
# HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
117
124
|
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
118
125
|
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
119
126
|
# output = "<p>foo <bad>bar</bad> baz</p>"
|
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
121
128
|
# end
|
122
129
|
# end
|
123
130
|
|
124
|
-
HTML5::
|
131
|
+
HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
|
125
132
|
define_method "test_should_allow_#{protocol}_uris" do
|
126
133
|
input = %(<a href="#{protocol}">foo</a>)
|
127
134
|
output = "<a href='#{protocol}'>foo</a>"
|
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
129
136
|
end
|
130
137
|
end
|
131
138
|
|
132
|
-
HTML5::
|
139
|
+
HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
|
133
140
|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
134
141
|
input = %(<a href="#{protocol.upcase}">foo</a>)
|
135
142
|
output = "<a href='#{protocol.upcase}'>foo</a>"
|
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
137
144
|
end
|
138
145
|
end
|
139
146
|
|
140
|
-
HTML5::
|
147
|
+
HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
141
148
|
define_method "test_should_allow_data_#{data_uri_type}_uris" do
|
142
149
|
input = %(<a href="data:#{data_uri_type}">foo</a>)
|
143
150
|
output = "<a href='data:#{data_uri_type}'>foo</a>"
|
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
149
156
|
end
|
150
157
|
end
|
151
158
|
|
152
|
-
HTML5::
|
159
|
+
HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
153
160
|
define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
|
154
161
|
input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
|
155
162
|
output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
|
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
172
179
|
end
|
173
180
|
|
174
181
|
|
175
|
-
HTML5::
|
176
|
-
next unless HTML5::
|
182
|
+
HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
183
|
+
next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
|
177
184
|
define_method "test_#{tag_name}_should_allow_local_href" do
|
178
185
|
input = %(<#{tag_name} xlink:href="#foo"/>)
|
179
186
|
output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
|
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
249
256
|
end
|
250
257
|
|
251
258
|
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
252
|
-
HTML5::
|
259
|
+
HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
253
260
|
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
254
261
|
input = "<rect fill='url(#foo)' />"
|
255
262
|
output = "<rect fill='url(#foo)'></rect>"
|
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
263
270
|
end
|
264
271
|
end
|
265
272
|
|
273
|
+
def test_css_list_style
|
274
|
+
html = '<ul style="list-style: none"></ul>'
|
275
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
276
|
+
assert_match %r/list-style/, sane.inner_html
|
277
|
+
end
|
278
|
+
|
266
279
|
def test_css_negative_value_sanitization
|
267
280
|
html = "<span style=\"letter-spacing:-0.03em;\">"
|
268
281
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
@@ -275,7 +288,19 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
275
288
|
assert_match %r/-0.05em/, sane.inner_html
|
276
289
|
end
|
277
290
|
|
278
|
-
def
|
291
|
+
def test_css_high_precision_value_shorthand_css_properties
|
292
|
+
html = "<span style=\"margin-left:0.3333333334em;\">"
|
293
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
294
|
+
assert_match %r/0.3333333334em/, sane.inner_html
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_css_rem_value
|
298
|
+
html = "<span style=\"margin-top:10rem;\">"
|
299
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
300
|
+
assert_match %r/10rem/, sane.inner_html
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_css_function_sanitization_leaves_safelisted_functions_calc
|
279
304
|
html = "<span style=\"width:calc(5%)\">"
|
280
305
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
281
306
|
assert_match %r/calc\(5%\)/, sane.inner_html
|
@@ -285,28 +310,35 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
285
310
|
assert_match %r/calc\(5%\)/, sane.inner_html
|
286
311
|
end
|
287
312
|
|
288
|
-
def
|
313
|
+
def test_css_function_sanitization_leaves_safelisted_functions_rgb
|
289
314
|
html = '<span style="color: rgb(255, 0, 0)">'
|
290
315
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
291
316
|
assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
|
292
317
|
end
|
293
318
|
|
294
|
-
def
|
319
|
+
def test_css_function_sanitization_leaves_safelisted_list_style_type
|
295
320
|
html = "<ol style='list-style-type:lower-greek;'></ol>"
|
296
321
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
297
322
|
assert_match %r/list-style-type:lower-greek/, sane.inner_html
|
298
323
|
end
|
299
324
|
|
300
325
|
def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
|
301
|
-
html = "<span style=\"width:
|
326
|
+
html = "<span style=\"width:url(data-evil-url)\">"
|
302
327
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
303
328
|
assert_match %r/<span><\/span>/, sane.inner_html
|
304
329
|
|
305
|
-
html = "<span style=\"width:
|
330
|
+
html = "<span style=\"width: url(data-evil-url)\">"
|
306
331
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
307
332
|
assert_match %r/<span><\/span>/, sane.inner_html
|
308
333
|
end
|
309
334
|
|
335
|
+
def test_css_max_width
|
336
|
+
html = '<div style="max-width: 100%;"></div>'
|
337
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
338
|
+
assert_match %r/max-width/, sane.inner_html
|
339
|
+
end
|
340
|
+
|
341
|
+
|
310
342
|
def test_issue_90_slow_regex
|
311
343
|
skip("timing tests are hard to make pass and have little regression-testing value")
|
312
344
|
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class UnitHTML5Scrub < Loofah::TestCase
|
4
|
+
include Loofah
|
5
|
+
|
6
|
+
def test_scrub_css
|
7
|
+
assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
|
8
|
+
assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
|
9
|
+
end
|
10
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "helper"
|
2
2
|
|
3
3
|
class IntegrationTestAdHoc < Loofah::TestCase
|
4
|
-
|
5
4
|
context "blank input string" do
|
6
5
|
context "fragment" do
|
7
6
|
it "return a blank string" do
|
@@ -17,6 +16,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
17
16
|
end
|
18
17
|
|
19
18
|
context "tests" do
|
19
|
+
MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
|
20
|
+
|
20
21
|
def test_removal_of_illegal_tag
|
21
22
|
html = <<-HTML
|
22
23
|
following this there should be no jim tag
|
@@ -31,9 +32,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
31
32
|
html = "<p class=bar foo=bar abbr=bar />"
|
32
33
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
33
34
|
node = sane.xpath("//p").first
|
34
|
-
assert node.attributes[
|
35
|
-
assert node.attributes[
|
36
|
-
assert_nil node.attributes[
|
35
|
+
assert node.attributes["class"]
|
36
|
+
assert node.attributes["abbr"]
|
37
|
+
assert_nil node.attributes["foo"]
|
37
38
|
end
|
38
39
|
|
39
40
|
def test_removal_of_illegal_url_in_href
|
@@ -43,14 +44,14 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
43
44
|
HTML
|
44
45
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
45
46
|
nodes = sane.xpath("//a")
|
46
|
-
assert_nil nodes.first.attributes[
|
47
|
-
assert nodes.last.attributes[
|
47
|
+
assert_nil nodes.first.attributes["href"]
|
48
|
+
assert nodes.last.attributes["href"]
|
48
49
|
end
|
49
50
|
|
50
51
|
def test_css_sanitization
|
51
52
|
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
52
53
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
53
|
-
assert_match %r/#000/,
|
54
|
+
assert_match %r/#000/, sane.inner_html
|
54
55
|
refute_match %r/foo\.com/, sane.inner_html
|
55
56
|
end
|
56
57
|
|
@@ -73,75 +74,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
73
74
|
def test_whitewash_on_fragment
|
74
75
|
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
75
76
|
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
76
|
-
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
77
|
+
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n", "")
|
77
78
|
end
|
78
79
|
|
79
|
-
MSWORD_HTML = <<-EOHTML
|
80
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
81
|
-
<w:WordDocument>
|
82
|
-
<w:View>Normal</w:View>
|
83
|
-
<w:Zoom>0</w:Zoom>
|
84
|
-
<w:PunctuationKerning/>
|
85
|
-
<w:ValidateAgainstSchemas/>
|
86
|
-
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
87
|
-
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
88
|
-
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
89
|
-
<w:Compatibility>
|
90
|
-
<w:BreakWrappedTables/>
|
91
|
-
<w:SnapToGridInCell/>
|
92
|
-
<w:WrapTextWithPunct/>
|
93
|
-
<w:UseAsianBreakRules/>
|
94
|
-
<w:DontGrowAutofit/>
|
95
|
-
</w:Compatibility>
|
96
|
-
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
97
|
-
</w:WordDocument>
|
98
|
-
</xml><![endif]--><!--[if gte mso 9]><xml>
|
99
|
-
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
100
|
-
</w:LatentStyles>
|
101
|
-
</xml><![endif]--><style>
|
102
|
-
<!--
|
103
|
-
/* Style Definitions */
|
104
|
-
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
105
|
-
{mso-style-parent:"";
|
106
|
-
margin:0in;
|
107
|
-
margin-bottom:.0001pt;
|
108
|
-
mso-pagination:widow-orphan;
|
109
|
-
font-size:12.0pt;
|
110
|
-
font-family:"Times New Roman";
|
111
|
-
mso-fareast-font-family:"Times New Roman";}
|
112
|
-
@page Section1
|
113
|
-
{size:8.5in 11.0in;
|
114
|
-
margin:1.0in 1.25in 1.0in 1.25in;
|
115
|
-
mso-header-margin:.5in;
|
116
|
-
mso-footer-margin:.5in;
|
117
|
-
mso-paper-source:0;}
|
118
|
-
div.Section1
|
119
|
-
{page:Section1;}
|
120
|
-
-->
|
121
|
-
</style><!--[if gte mso 10]>
|
122
|
-
<style>
|
123
|
-
/* Style Definitions */
|
124
|
-
table.MsoNormalTable
|
125
|
-
{mso-style-name:"Table Normal";
|
126
|
-
mso-tstyle-rowband-size:0;
|
127
|
-
mso-tstyle-colband-size:0;
|
128
|
-
mso-style-noshow:yes;
|
129
|
-
mso-style-parent:"";
|
130
|
-
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
131
|
-
mso-para-margin:0in;
|
132
|
-
mso-para-margin-bottom:.0001pt;
|
133
|
-
mso-pagination:widow-orphan;
|
134
|
-
font-size:10.0pt;
|
135
|
-
font-family:"Times New Roman";
|
136
|
-
mso-ansi-language:#0400;
|
137
|
-
mso-fareast-language:#0400;
|
138
|
-
mso-bidi-language:#0400;}
|
139
|
-
</style>
|
140
|
-
<![endif]-->
|
141
|
-
|
142
|
-
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
143
|
-
EOHTML
|
144
|
-
|
145
80
|
def test_fragment_whitewash_on_microsofty_markup
|
146
81
|
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
147
82
|
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
|
@@ -150,11 +85,11 @@ mso-bidi-language:#0400;}
|
|
150
85
|
def test_document_whitewash_on_microsofty_markup
|
151
86
|
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
152
87
|
assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
|
153
|
-
assert_equal "<p>Foo <b>BOLD</b></p>",
|
88
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
154
89
|
end
|
155
90
|
|
156
91
|
def test_return_empty_string_when_nothing_left
|
157
|
-
assert_equal "", Loofah.scrub_document(
|
92
|
+
assert_equal "", Loofah.scrub_document("<script>test</script>", :prune).text
|
158
93
|
end
|
159
94
|
|
160
95
|
def test_nested_script_cdata_tags_should_be_scrubbed
|
@@ -209,21 +144,20 @@ mso-bidi-language:#0400;}
|
|
209
144
|
#
|
210
145
|
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
|
211
146
|
#
|
212
|
-
{tag: "a",
|
213
|
-
{tag: "div", attr: "href"},
|
214
|
-
{tag: "a",
|
215
|
-
{tag: "div", attr: "action"},
|
216
|
-
{tag: "a",
|
217
|
-
{tag: "div", attr: "src"},
|
218
|
-
{tag: "a",
|
147
|
+
{ tag: "a", attr: "href" },
|
148
|
+
{ tag: "div", attr: "href" },
|
149
|
+
{ tag: "a", attr: "action" },
|
150
|
+
{ tag: "div", attr: "action" },
|
151
|
+
{ tag: "a", attr: "src" },
|
152
|
+
{ tag: "div", attr: "src" },
|
153
|
+
{ tag: "a", attr: "name" },
|
219
154
|
#
|
220
155
|
# note that div+name is _not_ affected by the libxml2 issue.
|
221
156
|
# but we test it anyway to ensure our logic isn't modifying
|
222
157
|
# attributes that don't need modifying.
|
223
158
|
#
|
224
|
-
{tag: "div", attr: "name", unescaped: true},
|
159
|
+
{ tag: "div", attr: "name", unescaped: true },
|
225
160
|
].each do |config|
|
226
|
-
|
227
161
|
define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
|
228
162
|
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
|
229
163
|
|
@@ -231,7 +165,7 @@ mso-bidi-language:#0400;}
|
|
231
165
|
attributes = reparsed.at_css(config[:tag]).attribute_nodes
|
232
166
|
|
233
167
|
assert_equal [config[:attr]], attributes.collect(&:name)
|
234
|
-
if Nokogiri::VersionInfo.
|
168
|
+
if Nokogiri::VersionInfo.instance.libxml2?
|
235
169
|
if config[:unescaped]
|
236
170
|
#
|
237
171
|
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
|
@@ -252,7 +186,35 @@ mso-bidi-language:#0400;}
|
|
252
186
|
assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
|
253
187
|
end
|
254
188
|
end
|
189
|
+
end
|
190
|
+
|
191
|
+
context "xss protection from svg animate attributes" do
|
192
|
+
# see recommendation from https://html5sec.org/#137
|
193
|
+
# to sanitize "to", "from", "values", and "by" attributes
|
194
|
+
|
195
|
+
it "sanitizes 'from', 'to', and 'by' attributes" do
|
196
|
+
# for CVE-2018-16468
|
197
|
+
# see:
|
198
|
+
# - https://github.com/flavorjones/loofah/issues/154
|
199
|
+
# - https://hackerone.com/reports/429267
|
200
|
+
html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26 by=5>}
|
201
|
+
|
202
|
+
sanitized = Loofah.scrub_fragment(html, :escape)
|
203
|
+
assert_nil sanitized.at_css("animate")["from"]
|
204
|
+
assert_nil sanitized.at_css("animate")["to"]
|
205
|
+
assert_nil sanitized.at_css("animate")["by"]
|
206
|
+
end
|
207
|
+
|
208
|
+
it "sanitizes 'values' attribute" do
|
209
|
+
# for CVE-2019-15587
|
210
|
+
# see:
|
211
|
+
# - https://github.com/flavorjones/loofah/issues/171
|
212
|
+
# - https://hackerone.com/reports/709009
|
213
|
+
html = %Q{<svg> <animate href="#foo" attributeName="href" values="javascript:alert('xss')"/> <a id="foo"> <circle r=400 /> </a> </svg>}
|
255
214
|
|
215
|
+
sanitized = Loofah.scrub_fragment(html, :escape)
|
216
|
+
assert_nil sanitized.at_css("animate")["values"]
|
217
|
+
end
|
256
218
|
end
|
257
219
|
end
|
258
220
|
end
|