loofah 2.2.0 → 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/CHANGELOG.md +105 -32
- data/Gemfile +3 -3
- data/Manifest.txt +5 -1
- data/README.md +28 -26
- data/Rakefile +23 -21
- data/SECURITY.md +18 -0
- data/lib/loofah.rb +15 -14
- data/lib/loofah/helpers.rb +13 -3
- data/lib/loofah/html5/libxml2_workarounds.rb +26 -0
- data/lib/loofah/html5/safelist.rb +796 -0
- data/lib/loofah/html5/scrub.rb +43 -16
- data/lib/loofah/scrubbers.rb +1 -1
- data/test/assets/msword.html +63 -0
- data/test/html5/test_sanitizer.rb +36 -17
- data/test/html5/test_scrub.rb +10 -0
- data/test/integration/test_ad_hoc.rb +105 -78
- data/test/unit/test_helpers.rb +4 -4
- metadata +55 -39
- data/lib/loofah/html5/whitelist.rb +0 -186
data/lib/loofah/html5/scrub.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
#encoding: US-ASCII
|
2
|
-
|
3
1
|
require 'cgi'
|
4
2
|
require 'crass'
|
5
3
|
|
@@ -8,13 +6,13 @@ module Loofah
|
|
8
6
|
module Scrub
|
9
7
|
|
10
8
|
CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
|
11
|
-
CSS_KEYWORDISH = /\A(#[0-9a-
|
9
|
+
CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
|
12
10
|
CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
|
13
11
|
|
14
12
|
class << self
|
15
13
|
|
16
14
|
def allowed_element? element_name
|
17
|
-
::Loofah::HTML5::
|
15
|
+
::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
|
18
16
|
end
|
19
17
|
|
20
18
|
# alternative implementation of the html5lib attribute scrubbing algorithm
|
@@ -30,31 +28,31 @@ module Loofah
|
|
30
28
|
next
|
31
29
|
end
|
32
30
|
|
33
|
-
unless
|
31
|
+
unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
|
34
32
|
attr_node.remove
|
35
33
|
next
|
36
34
|
end
|
37
35
|
|
38
|
-
if
|
36
|
+
if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
|
39
37
|
# this block lifted nearly verbatim from HTML5 sanitization
|
40
38
|
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
|
41
|
-
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !
|
39
|
+
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
|
42
40
|
attr_node.remove
|
43
41
|
next
|
44
|
-
elsif val_unescaped.split(
|
42
|
+
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
|
45
43
|
# permit only allowed data mediatypes
|
46
|
-
mediatype = val_unescaped.split(
|
44
|
+
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
|
47
45
|
mediatype, _ = mediatype.split(';')[0..1] if mediatype
|
48
|
-
if mediatype && !
|
46
|
+
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
|
49
47
|
attr_node.remove
|
50
48
|
next
|
51
49
|
end
|
52
50
|
end
|
53
51
|
end
|
54
|
-
if
|
52
|
+
if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
|
55
53
|
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
|
56
54
|
end
|
57
|
-
if
|
55
|
+
if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
|
58
56
|
attr_node.remove
|
59
57
|
next
|
60
58
|
end
|
@@ -65,6 +63,8 @@ module Loofah
|
|
65
63
|
node.attribute_nodes.each do |attr_node|
|
66
64
|
node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
|
67
65
|
end
|
66
|
+
|
67
|
+
force_correct_attribute_escaping! node
|
68
68
|
end
|
69
69
|
|
70
70
|
def scrub_css_attribute node
|
@@ -79,14 +79,14 @@ module Loofah
|
|
79
79
|
style_tree.each do |node|
|
80
80
|
next unless node[:node] == :property
|
81
81
|
next if node[:children].any? do |child|
|
82
|
-
[:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !
|
82
|
+
[:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
|
83
83
|
end
|
84
84
|
name = node[:name].downcase
|
85
|
-
if
|
85
|
+
if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
|
86
86
|
sanitized_tree << node << CRASS_SEMICOLON
|
87
|
-
elsif
|
87
|
+
elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
|
88
88
|
value = node[:value].split.map do |keyword|
|
89
|
-
if
|
89
|
+
if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
|
90
90
|
keyword
|
91
91
|
end
|
92
92
|
end.compact
|
@@ -100,6 +100,33 @@ module Loofah
|
|
100
100
|
|
101
101
|
Crass::Parser.stringify sanitized_tree
|
102
102
|
end
|
103
|
+
|
104
|
+
#
|
105
|
+
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
|
106
|
+
#
|
107
|
+
# see comments about CVE-2018-8048 within the tests for more information
|
108
|
+
#
|
109
|
+
def force_correct_attribute_escaping! node
|
110
|
+
return unless Nokogiri::VersionInfo.instance.libxml2?
|
111
|
+
|
112
|
+
node.attribute_nodes.each do |attr_node|
|
113
|
+
next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name)
|
114
|
+
|
115
|
+
tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name]
|
116
|
+
next unless tag_name.nil? || tag_name == node.name
|
117
|
+
|
118
|
+
#
|
119
|
+
# this block is just like CGI.escape in Ruby 2.4, but
|
120
|
+
# only encodes space and double-quote, to mimic
|
121
|
+
# pre-2.9.2 behavior
|
122
|
+
#
|
123
|
+
encoding = attr_node.value.encoding
|
124
|
+
attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
|
125
|
+
'%' + m.unpack('H2' * m.bytesize).join('%').upcase
|
126
|
+
end.force_encoding(encoding)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
103
130
|
end
|
104
131
|
end
|
105
132
|
end
|
data/lib/loofah/scrubbers.rb
CHANGED
@@ -0,0 +1,63 @@
|
|
1
|
+
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
2
|
+
<w:WordDocument>
|
3
|
+
<w:View>Normal</w:View>
|
4
|
+
<w:Zoom>0</w:Zoom>
|
5
|
+
<w:PunctuationKerning/>
|
6
|
+
<w:ValidateAgainstSchemas/>
|
7
|
+
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
8
|
+
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
9
|
+
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
10
|
+
<w:Compatibility>
|
11
|
+
<w:BreakWrappedTables/>
|
12
|
+
<w:SnapToGridInCell/>
|
13
|
+
<w:WrapTextWithPunct/>
|
14
|
+
<w:UseAsianBreakRules/>
|
15
|
+
<w:DontGrowAutofit/>
|
16
|
+
</w:Compatibility>
|
17
|
+
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
18
|
+
</w:WordDocument>
|
19
|
+
</xml><![endif]--><!--[if gte mso 9]><xml>
|
20
|
+
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
21
|
+
</w:LatentStyles>
|
22
|
+
</xml><![endif]--><style>
|
23
|
+
<!--
|
24
|
+
/* Style Definitions */
|
25
|
+
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
26
|
+
{mso-style-parent:"";
|
27
|
+
margin:0in;
|
28
|
+
margin-bottom:.0001pt;
|
29
|
+
mso-pagination:widow-orphan;
|
30
|
+
font-size:12.0pt;
|
31
|
+
font-family:"Times New Roman";
|
32
|
+
mso-fareast-font-family:"Times New Roman";}
|
33
|
+
@page Section1
|
34
|
+
{size:8.5in 11.0in;
|
35
|
+
margin:1.0in 1.25in 1.0in 1.25in;
|
36
|
+
mso-header-margin:.5in;
|
37
|
+
mso-footer-margin:.5in;
|
38
|
+
mso-paper-source:0;}
|
39
|
+
div.Section1
|
40
|
+
{page:Section1;}
|
41
|
+
-->
|
42
|
+
</style><!--[if gte mso 10]>
|
43
|
+
<style>
|
44
|
+
/* Style Definitions */
|
45
|
+
table.MsoNormalTable
|
46
|
+
{mso-style-name:"Table Normal";
|
47
|
+
mso-tstyle-rowband-size:0;
|
48
|
+
mso-tstyle-colband-size:0;
|
49
|
+
mso-style-noshow:yes;
|
50
|
+
mso-style-parent:"";
|
51
|
+
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
52
|
+
mso-para-margin:0in;
|
53
|
+
mso-para-margin-bottom:.0001pt;
|
54
|
+
mso-pagination:widow-orphan;
|
55
|
+
font-size:10.0pt;
|
56
|
+
font-family:"Times New Roman";
|
57
|
+
mso-ansi-language:#0400;
|
58
|
+
mso-fareast-language:#0400;
|
59
|
+
mso-bidi-language:#0400;}
|
60
|
+
</style>
|
61
|
+
<![endif]-->
|
62
|
+
|
63
|
+
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
37
37
|
assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
|
38
38
|
end
|
39
39
|
|
40
|
-
(HTML5::
|
40
|
+
(HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
|
41
41
|
define_method "test_should_allow_#{tag_name}_tag" do
|
42
42
|
input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
|
43
43
|
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.downcase}>"
|
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
58
58
|
htmloutput = "<img title='1'/>foo <bad>bar</bad> baz"
|
59
59
|
xhtmloutput = htmloutput
|
60
60
|
rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
|
61
|
-
elsif HTML5::
|
61
|
+
elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
|
62
62
|
htmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz"
|
63
63
|
xhtmloutput = htmloutput
|
64
64
|
htmloutput += '<br/>' if tag_name == 'br'
|
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
71
71
|
##
|
72
72
|
## libxml2 downcases elements, so this is moot.
|
73
73
|
##
|
74
|
-
# HTML5::
|
74
|
+
# HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
|
75
75
|
# define_method "test_should_forbid_#{tag_name.upcase}_tag" do
|
76
76
|
# input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
77
77
|
# output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>"
|
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
79
79
|
# end
|
80
80
|
# end
|
81
81
|
|
82
|
-
HTML5::
|
82
|
+
HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
83
83
|
next if attribute_name == 'style'
|
84
84
|
define_method "test_should_allow_#{attribute_name}_attribute" do
|
85
85
|
input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
|
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
110
110
|
check_sanitization(input, htmloutput, output, output)
|
111
111
|
end
|
112
112
|
|
113
|
+
def test_should_allow_contenteditable
|
114
|
+
input = '<p contenteditable="false">Hi!</p>'
|
115
|
+
output = '<p contenteditable="false">Hi!</p>'
|
116
|
+
|
117
|
+
check_sanitization(input, output, output, output)
|
118
|
+
end
|
119
|
+
|
113
120
|
##
|
114
121
|
## libxml2 downcases attributes, so this is moot.
|
115
122
|
##
|
116
|
-
# HTML5::
|
123
|
+
# HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
|
117
124
|
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
|
118
125
|
# input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
|
119
126
|
# output = "<p>foo <bad>bar</bad> baz</p>"
|
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
121
128
|
# end
|
122
129
|
# end
|
123
130
|
|
124
|
-
HTML5::
|
131
|
+
HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
|
125
132
|
define_method "test_should_allow_#{protocol}_uris" do
|
126
133
|
input = %(<a href="#{protocol}">foo</a>)
|
127
134
|
output = "<a href='#{protocol}'>foo</a>"
|
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
129
136
|
end
|
130
137
|
end
|
131
138
|
|
132
|
-
HTML5::
|
139
|
+
HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
|
133
140
|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
|
134
141
|
input = %(<a href="#{protocol.upcase}">foo</a>)
|
135
142
|
output = "<a href='#{protocol.upcase}'>foo</a>"
|
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
137
144
|
end
|
138
145
|
end
|
139
146
|
|
140
|
-
HTML5::
|
147
|
+
HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
141
148
|
define_method "test_should_allow_data_#{data_uri_type}_uris" do
|
142
149
|
input = %(<a href="data:#{data_uri_type}">foo</a>)
|
143
150
|
output = "<a href='data:#{data_uri_type}'>foo</a>"
|
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
149
156
|
end
|
150
157
|
end
|
151
158
|
|
152
|
-
HTML5::
|
159
|
+
HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
|
153
160
|
define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
|
154
161
|
input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
|
155
162
|
output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
|
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
172
179
|
end
|
173
180
|
|
174
181
|
|
175
|
-
HTML5::
|
176
|
-
next unless HTML5::
|
182
|
+
HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
|
183
|
+
next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
|
177
184
|
define_method "test_#{tag_name}_should_allow_local_href" do
|
178
185
|
input = %(<#{tag_name} xlink:href="#foo"/>)
|
179
186
|
output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
|
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
249
256
|
end
|
250
257
|
|
251
258
|
## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
|
252
|
-
HTML5::
|
259
|
+
HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
|
253
260
|
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
|
254
261
|
input = "<rect fill='url(#foo)' />"
|
255
262
|
output = "<rect fill='url(#foo)'></rect>"
|
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
263
270
|
end
|
264
271
|
end
|
265
272
|
|
273
|
+
def test_css_list_style
|
274
|
+
html = '<ul style="list-style: none"></ul>'
|
275
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
276
|
+
assert_match %r/list-style/, sane.inner_html
|
277
|
+
end
|
278
|
+
|
266
279
|
def test_css_negative_value_sanitization
|
267
280
|
html = "<span style=\"letter-spacing:-0.03em;\">"
|
268
281
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
@@ -275,7 +288,13 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
275
288
|
assert_match %r/-0.05em/, sane.inner_html
|
276
289
|
end
|
277
290
|
|
278
|
-
def
|
291
|
+
def test_css_high_precision_value_shorthand_css_properties
|
292
|
+
html = "<span style=\"margin-left:0.3333333334em;\">"
|
293
|
+
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
294
|
+
assert_match %r/0.3333333334em/, sane.inner_html
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_css_function_sanitization_leaves_safelisted_functions_calc
|
279
298
|
html = "<span style=\"width:calc(5%)\">"
|
280
299
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
281
300
|
assert_match %r/calc\(5%\)/, sane.inner_html
|
@@ -285,24 +304,24 @@ class Html5TestSanitizer < Loofah::TestCase
|
|
285
304
|
assert_match %r/calc\(5%\)/, sane.inner_html
|
286
305
|
end
|
287
306
|
|
288
|
-
def
|
307
|
+
def test_css_function_sanitization_leaves_safelisted_functions_rgb
|
289
308
|
html = '<span style="color: rgb(255, 0, 0)">'
|
290
309
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
291
310
|
assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
|
292
311
|
end
|
293
312
|
|
294
|
-
def
|
313
|
+
def test_css_function_sanitization_leaves_safelisted_list_style_type
|
295
314
|
html = "<ol style='list-style-type:lower-greek;'></ol>"
|
296
315
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
297
316
|
assert_match %r/list-style-type:lower-greek/, sane.inner_html
|
298
317
|
end
|
299
318
|
|
300
319
|
def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
|
301
|
-
html = "<span style=\"width:
|
320
|
+
html = "<span style=\"width:url(data-evil-url)\">"
|
302
321
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
303
322
|
assert_match %r/<span><\/span>/, sane.inner_html
|
304
323
|
|
305
|
-
html = "<span style=\"width:
|
324
|
+
html = "<span style=\"width: url(data-evil-url)\">"
|
306
325
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
|
307
326
|
assert_match %r/<span><\/span>/, sane.inner_html
|
308
327
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "helper"
|
2
|
+
|
3
|
+
class UnitHTML5Scrub < Loofah::TestCase
|
4
|
+
include Loofah
|
5
|
+
|
6
|
+
def test_scrub_css
|
7
|
+
assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
|
8
|
+
assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
|
9
|
+
end
|
10
|
+
end
|
@@ -1,7 +1,6 @@
|
|
1
1
|
require "helper"
|
2
2
|
|
3
3
|
class IntegrationTestAdHoc < Loofah::TestCase
|
4
|
-
|
5
4
|
context "blank input string" do
|
6
5
|
context "fragment" do
|
7
6
|
it "return a blank string" do
|
@@ -17,6 +16,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
17
16
|
end
|
18
17
|
|
19
18
|
context "tests" do
|
19
|
+
MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
|
20
|
+
|
20
21
|
def test_removal_of_illegal_tag
|
21
22
|
html = <<-HTML
|
22
23
|
following this there should be no jim tag
|
@@ -31,9 +32,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
31
32
|
html = "<p class=bar foo=bar abbr=bar />"
|
32
33
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
33
34
|
node = sane.xpath("//p").first
|
34
|
-
assert node.attributes[
|
35
|
-
assert node.attributes[
|
36
|
-
assert_nil node.attributes[
|
35
|
+
assert node.attributes["class"]
|
36
|
+
assert node.attributes["abbr"]
|
37
|
+
assert_nil node.attributes["foo"]
|
37
38
|
end
|
38
39
|
|
39
40
|
def test_removal_of_illegal_url_in_href
|
@@ -43,14 +44,14 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
43
44
|
HTML
|
44
45
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
45
46
|
nodes = sane.xpath("//a")
|
46
|
-
assert_nil nodes.first.attributes[
|
47
|
-
assert nodes.last.attributes[
|
47
|
+
assert_nil nodes.first.attributes["href"]
|
48
|
+
assert nodes.last.attributes["href"]
|
48
49
|
end
|
49
50
|
|
50
51
|
def test_css_sanitization
|
51
52
|
html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
|
52
53
|
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
|
53
|
-
assert_match %r/#000/,
|
54
|
+
assert_match %r/#000/, sane.inner_html
|
54
55
|
refute_match %r/foo\.com/, sane.inner_html
|
55
56
|
end
|
56
57
|
|
@@ -73,74 +74,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
|
|
73
74
|
def test_whitewash_on_fragment
|
74
75
|
html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
|
75
76
|
whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
|
76
|
-
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
|
77
|
-
end
|
78
|
-
|
79
|
-
MSWORD_HTML = <<-EOHTML
|
80
|
-
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
|
81
|
-
<w:WordDocument>
|
82
|
-
<w:View>Normal</w:View>
|
83
|
-
<w:Zoom>0</w:Zoom>
|
84
|
-
<w:PunctuationKerning/>
|
85
|
-
<w:ValidateAgainstSchemas/>
|
86
|
-
<w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
|
87
|
-
<w:IgnoreMixedContent>false</w:IgnoreMixedContent>
|
88
|
-
<w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
|
89
|
-
<w:Compatibility>
|
90
|
-
<w:BreakWrappedTables/>
|
91
|
-
<w:SnapToGridInCell/>
|
92
|
-
<w:WrapTextWithPunct/>
|
93
|
-
<w:UseAsianBreakRules/>
|
94
|
-
<w:DontGrowAutofit/>
|
95
|
-
</w:Compatibility>
|
96
|
-
<w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
|
97
|
-
</w:WordDocument>
|
98
|
-
</xml><![endif]--><!--[if gte mso 9]><xml>
|
99
|
-
<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
|
100
|
-
</w:LatentStyles>
|
101
|
-
</xml><![endif]--><style>
|
102
|
-
<!--
|
103
|
-
/* Style Definitions */
|
104
|
-
p.MsoNormal, li.MsoNormal, div.MsoNormal
|
105
|
-
{mso-style-parent:"";
|
106
|
-
margin:0in;
|
107
|
-
margin-bottom:.0001pt;
|
108
|
-
mso-pagination:widow-orphan;
|
109
|
-
font-size:12.0pt;
|
110
|
-
font-family:"Times New Roman";
|
111
|
-
mso-fareast-font-family:"Times New Roman";}
|
112
|
-
@page Section1
|
113
|
-
{size:8.5in 11.0in;
|
114
|
-
margin:1.0in 1.25in 1.0in 1.25in;
|
115
|
-
mso-header-margin:.5in;
|
116
|
-
mso-footer-margin:.5in;
|
117
|
-
mso-paper-source:0;}
|
118
|
-
div.Section1
|
119
|
-
{page:Section1;}
|
120
|
-
-->
|
121
|
-
</style><!--[if gte mso 10]>
|
122
|
-
<style>
|
123
|
-
/* Style Definitions */
|
124
|
-
table.MsoNormalTable
|
125
|
-
{mso-style-name:"Table Normal";
|
126
|
-
mso-tstyle-rowband-size:0;
|
127
|
-
mso-tstyle-colband-size:0;
|
128
|
-
mso-style-noshow:yes;
|
129
|
-
mso-style-parent:"";
|
130
|
-
mso-padding-alt:0in 5.4pt 0in 5.4pt;
|
131
|
-
mso-para-margin:0in;
|
132
|
-
mso-para-margin-bottom:.0001pt;
|
133
|
-
mso-pagination:widow-orphan;
|
134
|
-
font-size:10.0pt;
|
135
|
-
font-family:"Times New Roman";
|
136
|
-
mso-ansi-language:#0400;
|
137
|
-
mso-fareast-language:#0400;
|
138
|
-
mso-bidi-language:#0400;}
|
139
|
-
</style>
|
140
|
-
<![endif]-->
|
141
|
-
|
142
|
-
<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
|
143
|
-
EOHTML
|
77
|
+
assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n", "")
|
78
|
+
end
|
144
79
|
|
145
80
|
def test_fragment_whitewash_on_microsofty_markup
|
146
81
|
whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
|
@@ -150,11 +85,11 @@ mso-bidi-language:#0400;}
|
|
150
85
|
def test_document_whitewash_on_microsofty_markup
|
151
86
|
whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
|
152
87
|
assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
|
153
|
-
assert_equal "<p>Foo <b>BOLD</b></p>",
|
88
|
+
assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
|
154
89
|
end
|
155
90
|
|
156
91
|
def test_return_empty_string_when_nothing_left
|
157
|
-
assert_equal "", Loofah.scrub_document(
|
92
|
+
assert_equal "", Loofah.scrub_document("<script>test</script>", :prune).text
|
158
93
|
end
|
159
94
|
|
160
95
|
def test_nested_script_cdata_tags_should_be_scrubbed
|
@@ -188,6 +123,98 @@ mso-bidi-language:#0400;}
|
|
188
123
|
html = "<p>Foo</p>\n<p>Bar</p>"
|
189
124
|
assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
|
190
125
|
end
|
126
|
+
|
127
|
+
#
|
128
|
+
# tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
|
129
|
+
#
|
130
|
+
# libxml2 >= 2.9.2 fails to escape comments within some attributes. It
|
131
|
+
# wants to ensure these comments can be treated as "server-side includes",
|
132
|
+
# but as a result fails to ensure that serialization is well-formed,
|
133
|
+
# resulting in an opportunity for XSS injection of code into a final
|
134
|
+
# re-parsed document (presumably in a browser).
|
135
|
+
#
|
136
|
+
# we'll test this by parsing the HTML, serializing it, then
|
137
|
+
# re-parsing it to ensure there isn't any ambiguity in the output
|
138
|
+
# that might allow code injection into a browser consuming
|
139
|
+
# "sanitized" output.
|
140
|
+
#
|
141
|
+
[
|
142
|
+
#
|
143
|
+
# these tags and attributes are determined by the code at:
|
144
|
+
#
|
145
|
+
# https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
|
146
|
+
#
|
147
|
+
{ tag: "a", attr: "href" },
|
148
|
+
{ tag: "div", attr: "href" },
|
149
|
+
{ tag: "a", attr: "action" },
|
150
|
+
{ tag: "div", attr: "action" },
|
151
|
+
{ tag: "a", attr: "src" },
|
152
|
+
{ tag: "div", attr: "src" },
|
153
|
+
{ tag: "a", attr: "name" },
|
154
|
+
#
|
155
|
+
# note that div+name is _not_ affected by the libxml2 issue.
|
156
|
+
# but we test it anyway to ensure our logic isn't modifying
|
157
|
+
# attributes that don't need modifying.
|
158
|
+
#
|
159
|
+
{ tag: "div", attr: "name", unescaped: true },
|
160
|
+
].each do |config|
|
161
|
+
define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
|
162
|
+
html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
|
163
|
+
|
164
|
+
reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
|
165
|
+
attributes = reparsed.at_css(config[:tag]).attribute_nodes
|
166
|
+
|
167
|
+
assert_equal [config[:attr]], attributes.collect(&:name)
|
168
|
+
if Nokogiri::VersionInfo.instance.libxml2?
|
169
|
+
if config[:unescaped]
|
170
|
+
#
|
171
|
+
# this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
|
172
|
+
# assert that this attribute's serialization is unaffected.
|
173
|
+
#
|
174
|
+
assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
|
175
|
+
else
|
176
|
+
#
|
177
|
+
# let's match the behavior in libxml < 2.9.2.
|
178
|
+
# test that this attribute's serialization is well-formed and sanitized.
|
179
|
+
#
|
180
|
+
assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
|
181
|
+
end
|
182
|
+
else
|
183
|
+
#
|
184
|
+
# yay for consistency in javaland. move along, nothing to see here.
|
185
|
+
#
|
186
|
+
assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
context "xss protection from svg animate attributes" do
|
192
|
+
# see recommendation from https://html5sec.org/#137
|
193
|
+
# to sanitize "to", "from", "values", and "by" attributes
|
194
|
+
|
195
|
+
it "sanitizes 'from', 'to', and 'by' attributes" do
|
196
|
+
# for CVE-2018-16468
|
197
|
+
# see:
|
198
|
+
# - https://github.com/flavorjones/loofah/issues/154
|
199
|
+
# - https://hackerone.com/reports/429267
|
200
|
+
html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26 by=5>}
|
201
|
+
|
202
|
+
sanitized = Loofah.scrub_fragment(html, :escape)
|
203
|
+
assert_nil sanitized.at_css("animate")["from"]
|
204
|
+
assert_nil sanitized.at_css("animate")["to"]
|
205
|
+
assert_nil sanitized.at_css("animate")["by"]
|
206
|
+
end
|
207
|
+
|
208
|
+
it "sanitizes 'values' attribute" do
|
209
|
+
# for CVE-2019-15587
|
210
|
+
# see:
|
211
|
+
# - https://github.com/flavorjones/loofah/issues/171
|
212
|
+
# - https://hackerone.com/reports/709009
|
213
|
+
html = %Q{<svg> <animate href="#foo" attributeName="href" values="javascript:alert('xss')"/> <a id="foo"> <circle r=400 /> </a> </svg>}
|
214
|
+
|
215
|
+
sanitized = Loofah.scrub_fragment(html, :escape)
|
216
|
+
assert_nil sanitized.at_css("animate")["values"]
|
217
|
+
end
|
218
|
+
end
|
191
219
|
end
|
192
220
|
end
|
193
|
-
|