loofah 2.2.0 → 2.3.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,5 +1,3 @@
1
- #encoding: US-ASCII
2
-
3
1
  require 'cgi'
4
2
  require 'crass'
5
3
 
@@ -8,13 +6,13 @@ module Loofah
8
6
  module Scrub
9
7
 
10
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
11
- CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
12
10
  CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
13
11
 
14
12
  class << self
15
13
 
16
14
  def allowed_element? element_name
17
- ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
15
+ ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
18
16
  end
19
17
 
20
18
  # alternative implementation of the html5lib attribute scrubbing algorithm
@@ -30,31 +28,31 @@ module Loofah
30
28
  next
31
29
  end
32
30
 
33
- unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name)
31
+ unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
34
32
  attr_node.remove
35
33
  next
36
34
  end
37
35
 
38
- if WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
36
+ if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39
37
  # this block lifted nearly verbatim from HTML5 sanitization
40
38
  val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
41
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0])
39
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42
40
  attr_node.remove
43
41
  next
44
- elsif val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0] == 'data'
42
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
45
43
  # permit only allowed data mediatypes
46
- mediatype = val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[1]
44
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47
45
  mediatype, _ = mediatype.split(';')[0..1] if mediatype
48
- if mediatype && !WhiteList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
46
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49
47
  attr_node.remove
50
48
  next
51
49
  end
52
50
  end
53
51
  end
54
- if WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
52
+ if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55
53
  attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
56
54
  end
57
- if WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
55
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
58
56
  attr_node.remove
59
57
  next
60
58
  end
@@ -65,6 +63,8 @@ module Loofah
65
63
  node.attribute_nodes.each do |attr_node|
66
64
  node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
67
65
  end
66
+
67
+ force_correct_attribute_escaping! node
68
68
  end
69
69
 
70
70
  def scrub_css_attribute node
@@ -79,14 +79,14 @@ module Loofah
79
79
  style_tree.each do |node|
80
80
  next unless node[:node] == :property
81
81
  next if node[:children].any? do |child|
82
- [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !WhiteList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
82
+ [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
83
83
  end
84
84
  name = node[:name].downcase
85
- if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
85
+ if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
86
86
  sanitized_tree << node << CRASS_SEMICOLON
87
- elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
87
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
88
88
  value = node[:value].split.map do |keyword|
89
- if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
89
+ if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
90
  keyword
91
91
  end
92
92
  end.compact
@@ -100,6 +100,33 @@ module Loofah
100
100
 
101
101
  Crass::Parser.stringify sanitized_tree
102
102
  end
103
+
104
+ #
105
+ # libxml2 >= 2.9.2 fails to escape comments within some attributes.
106
+ #
107
+ # see comments about CVE-2018-8048 within the tests for more information
108
+ #
109
+ def force_correct_attribute_escaping! node
110
+ return unless Nokogiri::VersionInfo.instance.libxml2?
111
+
112
+ node.attribute_nodes.each do |attr_node|
113
+ next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name)
114
+
115
+ tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name]
116
+ next unless tag_name.nil? || tag_name == node.name
117
+
118
+ #
119
+ # this block is just like CGI.escape in Ruby 2.4, but
120
+ # only encodes space and double-quote, to mimic
121
+ # pre-2.9.2 behavior
122
+ #
123
+ encoding = attr_node.value.encoding
124
+ attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
125
+ '%' + m.unpack('H2' * m.bytesize).join('%').upcase
126
+ end.force_encoding(encoding)
127
+ end
128
+ end
129
+
103
130
  end
104
131
  end
105
132
  end
@@ -1,7 +1,7 @@
1
1
  module Loofah
2
2
  #
3
3
  # Loofah provides some built-in scrubbers for sanitizing with
4
- # HTML5lib's whitelist and for accomplishing some common
4
+ # HTML5lib's safelist and for accomplishing some common
5
5
  # transformation tasks.
6
6
  #
7
7
  #
@@ -0,0 +1,63 @@
1
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
2
+ <w:WordDocument>
3
+ <w:View>Normal</w:View>
4
+ <w:Zoom>0</w:Zoom>
5
+ <w:PunctuationKerning/>
6
+ <w:ValidateAgainstSchemas/>
7
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
8
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
9
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
10
+ <w:Compatibility>
11
+ <w:BreakWrappedTables/>
12
+ <w:SnapToGridInCell/>
13
+ <w:WrapTextWithPunct/>
14
+ <w:UseAsianBreakRules/>
15
+ <w:DontGrowAutofit/>
16
+ </w:Compatibility>
17
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
18
+ </w:WordDocument>
19
+ </xml><![endif]--><!--[if gte mso 9]><xml>
20
+ <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
21
+ </w:LatentStyles>
22
+ </xml><![endif]--><style>
23
+ <!--
24
+ /* Style Definitions */
25
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
26
+ {mso-style-parent:"";
27
+ margin:0in;
28
+ margin-bottom:.0001pt;
29
+ mso-pagination:widow-orphan;
30
+ font-size:12.0pt;
31
+ font-family:"Times New Roman";
32
+ mso-fareast-font-family:"Times New Roman";}
33
+ @page Section1
34
+ {size:8.5in 11.0in;
35
+ margin:1.0in 1.25in 1.0in 1.25in;
36
+ mso-header-margin:.5in;
37
+ mso-footer-margin:.5in;
38
+ mso-paper-source:0;}
39
+ div.Section1
40
+ {page:Section1;}
41
+ -->
42
+ </style><!--[if gte mso 10]>
43
+ <style>
44
+ /* Style Definitions */
45
+ table.MsoNormalTable
46
+ {mso-style-name:"Table Normal";
47
+ mso-tstyle-rowband-size:0;
48
+ mso-tstyle-colband-size:0;
49
+ mso-style-noshow:yes;
50
+ mso-style-parent:"";
51
+ mso-padding-alt:0in 5.4pt 0in 5.4pt;
52
+ mso-para-margin:0in;
53
+ mso-para-margin-bottom:.0001pt;
54
+ mso-pagination:widow-orphan;
55
+ font-size:10.0pt;
56
+ font-family:"Times New Roman";
57
+ mso-ansi-language:#0400;
58
+ mso-fareast-language:#0400;
59
+ mso-bidi-language:#0400;}
60
+ </style>
61
+ <![endif]-->
62
+
63
+ <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
37
37
  assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
38
38
  end
39
39
 
40
- (HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
40
+ (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
41
41
  define_method "test_should_allow_#{tag_name}_tag" do
42
42
  input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
43
43
  htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
58
58
  htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
59
59
  xhtmloutput = htmloutput
60
60
  rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
61
- elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
61
+ elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
62
62
  htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
63
63
  xhtmloutput = htmloutput
64
64
  htmloutput += '<br/>' if tag_name == 'br'
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
71
71
  ##
72
72
  ## libxml2 downcases elements, so this is moot.
73
73
  ##
74
- # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
74
+ # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
75
75
  # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
76
76
  # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
77
77
  # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
79
79
  # end
80
80
  # end
81
81
 
82
- HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
82
+ HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
83
83
  next if attribute_name == 'style'
84
84
  define_method "test_should_allow_#{attribute_name}_attribute" do
85
85
  input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
110
110
  check_sanitization(input, htmloutput, output, output)
111
111
  end
112
112
 
113
+ def test_should_allow_contenteditable
114
+ input = '<p contenteditable="false">Hi!</p>'
115
+ output = '<p contenteditable="false">Hi!</p>'
116
+
117
+ check_sanitization(input, output, output, output)
118
+ end
119
+
113
120
  ##
114
121
  ## libxml2 downcases attributes, so this is moot.
115
122
  ##
116
- # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
123
+ # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
117
124
  # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
118
125
  # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
119
126
  # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
121
128
  # end
122
129
  # end
123
130
 
124
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
131
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
125
132
  define_method "test_should_allow_#{protocol}_uris" do
126
133
  input = %(<a href="#{protocol}">foo</a>)
127
134
  output = "<a href='#{protocol}'>foo</a>"
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
129
136
  end
130
137
  end
131
138
 
132
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
139
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
133
140
  define_method "test_should_allow_uppercase_#{protocol}_uris" do
134
141
  input = %(<a href="#{protocol.upcase}">foo</a>)
135
142
  output = "<a href='#{protocol.upcase}'>foo</a>"
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
137
144
  end
138
145
  end
139
146
 
140
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
147
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
141
148
  define_method "test_should_allow_data_#{data_uri_type}_uris" do
142
149
  input = %(<a href="data:#{data_uri_type}">foo</a>)
143
150
  output = "<a href='data:#{data_uri_type}'>foo</a>"
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
149
156
  end
150
157
  end
151
158
 
152
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
159
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
153
160
  define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
154
161
  input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
155
162
  output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
172
179
  end
173
180
 
174
181
 
175
- HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
176
- next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
182
+ HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
183
+ next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
177
184
  define_method "test_#{tag_name}_should_allow_local_href" do
178
185
  input = %(<#{tag_name} xlink:href="#foo"/>)
179
186
  output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
249
256
  end
250
257
 
251
258
  ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
252
- HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
259
+ HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
253
260
  define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
254
261
  input = "<rect fill='url(#foo)' />"
255
262
  output = "<rect fill='url(#foo)'></rect>"
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
263
270
  end
264
271
  end
265
272
 
273
+ def test_css_list_style
274
+ html = '<ul style="list-style: none"></ul>'
275
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
276
+ assert_match %r/list-style/, sane.inner_html
277
+ end
278
+
266
279
  def test_css_negative_value_sanitization
267
280
  html = "<span style=\"letter-spacing:-0.03em;\">"
268
281
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
@@ -275,7 +288,13 @@ class Html5TestSanitizer < Loofah::TestCase
275
288
  assert_match %r/-0.05em/, sane.inner_html
276
289
  end
277
290
 
278
- def test_css_function_sanitization_leaves_whitelisted_functions_calc
291
+ def test_css_high_precision_value_shorthand_css_properties
292
+ html = "<span style=\"margin-left:0.3333333334em;\">"
293
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
294
+ assert_match %r/0.3333333334em/, sane.inner_html
295
+ end
296
+
297
+ def test_css_function_sanitization_leaves_safelisted_functions_calc
279
298
  html = "<span style=\"width:calc(5%)\">"
280
299
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
281
300
  assert_match %r/calc\(5%\)/, sane.inner_html
@@ -285,24 +304,24 @@ class Html5TestSanitizer < Loofah::TestCase
285
304
  assert_match %r/calc\(5%\)/, sane.inner_html
286
305
  end
287
306
 
288
- def test_css_function_sanitization_leaves_whitelisted_functions_rgb
307
+ def test_css_function_sanitization_leaves_safelisted_functions_rgb
289
308
  html = '<span style="color: rgb(255, 0, 0)">'
290
309
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
291
310
  assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
292
311
  end
293
312
 
294
- def test_css_function_sanitization_leaves_whitelisted_list_style_type
313
+ def test_css_function_sanitization_leaves_safelisted_list_style_type
295
314
  html = "<ol style='list-style-type:lower-greek;'></ol>"
296
315
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
297
316
  assert_match %r/list-style-type:lower-greek/, sane.inner_html
298
317
  end
299
318
 
300
319
  def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
301
- html = "<span style=\"width:attr(data-evil-attr)\">"
320
+ html = "<span style=\"width:url(data-evil-url)\">"
302
321
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
303
322
  assert_match %r/<span><\/span>/, sane.inner_html
304
323
 
305
- html = "<span style=\"width: attr(data-evil-attr)\">"
324
+ html = "<span style=\"width: url(data-evil-url)\">"
306
325
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
307
326
  assert_match %r/<span><\/span>/, sane.inner_html
308
327
  end
@@ -0,0 +1,10 @@
1
+ require "helper"
2
+
3
+ class UnitHTML5Scrub < Loofah::TestCase
4
+ include Loofah
5
+
6
+ def test_scrub_css
7
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
8
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
9
+ end
10
+ end
@@ -1,7 +1,6 @@
1
1
  require "helper"
2
2
 
3
3
  class IntegrationTestAdHoc < Loofah::TestCase
4
-
5
4
  context "blank input string" do
6
5
  context "fragment" do
7
6
  it "return a blank string" do
@@ -17,6 +16,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
17
16
  end
18
17
 
19
18
  context "tests" do
19
+ MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
20
+
20
21
  def test_removal_of_illegal_tag
21
22
  html = <<-HTML
22
23
  following this there should be no jim tag
@@ -31,9 +32,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
31
32
  html = "<p class=bar foo=bar abbr=bar />"
32
33
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
33
34
  node = sane.xpath("//p").first
34
- assert node.attributes['class']
35
- assert node.attributes['abbr']
36
- assert_nil node.attributes['foo']
35
+ assert node.attributes["class"]
36
+ assert node.attributes["abbr"]
37
+ assert_nil node.attributes["foo"]
37
38
  end
38
39
 
39
40
  def test_removal_of_illegal_url_in_href
@@ -43,14 +44,14 @@ class IntegrationTestAdHoc < Loofah::TestCase
43
44
  HTML
44
45
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
45
46
  nodes = sane.xpath("//a")
46
- assert_nil nodes.first.attributes['href']
47
- assert nodes.last.attributes['href']
47
+ assert_nil nodes.first.attributes["href"]
48
+ assert nodes.last.attributes["href"]
48
49
  end
49
50
 
50
51
  def test_css_sanitization
51
52
  html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
52
53
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
53
- assert_match %r/#000/, sane.inner_html
54
+ assert_match %r/#000/, sane.inner_html
54
55
  refute_match %r/foo\.com/, sane.inner_html
55
56
  end
56
57
 
@@ -73,74 +74,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
73
74
  def test_whitewash_on_fragment
74
75
  html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
75
76
  whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
76
- assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
77
- end
78
-
79
- MSWORD_HTML = <<-EOHTML
80
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
81
- <w:WordDocument>
82
- <w:View>Normal</w:View>
83
- <w:Zoom>0</w:Zoom>
84
- <w:PunctuationKerning/>
85
- <w:ValidateAgainstSchemas/>
86
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
87
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
88
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
89
- <w:Compatibility>
90
- <w:BreakWrappedTables/>
91
- <w:SnapToGridInCell/>
92
- <w:WrapTextWithPunct/>
93
- <w:UseAsianBreakRules/>
94
- <w:DontGrowAutofit/>
95
- </w:Compatibility>
96
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
97
- </w:WordDocument>
98
- </xml><![endif]--><!--[if gte mso 9]><xml>
99
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
100
- </w:LatentStyles>
101
- </xml><![endif]--><style>
102
- <!--
103
- /* Style Definitions */
104
- p.MsoNormal, li.MsoNormal, div.MsoNormal
105
- {mso-style-parent:"";
106
- margin:0in;
107
- margin-bottom:.0001pt;
108
- mso-pagination:widow-orphan;
109
- font-size:12.0pt;
110
- font-family:"Times New Roman";
111
- mso-fareast-font-family:"Times New Roman";}
112
- @page Section1
113
- {size:8.5in 11.0in;
114
- margin:1.0in 1.25in 1.0in 1.25in;
115
- mso-header-margin:.5in;
116
- mso-footer-margin:.5in;
117
- mso-paper-source:0;}
118
- div.Section1
119
- {page:Section1;}
120
- -->
121
- </style><!--[if gte mso 10]>
122
- <style>
123
- /* Style Definitions */
124
- table.MsoNormalTable
125
- {mso-style-name:"Table Normal";
126
- mso-tstyle-rowband-size:0;
127
- mso-tstyle-colband-size:0;
128
- mso-style-noshow:yes;
129
- mso-style-parent:"";
130
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
131
- mso-para-margin:0in;
132
- mso-para-margin-bottom:.0001pt;
133
- mso-pagination:widow-orphan;
134
- font-size:10.0pt;
135
- font-family:"Times New Roman";
136
- mso-ansi-language:#0400;
137
- mso-fareast-language:#0400;
138
- mso-bidi-language:#0400;}
139
- </style>
140
- <![endif]-->
141
-
142
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
143
- EOHTML
77
+ assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n", "")
78
+ end
144
79
 
145
80
  def test_fragment_whitewash_on_microsofty_markup
146
81
  whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
@@ -150,11 +85,11 @@ mso-bidi-language:#0400;}
150
85
  def test_document_whitewash_on_microsofty_markup
151
86
  whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
152
87
  assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
153
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
88
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
154
89
  end
155
90
 
156
91
  def test_return_empty_string_when_nothing_left
157
- assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
92
+ assert_equal "", Loofah.scrub_document("<script>test</script>", :prune).text
158
93
  end
159
94
 
160
95
  def test_nested_script_cdata_tags_should_be_scrubbed
@@ -188,6 +123,98 @@ mso-bidi-language:#0400;}
188
123
  html = "<p>Foo</p>\n<p>Bar</p>"
189
124
  assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
190
125
  end
126
+
127
+ #
128
+ # tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
129
+ #
130
+ # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
131
+ # wants to ensure these comments can be treated as "server-side includes",
132
+ # but as a result fails to ensure that serialization is well-formed,
133
+ # resulting in an opportunity for XSS injection of code into a final
134
+ # re-parsed document (presumably in a browser).
135
+ #
136
+ # we'll test this by parsing the HTML, serializing it, then
137
+ # re-parsing it to ensure there isn't any ambiguity in the output
138
+ # that might allow code injection into a browser consuming
139
+ # "sanitized" output.
140
+ #
141
+ [
142
+ #
143
+ # these tags and attributes are determined by the code at:
144
+ #
145
+ # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
146
+ #
147
+ { tag: "a", attr: "href" },
148
+ { tag: "div", attr: "href" },
149
+ { tag: "a", attr: "action" },
150
+ { tag: "div", attr: "action" },
151
+ { tag: "a", attr: "src" },
152
+ { tag: "div", attr: "src" },
153
+ { tag: "a", attr: "name" },
154
+ #
155
+ # note that div+name is _not_ affected by the libxml2 issue.
156
+ # but we test it anyway to ensure our logic isn't modifying
157
+ # attributes that don't need modifying.
158
+ #
159
+ { tag: "div", attr: "name", unescaped: true },
160
+ ].each do |config|
161
+ define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
162
+ html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
163
+
164
+ reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
165
+ attributes = reparsed.at_css(config[:tag]).attribute_nodes
166
+
167
+ assert_equal [config[:attr]], attributes.collect(&:name)
168
+ if Nokogiri::VersionInfo.instance.libxml2?
169
+ if config[:unescaped]
170
+ #
171
+ # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
172
+ # assert that this attribute's serialization is unaffected.
173
+ #
174
+ assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
175
+ else
176
+ #
177
+ # let's match the behavior in libxml < 2.9.2.
178
+ # test that this attribute's serialization is well-formed and sanitized.
179
+ #
180
+ assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
181
+ end
182
+ else
183
+ #
184
+ # yay for consistency in javaland. move along, nothing to see here.
185
+ #
186
+ assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
187
+ end
188
+ end
189
+ end
190
+
191
+ context "xss protection from svg animate attributes" do
192
+ # see recommendation from https://html5sec.org/#137
193
+ # to sanitize "to", "from", "values", and "by" attributes
194
+
195
+ it "sanitizes 'from', 'to', and 'by' attributes" do
196
+ # for CVE-2018-16468
197
+ # see:
198
+ # - https://github.com/flavorjones/loofah/issues/154
199
+ # - https://hackerone.com/reports/429267
200
+ html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26 by=5>}
201
+
202
+ sanitized = Loofah.scrub_fragment(html, :escape)
203
+ assert_nil sanitized.at_css("animate")["from"]
204
+ assert_nil sanitized.at_css("animate")["to"]
205
+ assert_nil sanitized.at_css("animate")["by"]
206
+ end
207
+
208
+ it "sanitizes 'values' attribute" do
209
+ # for CVE-2019-15587
210
+ # see:
211
+ # - https://github.com/flavorjones/loofah/issues/171
212
+ # - https://hackerone.com/reports/709009
213
+ html = %Q{<svg> <animate href="#foo" attributeName="href" values="javascript:alert('xss')"/> <a id="foo"> <circle r=400 /> </a> </svg>}
214
+
215
+ sanitized = Loofah.scrub_fragment(html, :escape)
216
+ assert_nil sanitized.at_css("animate")["values"]
217
+ end
218
+ end
191
219
  end
192
220
  end
193
-