loofah 2.2.1 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  require 'cgi'
2
3
  require 'crass'
3
4
 
@@ -6,13 +7,13 @@ module Loofah
6
7
  module Scrub
7
8
 
8
9
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
9
- CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
10
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|r?em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
10
11
  CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
11
12
 
12
13
  class << self
13
14
 
14
15
  def allowed_element? element_name
15
- ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
16
+ ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
16
17
  end
17
18
 
18
19
  # alternative implementation of the html5lib attribute scrubbing algorithm
@@ -28,31 +29,31 @@ module Loofah
28
29
  next
29
30
  end
30
31
 
31
- unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name)
32
+ unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
32
33
  attr_node.remove
33
34
  next
34
35
  end
35
36
 
36
- if WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
37
+ if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
37
38
  # this block lifted nearly verbatim from HTML5 sanitization
38
39
  val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
39
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0])
40
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
40
41
  attr_node.remove
41
42
  next
42
- elsif val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0] == 'data'
43
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
43
44
  # permit only allowed data mediatypes
44
- mediatype = val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[1]
45
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
45
46
  mediatype, _ = mediatype.split(';')[0..1] if mediatype
46
- if mediatype && !WhiteList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
47
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
47
48
  attr_node.remove
48
49
  next
49
50
  end
50
51
  end
51
52
  end
52
- if WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
53
+ if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
53
54
  attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
54
55
  end
55
- if WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
56
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
56
57
  attr_node.remove
57
58
  next
58
59
  end
@@ -79,14 +80,14 @@ module Loofah
79
80
  style_tree.each do |node|
80
81
  next unless node[:node] == :property
81
82
  next if node[:children].any? do |child|
82
- [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !WhiteList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
83
+ [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
83
84
  end
84
85
  name = node[:name].downcase
85
- if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
86
+ if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
86
87
  sanitized_tree << node << CRASS_SEMICOLON
87
- elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
88
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
88
89
  value = node[:value].split.map do |keyword|
89
- if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
+ if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
91
  keyword
91
92
  end
92
93
  end.compact
@@ -101,8 +102,6 @@ module Loofah
101
102
  Crass::Parser.stringify sanitized_tree
102
103
  end
103
104
 
104
- private
105
-
106
105
  #
107
106
  # libxml2 >= 2.9.2 fails to escape comments within some attributes.
108
107
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module MetaHelpers # :nodoc:
3
4
  def self.add_downcased_set_members_to_all_set_constants mojule
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # A RuntimeError raised when Loofah could not find an appropriate scrubber.
@@ -1,7 +1,8 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  #
3
4
  # Loofah provides some built-in scrubbers for sanitizing with
4
- # HTML5lib's whitelist and for accomplishing some common
5
+ # HTML5lib's safelist and for accomplishing some common
5
6
  # transformation tasks.
6
7
  #
7
8
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module XML # :nodoc:
3
4
  #
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Loofah
2
3
  module XML # :nodoc:
3
4
  #
@@ -0,0 +1,63 @@
1
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
2
+ <w:WordDocument>
3
+ <w:View>Normal</w:View>
4
+ <w:Zoom>0</w:Zoom>
5
+ <w:PunctuationKerning/>
6
+ <w:ValidateAgainstSchemas/>
7
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
8
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
9
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
10
+ <w:Compatibility>
11
+ <w:BreakWrappedTables/>
12
+ <w:SnapToGridInCell/>
13
+ <w:WrapTextWithPunct/>
14
+ <w:UseAsianBreakRules/>
15
+ <w:DontGrowAutofit/>
16
+ </w:Compatibility>
17
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
18
+ </w:WordDocument>
19
+ </xml><![endif]--><!--[if gte mso 9]><xml>
20
+ <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
21
+ </w:LatentStyles>
22
+ </xml><![endif]--><style>
23
+ <!--
24
+ /* Style Definitions */
25
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
26
+ {mso-style-parent:"";
27
+ margin:0in;
28
+ margin-bottom:.0001pt;
29
+ mso-pagination:widow-orphan;
30
+ font-size:12.0pt;
31
+ font-family:"Times New Roman";
32
+ mso-fareast-font-family:"Times New Roman";}
33
+ @page Section1
34
+ {size:8.5in 11.0in;
35
+ margin:1.0in 1.25in 1.0in 1.25in;
36
+ mso-header-margin:.5in;
37
+ mso-footer-margin:.5in;
38
+ mso-paper-source:0;}
39
+ div.Section1
40
+ {page:Section1;}
41
+ -->
42
+ </style><!--[if gte mso 10]>
43
+ <style>
44
+ /* Style Definitions */
45
+ table.MsoNormalTable
46
+ {mso-style-name:"Table Normal";
47
+ mso-tstyle-rowband-size:0;
48
+ mso-tstyle-colband-size:0;
49
+ mso-style-noshow:yes;
50
+ mso-style-parent:"";
51
+ mso-padding-alt:0in 5.4pt 0in 5.4pt;
52
+ mso-para-margin:0in;
53
+ mso-para-margin-bottom:.0001pt;
54
+ mso-pagination:widow-orphan;
55
+ font-size:10.0pt;
56
+ font-family:"Times New Roman";
57
+ mso-ansi-language:#0400;
58
+ mso-fareast-language:#0400;
59
+ mso-bidi-language:#0400;}
60
+ </style>
61
+ <![endif]-->
62
+
63
+ <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
37
37
  assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
38
38
  end
39
39
 
40
- (HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
40
+ (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
41
41
  define_method "test_should_allow_#{tag_name}_tag" do
42
42
  input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
43
43
  htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
58
58
  htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
59
59
  xhtmloutput = htmloutput
60
60
  rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
61
- elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
61
+ elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
62
62
  htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
63
63
  xhtmloutput = htmloutput
64
64
  htmloutput += '<br/>' if tag_name == 'br'
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
71
71
  ##
72
72
  ## libxml2 downcases elements, so this is moot.
73
73
  ##
74
- # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
74
+ # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
75
75
  # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
76
76
  # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
77
77
  # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
79
79
  # end
80
80
  # end
81
81
 
82
- HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
82
+ HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
83
83
  next if attribute_name == 'style'
84
84
  define_method "test_should_allow_#{attribute_name}_attribute" do
85
85
  input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
110
110
  check_sanitization(input, htmloutput, output, output)
111
111
  end
112
112
 
113
+ def test_should_allow_contenteditable
114
+ input = '<p contenteditable="false">Hi!</p>'
115
+ output = '<p contenteditable="false">Hi!</p>'
116
+
117
+ check_sanitization(input, output, output, output)
118
+ end
119
+
113
120
  ##
114
121
  ## libxml2 downcases attributes, so this is moot.
115
122
  ##
116
- # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
123
+ # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
117
124
  # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
118
125
  # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
119
126
  # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
121
128
  # end
122
129
  # end
123
130
 
124
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
131
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
125
132
  define_method "test_should_allow_#{protocol}_uris" do
126
133
  input = %(<a href="#{protocol}">foo</a>)
127
134
  output = "<a href='#{protocol}'>foo</a>"
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
129
136
  end
130
137
  end
131
138
 
132
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
139
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
133
140
  define_method "test_should_allow_uppercase_#{protocol}_uris" do
134
141
  input = %(<a href="#{protocol.upcase}">foo</a>)
135
142
  output = "<a href='#{protocol.upcase}'>foo</a>"
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
137
144
  end
138
145
  end
139
146
 
140
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
147
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
141
148
  define_method "test_should_allow_data_#{data_uri_type}_uris" do
142
149
  input = %(<a href="data:#{data_uri_type}">foo</a>)
143
150
  output = "<a href='data:#{data_uri_type}'>foo</a>"
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
149
156
  end
150
157
  end
151
158
 
152
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
159
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
153
160
  define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
154
161
  input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
155
162
  output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
172
179
  end
173
180
 
174
181
 
175
- HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
176
- next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
182
+ HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
183
+ next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
177
184
  define_method "test_#{tag_name}_should_allow_local_href" do
178
185
  input = %(<#{tag_name} xlink:href="#foo"/>)
179
186
  output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
249
256
  end
250
257
 
251
258
  ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
252
- HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
259
+ HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
253
260
  define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
254
261
  input = "<rect fill='url(#foo)' />"
255
262
  output = "<rect fill='url(#foo)'></rect>"
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
263
270
  end
264
271
  end
265
272
 
273
+ def test_css_list_style
274
+ html = '<ul style="list-style: none"></ul>'
275
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
276
+ assert_match %r/list-style/, sane.inner_html
277
+ end
278
+
266
279
  def test_css_negative_value_sanitization
267
280
  html = "<span style=\"letter-spacing:-0.03em;\">"
268
281
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
@@ -275,7 +288,19 @@ class Html5TestSanitizer < Loofah::TestCase
275
288
  assert_match %r/-0.05em/, sane.inner_html
276
289
  end
277
290
 
278
- def test_css_function_sanitization_leaves_whitelisted_functions_calc
291
+ def test_css_high_precision_value_shorthand_css_properties
292
+ html = "<span style=\"margin-left:0.3333333334em;\">"
293
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
294
+ assert_match %r/0.3333333334em/, sane.inner_html
295
+ end
296
+
297
+ def test_css_rem_value
298
+ html = "<span style=\"margin-top:10rem;\">"
299
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
300
+ assert_match %r/10rem/, sane.inner_html
301
+ end
302
+
303
+ def test_css_function_sanitization_leaves_safelisted_functions_calc
279
304
  html = "<span style=\"width:calc(5%)\">"
280
305
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
281
306
  assert_match %r/calc\(5%\)/, sane.inner_html
@@ -285,28 +310,35 @@ class Html5TestSanitizer < Loofah::TestCase
285
310
  assert_match %r/calc\(5%\)/, sane.inner_html
286
311
  end
287
312
 
288
- def test_css_function_sanitization_leaves_whitelisted_functions_rgb
313
+ def test_css_function_sanitization_leaves_safelisted_functions_rgb
289
314
  html = '<span style="color: rgb(255, 0, 0)">'
290
315
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
291
316
  assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
292
317
  end
293
318
 
294
- def test_css_function_sanitization_leaves_whitelisted_list_style_type
319
+ def test_css_function_sanitization_leaves_safelisted_list_style_type
295
320
  html = "<ol style='list-style-type:lower-greek;'></ol>"
296
321
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
297
322
  assert_match %r/list-style-type:lower-greek/, sane.inner_html
298
323
  end
299
324
 
300
325
  def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
301
- html = "<span style=\"width:attr(data-evil-attr)\">"
326
+ html = "<span style=\"width:url(data-evil-url)\">"
302
327
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
303
328
  assert_match %r/<span><\/span>/, sane.inner_html
304
329
 
305
- html = "<span style=\"width: attr(data-evil-attr)\">"
330
+ html = "<span style=\"width: url(data-evil-url)\">"
306
331
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
307
332
  assert_match %r/<span><\/span>/, sane.inner_html
308
333
  end
309
334
 
335
+ def test_css_max_width
336
+ html = '<div style="max-width: 100%;"></div>'
337
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
338
+ assert_match %r/max-width/, sane.inner_html
339
+ end
340
+
341
+
310
342
  def test_issue_90_slow_regex
311
343
  skip("timing tests are hard to make pass and have little regression-testing value")
312
344
 
@@ -0,0 +1,10 @@
1
+ require "helper"
2
+
3
+ class UnitHTML5Scrub < Loofah::TestCase
4
+ include Loofah
5
+
6
+ def test_scrub_css
7
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
8
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
9
+ end
10
+ end
@@ -1,7 +1,6 @@
1
1
  require "helper"
2
2
 
3
3
  class IntegrationTestAdHoc < Loofah::TestCase
4
-
5
4
  context "blank input string" do
6
5
  context "fragment" do
7
6
  it "return a blank string" do
@@ -17,6 +16,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
17
16
  end
18
17
 
19
18
  context "tests" do
19
+ MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
20
+
20
21
  def test_removal_of_illegal_tag
21
22
  html = <<-HTML
22
23
  following this there should be no jim tag
@@ -31,9 +32,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
31
32
  html = "<p class=bar foo=bar abbr=bar />"
32
33
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
33
34
  node = sane.xpath("//p").first
34
- assert node.attributes['class']
35
- assert node.attributes['abbr']
36
- assert_nil node.attributes['foo']
35
+ assert node.attributes["class"]
36
+ assert node.attributes["abbr"]
37
+ assert_nil node.attributes["foo"]
37
38
  end
38
39
 
39
40
  def test_removal_of_illegal_url_in_href
@@ -43,14 +44,14 @@ class IntegrationTestAdHoc < Loofah::TestCase
43
44
  HTML
44
45
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
45
46
  nodes = sane.xpath("//a")
46
- assert_nil nodes.first.attributes['href']
47
- assert nodes.last.attributes['href']
47
+ assert_nil nodes.first.attributes["href"]
48
+ assert nodes.last.attributes["href"]
48
49
  end
49
50
 
50
51
  def test_css_sanitization
51
52
  html = "<p style='background-color: url(\"http://foo.com/\") ; background-color: #000 ;' />"
52
53
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
53
- assert_match %r/#000/, sane.inner_html
54
+ assert_match %r/#000/, sane.inner_html
54
55
  refute_match %r/foo\.com/, sane.inner_html
55
56
  end
56
57
 
@@ -73,75 +74,9 @@ class IntegrationTestAdHoc < Loofah::TestCase
73
74
  def test_whitewash_on_fragment
74
75
  html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>"
75
76
  whitewashed = Loofah.scrub_document(html, :whitewash).xpath("/html/body/*").to_s
76
- assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
77
+ assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n", "")
77
78
  end
78
79
 
79
- MSWORD_HTML = <<-EOHTML
80
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
81
- <w:WordDocument>
82
- <w:View>Normal</w:View>
83
- <w:Zoom>0</w:Zoom>
84
- <w:PunctuationKerning/>
85
- <w:ValidateAgainstSchemas/>
86
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
87
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
88
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
89
- <w:Compatibility>
90
- <w:BreakWrappedTables/>
91
- <w:SnapToGridInCell/>
92
- <w:WrapTextWithPunct/>
93
- <w:UseAsianBreakRules/>
94
- <w:DontGrowAutofit/>
95
- </w:Compatibility>
96
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
97
- </w:WordDocument>
98
- </xml><![endif]--><!--[if gte mso 9]><xml>
99
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
100
- </w:LatentStyles>
101
- </xml><![endif]--><style>
102
- <!--
103
- /* Style Definitions */
104
- p.MsoNormal, li.MsoNormal, div.MsoNormal
105
- {mso-style-parent:"";
106
- margin:0in;
107
- margin-bottom:.0001pt;
108
- mso-pagination:widow-orphan;
109
- font-size:12.0pt;
110
- font-family:"Times New Roman";
111
- mso-fareast-font-family:"Times New Roman";}
112
- @page Section1
113
- {size:8.5in 11.0in;
114
- margin:1.0in 1.25in 1.0in 1.25in;
115
- mso-header-margin:.5in;
116
- mso-footer-margin:.5in;
117
- mso-paper-source:0;}
118
- div.Section1
119
- {page:Section1;}
120
- -->
121
- </style><!--[if gte mso 10]>
122
- <style>
123
- /* Style Definitions */
124
- table.MsoNormalTable
125
- {mso-style-name:"Table Normal";
126
- mso-tstyle-rowband-size:0;
127
- mso-tstyle-colband-size:0;
128
- mso-style-noshow:yes;
129
- mso-style-parent:"";
130
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
131
- mso-para-margin:0in;
132
- mso-para-margin-bottom:.0001pt;
133
- mso-pagination:widow-orphan;
134
- font-size:10.0pt;
135
- font-family:"Times New Roman";
136
- mso-ansi-language:#0400;
137
- mso-fareast-language:#0400;
138
- mso-bidi-language:#0400;}
139
- </style>
140
- <![endif]-->
141
-
142
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
143
- EOHTML
144
-
145
80
  def test_fragment_whitewash_on_microsofty_markup
146
81
  whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
147
82
  assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
@@ -150,11 +85,11 @@ mso-bidi-language:#0400;}
150
85
  def test_document_whitewash_on_microsofty_markup
151
86
  whitewashed = Loofah.document(MSWORD_HTML).scrub!(:whitewash)
152
87
  assert_match %r(<p>Foo <b>BOLD</b></p>), whitewashed.to_s
153
- assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
88
+ assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.xpath("/html/body/*").to_s
154
89
  end
155
90
 
156
91
  def test_return_empty_string_when_nothing_left
157
- assert_equal "", Loofah.scrub_document('<script>test</script>', :prune).text
92
+ assert_equal "", Loofah.scrub_document("<script>test</script>", :prune).text
158
93
  end
159
94
 
160
95
  def test_nested_script_cdata_tags_should_be_scrubbed
@@ -209,21 +144,20 @@ mso-bidi-language:#0400;}
209
144
  #
210
145
  # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
211
146
  #
212
- {tag: "a", attr: "href"},
213
- {tag: "div", attr: "href"},
214
- {tag: "a", attr: "action"},
215
- {tag: "div", attr: "action"},
216
- {tag: "a", attr: "src"},
217
- {tag: "div", attr: "src"},
218
- {tag: "a", attr: "name"},
147
+ { tag: "a", attr: "href" },
148
+ { tag: "div", attr: "href" },
149
+ { tag: "a", attr: "action" },
150
+ { tag: "div", attr: "action" },
151
+ { tag: "a", attr: "src" },
152
+ { tag: "div", attr: "src" },
153
+ { tag: "a", attr: "name" },
219
154
  #
220
155
  # note that div+name is _not_ affected by the libxml2 issue.
221
156
  # but we test it anyway to ensure our logic isn't modifying
222
157
  # attributes that don't need modifying.
223
158
  #
224
- {tag: "div", attr: "name", unescaped: true},
159
+ { tag: "div", attr: "name", unescaped: true },
225
160
  ].each do |config|
226
-
227
161
  define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
228
162
  html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
229
163
 
@@ -231,7 +165,7 @@ mso-bidi-language:#0400;}
231
165
  attributes = reparsed.at_css(config[:tag]).attribute_nodes
232
166
 
233
167
  assert_equal [config[:attr]], attributes.collect(&:name)
234
- if Nokogiri::VersionInfo.new.libxml2?
168
+ if Nokogiri::VersionInfo.instance.libxml2?
235
169
  if config[:unescaped]
236
170
  #
237
171
  # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
@@ -252,7 +186,35 @@ mso-bidi-language:#0400;}
252
186
  assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
253
187
  end
254
188
  end
189
+ end
190
+
191
+ context "xss protection from svg animate attributes" do
192
+ # see recommendation from https://html5sec.org/#137
193
+ # to sanitize "to", "from", "values", and "by" attributes
194
+
195
+ it "sanitizes 'from', 'to', and 'by' attributes" do
196
+ # for CVE-2018-16468
197
+ # see:
198
+ # - https://github.com/flavorjones/loofah/issues/154
199
+ # - https://hackerone.com/reports/429267
200
+ html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26 by=5>}
201
+
202
+ sanitized = Loofah.scrub_fragment(html, :escape)
203
+ assert_nil sanitized.at_css("animate")["from"]
204
+ assert_nil sanitized.at_css("animate")["to"]
205
+ assert_nil sanitized.at_css("animate")["by"]
206
+ end
207
+
208
+ it "sanitizes 'values' attribute" do
209
+ # for CVE-2019-15587
210
+ # see:
211
+ # - https://github.com/flavorjones/loofah/issues/171
212
+ # - https://hackerone.com/reports/709009
213
+ html = %Q{<svg> <animate href="#foo" attributeName="href" values="javascript:alert('xss')"/> <a id="foo"> <circle r=400 /> </a> </svg>}
255
214
 
215
+ sanitized = Loofah.scrub_fragment(html, :escape)
216
+ assert_nil sanitized.at_css("animate")["values"]
217
+ end
256
218
  end
257
219
  end
258
220
  end