loofah 2.2.0 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of loofah might be problematic. Click here for more details.

@@ -1,5 +1,3 @@
1
- #encoding: US-ASCII
2
-
3
1
  require 'cgi'
4
2
  require 'crass'
5
3
 
@@ -8,13 +6,13 @@ module Loofah
8
6
  module Scrub
9
7
 
10
8
  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
11
- CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
9
+ CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
12
10
  CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
13
11
 
14
12
  class << self
15
13
 
16
14
  def allowed_element? element_name
17
- ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
15
+ ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
18
16
  end
19
17
 
20
18
  # alternative implementation of the html5lib attribute scrubbing algorithm
@@ -30,31 +28,31 @@ module Loofah
30
28
  next
31
29
  end
32
30
 
33
- unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name)
31
+ unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
34
32
  attr_node.remove
35
33
  next
36
34
  end
37
35
 
38
- if WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
36
+ if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
39
37
  # this block lifted nearly verbatim from HTML5 sanitization
40
38
  val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
41
- if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0])
39
+ if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
42
40
  attr_node.remove
43
41
  next
44
- elsif val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0] == 'data'
42
+ elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
45
43
  # permit only allowed data mediatypes
46
- mediatype = val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[1]
44
+ mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
47
45
  mediatype, _ = mediatype.split(';')[0..1] if mediatype
48
- if mediatype && !WhiteList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
46
+ if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
49
47
  attr_node.remove
50
48
  next
51
49
  end
52
50
  end
53
51
  end
54
- if WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
52
+ if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
55
53
  attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
56
54
  end
57
- if WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
55
+ if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
58
56
  attr_node.remove
59
57
  next
60
58
  end
@@ -65,6 +63,8 @@ module Loofah
65
63
  node.attribute_nodes.each do |attr_node|
66
64
  node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
67
65
  end
66
+
67
+ force_correct_attribute_escaping! node
68
68
  end
69
69
 
70
70
  def scrub_css_attribute node
@@ -79,14 +79,14 @@ module Loofah
79
79
  style_tree.each do |node|
80
80
  next unless node[:node] == :property
81
81
  next if node[:children].any? do |child|
82
- [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !WhiteList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
82
+ [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
83
83
  end
84
84
  name = node[:name].downcase
85
- if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
85
+ if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
86
86
  sanitized_tree << node << CRASS_SEMICOLON
87
- elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
87
+ elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
88
88
  value = node[:value].split.map do |keyword|
89
- if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
89
+ if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
90
90
  keyword
91
91
  end
92
92
  end.compact
@@ -100,6 +100,33 @@ module Loofah
100
100
 
101
101
  Crass::Parser.stringify sanitized_tree
102
102
  end
103
+
104
+ #
105
+ # libxml2 >= 2.9.2 fails to escape comments within some attributes.
106
+ #
107
+ # see comments about CVE-2018-8048 within the tests for more information
108
+ #
109
+ def force_correct_attribute_escaping! node
110
+ return unless Nokogiri::VersionInfo.instance.libxml2?
111
+
112
+ node.attribute_nodes.each do |attr_node|
113
+ next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name)
114
+
115
+ tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name]
116
+ next unless tag_name.nil? || tag_name == node.name
117
+
118
+ #
119
+ # this block is just like CGI.escape in Ruby 2.4, but
120
+ # only encodes space and double-quote, to mimic
121
+ # pre-2.9.2 behavior
122
+ #
123
+ encoding = attr_node.value.encoding
124
+ attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
125
+ '%' + m.unpack('H2' * m.bytesize).join('%').upcase
126
+ end.force_encoding(encoding)
127
+ end
128
+ end
129
+
103
130
  end
104
131
  end
105
132
  end
@@ -1,7 +1,7 @@
1
1
  module Loofah
2
2
  #
3
3
  # Loofah provides some built-in scrubbers for sanitizing with
4
- # HTML5lib's whitelist and for accomplishing some common
4
+ # HTML5lib's safelist and for accomplishing some common
5
5
  # transformation tasks.
6
6
  #
7
7
  #
data/lib/loofah.rb CHANGED
@@ -1,21 +1,22 @@
1
1
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
2
 
3
- require 'nokogiri'
3
+ require "nokogiri"
4
4
 
5
- require 'loofah/metahelpers'
6
- require 'loofah/elements'
5
+ require "loofah/metahelpers"
6
+ require "loofah/elements"
7
7
 
8
- require 'loofah/html5/whitelist'
9
- require 'loofah/html5/scrub'
8
+ require "loofah/html5/safelist"
9
+ require "loofah/html5/libxml2_workarounds"
10
+ require "loofah/html5/scrub"
10
11
 
11
- require 'loofah/scrubber'
12
- require 'loofah/scrubbers'
12
+ require "loofah/scrubber"
13
+ require "loofah/scrubbers"
13
14
 
14
- require 'loofah/instance_methods'
15
- require 'loofah/xml/document'
16
- require 'loofah/xml/document_fragment'
17
- require 'loofah/html/document'
18
- require 'loofah/html/document_fragment'
15
+ require "loofah/instance_methods"
16
+ require "loofah/xml/document"
17
+ require "loofah/xml/document_fragment"
18
+ require "loofah/html/document"
19
+ require "loofah/html/document_fragment"
19
20
 
20
21
  # == Strings and IO Objects as Input
21
22
  #
@@ -27,7 +28,7 @@ require 'loofah/html/document_fragment'
27
28
  #
28
29
  module Loofah
29
30
  # The version of Loofah you are using
30
- VERSION = '2.2.0'
31
+ VERSION = "2.3.0"
31
32
 
32
33
  class << self
33
34
  # Shortcut for Loofah::HTML::Document.parse
@@ -76,7 +77,7 @@ module Loofah
76
77
 
77
78
  # A helper to remove extraneous whitespace from text-ified HTML
78
79
  def remove_extraneous_whitespace(string)
79
- string.gsub(/\n\s*\n\s*\n/,"\n\n")
80
+ string.gsub(/\n\s*\n\s*\n/, "\n\n")
80
81
  end
81
82
  end
82
83
  end
@@ -0,0 +1,63 @@
1
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
2
+ <w:WordDocument>
3
+ <w:View>Normal</w:View>
4
+ <w:Zoom>0</w:Zoom>
5
+ <w:PunctuationKerning/>
6
+ <w:ValidateAgainstSchemas/>
7
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
8
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
9
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
10
+ <w:Compatibility>
11
+ <w:BreakWrappedTables/>
12
+ <w:SnapToGridInCell/>
13
+ <w:WrapTextWithPunct/>
14
+ <w:UseAsianBreakRules/>
15
+ <w:DontGrowAutofit/>
16
+ </w:Compatibility>
17
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
18
+ </w:WordDocument>
19
+ </xml><![endif]--><!--[if gte mso 9]><xml>
20
+ <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
21
+ </w:LatentStyles>
22
+ </xml><![endif]--><style>
23
+ <!--
24
+ /* Style Definitions */
25
+ p.MsoNormal, li.MsoNormal, div.MsoNormal
26
+ {mso-style-parent:"";
27
+ margin:0in;
28
+ margin-bottom:.0001pt;
29
+ mso-pagination:widow-orphan;
30
+ font-size:12.0pt;
31
+ font-family:"Times New Roman";
32
+ mso-fareast-font-family:"Times New Roman";}
33
+ @page Section1
34
+ {size:8.5in 11.0in;
35
+ margin:1.0in 1.25in 1.0in 1.25in;
36
+ mso-header-margin:.5in;
37
+ mso-footer-margin:.5in;
38
+ mso-paper-source:0;}
39
+ div.Section1
40
+ {page:Section1;}
41
+ -->
42
+ </style><!--[if gte mso 10]>
43
+ <style>
44
+ /* Style Definitions */
45
+ table.MsoNormalTable
46
+ {mso-style-name:"Table Normal";
47
+ mso-tstyle-rowband-size:0;
48
+ mso-tstyle-colband-size:0;
49
+ mso-style-noshow:yes;
50
+ mso-style-parent:"";
51
+ mso-padding-alt:0in 5.4pt 0in 5.4pt;
52
+ mso-para-margin:0in;
53
+ mso-para-margin-bottom:.0001pt;
54
+ mso-pagination:widow-orphan;
55
+ font-size:10.0pt;
56
+ font-family:"Times New Roman";
57
+ mso-ansi-language:#0400;
58
+ mso-fareast-language:#0400;
59
+ mso-bidi-language:#0400;}
60
+ </style>
61
+ <![endif]-->
62
+
63
+ <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
37
37
  assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
38
38
  end
39
39
 
40
- (HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
40
+ (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
41
41
  define_method "test_should_allow_#{tag_name}_tag" do
42
42
  input = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
43
43
  htmloutput = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
58
58
  htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
59
59
  xhtmloutput = htmloutput
60
60
  rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
61
- elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
61
+ elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
62
62
  htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
63
63
  xhtmloutput = htmloutput
64
64
  htmloutput += '<br/>' if tag_name == 'br'
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
71
71
  ##
72
72
  ## libxml2 downcases elements, so this is moot.
73
73
  ##
74
- # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
74
+ # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
75
75
  # define_method "test_should_forbid_#{tag_name.upcase}_tag" do
76
76
  # input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
77
77
  # output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
79
79
  # end
80
80
  # end
81
81
 
82
- HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
82
+ HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
83
83
  next if attribute_name == 'style'
84
84
  define_method "test_should_allow_#{attribute_name}_attribute" do
85
85
  input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
110
110
  check_sanitization(input, htmloutput, output, output)
111
111
  end
112
112
 
113
+ def test_should_allow_contenteditable
114
+ input = '<p contenteditable="false">Hi!</p>'
115
+ output = '<p contenteditable="false">Hi!</p>'
116
+
117
+ check_sanitization(input, output, output, output)
118
+ end
119
+
113
120
  ##
114
121
  ## libxml2 downcases attributes, so this is moot.
115
122
  ##
116
- # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
123
+ # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
117
124
  # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
118
125
  # input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
119
126
  # output = "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
121
128
  # end
122
129
  # end
123
130
 
124
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
131
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
125
132
  define_method "test_should_allow_#{protocol}_uris" do
126
133
  input = %(<a href="#{protocol}">foo</a>)
127
134
  output = "<a href='#{protocol}'>foo</a>"
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
129
136
  end
130
137
  end
131
138
 
132
- HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
139
+ HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
133
140
  define_method "test_should_allow_uppercase_#{protocol}_uris" do
134
141
  input = %(<a href="#{protocol.upcase}">foo</a>)
135
142
  output = "<a href='#{protocol.upcase}'>foo</a>"
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
137
144
  end
138
145
  end
139
146
 
140
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
147
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
141
148
  define_method "test_should_allow_data_#{data_uri_type}_uris" do
142
149
  input = %(<a href="data:#{data_uri_type}">foo</a>)
143
150
  output = "<a href='data:#{data_uri_type}'>foo</a>"
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
149
156
  end
150
157
  end
151
158
 
152
- HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
159
+ HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
153
160
  define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
154
161
  input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
155
162
  output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
172
179
  end
173
180
 
174
181
 
175
- HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
176
- next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
182
+ HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
183
+ next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
177
184
  define_method "test_#{tag_name}_should_allow_local_href" do
178
185
  input = %(<#{tag_name} xlink:href="#foo"/>)
179
186
  output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
249
256
  end
250
257
 
251
258
  ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
252
- HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
259
+ HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
253
260
  define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
254
261
  input = "<rect fill='url(#foo)' />"
255
262
  output = "<rect fill='url(#foo)'></rect>"
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
263
270
  end
264
271
  end
265
272
 
273
+ def test_css_list_style
274
+ html = '<ul style="list-style: none"></ul>'
275
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
276
+ assert_match %r/list-style/, sane.inner_html
277
+ end
278
+
266
279
  def test_css_negative_value_sanitization
267
280
  html = "<span style=\"letter-spacing:-0.03em;\">"
268
281
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
@@ -275,7 +288,13 @@ class Html5TestSanitizer < Loofah::TestCase
275
288
  assert_match %r/-0.05em/, sane.inner_html
276
289
  end
277
290
 
278
- def test_css_function_sanitization_leaves_whitelisted_functions_calc
291
+ def test_css_high_precision_value_shorthand_css_properties
292
+ html = "<span style=\"margin-left:0.3333333334em;\">"
293
+ sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
294
+ assert_match %r/0.3333333334em/, sane.inner_html
295
+ end
296
+
297
+ def test_css_function_sanitization_leaves_safelisted_functions_calc
279
298
  html = "<span style=\"width:calc(5%)\">"
280
299
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
281
300
  assert_match %r/calc\(5%\)/, sane.inner_html
@@ -285,24 +304,24 @@ class Html5TestSanitizer < Loofah::TestCase
285
304
  assert_match %r/calc\(5%\)/, sane.inner_html
286
305
  end
287
306
 
288
- def test_css_function_sanitization_leaves_whitelisted_functions_rgb
307
+ def test_css_function_sanitization_leaves_safelisted_functions_rgb
289
308
  html = '<span style="color: rgb(255, 0, 0)">'
290
309
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
291
310
  assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
292
311
  end
293
312
 
294
- def test_css_function_sanitization_leaves_whitelisted_list_style_type
313
+ def test_css_function_sanitization_leaves_safelisted_list_style_type
295
314
  html = "<ol style='list-style-type:lower-greek;'></ol>"
296
315
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
297
316
  assert_match %r/list-style-type:lower-greek/, sane.inner_html
298
317
  end
299
318
 
300
319
  def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
301
- html = "<span style=\"width:attr(data-evil-attr)\">"
320
+ html = "<span style=\"width:url(data-evil-url)\">"
302
321
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
303
322
  assert_match %r/<span><\/span>/, sane.inner_html
304
323
 
305
- html = "<span style=\"width: attr(data-evil-attr)\">"
324
+ html = "<span style=\"width: url(data-evil-url)\">"
306
325
  sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
307
326
  assert_match %r/<span><\/span>/, sane.inner_html
308
327
  end
@@ -0,0 +1,10 @@
1
+ require "helper"
2
+
3
+ class UnitHTML5Scrub < Loofah::TestCase
4
+ include Loofah
5
+
6
+ def test_scrub_css
7
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
8
+ assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
9
+ end
10
+ end
@@ -17,6 +17,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
17
17
  end
18
18
 
19
19
  context "tests" do
20
+ MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
21
+
20
22
  def test_removal_of_illegal_tag
21
23
  html = <<-HTML
22
24
  following this there should be no jim tag
@@ -76,72 +78,6 @@ class IntegrationTestAdHoc < Loofah::TestCase
76
78
  assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
77
79
  end
78
80
 
79
- MSWORD_HTML = <<-EOHTML
80
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
81
- <w:WordDocument>
82
- <w:View>Normal</w:View>
83
- <w:Zoom>0</w:Zoom>
84
- <w:PunctuationKerning/>
85
- <w:ValidateAgainstSchemas/>
86
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
87
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
88
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
89
- <w:Compatibility>
90
- <w:BreakWrappedTables/>
91
- <w:SnapToGridInCell/>
92
- <w:WrapTextWithPunct/>
93
- <w:UseAsianBreakRules/>
94
- <w:DontGrowAutofit/>
95
- </w:Compatibility>
96
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
97
- </w:WordDocument>
98
- </xml><![endif]--><!--[if gte mso 9]><xml>
99
- <w:LatentStyles DefLockedState="false" LatentStyleCount="156">
100
- </w:LatentStyles>
101
- </xml><![endif]--><style>
102
- <!--
103
- /* Style Definitions */
104
- p.MsoNormal, li.MsoNormal, div.MsoNormal
105
- {mso-style-parent:"";
106
- margin:0in;
107
- margin-bottom:.0001pt;
108
- mso-pagination:widow-orphan;
109
- font-size:12.0pt;
110
- font-family:"Times New Roman";
111
- mso-fareast-font-family:"Times New Roman";}
112
- @page Section1
113
- {size:8.5in 11.0in;
114
- margin:1.0in 1.25in 1.0in 1.25in;
115
- mso-header-margin:.5in;
116
- mso-footer-margin:.5in;
117
- mso-paper-source:0;}
118
- div.Section1
119
- {page:Section1;}
120
- -->
121
- </style><!--[if gte mso 10]>
122
- <style>
123
- /* Style Definitions */
124
- table.MsoNormalTable
125
- {mso-style-name:"Table Normal";
126
- mso-tstyle-rowband-size:0;
127
- mso-tstyle-colband-size:0;
128
- mso-style-noshow:yes;
129
- mso-style-parent:"";
130
- mso-padding-alt:0in 5.4pt 0in 5.4pt;
131
- mso-para-margin:0in;
132
- mso-para-margin-bottom:.0001pt;
133
- mso-pagination:widow-orphan;
134
- font-size:10.0pt;
135
- font-family:"Times New Roman";
136
- mso-ansi-language:#0400;
137
- mso-fareast-language:#0400;
138
- mso-bidi-language:#0400;}
139
- </style>
140
- <![endif]-->
141
-
142
- <p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
143
- EOHTML
144
-
145
81
  def test_fragment_whitewash_on_microsofty_markup
146
82
  whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
147
83
  assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
@@ -188,6 +124,81 @@ mso-bidi-language:#0400;}
188
124
  html = "<p>Foo</p>\n<p>Bar</p>"
189
125
  assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
190
126
  end
127
+
128
+ #
129
+ # tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
130
+ #
131
+ # libxml2 >= 2.9.2 fails to escape comments within some attributes. It
132
+ # wants to ensure these comments can be treated as "server-side includes",
133
+ # but as a result fails to ensure that serialization is well-formed,
134
+ # resulting in an opportunity for XSS injection of code into a final
135
+ # re-parsed document (presumably in a browser).
136
+ #
137
+ # we'll test this by parsing the HTML, serializing it, then
138
+ # re-parsing it to ensure there isn't any ambiguity in the output
139
+ # that might allow code injection into a browser consuming
140
+ # "sanitized" output.
141
+ #
142
+ [
143
+ #
144
+ # these tags and attributes are determined by the code at:
145
+ #
146
+ # https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
147
+ #
148
+ {tag: "a", attr: "href"},
149
+ {tag: "div", attr: "href"},
150
+ {tag: "a", attr: "action"},
151
+ {tag: "div", attr: "action"},
152
+ {tag: "a", attr: "src"},
153
+ {tag: "div", attr: "src"},
154
+ {tag: "a", attr: "name"},
155
+ #
156
+ # note that div+name is _not_ affected by the libxml2 issue.
157
+ # but we test it anyway to ensure our logic isn't modifying
158
+ # attributes that don't need modifying.
159
+ #
160
+ {tag: "div", attr: "name", unescaped: true},
161
+ ].each do |config|
162
+
163
+ define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
164
+ html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
165
+
166
+ reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
167
+ attributes = reparsed.at_css(config[:tag]).attribute_nodes
168
+
169
+ assert_equal [config[:attr]], attributes.collect(&:name)
170
+ if Nokogiri::VersionInfo.instance.libxml2?
171
+ if config[:unescaped]
172
+ #
173
+ # this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
174
+ # assert that this attribute's serialization is unaffected.
175
+ #
176
+ assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
177
+ else
178
+ #
179
+ # let's match the behavior in libxml < 2.9.2.
180
+ # test that this attribute's serialization is well-formed and sanitized.
181
+ #
182
+ assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
183
+ end
184
+ else
185
+ #
186
+ # yay for consistency in javaland. move along, nothing to see here.
187
+ #
188
+ assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
189
+ end
190
+ end
191
+ end
192
+
193
+ # see:
194
+ # - https://github.com/flavorjones/loofah/issues/154
195
+ # - https://hackerone.com/reports/429267
196
+ context "xss protection from svg xmlns:xlink animate attribute" do
197
+ it "sanitizes appropriate attributes" do
198
+ html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26>}
199
+ sanitized = Loofah.scrub_fragment(html, :escape)
200
+ assert_nil sanitized.at_css("animate")["from"]
201
+ end
202
+ end
191
203
  end
192
204
  end
193
-
@@ -44,17 +44,17 @@ class UnitTestHelpers < Loofah::TestCase
44
44
  end
45
45
  end
46
46
 
47
- describe "WhiteListSanitizer#sanitize" do
47
+ describe "SafeListSanitizer#sanitize" do
48
48
  it "calls .sanitize" do
49
49
  mock(Loofah::Helpers).sanitize("foobar")
50
- Loofah::Helpers::ActionView::WhiteListSanitizer.new.sanitize "foobar"
50
+ Loofah::Helpers::ActionView::SafeListSanitizer.new.sanitize "foobar"
51
51
  end
52
52
  end
53
53
 
54
- describe "WhiteListSanitizer#sanitize_css" do
54
+ describe "SafeListSanitizer#sanitize_css" do
55
55
  it "calls .sanitize_css" do
56
56
  mock(Loofah::Helpers).sanitize_css("foobar")
57
- Loofah::Helpers::ActionView::WhiteListSanitizer.new.sanitize_css "foobar"
57
+ Loofah::Helpers::ActionView::SafeListSanitizer.new.sanitize_css "foobar"
58
58
  end
59
59
  end
60
60
  end