RubyGems - loofah - Versions diffs - 2.2.0 → 2.3.0 - Mend

loofah 2.2.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of loofah might be problematic. Click here for more details.

Files changed (20) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +96 -32
data/Gemfile +3 -3
data/Manifest.txt +5 -1
data/README.md +26 -18
data/Rakefile +23 -21
data/SECURITY.md +18 -0
data/lib/loofah/helpers.rb +13 -3
data/lib/loofah/html5/libxml2_workarounds.rb +26 -0
data/lib/loofah/html5/safelist.rb +800 -0
data/lib/loofah/html5/scrub.rb +43 -16
data/lib/loofah/scrubbers.rb +1 -1
data/lib/loofah.rb +15 -14
data/test/assets/msword.html +63 -0
data/test/html5/test_sanitizer.rb +36 -17
data/test/html5/test_scrub.rb +10 -0
data/test/integration/test_ad_hoc.rb +78 -67
data/test/unit/test_helpers.rb +4 -4
metadata +61 -39
data/lib/loofah/html5/whitelist.rb +0 -186

data/lib/loofah/html5/scrub.rb CHANGED Viewed

@@ -1,5 +1,3 @@
-#encoding: US-ASCII
 require 'cgi'
 require 'crass'
@@ -8,13 +6,13 @@ module Loofah
     module Scrub
       CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
-      CSS_KEYWORDISH = /\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
+      CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
       CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
       class << self
         def allowed_element? element_name
-          ::Loofah::HTML5::WhiteList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
+          ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
         end
         #  alternative implementation of the html5lib attribute scrubbing algorithm
@@ -30,31 +28,31 @@ module Loofah
               next
             end
-            unless WhiteList::ALLOWED_ATTRIBUTES.include?(attr_name)
+            unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name)
               attr_node.remove
               next
             end
-            if WhiteList::ATTR_VAL_IS_URI.include?(attr_name)
+            if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
               # this block lifted nearly verbatim from HTML5 sanitization
               val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS,'').downcase
-              if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! WhiteList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0])
+              if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
                 attr_node.remove
                 next
-              elsif val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[0] == 'data'
+              elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == 'data'
                 # permit only allowed data mediatypes
-                mediatype = val_unescaped.split(WhiteList::PROTOCOL_SEPARATOR)[1]
+                mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
                 mediatype, _ = mediatype.split(';')[0..1] if mediatype
-                if mediatype && !WhiteList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
+                if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
                   attr_node.remove
                   next
                 end
               end
             end
-            if WhiteList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
+            if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
               attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, ' ') if attr_node.value
             end
-            if WhiteList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
+            if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == 'xlink:href' && attr_node.value =~ /^\s*[^#\s].*/m
               attr_node.remove
               next
             end
@@ -65,6 +63,8 @@ module Loofah
           node.attribute_nodes.each do |attr_node|
             node.remove_attribute(attr_node.name) if attr_node.value !~ /[^[:space:]]/
           end
+          force_correct_attribute_escaping! node
         end
         def scrub_css_attribute node
@@ -79,14 +79,14 @@ module Loofah
           style_tree.each do |node|
             next unless node[:node] == :property
             next if node[:children].any? do |child|
-              [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !WhiteList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
+              [:url, :bad_url].include?(child[:node]) || (child[:node] == :function && !SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase))
             end
             name = node[:name].downcase
-            if WhiteList::ALLOWED_CSS_PROPERTIES.include?(name) || WhiteList::ALLOWED_SVG_PROPERTIES.include?(name)
+            if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
               sanitized_tree << node << CRASS_SEMICOLON
-            elsif WhiteList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
+            elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split('-').first)
               value = node[:value].split.map do |keyword|
-                if WhiteList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
+                if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
                   keyword
                 end
               end.compact
@@ -100,6 +100,33 @@ module Loofah
           Crass::Parser.stringify sanitized_tree
         end
+        #
+        #  libxml2 >= 2.9.2 fails to escape comments within some attributes.
+        #
+        #  see comments about CVE-2018-8048 within the tests for more information
+        #
+        def force_correct_attribute_escaping! node
+          return unless Nokogiri::VersionInfo.instance.libxml2?
+          node.attribute_nodes.each do |attr_node|
+            next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name)
+            tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name]
+            next unless tag_name.nil? || tag_name == node.name
+            #
+            #  this block is just like CGI.escape in Ruby 2.4, but
+            #  only encodes space and double-quote, to mimic
+            #  pre-2.9.2 behavior
+            #
+            encoding = attr_node.value.encoding
+            attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
+              '%' + m.unpack('H2' * m.bytesize).join('%').upcase
+            end.force_encoding(encoding)
+          end
+        end
       end
     end
   end

data/lib/loofah/scrubbers.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module Loofah
   #
   #  Loofah provides some built-in scrubbers for sanitizing with
-  #  HTML5lib's whitelist and for accomplishing some common
+  #  HTML5lib's safelist and for accomplishing some common
   #  transformation tasks.
   #
   #

data/lib/loofah.rb CHANGED Viewed

@@ -1,21 +1,22 @@
 $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
-require 'nokogiri'
+require "nokogiri"
-require 'loofah/metahelpers'
-require 'loofah/elements'
+require "loofah/metahelpers"
+require "loofah/elements"
-require 'loofah/html5/whitelist'
-require 'loofah/html5/scrub'
+require "loofah/html5/safelist"
+require "loofah/html5/libxml2_workarounds"
+require "loofah/html5/scrub"
-require 'loofah/scrubber'
-require 'loofah/scrubbers'
+require "loofah/scrubber"
+require "loofah/scrubbers"
-require 'loofah/instance_methods'
-require 'loofah/xml/document'
-require 'loofah/xml/document_fragment'
-require 'loofah/html/document'
-require 'loofah/html/document_fragment'
+require "loofah/instance_methods"
+require "loofah/xml/document"
+require "loofah/xml/document_fragment"
+require "loofah/html/document"
+require "loofah/html/document_fragment"
 # == Strings and IO Objects as Input
 #
@@ -27,7 +28,7 @@ require 'loofah/html/document_fragment'
 #
 module Loofah
   # The version of Loofah you are using
-  VERSION = '2.2.0'
+  VERSION = "2.3.0"
   class << self
     # Shortcut for Loofah::HTML::Document.parse
@@ -76,7 +77,7 @@ module Loofah
     # A helper to remove extraneous whitespace from text-ified HTML
     def remove_extraneous_whitespace(string)
-      string.gsub(/\n\s*\n\s*\n/,"\n\n")
+      string.gsub(/\n\s*\n\s*\n/, "\n\n")
     end
   end
 end

data/test/assets/msword.html ADDED Viewed

@@ -0,0 +1,63 @@
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
+<w:WordDocument>
+ <w:View>Normal</w:View>
+ <w:Zoom>0</w:Zoom>
+ <w:PunctuationKerning/>
+ <w:ValidateAgainstSchemas/>
+ <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
+ <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
+ <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
+ <w:Compatibility>
+  <w:BreakWrappedTables/>
+  <w:SnapToGridInCell/>
+  <w:WrapTextWithPunct/>
+  <w:UseAsianBreakRules/>
+  <w:DontGrowAutofit/>
+ </w:Compatibility>
+ <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
+</w:WordDocument>
+</xml><![endif]--><!--[if gte mso 9]><xml>
+<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
+</w:LatentStyles>
+</xml><![endif]--><style>
+<!--
+/* Style Definitions */
+p.MsoNormal, li.MsoNormal, div.MsoNormal
+{mso-style-parent:"";
+margin:0in;
+margin-bottom:.0001pt;
+mso-pagination:widow-orphan;
+font-size:12.0pt;
+font-family:"Times New Roman";
+mso-fareast-font-family:"Times New Roman";}
+@page Section1
+{size:8.5in 11.0in;
+margin:1.0in 1.25in 1.0in 1.25in;
+mso-header-margin:.5in;
+mso-footer-margin:.5in;
+mso-paper-source:0;}
+div.Section1
+{page:Section1;}
+-->
+</style><!--[if gte mso 10]>
+<style>
+/* Style Definitions */
+table.MsoNormalTable
+{mso-style-name:"Table Normal";
+mso-tstyle-rowband-size:0;
+mso-tstyle-colband-size:0;
+mso-style-noshow:yes;
+mso-style-parent:"";
+mso-padding-alt:0in 5.4pt 0in 5.4pt;
+mso-para-margin:0in;
+mso-para-margin-bottom:.0001pt;
+mso-pagination:widow-orphan;
+font-size:10.0pt;
+font-family:"Times New Roman";
+mso-ansi-language:#0400;
+mso-fareast-language:#0400;
+mso-bidi-language:#0400;}
+</style>
+<![endif]-->
+<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>

data/test/html5/test_sanitizer.rb CHANGED Viewed

@@ -37,7 +37,7 @@ class Html5TestSanitizer < Loofah::TestCase
     assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
   end
-  (HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
+  (HTML5::SafeList::ALLOWED_ELEMENTS).each do |tag_name|
     define_method "test_should_allow_#{tag_name}_tag" do
       input       = "<#{tag_name} title='1'>foo <bad>bar</bad> baz</#{tag_name}>"
       htmloutput  = "<#{tag_name.downcase} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</#{tag_name.downcase}>"
@@ -58,7 +58,7 @@ class Html5TestSanitizer < Loofah::TestCase
         htmloutput = "<img title='1'/>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
         xhtmloutput = htmloutput
         rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
-      elsif HTML5::WhiteList::VOID_ELEMENTS.include?(tag_name)
+      elsif HTML5::SafeList::VOID_ELEMENTS.include?(tag_name)
         htmloutput = "<#{tag_name} title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz"
         xhtmloutput = htmloutput
         htmloutput += '<br/>' if tag_name == 'br'
@@ -71,7 +71,7 @@ class Html5TestSanitizer < Loofah::TestCase
   ##
   ##  libxml2 downcases elements, so this is moot.
   ##
-  # HTML5::WhiteList::ALLOWED_ELEMENTS.each do |tag_name|
+  # HTML5::SafeList::ALLOWED_ELEMENTS.each do |tag_name|
   #   define_method "test_should_forbid_#{tag_name.upcase}_tag" do
   #     input = "<#{tag_name.upcase} title='1'>foo <bad>bar</bad> baz</#{tag_name.upcase}>"
   #     output = "&lt;#{tag_name.upcase} title=\"1\"&gt;foo &lt;bad&gt;bar&lt;/bad&gt; baz&lt;/#{tag_name.upcase}&gt;"
@@ -79,7 +79,7 @@ class Html5TestSanitizer < Loofah::TestCase
   #   end
   # end
-  HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
+  HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
     next if attribute_name == 'style'
     define_method "test_should_allow_#{attribute_name}_attribute" do
         input = "<p #{attribute_name}='foo'>foo <bad>bar</bad> baz</p>"
@@ -110,10 +110,17 @@ class Html5TestSanitizer < Loofah::TestCase
     check_sanitization(input, htmloutput, output, output)
   end
+  def test_should_allow_contenteditable
+    input = '<p contenteditable="false">Hi!</p>'
+    output = '<p contenteditable="false">Hi!</p>'
+    check_sanitization(input, output, output, output)
+  end
   ##
   ##  libxml2 downcases attributes, so this is moot.
   ##
-  # HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
+  # HTML5::SafeList::ALLOWED_ATTRIBUTES.each do |attribute_name|
   #   define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
   #     input = "<p #{attribute_name.upcase}='display: none;'>foo <bad>bar</bad> baz</p>"
   #     output =  "<p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"
@@ -121,7 +128,7 @@ class Html5TestSanitizer < Loofah::TestCase
   #   end
   # end
-  HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
+  HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
     define_method "test_should_allow_#{protocol}_uris" do
       input = %(<a href="#{protocol}">foo</a>)
       output = "<a href='#{protocol}'>foo</a>"
@@ -129,7 +136,7 @@ class Html5TestSanitizer < Loofah::TestCase
     end
   end
-  HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
+  HTML5::SafeList::ALLOWED_PROTOCOLS.each do |protocol|
     define_method "test_should_allow_uppercase_#{protocol}_uris" do
       input = %(<a href="#{protocol.upcase}">foo</a>)
       output = "<a href='#{protocol.upcase}'>foo</a>"
@@ -137,7 +144,7 @@ class Html5TestSanitizer < Loofah::TestCase
     end
   end
-  HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
+  HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
     define_method "test_should_allow_data_#{data_uri_type}_uris" do
       input = %(<a href="data:#{data_uri_type}">foo</a>)
       output = "<a href='data:#{data_uri_type}'>foo</a>"
@@ -149,7 +156,7 @@ class Html5TestSanitizer < Loofah::TestCase
     end
   end
-  HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
+  HTML5::SafeList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
     define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
       input = %(<a href="DATA:#{data_uri_type.upcase}">foo</a>)
       output = "<a href='DATA:#{data_uri_type.upcase}'>foo</a>"
@@ -172,8 +179,8 @@ class Html5TestSanitizer < Loofah::TestCase
   end
-  HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
-    next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
+  HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
+    next unless HTML5::SafeList::ALLOWED_ELEMENTS.include?(tag_name)
     define_method "test_#{tag_name}_should_allow_local_href" do
       input = %(<#{tag_name} xlink:href="#foo"/>)
       output = "<#{tag_name.downcase} xlink:href='#foo'></#{tag_name.downcase}>"
@@ -249,7 +256,7 @@ class Html5TestSanitizer < Loofah::TestCase
   end
   ## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
-  HTML5::WhiteList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
+  HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
     define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
       input = "<rect fill='url(#foo)' />"
       output = "<rect fill='url(#foo)'></rect>"
@@ -263,6 +270,12 @@ class Html5TestSanitizer < Loofah::TestCase
     end
   end
+  def test_css_list_style
+    html = '<ul style="list-style: none"></ul>'
+    sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
+    assert_match %r/list-style/, sane.inner_html
+  end
   def test_css_negative_value_sanitization
     html = "<span style=\"letter-spacing:-0.03em;\">"
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
@@ -275,7 +288,13 @@ class Html5TestSanitizer < Loofah::TestCase
     assert_match %r/-0.05em/, sane.inner_html
   end
-  def test_css_function_sanitization_leaves_whitelisted_functions_calc
+  def test_css_high_precision_value_shorthand_css_properties
+    html = "<span style=\"margin-left:0.3333333334em;\">"
+    sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :escape).to_xml)
+    assert_match %r/0.3333333334em/, sane.inner_html
+  end
+  def test_css_function_sanitization_leaves_safelisted_functions_calc
     html = "<span style=\"width:calc(5%)\">"
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
     assert_match %r/calc\(5%\)/, sane.inner_html
@@ -285,24 +304,24 @@ class Html5TestSanitizer < Loofah::TestCase
     assert_match %r/calc\(5%\)/, sane.inner_html
   end
-  def test_css_function_sanitization_leaves_whitelisted_functions_rgb
+  def test_css_function_sanitization_leaves_safelisted_functions_rgb
     html = '<span style="color: rgb(255, 0, 0)">'
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
     assert_match %r/rgb\(255, 0, 0\)/, sane.inner_html
   end
-  def test_css_function_sanitization_leaves_whitelisted_list_style_type
+  def test_css_function_sanitization_leaves_safelisted_list_style_type
     html = "<ol style='list-style-type:lower-greek;'></ol>"
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
     assert_match %r/list-style-type:lower-greek/, sane.inner_html
   end
   def test_css_function_sanitization_strips_style_attributes_with_unsafe_functions
-    html = "<span style=\"width:attr(data-evil-attr)\">"
+    html = "<span style=\"width:url(data-evil-url)\">"
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
     assert_match %r/<span><\/span>/, sane.inner_html
-    html = "<span style=\"width: attr(data-evil-attr)\">"
+    html = "<span style=\"width: url(data-evil-url)\">"
     sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
     assert_match %r/<span><\/span>/, sane.inner_html
   end

data/test/html5/test_scrub.rb ADDED Viewed

@@ -0,0 +1,10 @@
+require "helper"
+class UnitHTML5Scrub < Loofah::TestCase
+  include Loofah
+  def test_scrub_css
+    assert_equal Loofah::HTML5::Scrub.scrub_css("background: #ABC012"), "background:#ABC012;"
+    assert_equal Loofah::HTML5::Scrub.scrub_css("background: #abc012"), "background:#abc012;"
+  end
+end

data/test/integration/test_ad_hoc.rb CHANGED Viewed

@@ -17,6 +17,8 @@ class IntegrationTestAdHoc < Loofah::TestCase
   end
   context "tests" do
+    MSWORD_HTML = File.read(File.join(File.dirname(__FILE__), "..", "assets", "msword.html")).freeze
     def test_removal_of_illegal_tag
       html = <<-HTML
       following this there should be no jim tag
@@ -76,72 +78,6 @@ class IntegrationTestAdHoc < Loofah::TestCase
       assert_equal "<p>safe</p><b>description</b>", whitewashed.gsub("\n","")
     end
-    MSWORD_HTML = <<-EOHTML
-<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml>
-<w:WordDocument>
- <w:View>Normal</w:View>
- <w:Zoom>0</w:Zoom>
- <w:PunctuationKerning/>
- <w:ValidateAgainstSchemas/>
- <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid>
- <w:IgnoreMixedContent>false</w:IgnoreMixedContent>
- <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText>
- <w:Compatibility>
-  <w:BreakWrappedTables/>
-  <w:SnapToGridInCell/>
-  <w:WrapTextWithPunct/>
-  <w:UseAsianBreakRules/>
-  <w:DontGrowAutofit/>
- </w:Compatibility>
- <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel>
-</w:WordDocument>
-</xml><![endif]--><!--[if gte mso 9]><xml>
-<w:LatentStyles DefLockedState="false" LatentStyleCount="156">
-</w:LatentStyles>
-</xml><![endif]--><style>
-<!--
-/* Style Definitions */
-p.MsoNormal, li.MsoNormal, div.MsoNormal
-{mso-style-parent:"";
-margin:0in;
-margin-bottom:.0001pt;
-mso-pagination:widow-orphan;
-font-size:12.0pt;
-font-family:"Times New Roman";
-mso-fareast-font-family:"Times New Roman";}
-@page Section1
-{size:8.5in 11.0in;
-margin:1.0in 1.25in 1.0in 1.25in;
-mso-header-margin:.5in;
-mso-footer-margin:.5in;
-mso-paper-source:0;}
-div.Section1
-{page:Section1;}
--->
-</style><!--[if gte mso 10]>
-<style>
-/* Style Definitions */
-table.MsoNormalTable
-{mso-style-name:"Table Normal";
-mso-tstyle-rowband-size:0;
-mso-tstyle-colband-size:0;
-mso-style-noshow:yes;
-mso-style-parent:"";
-mso-padding-alt:0in 5.4pt 0in 5.4pt;
-mso-para-margin:0in;
-mso-para-margin-bottom:.0001pt;
-mso-pagination:widow-orphan;
-font-size:10.0pt;
-font-family:"Times New Roman";
-mso-ansi-language:#0400;
-mso-fareast-language:#0400;
-mso-bidi-language:#0400;}
-</style>
-<![endif]-->
-<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p>
-  EOHTML
     def test_fragment_whitewash_on_microsofty_markup
       whitewashed = Loofah.fragment(MSWORD_HTML).scrub!(:whitewash)
       assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed.to_s.strip
@@ -188,6 +124,81 @@ mso-bidi-language:#0400;}
       html = "<p>Foo</p>\n<p>Bar</p>"
       assert_equal "Foo\nBar", Loofah.scrub_document(html, :prune).text
     end
+    #
+    #  tests for CVE-2018-8048 (see https://github.com/flavorjones/loofah/issues/144)
+    #
+    #  libxml2 >= 2.9.2 fails to escape comments within some attributes. It
+    #  wants to ensure these comments can be treated as "server-side includes",
+    #  but as a result fails to ensure that serialization is well-formed,
+    #  resulting in an opportunity for XSS injection of code into a final
+    #  re-parsed document (presumably in a browser).
+    #
+    #  we'll test this by parsing the HTML, serializing it, then
+    #  re-parsing it to ensure there isn't any ambiguity in the output
+    #  that might allow code injection into a browser consuming
+    #  "sanitized" output.
+    #
+    [
+      #
+      #  these tags and attributes are determined by the code at:
+      #
+      #    https://git.gnome.org/browse/libxml2/tree/HTMLtree.c?h=v2.9.2#n714
+      #
+      {tag: "a",   attr: "href"},
+      {tag: "div", attr: "href"},
+      {tag: "a",   attr: "action"},
+      {tag: "div", attr: "action"},
+      {tag: "a",   attr: "src"},
+      {tag: "div", attr: "src"},
+      {tag: "a",   attr: "name"},
+      #
+      #  note that div+name is _not_ affected by the libxml2 issue.
+      #  but we test it anyway to ensure our logic isn't modifying
+      #  attributes that don't need modifying.
+      #
+      {tag: "div", attr: "name", unescaped: true},
+    ].each do |config|
+      define_method "test_uri_escaping_of_#{config[:attr]}_attr_in_#{config[:tag]}_tag" do
+        html = %{<#{config[:tag]} #{config[:attr]}='examp<!--" unsafeattr=foo()>-->le.com'>test</#{config[:tag]}>}
+        reparsed = Loofah.fragment(Loofah.fragment(html).scrub!(:prune).to_html)
+        attributes = reparsed.at_css(config[:tag]).attribute_nodes
+        assert_equal [config[:attr]], attributes.collect(&:name)
+        if Nokogiri::VersionInfo.instance.libxml2?
+          if config[:unescaped]
+            #
+            #  this attribute was emitted wrapped in single-quotes, so a double quote is A-OK.
+            #  assert that this attribute's serialization is unaffected.
+            #
+            assert_equal %{examp<!--" unsafeattr=foo()>-->le.com}, attributes.first.value
+          else
+            #
+            #  let's match the behavior in libxml < 2.9.2.
+            #  test that this attribute's serialization is well-formed and sanitized.
+            #
+            assert_equal %{examp<!--%22%20unsafeattr=foo()>-->le.com}, attributes.first.value
+          end
+        else
+          #
+          #  yay for consistency in javaland. move along, nothing to see here.
+          #
+          assert_equal %{examp<!--%22 unsafeattr=foo()>-->le.com}, attributes.first.value
+        end
+      end
+    end
+    # see:
+    # - https://github.com/flavorjones/loofah/issues/154
+    # - https://hackerone.com/reports/429267
+    context "xss protection from svg xmlns:xlink animate attribute" do
+      it "sanitizes appropriate attributes" do
+        html = %Q{<svg><a xmlns:xlink=http://www.w3.org/1999/xlink xlink:href=?><circle r=400 /><animate attributeName=xlink:href begin=0 from=javascript:alert(1) to=%26>}
+        sanitized = Loofah.scrub_fragment(html, :escape)
+        assert_nil sanitized.at_css("animate")["from"]
+      end
+    end
   end
 end

data/test/unit/test_helpers.rb CHANGED Viewed

@@ -44,17 +44,17 @@ class UnitTestHelpers < Loofah::TestCase
         end
       end
-      describe "WhiteListSanitizer#sanitize" do
+      describe "SafeListSanitizer#sanitize" do
         it "calls .sanitize" do
           mock(Loofah::Helpers).sanitize("foobar")
-          Loofah::Helpers::ActionView::WhiteListSanitizer.new.sanitize "foobar"
+          Loofah::Helpers::ActionView::SafeListSanitizer.new.sanitize "foobar"
         end
       end
-      describe "WhiteListSanitizer#sanitize_css" do
+      describe "SafeListSanitizer#sanitize_css" do
         it "calls .sanitize_css" do
           mock(Loofah::Helpers).sanitize_css("foobar")
-          Loofah::Helpers::ActionView::WhiteListSanitizer.new.sanitize_css "foobar"
+          Loofah::Helpers::ActionView::SafeListSanitizer.new.sanitize_css "foobar"
         end
       end
     end