RubyGems - loofah - Versions diffs - 1.0.0 → 2.19.1 - Mend

loofah 1.0.0 → 2.19.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

checksums.yaml +7 -0
data/CHANGELOG.md +489 -0
data/MIT-LICENSE.txt +3 -1
data/README.md +364 -0
data/SECURITY.md +18 -0
data/lib/loofah/elements.rb +88 -11
data/lib/loofah/helpers.rb +76 -2
data/lib/loofah/html/document.rb +1 -0
data/lib/loofah/html/document_fragment.rb +9 -2
data/lib/loofah/html5/libxml2_workarounds.rb +27 -0
data/lib/loofah/html5/safelist.rb +1042 -0
data/lib/loofah/html5/scrub.rb +198 -40
data/lib/loofah/instance_methods.rb +16 -10
data/lib/loofah/metahelpers.rb +9 -10
data/lib/loofah/scrubber.rb +22 -6
data/lib/loofah/scrubbers.rb +96 -16
data/lib/loofah/version.rb +5 -0
data/lib/loofah/xml/document.rb +1 -0
data/lib/loofah/xml/document_fragment.rb +5 -2
data/lib/loofah.rb +38 -25
metadata +159 -172
data/CHANGELOG.rdoc +0 -134
data/Gemfile +0 -1
data/Manifest.txt +0 -34
data/README.rdoc +0 -312
data/Rakefile +0 -53
data/benchmark/benchmark.rb +0 -149
data/benchmark/fragment.html +0 -96
data/benchmark/helper.rb +0 -73
data/benchmark/www.slashdot.com.html +0 -2560
data/lib/loofah/html5/whitelist.rb +0 -168
data/test/helper.rb +0 -7
data/test/html5/test_sanitizer.rb +0 -248
data/test/integration/test_ad_hoc.rb +0 -176
data/test/integration/test_helpers.rb +0 -33
data/test/integration/test_html.rb +0 -51
data/test/integration/test_scrubbers.rb +0 -331
data/test/integration/test_xml.rb +0 -55
data/test/unit/test_api.rb +0 -138
data/test/unit/test_helpers.rb +0 -27
data/test/unit/test_scrubber.rb +0 -229
data/test/unit/test_scrubbers.rb +0 -14

data/test/integration/test_html.rb DELETED Viewed

@@ -1,51 +0,0 @@
-require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
-class TestHtml < Test::Unit::TestCase
-  context "html fragment" do
-    context "#to_s" do
-      should "not include head tags (like style)" do
-        html = Loofah.fragment "<style>foo</style><div>bar</div>"
-        assert_equal "<div>bar</div>", html.to_s
-      end
-    end
-    context "#text" do
-      should "not include head tags (like style)" do
-        html = Loofah.fragment "<style>foo</style><div>bar</div>"
-        assert_equal "bar", html.text
-      end
-    end
-    context "#to_text" do
-      should "add newlines before and after block elements" do
-        html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      should "remove extraneous whitespace" do
-        html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
-        assert_equal "\ntweedle\n\nbeetle\n", html.to_text
-      end
-    end
-  end
-  context "html document" do
-    should "not include head tags (like style)" do
-      html = Loofah.document "<style>foo</style><div>bar</div>"
-      assert_equal "bar", html.text
-    end
-    context "#to_text" do
-      should "add newlines before and after block elements" do
-        html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      should "remove extraneous whitespace" do
-        html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
-        assert_equal "\ntweedle\n\nbeetle\n", html.to_text
-      end
-    end
-  end
-end

data/test/integration/test_scrubbers.rb DELETED Viewed

@@ -1,331 +0,0 @@
-require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
-class TestScrubbers < Test::Unit::TestCase
-  INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
-  INVALID_ESCAPED  = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
-  INVALID_PRUNED   = "<div>quux</div>"
-  INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
-  WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
-  WHITEWASH_RESULT   = "<div>foo</div>"
-  NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
-  NOFOLLOW_RESULT   = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
-  ENTITY_FRAGMENT   = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p><div>w00t</div>"
-  ENTITY_TEXT       = %Q(this is < that "&" the other > boo\'yaw00t)
-  ENTITY_HACK_ATTACK            = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
-  ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
-  ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
-  context "Document" do
-    context "#scrub!" do
-      context ":escape" do
-        should "escape bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :escape
-          assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":prune" do
-        should "prune bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :prune
-          assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":strip" do
-        should "strip bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :strip
-          assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":whitewash" do
-        should "whitewash the markup" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
-          result = doc.scrub! :whitewash
-          assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":nofollow" do
-        should "add a 'nofollow' attribute to hyperlinks" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
-          result = doc.scrub! :nofollow
-          assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-    end
-    context "#scrub_document" do
-      should "be a shortcut for parse-and-scrub" do
-        mock_doc = mock
-        Loofah.expects(:document).with(:string_or_io).returns(mock_doc)
-        mock_doc.expects(:scrub!).with(:method)
-        Loofah.scrub_document(:string_or_io, :method)
-      end
-    end
-    context "#text" do
-      should "leave behind only inner text with html entities still escaped" do
-        doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-        result = doc.text
-        assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-      end
-      context "with encode_special_chars => false" do
-        should "leave behind only inner text with html entities unescaped" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-          result = doc.text(:encode_special_chars => false)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
-        end
-      end
-      context "with encode_special_chars => true" do
-        should "leave behind only inner text with html entities still escaped" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-          result = doc.text(:encode_special_chars => true)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-        end
-      end
-    end
-    context "#to_s" do
-      should "generate HTML" do
-        doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
-        assert_not_nil doc.xpath("/html").first
-        assert_not_nil doc.xpath("/html/head").first
-        assert_not_nil doc.xpath("/html/body").first
-        string = doc.to_s
-        assert_contains string, /<!DOCTYPE/
-        assert_contains string, /<html>/
-        assert_contains string, /<head>/
-        assert_contains string, /<body>/
-      end
-    end
-    context "#serialize" do
-      should "generate HTML" do
-        doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
-        assert_not_nil doc.xpath("/html").first
-        assert_not_nil doc.xpath("/html/head").first
-        assert_not_nil doc.xpath("/html/body").first
-        string = doc.serialize
-        assert_contains string, /<!DOCTYPE/
-        assert_contains string, /<html>/
-        assert_contains string, /<head>/
-        assert_contains string, /<body>/
-      end
-    end
-    context "Node" do
-      context "#scrub!" do
-        should "only scrub subtree" do
-          xml = Loofah.document <<-EOHTML
-           <html><body>
-             <div class='scrub'>
-               <script>I should be removed</script>
-             </div>
-             <div class='noscrub'>
-               <script>I should remain</script>
-             </div>
-           </body></html>
-          EOHTML
-          node = xml.at_css "div.scrub"
-          node.scrub!(:prune)
-          assert_contains         xml.to_s, /I should remain/
-          assert_does_not_contain xml.to_s, /I should be removed/
-        end
-      end
-    end
-    context "NodeSet" do
-      context "#scrub!" do
-        should "only scrub subtrees" do
-          xml = Loofah.document <<-EOHTML
-            <html><body>
-              <div class='scrub'>
-                <script>I should be removed</script>
-              </div>
-              <div class='noscrub'>
-                <script>I should remain</script>
-              </div>
-              <div class='scrub'>
-                <script>I should also be removed</script>
-              </div>
-            </body></html>
-          EOHTML
-          node_set = xml.css "div.scrub"
-          assert_equal 2, node_set.length
-          node_set.scrub!(:prune)
-          assert_contains         xml.to_s, /I should remain/
-          assert_does_not_contain xml.to_s, /I should be removed/
-          assert_does_not_contain xml.to_s, /I should also be removed/
-        end
-      end
-    end
-  end
-  context "DocumentFragment" do
-    context "#scrub!" do
-      context ":escape" do
-        should "escape bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :escape
-          assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":prune" do
-        should "prune bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :prune
-          assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":strip" do
-        should "strip bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :strip
-          assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":whitewash" do
-        should "whitewash the markup" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
-          result = doc.scrub! :whitewash
-          assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":nofollow" do
-        should "add a 'nofollow' attribute to hyperlinks" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
-          result = doc.scrub! :nofollow
-          assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-    end
-    context "#scrub_fragment" do
-      should "be a shortcut for parse-and-scrub" do
-        mock_doc = mock
-        Loofah.expects(:fragment).with(:string_or_io).returns(mock_doc)
-        mock_doc.expects(:scrub!).with(:method)
-        Loofah.scrub_fragment(:string_or_io, :method)
-      end
-    end
-    context "#text" do
-      should "leave behind only inner text with html entities still escaped" do
-        doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-        result = doc.text
-        assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-      end
-      context "with encode_special_chars => false" do
-        should "leave behind only inner text with html entities unescaped" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-          result = doc.text(:encode_special_chars => false)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
-        end
-      end
-      context "with encode_special_chars => true" do
-        should "leave behind only inner text with html entities still escaped" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-          result = doc.text(:encode_special_chars => true)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-        end
-      end
-    end
-    context "#to_s" do
-      should "not remove entities" do
-        string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
-        assert_contains string, /this is &lt;/
-      end
-    end
-    context "Node" do
-      context "#scrub!" do
-        should "only scrub subtree" do
-          xml = Loofah.fragment <<-EOHTML
-            <div class='scrub'>
-              <script>I should be removed</script>
-            </div>
-            <div class='noscrub'>
-              <script>I should remain</script>
-            </div>
-          EOHTML
-          node = xml.at_css "div.scrub"
-          node.scrub!(:prune)
-          assert_contains         xml.to_s, /I should remain/
-          assert_does_not_contain xml.to_s, /I should be removed/
-        end
-      end
-    end
-    context "NodeSet" do
-      context "#scrub!" do
-        should "only scrub subtrees" do
-          xml = Loofah.fragment <<-EOHTML
-            <div class='scrub'>
-              <script>I should be removed</script>
-            </div>
-            <div class='noscrub'>
-              <script>I should remain</script>
-            </div>
-            <div class='scrub'>
-              <script>I should also be removed</script>
-            </div>
-          EOHTML
-          node_set = xml.css "div.scrub"
-          assert_equal 2, node_set.length
-          node_set.scrub!(:prune)
-          assert_contains         xml.to_s, /I should remain/
-          assert_does_not_contain xml.to_s, /I should be removed/
-          assert_does_not_contain xml.to_s, /I should also be removed/
-        end
-      end
-    end
-  end
-end

data/test/integration/test_xml.rb DELETED Viewed

@@ -1,55 +0,0 @@
-require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
-class TestXml < Test::Unit::TestCase
-  context "integration test" do
-    context "xml document" do
-      context "custom scrubber" do
-        should "act as expected" do
-          xml = Loofah.xml_document <<-EOXML
-            <root>
-              <employee deceased='true'>Abraham Lincoln</employee>
-              <employee deceased='false'>Abe Vigoda</employee>
-            </root>
-          EOXML
-          bring_out_your_dead = Loofah::Scrubber.new do |node|
-            if node.name == "employee" and node["deceased"] == "true"
-              node.remove
-              Loofah::Scrubber::STOP # don't bother with the rest of the subtree
-            end
-          end
-          assert_equal 2, xml.css("employee").length
-          xml.scrub!(bring_out_your_dead)
-          employees = xml.css "employee"
-          assert_equal 1, employees.length
-          assert_equal "Abe Vigoda", employees.first.inner_text
-        end
-      end
-    end
-    context "xml fragment" do
-      context "custom scrubber" do
-        should "act as expected" do
-          xml = Loofah.xml_fragment <<-EOXML
-            <employee deceased='true'>Abraham Lincoln</employee>
-            <employee deceased='false'>Abe Vigoda</employee>
-          EOXML
-          bring_out_your_dead = Loofah::Scrubber.new do |node|
-            if node.name == "employee" and node["deceased"] == "true"
-              node.remove
-              Loofah::Scrubber::STOP # don't bother with the rest of the subtree
-            end
-          end
-          assert_equal 2, xml.css("employee").length
-          xml.scrub!(bring_out_your_dead)
-          employees = xml.css "employee"
-          assert_equal 1, employees.length
-          assert_equal "Abe Vigoda", employees.first.inner_text
-        end
-      end
-    end
-  end
-end

data/test/unit/test_api.rb DELETED Viewed

@@ -1,138 +0,0 @@
-require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
-class TestApi < Test::Unit::TestCase
-  HTML          = "<div>a</div>\n<div>b</div>"
-  XML_FRAGMENT  = "<div>a</div>\n<div>b</div>"
-  XML           = "<root>#{XML_FRAGMENT}</root>"
-  def test_loofah_document
-    doc = Loofah.document(HTML)
-    assert_html_documentish doc
-  end
-  def test_loofah_fragment
-    doc = Loofah.fragment(HTML)
-    assert_html_fragmentish doc
-  end
-  def test_loofah_xml_document
-    doc = Loofah.xml_document(XML)
-    assert_xml_documentish doc
-  end
-  def test_loofah_xml_fragment
-    doc = Loofah.xml_fragment(XML_FRAGMENT)
-    assert_xml_fragmentish doc
-  end
-  def test_loofah_html_document_parse_method
-    doc = Loofah::HTML::Document.parse(HTML)
-    assert_html_documentish doc
-  end
-  def test_loofah_xml_document_parse_method
-    doc = Loofah::XML::Document.parse(XML)
-    assert_xml_documentish doc
-  end
-  def test_loofah_html_document_fragment_parse_method
-    doc = Loofah::HTML::DocumentFragment.parse(HTML)
-    assert_html_fragmentish doc
-  end
-  def test_loofah_xml_document_fragment_parse_method
-    doc = Loofah::XML::DocumentFragment.parse(XML_FRAGMENT)
-    assert_xml_fragmentish doc
-  end
-  def test_loofah_document_scrub!
-    doc = Loofah.document(HTML).scrub!(:strip)
-    assert_html_documentish doc
-  end
-  def test_loofah_fragment_scrub!
-    doc = Loofah.fragment(HTML).scrub!(:strip)
-    assert_html_fragmentish doc
-  end
-  def test_loofah_xml_document_scrub!
-    scrubber = Loofah::Scrubber.new { |node| }
-    doc = Loofah.xml_document(XML).scrub!(scrubber)
-    assert_xml_documentish doc
-  end
-  def test_loofah_xml_fragment_scrub!
-    scrubber = Loofah::Scrubber.new { |node| }
-    doc = Loofah.xml_fragment(XML_FRAGMENT).scrub!(scrubber)
-    assert_xml_fragmentish doc
-  end
-  def test_loofah_html_document_node_scrub!
-    doc = Loofah.document(HTML)
-    assert(node = doc.at_css("div"))
-    node.scrub!(:strip)
-  end
-  def test_loofah_html_fragment_node_scrub!
-    doc = Loofah.fragment(HTML)
-    assert(node = doc.at_css("div"))
-    node.scrub!(:strip)
-  end
-  def test_loofah_xml_document_node_scrub!
-    doc = Loofah.xml_document(XML)
-    assert(node = doc.at_css("div"))
-    node.scrub!(:strip)
-  end
-  def test_loofah_xml_fragment_node_scrub!
-    doc = Loofah.xml_fragment(XML)
-    assert(node = doc.at_css("div"))
-    node.scrub!(:strip)
-  end
-  def test_loofah_nodeset_scrub!
-    doc = Loofah.document(HTML)
-    assert(node_set = doc.css("div"))
-    assert_instance_of Nokogiri::XML::NodeSet, node_set
-    node_set.scrub!(:strip)
-  end
-  should "HTML::DocumentFragment exposes serialize_root" do
-    doc = Loofah.fragment(HTML)
-    assert_equal HTML, doc.serialize_root.to_html
-  end
-  should "HTML::Document exposes serialize_root" do
-    doc = Loofah.document(HTML)
-    assert_equal HTML, doc.serialize_root.children.to_html
-  end
-  private
-  def assert_html_documentish(doc)
-    assert_kind_of Nokogiri::HTML::Document, doc
-    assert_kind_of Loofah::HTML::Document,   doc
-    assert_equal HTML, doc.xpath("/html/body").inner_html
-  end
-  def assert_html_fragmentish(doc)
-    assert_kind_of Nokogiri::HTML::DocumentFragment, doc
-    assert_kind_of Loofah::HTML::DocumentFragment,   doc
-    assert_equal HTML, doc.inner_html
-  end
-  def assert_xml_documentish(doc)
-    assert_kind_of Nokogiri::XML::Document, doc
-    assert_kind_of Loofah::XML::Document,   doc
-    assert_equal XML, doc.root.to_xml
-  end
-  def assert_xml_fragmentish(doc)
-    assert_kind_of Nokogiri::XML::DocumentFragment, doc
-    assert_kind_of Loofah::XML::DocumentFragment,   doc
-    assert_equal XML_FRAGMENT, doc.children.to_xml
-  end
-end

data/test/unit/test_helpers.rb DELETED Viewed

@@ -1,27 +0,0 @@
-require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
-class TestHelpers < Test::Unit::TestCase
-  HTML_STRING = "<div>omgwtfbbq</div>"
-  context "#strip_tags" do
-    should "invoke Loofah.fragment.text" do
-      mock_doc = mock
-      Loofah.expects(:fragment).with(HTML_STRING).returns(mock_doc)
-      mock_doc.expects(:text)
-      Loofah::Helpers.strip_tags HTML_STRING
-    end
-  end
-  context "#sanitize" do
-    should "invoke Loofah.scrub_fragment(:strip).to_s" do
-      mock_doc = mock
-      Loofah.expects(:fragment).with(HTML_STRING).returns(mock_doc)
-      mock_doc.expects(:scrub!).with(:strip).returns(mock_doc)
-      mock_doc.expects(:to_s)
-      Loofah::Helpers.sanitize HTML_STRING
-    end
-  end
-end