RubyGems - loofah - Versions diffs - 2.4.0 → 2.9.0 - Mend

loofah 2.4.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +90 -43
data/README.md +7 -4
data/lib/loofah.rb +32 -16
data/lib/loofah/elements.rb +73 -73
data/lib/loofah/helpers.rb +4 -4
data/lib/loofah/html/document_fragment.rb +3 -2
data/lib/loofah/html5/libxml2_workarounds.rb +7 -7
data/lib/loofah/html5/safelist.rb +21 -0
data/lib/loofah/html5/scrub.rb +57 -41
data/lib/loofah/instance_methods.rb +4 -3
data/lib/loofah/metahelpers.rb +1 -1
data/lib/loofah/scrubber.rb +7 -7
data/lib/loofah/scrubbers.rb +10 -10
data/lib/loofah/version.rb +5 -0
data/lib/loofah/xml/document_fragment.rb +1 -1
metadata +25 -106
data/.gemtest +0 -0
data/Gemfile +0 -23
data/Manifest.txt +0 -41
data/Rakefile +0 -91
data/benchmark/benchmark.rb +0 -149
data/benchmark/fragment.html +0 -96
data/benchmark/helper.rb +0 -73
data/benchmark/www.slashdot.com.html +0 -2560
data/test/assets/msword.html +0 -63
data/test/assets/testdata_sanitizer_tests1.dat +0 -502
data/test/helper.rb +0 -18
data/test/html5/test_sanitizer.rb +0 -414
data/test/html5/test_scrub.rb +0 -10
data/test/integration/test_ad_hoc.rb +0 -220
data/test/integration/test_helpers.rb +0 -43
data/test/integration/test_html.rb +0 -72
data/test/integration/test_scrubbers.rb +0 -400
data/test/integration/test_xml.rb +0 -55
data/test/unit/test_api.rb +0 -142
data/test/unit/test_encoding.rb +0 -20
data/test/unit/test_helpers.rb +0 -62
data/test/unit/test_scrubber.rb +0 -229
data/test/unit/test_scrubbers.rb +0 -14

data/test/integration/test_helpers.rb DELETED

@@ -1,43 +0,0 @@
-require "helper"
-class IntegrationTestHelpers < Loofah::TestCase
-  context ".strip_tags" do
-    context "on safe markup" do
-      it "strip out tags" do
-        assert_equal "omgwtfbbq!!1!", Loofah::Helpers.strip_tags("<div>omgwtfbbq</div><span>!!1!</span>")
-      end
-    end
-    context "on hack attack" do
-      it "strip escape html entities" do
-        bad_shit = "&lt;script&gt;alert('evil')&lt;/script&gt;"
-        assert_equal bad_shit, Loofah::Helpers.strip_tags(bad_shit)
-      end
-    end
-  end
-  context ".sanitize" do
-    context "on safe markup" do
-      it "render the safe html" do
-        html = "<div>omgwtfbbq</div><span>!!1!</span>"
-        assert_equal html, Loofah::Helpers.sanitize(html)
-      end
-    end
-    context "on hack attack" do
-      it "strip the unsafe tags" do
-        assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><span>w00t</span>")
-      end
-      it "strips form tags" do
-        assert_equal "alert('evil')<span>w00t</span>", Loofah::Helpers.sanitize("<script>alert('evil')</script><form action=\"/foo/bar\" method=\"post\"><input></form><span>w00t</span>")
-      end
-    end
-  end
-  context ".sanitize_css" do
-    it "removes unsafe css properties" do
-      assert_match(/display:\s*block;\s*background-color:\s*blue;/, Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg);background-color:blue"))
-    end
-  end
-end

data/test/integration/test_html.rb DELETED

@@ -1,72 +0,0 @@
-require "helper"
-class IntegrationTestHtml < Loofah::TestCase
-  context "html fragment" do
-    context "#to_s" do
-      it "not include head tags (like style)" do
-        skip "depends on nokogiri version"
-        html = Loofah.fragment "<style>foo</style><div>bar</div>"
-        assert_equal "<div>bar</div>", html.to_s
-      end
-    end
-    context "#text" do
-      it "not include head tags (like style)" do
-        skip "depends on nokogiri version"
-        html = Loofah.fragment "<style>foo</style><div>bar</div>"
-        assert_equal "bar", html.text
-      end
-    end
-    context "#to_text" do
-      it "add newlines before and after html4 block elements" do
-        html = Loofah.fragment "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      it "add newlines before and after html5 block elements" do
-        html = Loofah.fragment "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      it "remove extraneous whitespace" do
-        html = Loofah.fragment "<div>tweedle\n\n\t\n\s\nbeetle</div>"
-        assert_equal "\ntweedle\n\nbeetle\n", html.to_text
-      end
-    end
-    context 'with an `encoding` arg' do
-      it "sets the parent document's encoding to accordingly" do
-        html = Loofah.fragment "<style>foo</style><div>bar</div>", 'US-ASCII'
-        assert_equal 'US-ASCII', html.document.encoding
-      end
-    end
-  end
-  context "html document" do
-    context "#text" do
-      it "not include head tags (like style)" do
-        html = Loofah.document "<style>foo</style><div>bar</div>"
-        assert_equal "bar", html.text
-      end
-    end
-    context "#to_text" do
-      it "add newlines before and after html4 block elements" do
-        html = Loofah.document "<div>tweedle<h1>beetle</h1>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      it "add newlines before and after html5 block elements" do
-        html = Loofah.document "<div>tweedle<section>beetle</section>bottle<span>puddle</span>paddle<div>battle</div>muddle</div>"
-        assert_equal "\ntweedle\nbeetle\nbottlepuddlepaddle\nbattle\nmuddle\n", html.to_text
-      end
-      it "remove extraneous whitespace" do
-        html = Loofah.document "<div>tweedle\n\n\t\n\s\nbeetle</div>"
-        assert_equal "\ntweedle\n\nbeetle\n", html.to_text
-      end
-    end
-  end
-end

data/test/integration/test_scrubbers.rb DELETED

@@ -1,400 +0,0 @@
-require "helper"
-class IntegrationTestScrubbers < Loofah::TestCase
-  INVALID_FRAGMENT = "<invalid>foo<p>bar</p>bazz</invalid><div>quux</div>"
-  INVALID_ESCAPED  = "&lt;invalid&gt;foo&lt;p&gt;bar&lt;/p&gt;bazz&lt;/invalid&gt;<div>quux</div>"
-  INVALID_PRUNED   = "<div>quux</div>"
-  INVALID_STRIPPED = "foo<p>bar</p>bazz<div>quux</div>"
-  WHITEWASH_FRAGMENT = "<o:div>no</o:div><div id='no'>foo</div><invalid>bar</invalid><!--[if gts mso9]><div>microsofty stuff</div><![endif]-->"
-  WHITEWASH_RESULT   = "<div>foo</div>"
-  NOFOLLOW_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
-  NOFOLLOW_RESULT   = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
-  NOFOLLOW_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
-  NOFOLLOW_WITH_REL_RESULT   = '<a href="http://www.example.com/" rel="noopener nofollow">Click here</a>'
-  NOOPENER_FRAGMENT = '<a href="http://www.example.com/">Click here</a>'
-  NOOPENER_RESULT   = '<a href="http://www.example.com/" rel="noopener">Click here</a>'
-  NOOPENER_WITH_REL_FRAGMENT = '<a href="http://www.example.com/" rel="nofollow">Click here</a>'
-  NOOPENER_WITH_REL_RESULT   = '<a href="http://www.example.com/" rel="nofollow noopener">Click here</a>'
-  UNPRINTABLE_FRAGMENT = "<b>Lo\u2029ofah ro\u2028cks!</b><script>x\u2028y</script>"
-  UNPRINTABLE_RESULT = "<b>Loofah rocks!</b><script>xy</script>"
-  ENTITY_FRAGMENT   = "<p>this is &lt; that &quot;&amp;&quot; the other &gt; boo&apos;ya</p><div>w00t</div>"
-  ENTITY_TEXT       = %Q(this is < that "&" the other > boo\'yaw00t)
-  ENTITY_HACK_ATTACK            = "<div><div>Hack attack!</div><div>&lt;script&gt;alert('evil')&lt;/script&gt;</div></div>"
-  ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!&lt;script&gt;alert('evil')&lt;/script&gt;"
-  ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!<script>alert('evil')</script>"
-  context "Document" do
-    context "#scrub!" do
-      context ":escape" do
-        it "escape bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :escape
-          assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":prune" do
-        it "prune bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :prune
-          assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":strip" do
-        it "strip bad tags" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{INVALID_FRAGMENT}</body></html>"
-          result = doc.scrub! :strip
-          assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":whitewash" do
-        it "whitewash the markup" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{WHITEWASH_FRAGMENT}</body></html>"
-          result = doc.scrub! :whitewash
-          assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":nofollow" do
-        it "add a 'nofollow' attribute to hyperlinks" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{NOFOLLOW_FRAGMENT}</body></html>"
-          result = doc.scrub! :nofollow
-          assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":unprintable" do
-        it "removes unprintable unicode characters" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{UNPRINTABLE_FRAGMENT}</body></html>"
-          result = doc.scrub! :unprintable
-          assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
-          assert_equal doc, result
-        end
-      end
-    end
-    context "#scrub_document" do
-      it "be a shortcut for parse-and-scrub" do
-        mock_doc = Object.new
-        mock(Loofah).document(:string_or_io) { mock_doc }
-        mock(mock_doc).scrub!(:method)
-        Loofah.scrub_document(:string_or_io, :method)
-      end
-    end
-    context "#text" do
-      it "leave behind only inner text with html entities still escaped" do
-        doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-        result = doc.text
-        assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-      end
-      context "with encode_special_chars => false" do
-        it "leave behind only inner text with html entities unescaped" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-          result = doc.text(:encode_special_chars => false)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
-        end
-      end
-      context "with encode_special_chars => true" do
-        it "leave behind only inner text with html entities still escaped" do
-          doc = Loofah::HTML::Document.parse "<html><body>#{ENTITY_HACK_ATTACK}</body></html>"
-          result = doc.text(:encode_special_chars => true)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-        end
-      end
-    end
-    context "#to_s" do
-      it "generate HTML" do
-        doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
-        refute_nil doc.xpath("/html").first
-        refute_nil doc.xpath("/html/head").first
-        refute_nil doc.xpath("/html/body").first
-        string = doc.to_s
-        assert_match %r/<!DOCTYPE/, string
-        assert_match %r/<html>/, string
-        assert_match %r/<head>/, string
-        assert_match %r/<body>/, string
-      end
-    end
-    context "#serialize" do
-      it "generate HTML" do
-        doc = Loofah.scrub_document "<html><head><title>quux</title></head><body><div>foo</div></body></html>", :prune
-        refute_nil doc.xpath("/html").first
-        refute_nil doc.xpath("/html/head").first
-        refute_nil doc.xpath("/html/body").first
-        string = doc.serialize
-        assert_match %r/<!DOCTYPE/, string
-        assert_match %r/<html>/, string
-        assert_match %r/<head>/, string
-        assert_match %r/<body>/, string
-      end
-    end
-    context "Node" do
-      context "#scrub!" do
-        it "only scrub subtree" do
-          xml = Loofah.document <<-EOHTML
-           <html><body>
-             <div class='scrub'>
-               <script>I should be removed</script>
-             </div>
-             <div class='noscrub'>
-               <script>I should remain</script>
-             </div>
-           </body></html>
-          EOHTML
-          node = xml.at_css "div.scrub"
-          node.scrub!(:prune)
-          assert_match %r/I should remain/,     xml.to_s
-          refute_match %r/I should be removed/, xml.to_s
-        end
-      end
-    end
-    context "NodeSet" do
-      context "#scrub!" do
-        it "only scrub subtrees" do
-          xml = Loofah.document <<-EOHTML
-            <html><body>
-              <div class='scrub'>
-                <script>I should be removed</script>
-              </div>
-              <div class='noscrub'>
-                <script>I should remain</script>
-              </div>
-              <div class='scrub'>
-                <script>I should also be removed</script>
-              </div>
-            </body></html>
-          EOHTML
-          node_set = xml.css "div.scrub"
-          assert_equal 2, node_set.length
-          node_set.scrub!(:prune)
-          assert_match %r/I should remain/,          xml.to_s
-          refute_match %r/I should be removed/,      xml.to_s
-          refute_match %r/I should also be removed/, xml.to_s
-        end
-      end
-    end
-  end
-  context "DocumentFragment" do
-    context "#scrub!" do
-      context ":escape" do
-        it "escape bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :escape
-          assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":prune" do
-        it "prune bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :prune
-          assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":strip" do
-        it "strip bad tags" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{INVALID_FRAGMENT}</div>"
-          result = doc.scrub! :strip
-          assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":whitewash" do
-        it "whitewash the markup" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{WHITEWASH_FRAGMENT}</div>"
-          result = doc.scrub! :whitewash
-          assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-      context ":nofollow" do
-        context "for a hyperlink that does not have a rel attribute" do
-          it "add a 'nofollow' attribute to hyperlinks" do
-            doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_FRAGMENT}</div>"
-            result = doc.scrub! :nofollow
-            assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
-            assert_equal doc, result
-          end
-        end
-        context "for a hyperlink that does have a rel attribute" do
-          it "appends nofollow to rel attribute" do
-              doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOFOLLOW_WITH_REL_FRAGMENT}</div>"
-              result = doc.scrub! :nofollow
-              assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
-              assert_equal doc, result
-          end
-        end
-      end
-      context ":noopener" do
-        context "for a hyperlink without a 'rel' attribute" do
-          it "add a 'noopener' attribute to hyperlinks" do
-            doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_FRAGMENT}</div>"
-            result = doc.scrub! :noopener
-            assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
-            assert_equal doc, result
-          end
-        end
-        context "for a hyperlink that does have a rel attribute" do
-          it "appends 'noopener' to 'rel' attribute" do
-            doc = Loofah::HTML::DocumentFragment.parse "<div>#{NOOPENER_WITH_REL_FRAGMENT}</div>"
-            result = doc.scrub! :noopener
-            assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
-            assert_equal doc, result
-          end
-        end
-      end
-      context ":unprintable" do
-        it "removes unprintable unicode characters" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{UNPRINTABLE_FRAGMENT}</div>"
-          result = doc.scrub! :unprintable
-          assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
-          assert_equal doc, result
-        end
-      end
-    end
-    context "#scrub_fragment" do
-      it "be a shortcut for parse-and-scrub" do
-        mock_doc = Object.new
-        mock(Loofah).fragment(:string_or_io) { mock_doc }
-        mock(mock_doc).scrub!(:method)
-        Loofah.scrub_fragment(:string_or_io, :method)
-      end
-    end
-    context "#text" do
-      it "leave behind only inner text with html entities still escaped" do
-        doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-        result = doc.text
-        assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-      end
-      context "with encode_special_chars => false" do
-        it "leave behind only inner text with html entities unescaped" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-          result = doc.text(:encode_special_chars => false)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
-        end
-      end
-      context "with encode_special_chars => true" do
-        it "leave behind only inner text with html entities still escaped" do
-          doc = Loofah::HTML::DocumentFragment.parse "<div>#{ENTITY_HACK_ATTACK}</div>"
-          result = doc.text(:encode_special_chars => true)
-          assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
-        end
-      end
-    end
-    context "#to_s" do
-      it "not remove entities" do
-        string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
-        assert_match %r/this is &lt;/, string
-      end
-    end
-    context "Node" do
-      context "#scrub!" do
-        it "only scrub subtree" do
-          xml = Loofah.fragment <<-EOHTML
-            <div class='scrub'>
-              <script>I should be removed</script>
-            </div>
-            <div class='noscrub'>
-              <script>I should remain</script>
-            </div>
-          EOHTML
-          node = xml.at_css "div.scrub"
-          node.scrub!(:prune)
-          assert_match %r(I should remain),     xml.to_s
-          refute_match %r(I should be removed), xml.to_s
-        end
-      end
-    end
-    context "NodeSet" do
-      context "#scrub!" do
-        it "only scrub subtrees" do
-          xml = Loofah.fragment <<-EOHTML
-            <div class='scrub'>
-              <script>I should be removed</script>
-            </div>
-            <div class='noscrub'>
-              <script>I should remain</script>
-            </div>
-            <div class='scrub'>
-              <script>I should also be removed</script>
-            </div>
-          EOHTML
-          node_set = xml.css "div.scrub"
-          assert_equal 2, node_set.length
-          node_set.scrub!(:prune)
-          assert_match %r/I should remain/,          xml.to_s
-          refute_match %r/I should be removed/,      xml.to_s
-          refute_match %r/I should also be removed/, xml.to_s
-        end
-      end
-    end
-  end
-end