RubyGems - loofah - Versions diffs - 2.19.1 → 2.24.0 - Mend

loofah 2.19.1 → 2.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +102 -0
data/README.md +161 -115
data/lib/loofah/concerns.rb +207 -0
data/lib/loofah/elements.rb +78 -76
data/lib/loofah/helpers.rb +21 -15
data/lib/loofah/{html → html4}/document.rb +5 -7
data/lib/loofah/html4/document_fragment.rb +15 -0
data/lib/loofah/html5/document.rb +17 -0
data/lib/loofah/html5/document_fragment.rb +15 -0
data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
data/lib/loofah/html5/safelist.rb +940 -924
data/lib/loofah/html5/scrub.rb +36 -35
data/lib/loofah/metahelpers.rb +10 -6
data/lib/loofah/scrubber.rb +10 -8
data/lib/loofah/scrubbers.rb +174 -43
data/lib/loofah/version.rb +2 -1
data/lib/loofah/xml/document.rb +1 -0
data/lib/loofah/xml/document_fragment.rb +2 -6
data/lib/loofah.rb +116 -43
metadata +18 -122
data/lib/loofah/html/document_fragment.rb +0 -42
data/lib/loofah/instance_methods.rb +0 -133

data/lib/loofah/html5/scrub.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 require "cgi"
 require "crass"
@@ -6,9 +7,10 @@ module Loofah
   module HTML5 # :nodoc:
     module Scrub
       CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
-      CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
+      CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
       CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
-      CSS_IMPORTANT = '!important'
+      CSS_IMPORTANT = "!important"
+      CSS_WHITESPACE = " "
       CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
       DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
@@ -26,7 +28,7 @@ module Loofah
               attr_node.node_name
             end
-            if attr_name =~ DATA_ATTRIBUTE_NAME
+            if DATA_ATTRIBUTE_NAME.match?(attr_name)
               next
             end
@@ -43,10 +45,12 @@ module Loofah
               scrub_attribute_that_allows_local_ref(attr_node)
             end
-            if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
-              attr_node.remove
-              next
-            end
+            next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
+              attr_name == "xlink:href" &&
+              attr_node.value =~ /^\s*[^#\s].*/m
+            attr_node.remove
+            next
           end
           scrub_css_attribute(node)
@@ -66,29 +70,28 @@ module Loofah
         end
         def scrub_css(style)
+          url_flags = [:url, :bad_url]
           style_tree = Crass.parse_properties(style)
           sanitized_tree = []
           style_tree.each do |node|
             next unless node[:node] == :property
             next if node[:children].any? do |child|
-              [:url, :bad_url].include?(child[:node])
+              url_flags.include?(child[:node])
             end
             name = node[:name].downcase
             next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
-                SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
-                SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
+              SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
+              SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
             value = node[:children].map do |child|
               case child[:node]
               when :whitespace
-                nil
+                CSS_WHITESPACE
               when :string
-                if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
+                if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
                   Crass::Parser.stringify(child)
-                else
-                  nil
                 end
               when :function
                 if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
@@ -97,18 +100,19 @@ module Loofah
               when :ident
                 keyword = child[:value]
                 if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
-                   SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
-                   (keyword =~ CSS_KEYWORDISH)
+                    SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
+                    (keyword =~ CSS_KEYWORDISH)
                   keyword
                 end
               else
                 child[:raw]
               end
-            end.compact
+            end.compact.join.strip
             next if value.empty?
-            value << CSS_IMPORTANT if node[:important]
-            propstring = format("%s:%s", name, value.join(" "))
+            value << CSS_WHITESPACE << CSS_IMPORTANT if node[:important]
+            propstring = format("%s:%s", name, value)
             sanitized_node = Crass.parse_properties(propstring).first
             sanitized_tree << sanitized_node << CRASS_SEMICOLON
           end
@@ -126,13 +130,9 @@ module Loofah
             when :url
               if node[:value].start_with?("#")
                 node[:raw]
-              else
-                nil
               end
             when :hash, :ident, :string
               node[:raw]
-            else
-              nil
             end
           end.compact
@@ -142,7 +142,8 @@ module Loofah
         def scrub_uri_attribute(attr_node)
           # this block lifted nearly verbatim from HTML5 sanitization
           val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
-          if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
+          if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
+              !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
             attr_node.remove
             return true
           elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
@@ -184,8 +185,8 @@ module Loofah
         end
         def cdata_needs_escaping?(node)
-          # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
-          node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
+          # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
+          node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
         end
         def cdata_escape(node)
@@ -198,28 +199,28 @@ module Loofah
         end
         TABLE_FOR_ESCAPE_HTML__ = {
-          '<' => '&lt;',
-          '>' => '&gt;',
-          '&' => '&amp;',
+          "<" => "&lt;",
+          ">" => "&gt;",
+          "&" => "&amp;",
         }
         def escape_tags(string)
           # modified version of CGI.escapeHTML from ruby 3.1
           enc = string.encoding
-          unless enc.ascii_compatible?
+          if enc.ascii_compatible?
+            string = string.b
+            string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
+            string.force_encoding(enc)
+          else
             if enc.dummy?
               origenc = enc
               enc = Encoding::Converter.asciicompat_encoding(enc)
               string = enc ? string.encode(enc) : string.b
             end
-            table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
+            table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
             string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
             string.encode!(origenc) if origenc
             string
-          else
-            string = string.b
-            string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
-            string.force_encoding(enc)
           end
         end
       end

data/lib/loofah/metahelpers.rb CHANGED Viewed

@@ -1,12 +1,16 @@
 # frozen_string_literal: true
 module Loofah
   module MetaHelpers # :nodoc:
-    def self.add_downcased_set_members_to_all_set_constants(mojule)
-      mojule.constants.each do |constant_sym|
-        constant = mojule.const_get constant_sym
-        next unless Set === constant
-        constant.dup.each do |member|
-          constant.add member.downcase
+    class << self
+      def add_downcased_set_members_to_all_set_constants(mojule)
+        mojule.constants.each do |constant_sym|
+          constant = mojule.const_get(constant_sym)
+          next unless Set === constant
+          constant.dup.each do |member|
+            constant.add(member.downcase)
+          end
         end
       end
     end

data/lib/loofah/scrubber.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 module Loofah
   #
   #  A RuntimeError raised when Loofah could not find an appropriate scrubber.
@@ -24,7 +25,7 @@ module Loofah
   #
   #  This can then be run on a document:
   #
-  #    Loofah.fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
+  #    Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
   #    # => "<div>foo</div><p>bar</p>"
   #
   #  Scrubbers can be run on a document in either a top-down traversal (the
@@ -32,7 +33,6 @@ module Loofah
   #  Scrubber::STOP to terminate the traversal of a subtree.
   #
   class Scrubber
     # Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
     CONTINUE = Object.new.freeze
@@ -67,7 +67,9 @@ module Loofah
       unless [:top_down, :bottom_up].include?(direction)
         raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
       end
-      @direction, @block = direction, block
+      @direction = direction
+      @block = block
     end
     #
@@ -84,7 +86,7 @@ module Loofah
     #  +scrub+, which will be called for each document node.
     #
     def scrub(node)
-      raise ScrubberNotFound, "No scrub method has been defined on #{self.class.to_s}"
+      raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
     end
     #
@@ -103,8 +105,8 @@ module Loofah
     def html5lib_sanitize(node)
       case node.type
       when Nokogiri::XML::Node::ELEMENT_NODE
-        if HTML5::Scrub.allowed_element? node.name
-          HTML5::Scrub.scrub_attributes node
+        if HTML5::Scrub.allowed_element?(node.name)
+          HTML5::Scrub.scrub_attributes(node)
           return Scrubber::CONTINUE
         end
       when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
@@ -120,8 +122,8 @@ module Loofah
     def traverse_conditionally_top_down(node)
       if block
         return if block.call(node) == STOP
-      else
-        return if scrub(node) == STOP
+      elsif scrub(node) == STOP
+        return
       end
       node.children.each { |j| traverse_conditionally_top_down(j) }
     end

data/lib/loofah/scrubbers.rb CHANGED Viewed

@@ -1,4 +1,5 @@
 # frozen_string_literal: true
 module Loofah
   #
   #  Loofah provides some built-in scrubbers for sanitizing with
@@ -11,7 +12,7 @@ module Loofah
   #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:strip)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
   #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
   #
   #
@@ -20,7 +21,7 @@ module Loofah
   #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:prune)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
   #     => "ohai! <div>div is safe</div> "
   #
   #
@@ -29,7 +30,7 @@ module Loofah
   #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
   #
   #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-  #     Loofah.fragment(unsafe_html).scrub!(:escape)
+  #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
   #     => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
   #
   #
@@ -41,7 +42,7 @@ module Loofah
   #  layer of paint on top of the HTML input to make it look nice.
   #
   #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
-  #     Loofah.fragment(messy_markup).scrub!(:whitewash)
+  #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
   #     => "ohai! <div>div with attributes</div>"
   #
   #  One use case for this scrubber is to clean up HTML that was
@@ -56,25 +57,42 @@ module Loofah
   #  +:nofollow+ adds a rel="nofollow" attribute to all links
   #
   #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-  #     Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
   #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
   #
   #
+  #  === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
+  #
+  #  +:targetblank+ adds a target="_blank" attribute to all links
+  #
+  #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
+  #     => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
+  #
+  #
   #  === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
   #
   #  +:noopener+ adds a rel="noopener" attribute to all links
   #
   #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-  #     Loofah.fragment(link_farmers_markup).scrub!(:noopener)
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
   #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
   #
+  #  === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
+  #
+  #  +:noreferrer+ adds a rel="noreferrer" attribute to all links
+  #
+  #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
+  #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
+  #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
+  #
   #
   #  === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
   #
   #  +:unprintable+ removes unprintable Unicode characters.
   #
   #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
-  #     Loofah.fragment(markup).scrub!(:unprintable)
+  #     Loofah.html5_fragment(markup).scrub!(:unprintable)
   #     => "<p>Some text with an unprintable character at the end</p>"
   #
   #  You may not be able to see the unprintable character in the above example, but there is a
@@ -90,19 +108,20 @@ module Loofah
     #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:strip)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
     #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
     #
     class Strip < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :bottom_up
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
         node.before(node.children)
         node.remove
-        return STOP
+        STOP
       end
     end
@@ -112,18 +131,19 @@ module Loofah
     #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:prune)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
     #     => "ohai! <div>div is safe</div> "
     #
     class Prune < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
         node.remove
-        return STOP
+        STOP
       end
     end
@@ -133,19 +153,20 @@ module Loofah
     #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
     #
     #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
-    #     Loofah.fragment(unsafe_html).scrub!(:escape)
+    #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
     #     => "ohai! <div>div is safe</div> &lt;foo&gt;but foo is &lt;b&gt;not&lt;/b&gt;&lt;/foo&gt;"
     #
     class Escape < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE if html5lib_sanitize(node) == CONTINUE
-        node.add_next_sibling Nokogiri::XML::Text.new(node.to_s, node.document)
+        node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
         node.remove
-        return STOP
+        STOP
       end
     end
@@ -158,7 +179,7 @@ module Loofah
     #  layer of paint on top of the HTML input to make it look nice.
     #
     #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
-    #     Loofah.fragment(messy_markup).scrub!(:whitewash)
+    #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
     #     => "ohai! <div>div with attributes</div>"
     #
     #  One use case for this scrubber is to clean up HTML that was
@@ -168,14 +189,14 @@ module Loofah
     #  Certainly not me.
     #
     class Whitewash < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         case node.type
         when Nokogiri::XML::Node::ELEMENT_NODE
-          if HTML5::Scrub.allowed_element? node.name
+          if HTML5::Scrub.allowed_element?(node.name)
             node.attributes.each { |attr| node.remove_attribute(attr.first) }
             return CONTINUE if node.namespaces.empty?
           end
@@ -193,18 +214,48 @@ module Loofah
     #  +:nofollow+ adds a rel="nofollow" attribute to all links
     #
     #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-    #     Loofah.fragment(link_farmers_markup).scrub!(:nofollow)
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
     #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
     #
     class NoFollow < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         append_attribute(node, "rel", "nofollow")
-        return STOP
+        STOP
+      end
+    end
+    #
+    #  === scrub!(:targetblank)
+    #
+    #  +:targetblank+ adds a target="_blank" attribute to all links.
+    #  If there is a target already set, replaces it with target="_blank".
+    #
+    #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
+    #     => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
+    #
+    #  On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
+    #  behavior as setting rel="noopener".
+    #
+    class TargetBlank < Scrubber
+      def initialize # rubocop:disable Lint/MissingSuper
+        @direction = :top_down
+      end
+      def scrub(node)
+        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
+        href = node["href"]
+        node.set_attribute("target", "_blank") if href && href[0] != "#"
+        STOP
       end
     end
@@ -214,35 +265,59 @@ module Loofah
     #  +:noopener+ adds a rel="noopener" attribute to all links
     #
     #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
-    #     Loofah.fragment(link_farmers_markup).scrub!(:noopener)
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
     #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
     #
     class NoOpener < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
       def scrub(node)
         return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         append_attribute(node, "rel", "noopener")
-        return STOP
+        STOP
+      end
+    end
+    #
+    #  === scrub!(:noreferrer)
+    #
+    #  +:noreferrer+ adds a rel="noreferrer" attribute to all links
+    #
+    #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
+    #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
+    #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
+    #
+    class NoReferrer < Scrubber
+      def initialize # rubocop:disable Lint/MissingSuper
+        @direction = :top_down
+      end
+      def scrub(node)
+        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
+        append_attribute(node, "rel", "noreferrer")
+        STOP
       end
     end
     # This class probably isn't useful publicly, but is used for #to_text's current implemention
     class NewlineBlockElements < Scrubber # :nodoc:
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :bottom_up
       end
       def scrub(node)
         return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
         replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
           "\n"
         else
           "\n#{node.content}\n"
         end
-        node.add_next_sibling Nokogiri::XML::Text.new(replacement, node.document)
+        node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
         node.remove
       end
     end
@@ -253,7 +328,7 @@ module Loofah
     #  +:unprintable+ removes unprintable Unicode characters.
     #
     #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
-    #     Loofah.fragment(markup).scrub!(:unprintable)
+    #     Loofah.html5_fragment(markup).scrub!(:unprintable)
     #     => "<p>Some text with an unprintable character at the end</p>"
     #
     #  You may not be able to see the unprintable character in the above example, but there is a
@@ -263,7 +338,7 @@ module Loofah
     #     http://timelessrepo.com/json-isnt-a-javascript-subset
     #
     class Unprintable < Scrubber
-      def initialize
+      def initialize # rubocop:disable Lint/MissingSuper
         @direction = :top_down
       end
@@ -275,25 +350,81 @@ module Loofah
       end
     end
+    #
+    #  === scrub!(:double_breakpoint)
+    #
+    #  +:double_breakpoint+ replaces double-break tags with closing/opening paragraph tags.
+    #
+    #     markup = "<p>Some text here in a logical paragraph.<br><br>Some more text, apparently a second paragraph.</p>"
+    #     Loofah.html5_fragment(markup).scrub!(:double_breakpoint)
+    #     => "<p>Some text here in a logical paragraph.</p><p>Some more text, apparently a second paragraph.</p>"
+    #
+    class DoubleBreakpoint < Scrubber
+      def initialize # rubocop:disable Lint/MissingSuper
+        @direction = :top_down
+      end
+      def scrub(node)
+        return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "p")
+        paragraph_with_break_point_nodes = node.xpath("//p[br[following-sibling::br]]")
+        paragraph_with_break_point_nodes.each do |paragraph_node|
+          new_paragraph = paragraph_node.add_previous_sibling("<p>").first
+          paragraph_node.children.each do |child|
+            remove_blank_text_nodes(child)
+          end
+          paragraph_node.children.each do |child|
+            # already unlinked
+            next if child.parent.nil?
+            if child.name == "br" && child.next_sibling.name == "br"
+              new_paragraph = paragraph_node.add_previous_sibling("<p>").first
+              child.next_sibling.unlink
+              child.unlink
+            else
+              child.parent = new_paragraph
+            end
+          end
+          paragraph_node.unlink
+        end
+        CONTINUE
+      end
+      private
+      def remove_blank_text_nodes(node)
+        node.unlink if node.text? && node.blank?
+      end
+    end
     #
     #  A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
     #
     MAP = {
-      :escape => Escape,
-      :prune => Prune,
-      :whitewash => Whitewash,
-      :strip => Strip,
-      :nofollow => NoFollow,
-      :noopener => NoOpener,
-      :newline_block_elements => NewlineBlockElements,
-      :unprintable => Unprintable,
+      escape: Escape,
+      prune: Prune,
+      whitewash: Whitewash,
+      strip: Strip,
+      nofollow: NoFollow,
+      noopener: NoOpener,
+      noreferrer: NoReferrer,
+      targetblank: TargetBlank,
+      newline_block_elements: NewlineBlockElements,
+      unprintable: Unprintable,
+      double_breakpoint: DoubleBreakpoint,
     }
-    #
-    #  Returns an array of symbols representing the built-in scrubbers
-    #
-    def self.scrubber_symbols
-      MAP.keys
+    class << self
+      #
+      #  Returns an array of symbols representing the built-in scrubbers
+      #
+      def scrubber_symbols
+        MAP.keys
+      end
     end
   end
 end

data/lib/loofah/version.rb CHANGED Viewed

@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 module Loofah
   # The version of Loofah you are using
-  VERSION = "2.19.1"
+  VERSION = "2.24.0"
 end