loofah 2.19.1 → 2.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/CHANGELOG.md +102 -0
 - data/README.md +161 -115
 - data/lib/loofah/concerns.rb +207 -0
 - data/lib/loofah/elements.rb +78 -76
 - data/lib/loofah/helpers.rb +21 -15
 - data/lib/loofah/{html → html4}/document.rb +5 -7
 - data/lib/loofah/html4/document_fragment.rb +15 -0
 - data/lib/loofah/html5/document.rb +17 -0
 - data/lib/loofah/html5/document_fragment.rb +15 -0
 - data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
 - data/lib/loofah/html5/safelist.rb +940 -924
 - data/lib/loofah/html5/scrub.rb +36 -35
 - data/lib/loofah/metahelpers.rb +10 -6
 - data/lib/loofah/scrubber.rb +10 -8
 - data/lib/loofah/scrubbers.rb +174 -43
 - data/lib/loofah/version.rb +2 -1
 - data/lib/loofah/xml/document.rb +1 -0
 - data/lib/loofah/xml/document_fragment.rb +2 -6
 - data/lib/loofah.rb +116 -43
 - metadata +18 -122
 - data/lib/loofah/html/document_fragment.rb +0 -42
 - data/lib/loofah/instance_methods.rb +0 -133
 
    
        data/lib/loofah/html5/scrub.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            require "cgi"
         
     | 
| 
       3 
4 
     | 
    
         
             
            require "crass"
         
     | 
| 
       4 
5 
     | 
    
         | 
| 
         @@ -6,9 +7,10 @@ module Loofah 
     | 
|
| 
       6 
7 
     | 
    
         
             
              module HTML5 # :nodoc:
         
     | 
| 
       7 
8 
     | 
    
         
             
                module Scrub
         
     | 
| 
       8 
9 
     | 
    
         
             
                  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
         
     | 
| 
       9 
     | 
    
         
            -
                  CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
         
     | 
| 
      
 10 
     | 
    
         
            +
                  CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/ # rubocop:disable Layout/LineLength
         
     | 
| 
       10 
11 
     | 
    
         
             
                  CRASS_SEMICOLON = { node: :semicolon, raw: ";" }
         
     | 
| 
       11 
     | 
    
         
            -
                  CSS_IMPORTANT =  
     | 
| 
      
 12 
     | 
    
         
            +
                  CSS_IMPORTANT = "!important"
         
     | 
| 
      
 13 
     | 
    
         
            +
                  CSS_WHITESPACE = " "
         
     | 
| 
       12 
14 
     | 
    
         
             
                  CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES = /\A(["'])?[^"']+\1\z/
         
     | 
| 
       13 
15 
     | 
    
         
             
                  DATA_ATTRIBUTE_NAME = /\Adata-[\w-]+\z/
         
     | 
| 
       14 
16 
     | 
    
         | 
| 
         @@ -26,7 +28,7 @@ module Loofah 
     | 
|
| 
       26 
28 
     | 
    
         
             
                          attr_node.node_name
         
     | 
| 
       27 
29 
     | 
    
         
             
                        end
         
     | 
| 
       28 
30 
     | 
    
         | 
| 
       29 
     | 
    
         
            -
                        if attr_name 
     | 
| 
      
 31 
     | 
    
         
            +
                        if DATA_ATTRIBUTE_NAME.match?(attr_name)
         
     | 
| 
       30 
32 
     | 
    
         
             
                          next
         
     | 
| 
       31 
33 
     | 
    
         
             
                        end
         
     | 
| 
       32 
34 
     | 
    
         | 
| 
         @@ -43,10 +45,12 @@ module Loofah 
     | 
|
| 
       43 
45 
     | 
    
         
             
                          scrub_attribute_that_allows_local_ref(attr_node)
         
     | 
| 
       44 
46 
     | 
    
         
             
                        end
         
     | 
| 
       45 
47 
     | 
    
         | 
| 
       46 
     | 
    
         
            -
                         
     | 
| 
       47 
     | 
    
         
            -
                           
     | 
| 
       48 
     | 
    
         
            -
                           
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
      
 48 
     | 
    
         
            +
                        next unless SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) &&
         
     | 
| 
      
 49 
     | 
    
         
            +
                          attr_name == "xlink:href" &&
         
     | 
| 
      
 50 
     | 
    
         
            +
                          attr_node.value =~ /^\s*[^#\s].*/m
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                        attr_node.remove
         
     | 
| 
      
 53 
     | 
    
         
            +
                        next
         
     | 
| 
       50 
54 
     | 
    
         
             
                      end
         
     | 
| 
       51 
55 
     | 
    
         | 
| 
       52 
56 
     | 
    
         
             
                      scrub_css_attribute(node)
         
     | 
| 
         @@ -66,29 +70,28 @@ module Loofah 
     | 
|
| 
       66 
70 
     | 
    
         
             
                    end
         
     | 
| 
       67 
71 
     | 
    
         | 
| 
       68 
72 
     | 
    
         
             
                    def scrub_css(style)
         
     | 
| 
      
 73 
     | 
    
         
            +
                      url_flags = [:url, :bad_url]
         
     | 
| 
       69 
74 
     | 
    
         
             
                      style_tree = Crass.parse_properties(style)
         
     | 
| 
       70 
75 
     | 
    
         
             
                      sanitized_tree = []
         
     | 
| 
       71 
76 
     | 
    
         | 
| 
       72 
77 
     | 
    
         
             
                      style_tree.each do |node|
         
     | 
| 
       73 
78 
     | 
    
         
             
                        next unless node[:node] == :property
         
     | 
| 
       74 
79 
     | 
    
         
             
                        next if node[:children].any? do |child|
         
     | 
| 
       75 
     | 
    
         
            -
                           
     | 
| 
      
 80 
     | 
    
         
            +
                          url_flags.include?(child[:node])
         
     | 
| 
       76 
81 
     | 
    
         
             
                        end
         
     | 
| 
       77 
82 
     | 
    
         | 
| 
       78 
83 
     | 
    
         
             
                        name = node[:name].downcase
         
     | 
| 
       79 
84 
     | 
    
         
             
                        next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) ||
         
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
      
 85 
     | 
    
         
            +
                          SafeList::ALLOWED_SVG_PROPERTIES.include?(name) ||
         
     | 
| 
      
 86 
     | 
    
         
            +
                          SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
         
     | 
| 
       82 
87 
     | 
    
         | 
| 
       83 
88 
     | 
    
         
             
                        value = node[:children].map do |child|
         
     | 
| 
       84 
89 
     | 
    
         
             
                          case child[:node]
         
     | 
| 
       85 
90 
     | 
    
         
             
                          when :whitespace
         
     | 
| 
       86 
     | 
    
         
            -
                             
     | 
| 
      
 91 
     | 
    
         
            +
                            CSS_WHITESPACE
         
     | 
| 
       87 
92 
     | 
    
         
             
                          when :string
         
     | 
| 
       88 
     | 
    
         
            -
                            if child[:raw] 
     | 
| 
      
 93 
     | 
    
         
            +
                            if CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES.match?(child[:raw])
         
     | 
| 
       89 
94 
     | 
    
         
             
                              Crass::Parser.stringify(child)
         
     | 
| 
       90 
     | 
    
         
            -
                            else
         
     | 
| 
       91 
     | 
    
         
            -
                              nil
         
     | 
| 
       92 
95 
     | 
    
         
             
                            end
         
     | 
| 
       93 
96 
     | 
    
         
             
                          when :function
         
     | 
| 
       94 
97 
     | 
    
         
             
                            if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase)
         
     | 
| 
         @@ -97,18 +100,19 @@ module Loofah 
     | 
|
| 
       97 
100 
     | 
    
         
             
                          when :ident
         
     | 
| 
       98 
101 
     | 
    
         
             
                            keyword = child[:value]
         
     | 
| 
       99 
102 
     | 
    
         
             
                            if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) ||
         
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
      
 103 
     | 
    
         
            +
                                SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) ||
         
     | 
| 
      
 104 
     | 
    
         
            +
                                (keyword =~ CSS_KEYWORDISH)
         
     | 
| 
       102 
105 
     | 
    
         
             
                              keyword
         
     | 
| 
       103 
106 
     | 
    
         
             
                            end
         
     | 
| 
       104 
107 
     | 
    
         
             
                          else
         
     | 
| 
       105 
108 
     | 
    
         
             
                            child[:raw]
         
     | 
| 
       106 
109 
     | 
    
         
             
                          end
         
     | 
| 
       107 
     | 
    
         
            -
                        end.compact
         
     | 
| 
      
 110 
     | 
    
         
            +
                        end.compact.join.strip
         
     | 
| 
       108 
111 
     | 
    
         | 
| 
       109 
112 
     | 
    
         
             
                        next if value.empty?
         
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
                         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                        value << CSS_WHITESPACE << CSS_IMPORTANT if node[:important]
         
     | 
| 
      
 115 
     | 
    
         
            +
                        propstring = format("%s:%s", name, value)
         
     | 
| 
       112 
116 
     | 
    
         
             
                        sanitized_node = Crass.parse_properties(propstring).first
         
     | 
| 
       113 
117 
     | 
    
         
             
                        sanitized_tree << sanitized_node << CRASS_SEMICOLON
         
     | 
| 
       114 
118 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -126,13 +130,9 @@ module Loofah 
     | 
|
| 
       126 
130 
     | 
    
         
             
                        when :url
         
     | 
| 
       127 
131 
     | 
    
         
             
                          if node[:value].start_with?("#")
         
     | 
| 
       128 
132 
     | 
    
         
             
                            node[:raw]
         
     | 
| 
       129 
     | 
    
         
            -
                          else
         
     | 
| 
       130 
     | 
    
         
            -
                            nil
         
     | 
| 
       131 
133 
     | 
    
         
             
                          end
         
     | 
| 
       132 
134 
     | 
    
         
             
                        when :hash, :ident, :string
         
     | 
| 
       133 
135 
     | 
    
         
             
                          node[:raw]
         
     | 
| 
       134 
     | 
    
         
            -
                        else
         
     | 
| 
       135 
     | 
    
         
            -
                          nil
         
     | 
| 
       136 
136 
     | 
    
         
             
                        end
         
     | 
| 
       137 
137 
     | 
    
         
             
                      end.compact
         
     | 
| 
       138 
138 
     | 
    
         | 
| 
         @@ -142,7 +142,8 @@ module Loofah 
     | 
|
| 
       142 
142 
     | 
    
         
             
                    def scrub_uri_attribute(attr_node)
         
     | 
| 
       143 
143 
     | 
    
         
             
                      # this block lifted nearly verbatim from HTML5 sanitization
         
     | 
| 
       144 
144 
     | 
    
         
             
                      val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
         
     | 
| 
       145 
     | 
    
         
            -
                      if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && 
     | 
| 
      
 145 
     | 
    
         
            +
                      if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ &&
         
     | 
| 
      
 146 
     | 
    
         
            +
                          !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
         
     | 
| 
       146 
147 
     | 
    
         
             
                        attr_node.remove
         
     | 
| 
       147 
148 
     | 
    
         
             
                        return true
         
     | 
| 
       148 
149 
     | 
    
         
             
                      elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
         
     | 
| 
         @@ -184,8 +185,8 @@ module Loofah 
     | 
|
| 
       184 
185 
     | 
    
         
             
                    end
         
     | 
| 
       185 
186 
     | 
    
         | 
| 
       186 
187 
     | 
    
         
             
                    def cdata_needs_escaping?(node)
         
     | 
| 
       187 
     | 
    
         
            -
                      # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style`  
     | 
| 
       188 
     | 
    
         
            -
                      node.cdata? || (Nokogiri.jruby? && node.text? &&  
     | 
| 
      
 188 
     | 
    
         
            +
                      # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` tag as cdata, but it acts that way
         
     | 
| 
      
 189 
     | 
    
         
            +
                      node.cdata? || (Nokogiri.jruby? && node.text? && node.parent.name == "style")
         
     | 
| 
       189 
190 
     | 
    
         
             
                    end
         
     | 
| 
       190 
191 
     | 
    
         | 
| 
       191 
192 
     | 
    
         
             
                    def cdata_escape(node)
         
     | 
| 
         @@ -198,28 +199,28 @@ module Loofah 
     | 
|
| 
       198 
199 
     | 
    
         
             
                    end
         
     | 
| 
       199 
200 
     | 
    
         | 
| 
       200 
201 
     | 
    
         
             
                    TABLE_FOR_ESCAPE_HTML__ = {
         
     | 
| 
       201 
     | 
    
         
            -
                       
     | 
| 
       202 
     | 
    
         
            -
                       
     | 
| 
       203 
     | 
    
         
            -
                       
     | 
| 
      
 202 
     | 
    
         
            +
                      "<" => "<",
         
     | 
| 
      
 203 
     | 
    
         
            +
                      ">" => ">",
         
     | 
| 
      
 204 
     | 
    
         
            +
                      "&" => "&",
         
     | 
| 
       204 
205 
     | 
    
         
             
                    }
         
     | 
| 
       205 
206 
     | 
    
         | 
| 
       206 
207 
     | 
    
         
             
                    def escape_tags(string)
         
     | 
| 
       207 
208 
     | 
    
         
             
                      # modified version of CGI.escapeHTML from ruby 3.1
         
     | 
| 
       208 
209 
     | 
    
         
             
                      enc = string.encoding
         
     | 
| 
       209 
     | 
    
         
            -
                       
     | 
| 
      
 210 
     | 
    
         
            +
                      if enc.ascii_compatible?
         
     | 
| 
      
 211 
     | 
    
         
            +
                        string = string.b
         
     | 
| 
      
 212 
     | 
    
         
            +
                        string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
         
     | 
| 
      
 213 
     | 
    
         
            +
                        string.force_encoding(enc)
         
     | 
| 
      
 214 
     | 
    
         
            +
                      else
         
     | 
| 
       210 
215 
     | 
    
         
             
                        if enc.dummy?
         
     | 
| 
       211 
216 
     | 
    
         
             
                          origenc = enc
         
     | 
| 
       212 
217 
     | 
    
         
             
                          enc = Encoding::Converter.asciicompat_encoding(enc)
         
     | 
| 
       213 
218 
     | 
    
         
             
                          string = enc ? string.encode(enc) : string.b
         
     | 
| 
       214 
219 
     | 
    
         
             
                        end
         
     | 
| 
       215 
     | 
    
         
            -
                        table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
         
     | 
| 
      
 220 
     | 
    
         
            +
                        table = Hash[TABLE_FOR_ESCAPE_HTML__.map { |pair| pair.map { |s| s.encode(enc) } }]
         
     | 
| 
       216 
221 
     | 
    
         
             
                        string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
         
     | 
| 
       217 
222 
     | 
    
         
             
                        string.encode!(origenc) if origenc
         
     | 
| 
       218 
223 
     | 
    
         
             
                        string
         
     | 
| 
       219 
     | 
    
         
            -
                      else
         
     | 
| 
       220 
     | 
    
         
            -
                        string = string.b
         
     | 
| 
       221 
     | 
    
         
            -
                        string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
         
     | 
| 
       222 
     | 
    
         
            -
                        string.force_encoding(enc)
         
     | 
| 
       223 
224 
     | 
    
         
             
                      end
         
     | 
| 
       224 
225 
     | 
    
         
             
                    end
         
     | 
| 
       225 
226 
     | 
    
         
             
                  end
         
     | 
    
        data/lib/loofah/metahelpers.rb
    CHANGED
    
    | 
         @@ -1,12 +1,16 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            module Loofah
         
     | 
| 
       3 
4 
     | 
    
         
             
              module MetaHelpers # :nodoc:
         
     | 
| 
       4 
     | 
    
         
            -
                 
     | 
| 
       5 
     | 
    
         
            -
                  mojule 
     | 
| 
       6 
     | 
    
         
            -
                     
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
      
 5 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 6 
     | 
    
         
            +
                  def add_downcased_set_members_to_all_set_constants(mojule)
         
     | 
| 
      
 7 
     | 
    
         
            +
                    mojule.constants.each do |constant_sym|
         
     | 
| 
      
 8 
     | 
    
         
            +
                      constant = mojule.const_get(constant_sym)
         
     | 
| 
      
 9 
     | 
    
         
            +
                      next unless Set === constant
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                      constant.dup.each do |member|
         
     | 
| 
      
 12 
     | 
    
         
            +
                        constant.add(member.downcase)
         
     | 
| 
      
 13 
     | 
    
         
            +
                      end
         
     | 
| 
       10 
14 
     | 
    
         
             
                    end
         
     | 
| 
       11 
15 
     | 
    
         
             
                  end
         
     | 
| 
       12 
16 
     | 
    
         
             
                end
         
     | 
    
        data/lib/loofah/scrubber.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            module Loofah
         
     | 
| 
       3 
4 
     | 
    
         
             
              #
         
     | 
| 
       4 
5 
     | 
    
         
             
              #  A RuntimeError raised when Loofah could not find an appropriate scrubber.
         
     | 
| 
         @@ -24,7 +25,7 @@ module Loofah 
     | 
|
| 
       24 
25 
     | 
    
         
             
              #
         
     | 
| 
       25 
26 
     | 
    
         
             
              #  This can then be run on a document:
         
     | 
| 
       26 
27 
     | 
    
         
             
              #
         
     | 
| 
       27 
     | 
    
         
            -
              #    Loofah. 
     | 
| 
      
 28 
     | 
    
         
            +
              #    Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
         
     | 
| 
       28 
29 
     | 
    
         
             
              #    # => "<div>foo</div><p>bar</p>"
         
     | 
| 
       29 
30 
     | 
    
         
             
              #
         
     | 
| 
       30 
31 
     | 
    
         
             
              #  Scrubbers can be run on a document in either a top-down traversal (the
         
     | 
| 
         @@ -32,7 +33,6 @@ module Loofah 
     | 
|
| 
       32 
33 
     | 
    
         
             
              #  Scrubber::STOP to terminate the traversal of a subtree.
         
     | 
| 
       33 
34 
     | 
    
         
             
              #
         
     | 
| 
       34 
35 
     | 
    
         
             
              class Scrubber
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
36 
     | 
    
         
             
                # Top-down Scrubbers may return CONTINUE to indicate that the subtree should be traversed.
         
     | 
| 
       37 
37 
     | 
    
         
             
                CONTINUE = Object.new.freeze
         
     | 
| 
       38 
38 
     | 
    
         | 
| 
         @@ -67,7 +67,9 @@ module Loofah 
     | 
|
| 
       67 
67 
     | 
    
         
             
                  unless [:top_down, :bottom_up].include?(direction)
         
     | 
| 
       68 
68 
     | 
    
         
             
                    raise ArgumentError, "direction #{direction} must be one of :top_down or :bottom_up"
         
     | 
| 
       69 
69 
     | 
    
         
             
                  end
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                  @direction = direction
         
     | 
| 
      
 72 
     | 
    
         
            +
                  @block = block
         
     | 
| 
       71 
73 
     | 
    
         
             
                end
         
     | 
| 
       72 
74 
     | 
    
         | 
| 
       73 
75 
     | 
    
         
             
                #
         
     | 
| 
         @@ -84,7 +86,7 @@ module Loofah 
     | 
|
| 
       84 
86 
     | 
    
         
             
                #  +scrub+, which will be called for each document node.
         
     | 
| 
       85 
87 
     | 
    
         
             
                #
         
     | 
| 
       86 
88 
     | 
    
         
             
                def scrub(node)
         
     | 
| 
       87 
     | 
    
         
            -
                  raise ScrubberNotFound, "No scrub method has been defined on #{self.class 
     | 
| 
      
 89 
     | 
    
         
            +
                  raise ScrubberNotFound, "No scrub method has been defined on #{self.class}"
         
     | 
| 
       88 
90 
     | 
    
         
             
                end
         
     | 
| 
       89 
91 
     | 
    
         | 
| 
       90 
92 
     | 
    
         
             
                #
         
     | 
| 
         @@ -103,8 +105,8 @@ module Loofah 
     | 
|
| 
       103 
105 
     | 
    
         
             
                def html5lib_sanitize(node)
         
     | 
| 
       104 
106 
     | 
    
         
             
                  case node.type
         
     | 
| 
       105 
107 
     | 
    
         
             
                  when Nokogiri::XML::Node::ELEMENT_NODE
         
     | 
| 
       106 
     | 
    
         
            -
                    if HTML5::Scrub.allowed_element? 
     | 
| 
       107 
     | 
    
         
            -
                      HTML5::Scrub.scrub_attributes 
     | 
| 
      
 108 
     | 
    
         
            +
                    if HTML5::Scrub.allowed_element?(node.name)
         
     | 
| 
      
 109 
     | 
    
         
            +
                      HTML5::Scrub.scrub_attributes(node)
         
     | 
| 
       108 
110 
     | 
    
         
             
                      return Scrubber::CONTINUE
         
     | 
| 
       109 
111 
     | 
    
         
             
                    end
         
     | 
| 
       110 
112 
     | 
    
         
             
                  when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
         
     | 
| 
         @@ -120,8 +122,8 @@ module Loofah 
     | 
|
| 
       120 
122 
     | 
    
         
             
                def traverse_conditionally_top_down(node)
         
     | 
| 
       121 
123 
     | 
    
         
             
                  if block
         
     | 
| 
       122 
124 
     | 
    
         
             
                    return if block.call(node) == STOP
         
     | 
| 
       123 
     | 
    
         
            -
                   
     | 
| 
       124 
     | 
    
         
            -
                    return 
     | 
| 
      
 125 
     | 
    
         
            +
                  elsif scrub(node) == STOP
         
     | 
| 
      
 126 
     | 
    
         
            +
                    return
         
     | 
| 
       125 
127 
     | 
    
         
             
                  end
         
     | 
| 
       126 
128 
     | 
    
         
             
                  node.children.each { |j| traverse_conditionally_top_down(j) }
         
     | 
| 
       127 
129 
     | 
    
         
             
                end
         
     | 
    
        data/lib/loofah/scrubbers.rb
    CHANGED
    
    | 
         @@ -1,4 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # frozen_string_literal: true
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       2 
3 
     | 
    
         
             
            module Loofah
         
     | 
| 
       3 
4 
     | 
    
         
             
              #
         
     | 
| 
       4 
5 
     | 
    
         
             
              #  Loofah provides some built-in scrubbers for sanitizing with
         
     | 
| 
         @@ -11,7 +12,7 @@ module Loofah 
     | 
|
| 
       11 
12 
     | 
    
         
             
              #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
         
     | 
| 
       12 
13 
     | 
    
         
             
              #
         
     | 
| 
       13 
14 
     | 
    
         
             
              #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       14 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 15 
     | 
    
         
            +
              #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
         
     | 
| 
       15 
16 
     | 
    
         
             
              #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
         
     | 
| 
       16 
17 
     | 
    
         
             
              #
         
     | 
| 
       17 
18 
     | 
    
         
             
              #
         
     | 
| 
         @@ -20,7 +21,7 @@ module Loofah 
     | 
|
| 
       20 
21 
     | 
    
         
             
              #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
         
     | 
| 
       21 
22 
     | 
    
         
             
              #
         
     | 
| 
       22 
23 
     | 
    
         
             
              #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       23 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 24 
     | 
    
         
            +
              #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
         
     | 
| 
       24 
25 
     | 
    
         
             
              #     => "ohai! <div>div is safe</div> "
         
     | 
| 
       25 
26 
     | 
    
         
             
              #
         
     | 
| 
       26 
27 
     | 
    
         
             
              #
         
     | 
| 
         @@ -29,7 +30,7 @@ module Loofah 
     | 
|
| 
       29 
30 
     | 
    
         
             
              #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
         
     | 
| 
       30 
31 
     | 
    
         
             
              #
         
     | 
| 
       31 
32 
     | 
    
         
             
              #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       32 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 33 
     | 
    
         
            +
              #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
         
     | 
| 
       33 
34 
     | 
    
         
             
              #     => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       34 
35 
     | 
    
         
             
              #
         
     | 
| 
       35 
36 
     | 
    
         
             
              #
         
     | 
| 
         @@ -41,7 +42,7 @@ module Loofah 
     | 
|
| 
       41 
42 
     | 
    
         
             
              #  layer of paint on top of the HTML input to make it look nice.
         
     | 
| 
       42 
43 
     | 
    
         
             
              #
         
     | 
| 
       43 
44 
     | 
    
         
             
              #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
         
     | 
| 
       44 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 45 
     | 
    
         
            +
              #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
         
     | 
| 
       45 
46 
     | 
    
         
             
              #     => "ohai! <div>div with attributes</div>"
         
     | 
| 
       46 
47 
     | 
    
         
             
              #
         
     | 
| 
       47 
48 
     | 
    
         
             
              #  One use case for this scrubber is to clean up HTML that was
         
     | 
| 
         @@ -56,25 +57,42 @@ module Loofah 
     | 
|
| 
       56 
57 
     | 
    
         
             
              #  +:nofollow+ adds a rel="nofollow" attribute to all links
         
     | 
| 
       57 
58 
     | 
    
         
             
              #
         
     | 
| 
       58 
59 
     | 
    
         
             
              #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
       59 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 60 
     | 
    
         
            +
              #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
         
     | 
| 
       60 
61 
     | 
    
         
             
              #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
         
     | 
| 
       61 
62 
     | 
    
         
             
              #
         
     | 
| 
       62 
63 
     | 
    
         
             
              #
         
     | 
| 
      
 64 
     | 
    
         
            +
              #  === Loofah::Scrubbers::TargetBlank / scrub!(:targetblank)
         
     | 
| 
      
 65 
     | 
    
         
            +
              #
         
     | 
| 
      
 66 
     | 
    
         
            +
              #  +:targetblank+ adds a target="_blank" attribute to all links
         
     | 
| 
      
 67 
     | 
    
         
            +
              #
         
     | 
| 
      
 68 
     | 
    
         
            +
              #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
      
 69 
     | 
    
         
            +
              #     Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
         
     | 
| 
      
 70 
     | 
    
         
            +
              #     => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
         
     | 
| 
      
 71 
     | 
    
         
            +
              #
         
     | 
| 
      
 72 
     | 
    
         
            +
              #
         
     | 
| 
       63 
73 
     | 
    
         
             
              #  === Loofah::Scrubbers::NoOpener / scrub!(:noopener)
         
     | 
| 
       64 
74 
     | 
    
         
             
              #
         
     | 
| 
       65 
75 
     | 
    
         
             
              #  +:noopener+ adds a rel="noopener" attribute to all links
         
     | 
| 
       66 
76 
     | 
    
         
             
              #
         
     | 
| 
       67 
77 
     | 
    
         
             
              #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
       68 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 78 
     | 
    
         
            +
              #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
         
     | 
| 
       69 
79 
     | 
    
         
             
              #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
         
     | 
| 
       70 
80 
     | 
    
         
             
              #
         
     | 
| 
      
 81 
     | 
    
         
            +
              #  === Loofah::Scrubbers::NoReferrer / scrub!(:noreferrer)
         
     | 
| 
      
 82 
     | 
    
         
            +
              #
         
     | 
| 
      
 83 
     | 
    
         
            +
              #  +:noreferrer+ adds a rel="noreferrer" attribute to all links
         
     | 
| 
      
 84 
     | 
    
         
            +
              #
         
     | 
| 
      
 85 
     | 
    
         
            +
              #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
      
 86 
     | 
    
         
            +
              #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
         
     | 
| 
      
 87 
     | 
    
         
            +
              #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
         
     | 
| 
      
 88 
     | 
    
         
            +
              #
         
     | 
| 
       71 
89 
     | 
    
         
             
              #
         
     | 
| 
       72 
90 
     | 
    
         
             
              #  === Loofah::Scrubbers::Unprintable / scrub!(:unprintable)
         
     | 
| 
       73 
91 
     | 
    
         
             
              #
         
     | 
| 
       74 
92 
     | 
    
         
             
              #  +:unprintable+ removes unprintable Unicode characters.
         
     | 
| 
       75 
93 
     | 
    
         
             
              #
         
     | 
| 
       76 
94 
     | 
    
         
             
              #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
         
     | 
| 
       77 
     | 
    
         
            -
              #     Loofah. 
     | 
| 
      
 95 
     | 
    
         
            +
              #     Loofah.html5_fragment(markup).scrub!(:unprintable)
         
     | 
| 
       78 
96 
     | 
    
         
             
              #     => "<p>Some text with an unprintable character at the end</p>"
         
     | 
| 
       79 
97 
     | 
    
         
             
              #
         
     | 
| 
       80 
98 
     | 
    
         
             
              #  You may not be able to see the unprintable character in the above example, but there is a
         
     | 
| 
         @@ -90,19 +108,20 @@ module Loofah 
     | 
|
| 
       90 
108 
     | 
    
         
             
                #  +:strip+ removes unknown/unsafe tags, but leaves behind the pristine contents:
         
     | 
| 
       91 
109 
     | 
    
         
             
                #
         
     | 
| 
       92 
110 
     | 
    
         
             
                #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       93 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 111 
     | 
    
         
            +
                #     Loofah.html5_fragment(unsafe_html).scrub!(:strip)
         
     | 
| 
       94 
112 
     | 
    
         
             
                #     => "ohai! <div>div is safe</div> but foo is <b>not</b>"
         
     | 
| 
       95 
113 
     | 
    
         
             
                #
         
     | 
| 
       96 
114 
     | 
    
         
             
                class Strip < Scrubber
         
     | 
| 
       97 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 115 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       98 
116 
     | 
    
         
             
                    @direction = :bottom_up
         
     | 
| 
       99 
117 
     | 
    
         
             
                  end
         
     | 
| 
       100 
118 
     | 
    
         | 
| 
       101 
119 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       102 
120 
     | 
    
         
             
                    return CONTINUE if html5lib_sanitize(node) == CONTINUE
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
       103 
122 
     | 
    
         
             
                    node.before(node.children)
         
     | 
| 
       104 
123 
     | 
    
         
             
                    node.remove
         
     | 
| 
       105 
     | 
    
         
            -
                     
     | 
| 
      
 124 
     | 
    
         
            +
                    STOP
         
     | 
| 
       106 
125 
     | 
    
         
             
                  end
         
     | 
| 
       107 
126 
     | 
    
         
             
                end
         
     | 
| 
       108 
127 
     | 
    
         | 
| 
         @@ -112,18 +131,19 @@ module Loofah 
     | 
|
| 
       112 
131 
     | 
    
         
             
                #  +:prune+ removes unknown/unsafe tags and their contents (including their subtrees):
         
     | 
| 
       113 
132 
     | 
    
         
             
                #
         
     | 
| 
       114 
133 
     | 
    
         
             
                #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       115 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 134 
     | 
    
         
            +
                #     Loofah.html5_fragment(unsafe_html).scrub!(:prune)
         
     | 
| 
       116 
135 
     | 
    
         
             
                #     => "ohai! <div>div is safe</div> "
         
     | 
| 
       117 
136 
     | 
    
         
             
                #
         
     | 
| 
       118 
137 
     | 
    
         
             
                class Prune < Scrubber
         
     | 
| 
       119 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 138 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       120 
139 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       121 
140 
     | 
    
         
             
                  end
         
     | 
| 
       122 
141 
     | 
    
         | 
| 
       123 
142 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       124 
143 
     | 
    
         
             
                    return CONTINUE if html5lib_sanitize(node) == CONTINUE
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
       125 
145 
     | 
    
         
             
                    node.remove
         
     | 
| 
       126 
     | 
    
         
            -
                     
     | 
| 
      
 146 
     | 
    
         
            +
                    STOP
         
     | 
| 
       127 
147 
     | 
    
         
             
                  end
         
     | 
| 
       128 
148 
     | 
    
         
             
                end
         
     | 
| 
       129 
149 
     | 
    
         | 
| 
         @@ -133,19 +153,20 @@ module Loofah 
     | 
|
| 
       133 
153 
     | 
    
         
             
                #  +:escape+ performs HTML entity escaping on the unknown/unsafe tags:
         
     | 
| 
       134 
154 
     | 
    
         
             
                #
         
     | 
| 
       135 
155 
     | 
    
         
             
                #     unsafe_html = "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       136 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 156 
     | 
    
         
            +
                #     Loofah.html5_fragment(unsafe_html).scrub!(:escape)
         
     | 
| 
       137 
157 
     | 
    
         
             
                #     => "ohai! <div>div is safe</div> <foo>but foo is <b>not</b></foo>"
         
     | 
| 
       138 
158 
     | 
    
         
             
                #
         
     | 
| 
       139 
159 
     | 
    
         
             
                class Escape < Scrubber
         
     | 
| 
       140 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 160 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       141 
161 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       142 
162 
     | 
    
         
             
                  end
         
     | 
| 
       143 
163 
     | 
    
         | 
| 
       144 
164 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       145 
165 
     | 
    
         
             
                    return CONTINUE if html5lib_sanitize(node) == CONTINUE
         
     | 
| 
       146 
     | 
    
         
            -
             
     | 
| 
      
 166 
     | 
    
         
            +
             
     | 
| 
      
 167 
     | 
    
         
            +
                    node.add_next_sibling(Nokogiri::XML::Text.new(node.to_s, node.document))
         
     | 
| 
       147 
168 
     | 
    
         
             
                    node.remove
         
     | 
| 
       148 
     | 
    
         
            -
                     
     | 
| 
      
 169 
     | 
    
         
            +
                    STOP
         
     | 
| 
       149 
170 
     | 
    
         
             
                  end
         
     | 
| 
       150 
171 
     | 
    
         
             
                end
         
     | 
| 
       151 
172 
     | 
    
         | 
| 
         @@ -158,7 +179,7 @@ module Loofah 
     | 
|
| 
       158 
179 
     | 
    
         
             
                #  layer of paint on top of the HTML input to make it look nice.
         
     | 
| 
       159 
180 
     | 
    
         
             
                #
         
     | 
| 
       160 
181 
     | 
    
         
             
                #     messy_markup = "ohai! <div id='foo' class='bar' style='margin: 10px'>div with attributes</div>"
         
     | 
| 
       161 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 182 
     | 
    
         
            +
                #     Loofah.html5_fragment(messy_markup).scrub!(:whitewash)
         
     | 
| 
       162 
183 
     | 
    
         
             
                #     => "ohai! <div>div with attributes</div>"
         
     | 
| 
       163 
184 
     | 
    
         
             
                #
         
     | 
| 
       164 
185 
     | 
    
         
             
                #  One use case for this scrubber is to clean up HTML that was
         
     | 
| 
         @@ -168,14 +189,14 @@ module Loofah 
     | 
|
| 
       168 
189 
     | 
    
         
             
                #  Certainly not me.
         
     | 
| 
       169 
190 
     | 
    
         
             
                #
         
     | 
| 
       170 
191 
     | 
    
         
             
                class Whitewash < Scrubber
         
     | 
| 
       171 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 192 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       172 
193 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       173 
194 
     | 
    
         
             
                  end
         
     | 
| 
       174 
195 
     | 
    
         | 
| 
       175 
196 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       176 
197 
     | 
    
         
             
                    case node.type
         
     | 
| 
       177 
198 
     | 
    
         
             
                    when Nokogiri::XML::Node::ELEMENT_NODE
         
     | 
| 
       178 
     | 
    
         
            -
                      if HTML5::Scrub.allowed_element? 
     | 
| 
      
 199 
     | 
    
         
            +
                      if HTML5::Scrub.allowed_element?(node.name)
         
     | 
| 
       179 
200 
     | 
    
         
             
                        node.attributes.each { |attr| node.remove_attribute(attr.first) }
         
     | 
| 
       180 
201 
     | 
    
         
             
                        return CONTINUE if node.namespaces.empty?
         
     | 
| 
       181 
202 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -193,18 +214,48 @@ module Loofah 
     | 
|
| 
       193 
214 
     | 
    
         
             
                #  +:nofollow+ adds a rel="nofollow" attribute to all links
         
     | 
| 
       194 
215 
     | 
    
         
             
                #
         
     | 
| 
       195 
216 
     | 
    
         
             
                #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
       196 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 217 
     | 
    
         
            +
                #     Loofah.html5_fragment(link_farmers_markup).scrub!(:nofollow)
         
     | 
| 
       197 
218 
     | 
    
         
             
                #     => "ohai! <a href='http://www.myswarmysite.com/' rel="nofollow">I like your blog post</a>"
         
     | 
| 
       198 
219 
     | 
    
         
             
                #
         
     | 
| 
       199 
220 
     | 
    
         
             
                class NoFollow < Scrubber
         
     | 
| 
       200 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 221 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       201 
222 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       202 
223 
     | 
    
         
             
                  end
         
     | 
| 
       203 
224 
     | 
    
         | 
| 
       204 
225 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       205 
226 
     | 
    
         
             
                    return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
       206 
228 
     | 
    
         
             
                    append_attribute(node, "rel", "nofollow")
         
     | 
| 
       207 
     | 
    
         
            -
                     
     | 
| 
      
 229 
     | 
    
         
            +
                    STOP
         
     | 
| 
      
 230 
     | 
    
         
            +
                  end
         
     | 
| 
      
 231 
     | 
    
         
            +
                end
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
                #
         
     | 
| 
      
 234 
     | 
    
         
            +
                #  === scrub!(:targetblank)
         
     | 
| 
      
 235 
     | 
    
         
            +
                #
         
     | 
| 
      
 236 
     | 
    
         
            +
                #  +:targetblank+ adds a target="_blank" attribute to all links.
         
     | 
| 
      
 237 
     | 
    
         
            +
                #  If there is a target already set, replaces it with target="_blank".
         
     | 
| 
      
 238 
     | 
    
         
            +
                #
         
     | 
| 
      
 239 
     | 
    
         
            +
                #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
      
 240 
     | 
    
         
            +
                #     Loofah.html5_fragment(link_farmers_markup).scrub!(:targetblank)
         
     | 
| 
      
 241 
     | 
    
         
            +
                #     => "ohai! <a href='http://www.myswarmysite.com/' target="_blank">I like your blog post</a>"
         
     | 
| 
      
 242 
     | 
    
         
            +
                #
         
     | 
| 
      
 243 
     | 
    
         
            +
                #  On modern browsers, setting target="_blank" on anchor elements implicitly provides the same
         
     | 
| 
      
 244 
     | 
    
         
            +
                #  behavior as setting rel="noopener".
         
     | 
| 
      
 245 
     | 
    
         
            +
                #
         
     | 
| 
      
 246 
     | 
    
         
            +
                class TargetBlank < Scrubber
         
     | 
| 
      
 247 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
      
 248 
     | 
    
         
            +
                    @direction = :top_down
         
     | 
| 
      
 249 
     | 
    
         
            +
                  end
         
     | 
| 
      
 250 
     | 
    
         
            +
             
     | 
| 
      
 251 
     | 
    
         
            +
                  def scrub(node)
         
     | 
| 
      
 252 
     | 
    
         
            +
                    return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         
     | 
| 
      
 253 
     | 
    
         
            +
             
     | 
| 
      
 254 
     | 
    
         
            +
                    href = node["href"]
         
     | 
| 
      
 255 
     | 
    
         
            +
             
     | 
| 
      
 256 
     | 
    
         
            +
                    node.set_attribute("target", "_blank") if href && href[0] != "#"
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
                    STOP
         
     | 
| 
       208 
259 
     | 
    
         
             
                  end
         
     | 
| 
       209 
260 
     | 
    
         
             
                end
         
     | 
| 
       210 
261 
     | 
    
         | 
| 
         @@ -214,35 +265,59 @@ module Loofah 
     | 
|
| 
       214 
265 
     | 
    
         
             
                #  +:noopener+ adds a rel="noopener" attribute to all links
         
     | 
| 
       215 
266 
     | 
    
         
             
                #
         
     | 
| 
       216 
267 
     | 
    
         
             
                #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
       217 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 268 
     | 
    
         
            +
                #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noopener)
         
     | 
| 
       218 
269 
     | 
    
         
             
                #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noopener">I like your blog post</a>"
         
     | 
| 
       219 
270 
     | 
    
         
             
                #
         
     | 
| 
       220 
271 
     | 
    
         
             
                class NoOpener < Scrubber
         
     | 
| 
       221 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 272 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       222 
273 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       223 
274 
     | 
    
         
             
                  end
         
     | 
| 
       224 
275 
     | 
    
         | 
| 
       225 
276 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       226 
277 
     | 
    
         
             
                    return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         
     | 
| 
      
 278 
     | 
    
         
            +
             
     | 
| 
       227 
279 
     | 
    
         
             
                    append_attribute(node, "rel", "noopener")
         
     | 
| 
       228 
     | 
    
         
            -
                     
     | 
| 
      
 280 
     | 
    
         
            +
                    STOP
         
     | 
| 
      
 281 
     | 
    
         
            +
                  end
         
     | 
| 
      
 282 
     | 
    
         
            +
                end
         
     | 
| 
      
 283 
     | 
    
         
            +
             
     | 
| 
      
 284 
     | 
    
         
            +
                #
         
     | 
| 
      
 285 
     | 
    
         
            +
                #  === scrub!(:noreferrer)
         
     | 
| 
      
 286 
     | 
    
         
            +
                #
         
     | 
| 
      
 287 
     | 
    
         
            +
                #  +:noreferrer+ adds a rel="noreferrer" attribute to all links
         
     | 
| 
      
 288 
     | 
    
         
            +
                #
         
     | 
| 
      
 289 
     | 
    
         
            +
                #     link_farmers_markup = "ohai! <a href='http://www.myswarmysite.com/'>I like your blog post</a>"
         
     | 
| 
      
 290 
     | 
    
         
            +
                #     Loofah.html5_fragment(link_farmers_markup).scrub!(:noreferrer)
         
     | 
| 
      
 291 
     | 
    
         
            +
                #     => "ohai! <a href='http://www.myswarmysite.com/' rel="noreferrer">I like your blog post</a>"
         
     | 
| 
      
 292 
     | 
    
         
            +
                #
         
     | 
| 
      
 293 
     | 
    
         
            +
                class NoReferrer < Scrubber
         
     | 
| 
      
 294 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
      
 295 
     | 
    
         
            +
                    @direction = :top_down
         
     | 
| 
      
 296 
     | 
    
         
            +
                  end
         
     | 
| 
      
 297 
     | 
    
         
            +
             
     | 
| 
      
 298 
     | 
    
         
            +
                  def scrub(node)
         
     | 
| 
      
 299 
     | 
    
         
            +
                    return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "a")
         
     | 
| 
      
 300 
     | 
    
         
            +
             
     | 
| 
      
 301 
     | 
    
         
            +
                    append_attribute(node, "rel", "noreferrer")
         
     | 
| 
      
 302 
     | 
    
         
            +
                    STOP
         
     | 
| 
       229 
303 
     | 
    
         
             
                  end
         
     | 
| 
       230 
304 
     | 
    
         
             
                end
         
     | 
| 
       231 
305 
     | 
    
         | 
| 
       232 
306 
     | 
    
         
             
                # This class probably isn't useful publicly, but is used for #to_text's current implemention
         
     | 
| 
       233 
307 
     | 
    
         
             
                class NewlineBlockElements < Scrubber # :nodoc:
         
     | 
| 
       234 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 308 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       235 
309 
     | 
    
         
             
                    @direction = :bottom_up
         
     | 
| 
       236 
310 
     | 
    
         
             
                  end
         
     | 
| 
       237 
311 
     | 
    
         | 
| 
       238 
312 
     | 
    
         
             
                  def scrub(node)
         
     | 
| 
       239 
313 
     | 
    
         
             
                    return CONTINUE unless Loofah::Elements::LINEBREAKERS.include?(node.name)
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
       240 
315 
     | 
    
         
             
                    replacement = if Loofah::Elements::INLINE_LINE_BREAK.include?(node.name)
         
     | 
| 
       241 
316 
     | 
    
         
             
                      "\n"
         
     | 
| 
       242 
317 
     | 
    
         
             
                    else
         
     | 
| 
       243 
318 
     | 
    
         
             
                      "\n#{node.content}\n"
         
     | 
| 
       244 
319 
     | 
    
         
             
                    end
         
     | 
| 
       245 
     | 
    
         
            -
                    node.add_next_sibling 
     | 
| 
      
 320 
     | 
    
         
            +
                    node.add_next_sibling(Nokogiri::XML::Text.new(replacement, node.document))
         
     | 
| 
       246 
321 
     | 
    
         
             
                    node.remove
         
     | 
| 
       247 
322 
     | 
    
         
             
                  end
         
     | 
| 
       248 
323 
     | 
    
         
             
                end
         
     | 
| 
         @@ -253,7 +328,7 @@ module Loofah 
     | 
|
| 
       253 
328 
     | 
    
         
             
                #  +:unprintable+ removes unprintable Unicode characters.
         
     | 
| 
       254 
329 
     | 
    
         
             
                #
         
     | 
| 
       255 
330 
     | 
    
         
             
                #     markup = "<p>Some text with an unprintable character at the end\u2028</p>"
         
     | 
| 
       256 
     | 
    
         
            -
                #     Loofah. 
     | 
| 
      
 331 
     | 
    
         
            +
                #     Loofah.html5_fragment(markup).scrub!(:unprintable)
         
     | 
| 
       257 
332 
     | 
    
         
             
                #     => "<p>Some text with an unprintable character at the end</p>"
         
     | 
| 
       258 
333 
     | 
    
         
             
                #
         
     | 
| 
       259 
334 
     | 
    
         
             
                #  You may not be able to see the unprintable character in the above example, but there is a
         
     | 
| 
         @@ -263,7 +338,7 @@ module Loofah 
     | 
|
| 
       263 
338 
     | 
    
         
             
                #     http://timelessrepo.com/json-isnt-a-javascript-subset
         
     | 
| 
       264 
339 
     | 
    
         
             
                #
         
     | 
| 
       265 
340 
     | 
    
         
             
                class Unprintable < Scrubber
         
     | 
| 
       266 
     | 
    
         
            -
                  def initialize
         
     | 
| 
      
 341 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
       267 
342 
     | 
    
         
             
                    @direction = :top_down
         
     | 
| 
       268 
343 
     | 
    
         
             
                  end
         
     | 
| 
       269 
344 
     | 
    
         | 
| 
         @@ -275,25 +350,81 @@ module Loofah 
     | 
|
| 
       275 
350 
     | 
    
         
             
                  end
         
     | 
| 
       276 
351 
     | 
    
         
             
                end
         
     | 
| 
       277 
352 
     | 
    
         | 
| 
      
 353 
     | 
    
         
            +
                #
         
     | 
| 
      
 354 
     | 
    
         
            +
                #  === scrub!(:double_breakpoint)
         
     | 
| 
      
 355 
     | 
    
         
            +
                #
         
     | 
| 
      
 356 
     | 
    
         
            +
                #  +:double_breakpoint+ replaces double-break tags with closing/opening paragraph tags.
         
     | 
| 
      
 357 
     | 
    
         
            +
                #
         
     | 
| 
      
 358 
     | 
    
         
            +
                #     markup = "<p>Some text here in a logical paragraph.<br><br>Some more text, apparently a second paragraph.</p>"
         
     | 
| 
      
 359 
     | 
    
         
            +
                #     Loofah.html5_fragment(markup).scrub!(:double_breakpoint)
         
     | 
| 
      
 360 
     | 
    
         
            +
                #     => "<p>Some text here in a logical paragraph.</p><p>Some more text, apparently a second paragraph.</p>"
         
     | 
| 
      
 361 
     | 
    
         
            +
                #
         
     | 
| 
      
 362 
     | 
    
         
            +
                class DoubleBreakpoint < Scrubber
         
     | 
| 
      
 363 
     | 
    
         
            +
                  def initialize # rubocop:disable Lint/MissingSuper
         
     | 
| 
      
 364 
     | 
    
         
            +
                    @direction = :top_down
         
     | 
| 
      
 365 
     | 
    
         
            +
                  end
         
     | 
| 
      
 366 
     | 
    
         
            +
             
     | 
| 
      
 367 
     | 
    
         
            +
                  def scrub(node)
         
     | 
| 
      
 368 
     | 
    
         
            +
                    return CONTINUE unless (node.type == Nokogiri::XML::Node::ELEMENT_NODE) && (node.name == "p")
         
     | 
| 
      
 369 
     | 
    
         
            +
             
     | 
| 
      
 370 
     | 
    
         
            +
                    paragraph_with_break_point_nodes = node.xpath("//p[br[following-sibling::br]]")
         
     | 
| 
      
 371 
     | 
    
         
            +
             
     | 
| 
      
 372 
     | 
    
         
            +
                    paragraph_with_break_point_nodes.each do |paragraph_node|
         
     | 
| 
      
 373 
     | 
    
         
            +
                      new_paragraph = paragraph_node.add_previous_sibling("<p>").first
         
     | 
| 
      
 374 
     | 
    
         
            +
             
     | 
| 
      
 375 
     | 
    
         
            +
                      paragraph_node.children.each do |child|
         
     | 
| 
      
 376 
     | 
    
         
            +
                        remove_blank_text_nodes(child)
         
     | 
| 
      
 377 
     | 
    
         
            +
                      end
         
     | 
| 
      
 378 
     | 
    
         
            +
             
     | 
| 
      
 379 
     | 
    
         
            +
                      paragraph_node.children.each do |child|
         
     | 
| 
      
 380 
     | 
    
         
            +
                        # already unlinked
         
     | 
| 
      
 381 
     | 
    
         
            +
                        next if child.parent.nil?
         
     | 
| 
      
 382 
     | 
    
         
            +
             
     | 
| 
      
 383 
     | 
    
         
            +
                        if child.name == "br" && child.next_sibling.name == "br"
         
     | 
| 
      
 384 
     | 
    
         
            +
                          new_paragraph = paragraph_node.add_previous_sibling("<p>").first
         
     | 
| 
      
 385 
     | 
    
         
            +
                          child.next_sibling.unlink
         
     | 
| 
      
 386 
     | 
    
         
            +
                          child.unlink
         
     | 
| 
      
 387 
     | 
    
         
            +
                        else
         
     | 
| 
      
 388 
     | 
    
         
            +
                          child.parent = new_paragraph
         
     | 
| 
      
 389 
     | 
    
         
            +
                        end
         
     | 
| 
      
 390 
     | 
    
         
            +
                      end
         
     | 
| 
      
 391 
     | 
    
         
            +
             
     | 
| 
      
 392 
     | 
    
         
            +
                      paragraph_node.unlink
         
     | 
| 
      
 393 
     | 
    
         
            +
                    end
         
     | 
| 
      
 394 
     | 
    
         
            +
             
     | 
| 
      
 395 
     | 
    
         
            +
                    CONTINUE
         
     | 
| 
      
 396 
     | 
    
         
            +
                  end
         
     | 
| 
      
 397 
     | 
    
         
            +
             
     | 
| 
      
 398 
     | 
    
         
            +
                  private
         
     | 
| 
      
 399 
     | 
    
         
            +
             
     | 
| 
      
 400 
     | 
    
         
            +
                  def remove_blank_text_nodes(node)
         
     | 
| 
      
 401 
     | 
    
         
            +
                    node.unlink if node.text? && node.blank?
         
     | 
| 
      
 402 
     | 
    
         
            +
                  end
         
     | 
| 
      
 403 
     | 
    
         
            +
                end
         
     | 
| 
       278 
404 
     | 
    
         
             
                #
         
     | 
| 
       279 
405 
     | 
    
         
             
                #  A hash that maps a symbol (like +:prune+) to the appropriate Scrubber (Loofah::Scrubbers::Prune).
         
     | 
| 
       280 
406 
     | 
    
         
             
                #
         
     | 
| 
       281 
407 
     | 
    
         
             
                MAP = {
         
     | 
| 
       282 
     | 
    
         
            -
                  : 
     | 
| 
       283 
     | 
    
         
            -
                  : 
     | 
| 
       284 
     | 
    
         
            -
                  : 
     | 
| 
       285 
     | 
    
         
            -
                  : 
     | 
| 
       286 
     | 
    
         
            -
                  : 
     | 
| 
       287 
     | 
    
         
            -
                  : 
     | 
| 
       288 
     | 
    
         
            -
                  : 
     | 
| 
       289 
     | 
    
         
            -
                  : 
     | 
| 
      
 408 
     | 
    
         
            +
                  escape: Escape,
         
     | 
| 
      
 409 
     | 
    
         
            +
                  prune: Prune,
         
     | 
| 
      
 410 
     | 
    
         
            +
                  whitewash: Whitewash,
         
     | 
| 
      
 411 
     | 
    
         
            +
                  strip: Strip,
         
     | 
| 
      
 412 
     | 
    
         
            +
                  nofollow: NoFollow,
         
     | 
| 
      
 413 
     | 
    
         
            +
                  noopener: NoOpener,
         
     | 
| 
      
 414 
     | 
    
         
            +
                  noreferrer: NoReferrer,
         
     | 
| 
      
 415 
     | 
    
         
            +
                  targetblank: TargetBlank,
         
     | 
| 
      
 416 
     | 
    
         
            +
                  newline_block_elements: NewlineBlockElements,
         
     | 
| 
      
 417 
     | 
    
         
            +
                  unprintable: Unprintable,
         
     | 
| 
      
 418 
     | 
    
         
            +
                  double_breakpoint: DoubleBreakpoint,
         
     | 
| 
       290 
419 
     | 
    
         
             
                }
         
     | 
| 
       291 
420 
     | 
    
         | 
| 
       292 
     | 
    
         
            -
                 
     | 
| 
       293 
     | 
    
         
            -
             
     | 
| 
       294 
     | 
    
         
            -
             
     | 
| 
       295 
     | 
    
         
            -
             
     | 
| 
       296 
     | 
    
         
            -
                   
     | 
| 
      
 421 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 422 
     | 
    
         
            +
                  #
         
     | 
| 
      
 423 
     | 
    
         
            +
                  #  Returns an array of symbols representing the built-in scrubbers
         
     | 
| 
      
 424 
     | 
    
         
            +
                  #
         
     | 
| 
      
 425 
     | 
    
         
            +
                  def scrubber_symbols
         
     | 
| 
      
 426 
     | 
    
         
            +
                    MAP.keys
         
     | 
| 
      
 427 
     | 
    
         
            +
                  end
         
     | 
| 
       297 
428 
     | 
    
         
             
                end
         
     | 
| 
       298 
429 
     | 
    
         
             
              end
         
     | 
| 
       299 
430 
     | 
    
         
             
            end
         
     |