loofah 2.19.0 → 2.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +100 -0
- data/README.md +157 -114
- data/lib/loofah/concerns.rb +207 -0
- data/lib/loofah/elements.rb +78 -76
- data/lib/loofah/helpers.rb +21 -15
- data/lib/loofah/{html → html4}/document.rb +5 -7
- data/lib/loofah/html4/document_fragment.rb +15 -0
- data/lib/loofah/html5/document.rb +17 -0
- data/lib/loofah/html5/document_fragment.rb +15 -0
- data/lib/loofah/html5/libxml2_workarounds.rb +7 -6
- data/lib/loofah/html5/safelist.rb +940 -925
- data/lib/loofah/html5/scrub.rb +105 -34
- data/lib/loofah/metahelpers.rb +10 -6
- data/lib/loofah/scrubber.rb +14 -8
- data/lib/loofah/scrubbers.rb +121 -48
- data/lib/loofah/version.rb +2 -1
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -6
- data/lib/loofah.rb +116 -43
- metadata +20 -122
- data/lib/loofah/html/document_fragment.rb +0 -42
- data/lib/loofah/instance_methods.rb +0 -133
| @@ -0,0 +1,207 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Loofah
         | 
| 4 | 
            +
              #
         | 
| 5 | 
            +
              #  Mixes +scrub!+ into Document, DocumentFragment, Node and NodeSet.
         | 
| 6 | 
            +
              #
         | 
| 7 | 
            +
              #  Traverse the document or fragment, invoking the +scrubber+ on each node.
         | 
| 8 | 
            +
              #
         | 
| 9 | 
            +
              #  +scrubber+ must either be one of the symbols representing the built-in scrubbers (see
         | 
| 10 | 
            +
              #  Scrubbers), or a Scrubber instance.
         | 
| 11 | 
            +
              #
         | 
| 12 | 
            +
              #    span2div = Loofah::Scrubber.new do |node|
         | 
| 13 | 
            +
              #      node.name = "div" if node.name == "span"
         | 
| 14 | 
            +
              #    end
         | 
| 15 | 
            +
              #    Loofah.html5_fragment("<span>foo</span><p>bar</p>").scrub!(span2div).to_s
         | 
| 16 | 
            +
              #    # => "<div>foo</div><p>bar</p>"
         | 
| 17 | 
            +
              #
         | 
| 18 | 
            +
              #  or
         | 
| 19 | 
            +
              #
         | 
| 20 | 
            +
              #    unsafe_html = "ohai! <div>div is safe</div> <script>but script is not</script>"
         | 
| 21 | 
            +
              #    Loofah.html5_fragment(unsafe_html).scrub!(:strip).to_s
         | 
| 22 | 
            +
              #    # => "ohai! <div>div is safe</div> "
         | 
| 23 | 
            +
              #
         | 
| 24 | 
            +
              #  Note that this method is called implicitly from the shortcuts Loofah.scrub_html5_fragment et
         | 
| 25 | 
            +
              #  al.
         | 
| 26 | 
            +
              #
         | 
| 27 | 
            +
              #  Please see Scrubber for more information on implementation and traversal, and README.rdoc for
         | 
| 28 | 
            +
              #  more example usage.
         | 
| 29 | 
            +
              #
         | 
| 30 | 
            +
              module ScrubBehavior
         | 
| 31 | 
            +
                module Node # :nodoc:
         | 
| 32 | 
            +
                  def scrub!(scrubber)
         | 
| 33 | 
            +
                    #
         | 
| 34 | 
            +
                    #  yes. this should be three separate methods. but nokogiri decorates (or not) based on
         | 
| 35 | 
            +
                    #  whether the module name has already been included. and since documents get decorated just
         | 
| 36 | 
            +
                    #  like their constituent nodes, we need to jam all the logic into a single module.
         | 
| 37 | 
            +
                    #
         | 
| 38 | 
            +
                    scrubber = ScrubBehavior.resolve_scrubber(scrubber)
         | 
| 39 | 
            +
                    case self
         | 
| 40 | 
            +
                    when Nokogiri::XML::Document
         | 
| 41 | 
            +
                      scrubber.traverse(root) if root
         | 
| 42 | 
            +
                    when Nokogiri::XML::DocumentFragment
         | 
| 43 | 
            +
                      children.scrub!(scrubber)
         | 
| 44 | 
            +
                    else
         | 
| 45 | 
            +
                      scrubber.traverse(self)
         | 
| 46 | 
            +
                    end
         | 
| 47 | 
            +
                    self
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                module NodeSet # :nodoc:
         | 
| 52 | 
            +
                  def scrub!(scrubber)
         | 
| 53 | 
            +
                    each { |node| node.scrub!(scrubber) }
         | 
| 54 | 
            +
                    self
         | 
| 55 | 
            +
                  end
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                class << self
         | 
| 59 | 
            +
                  def resolve_scrubber(scrubber) # :nodoc:
         | 
| 60 | 
            +
                    scrubber = Scrubbers::MAP[scrubber].new if Scrubbers::MAP[scrubber]
         | 
| 61 | 
            +
                    unless scrubber.is_a?(Loofah::Scrubber)
         | 
| 62 | 
            +
                      raise Loofah::ScrubberNotFound, "not a Scrubber or a scrubber name: #{scrubber.inspect}"
         | 
| 63 | 
            +
                    end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    scrubber
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
                end
         | 
| 68 | 
            +
              end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
              #
         | 
| 71 | 
            +
              #  Overrides +text+ in Document and DocumentFragment classes, and mixes in +to_text+.
         | 
| 72 | 
            +
              #
         | 
| 73 | 
            +
              module TextBehavior
         | 
| 74 | 
            +
                #
         | 
| 75 | 
            +
                #  Returns a plain-text version of the markup contained by the document, with HTML entities
         | 
| 76 | 
            +
                #  encoded.
         | 
| 77 | 
            +
                #
         | 
| 78 | 
            +
                #  This method is significantly faster than #to_text, but isn't clever about whitespace around
         | 
| 79 | 
            +
                #  block elements.
         | 
| 80 | 
            +
                #
         | 
| 81 | 
            +
                #    Loofah.html5_document("<h1>Title</h1><div>Content</div>").text
         | 
| 82 | 
            +
                #    # => "TitleContent"
         | 
| 83 | 
            +
                #
         | 
| 84 | 
            +
                #  By default, the returned text will have HTML entities escaped. If you want unescaped
         | 
| 85 | 
            +
                #  entities, and you understand that the result is unsafe to render in a browser, then you can
         | 
| 86 | 
            +
                #  pass an argument as shown:
         | 
| 87 | 
            +
                #
         | 
| 88 | 
            +
                #    frag = Loofah.html5_fragment("<script>alert('EVIL');</script>")
         | 
| 89 | 
            +
                #    # ok for browser:
         | 
| 90 | 
            +
                #    frag.text                                 # => "<script>alert('EVIL');</script>"
         | 
| 91 | 
            +
                #    # decidedly not ok for browser:
         | 
| 92 | 
            +
                #    frag.text(:encode_special_chars => false) # => "<script>alert('EVIL');</script>"
         | 
| 93 | 
            +
                #
         | 
| 94 | 
            +
                def text(options = {})
         | 
| 95 | 
            +
                  result = if serialize_root
         | 
| 96 | 
            +
                    serialize_root.children.reject(&:comment?).map(&:inner_text).join("")
         | 
| 97 | 
            +
                  else
         | 
| 98 | 
            +
                    ""
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
                  if options[:encode_special_chars] == false
         | 
| 101 | 
            +
                    result # possibly dangerous if rendered in a browser
         | 
| 102 | 
            +
                  else
         | 
| 103 | 
            +
                    encode_special_chars(result)
         | 
| 104 | 
            +
                  end
         | 
| 105 | 
            +
                end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                alias_method :inner_text, :text
         | 
| 108 | 
            +
                alias_method :to_str, :text
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                #
         | 
| 111 | 
            +
                #  Returns a plain-text version of the markup contained by the fragment, with HTML entities
         | 
| 112 | 
            +
                #  encoded.
         | 
| 113 | 
            +
                #
         | 
| 114 | 
            +
                #  This method is slower than #text, but is clever about whitespace around block elements and
         | 
| 115 | 
            +
                #  line break elements.
         | 
| 116 | 
            +
                #
         | 
| 117 | 
            +
                #    Loofah.html5_document("<h1>Title</h1><div>Content<br>Next line</div>").to_text
         | 
| 118 | 
            +
                #    # => "\nTitle\n\nContent\nNext line\n"
         | 
| 119 | 
            +
                #
         | 
| 120 | 
            +
                def to_text(options = {})
         | 
| 121 | 
            +
                  Loofah.remove_extraneous_whitespace(dup.scrub!(:newline_block_elements).text(options))
         | 
| 122 | 
            +
                end
         | 
| 123 | 
            +
              end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
              module DocumentDecorator # :nodoc:
         | 
| 126 | 
            +
                def initialize(*args, &block)
         | 
| 127 | 
            +
                  super
         | 
| 128 | 
            +
                  decorators(Nokogiri::XML::Node) << ScrubBehavior::Node
         | 
| 129 | 
            +
                  decorators(Nokogiri::XML::NodeSet) << ScrubBehavior::NodeSet
         | 
| 130 | 
            +
                end
         | 
| 131 | 
            +
              end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
              module HtmlDocumentBehavior # :nodoc:
         | 
| 134 | 
            +
                module ClassMethods
         | 
| 135 | 
            +
                  def parse(*args, &block)
         | 
| 136 | 
            +
                    remove_comments_before_html_element(super)
         | 
| 137 | 
            +
                  end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                  private
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                  # remove comments that exist outside of the HTML element.
         | 
| 142 | 
            +
                  #
         | 
| 143 | 
            +
                  # these comments are allowed by the HTML spec:
         | 
| 144 | 
            +
                  #
         | 
| 145 | 
            +
                  #    https://www.w3.org/TR/html401/struct/global.html#h-7.1
         | 
| 146 | 
            +
                  #
         | 
| 147 | 
            +
                  # but are not scrubbed by Loofah because these nodes don't meet
         | 
| 148 | 
            +
                  # the contract that scrubbers expect of a node (e.g., it can be
         | 
| 149 | 
            +
                  # replaced, sibling and children nodes can be created).
         | 
| 150 | 
            +
                  def remove_comments_before_html_element(doc)
         | 
| 151 | 
            +
                    doc.children.each do |child|
         | 
| 152 | 
            +
                      child.unlink if child.comment?
         | 
| 153 | 
            +
                    end
         | 
| 154 | 
            +
                    doc
         | 
| 155 | 
            +
                  end
         | 
| 156 | 
            +
                end
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                class << self
         | 
| 159 | 
            +
                  def included(base)
         | 
| 160 | 
            +
                    base.extend(ClassMethods)
         | 
| 161 | 
            +
                  end
         | 
| 162 | 
            +
                end
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                def serialize_root
         | 
| 165 | 
            +
                  at_xpath("/html/body")
         | 
| 166 | 
            +
                end
         | 
| 167 | 
            +
              end
         | 
| 168 | 
            +
             | 
| 169 | 
            +
              module HtmlFragmentBehavior # :nodoc:
         | 
| 170 | 
            +
                module ClassMethods
         | 
| 171 | 
            +
                  def parse(tags, encoding = nil)
         | 
| 172 | 
            +
                    doc = document_klass.new
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                    encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
         | 
| 175 | 
            +
                    doc.encoding = encoding
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    new(doc, tags)
         | 
| 178 | 
            +
                  end
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                  def document_klass
         | 
| 181 | 
            +
                    @document_klass ||= if Loofah.html5_support? && self == Loofah::HTML5::DocumentFragment
         | 
| 182 | 
            +
                      Loofah::HTML5::Document
         | 
| 183 | 
            +
                    elsif self == Loofah::HTML4::DocumentFragment
         | 
| 184 | 
            +
                      Loofah::HTML4::Document
         | 
| 185 | 
            +
                    else
         | 
| 186 | 
            +
                      raise ArgumentError, "unexpected class: #{self}"
         | 
| 187 | 
            +
                    end
         | 
| 188 | 
            +
                  end
         | 
| 189 | 
            +
                end
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                class << self
         | 
| 192 | 
            +
                  def included(base)
         | 
| 193 | 
            +
                    base.extend(ClassMethods)
         | 
| 194 | 
            +
                  end
         | 
| 195 | 
            +
                end
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                def to_s
         | 
| 198 | 
            +
                  serialize_root.children.to_s
         | 
| 199 | 
            +
                end
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                alias_method :serialize, :to_s
         | 
| 202 | 
            +
             | 
| 203 | 
            +
                def serialize_root
         | 
| 204 | 
            +
                  at_xpath("./body") || self
         | 
| 205 | 
            +
                end
         | 
| 206 | 
            +
              end
         | 
| 207 | 
            +
            end
         | 
    
        data/lib/loofah/elements.rb
    CHANGED
    
    | @@ -1,88 +1,90 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            require "set"
         | 
| 3 4 |  | 
| 4 5 | 
             
            module Loofah
         | 
| 5 6 | 
             
              module Elements
         | 
| 6 | 
            -
                STRICT_BLOCK_LEVEL_HTML4 = Set.new | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 31 | 
            -
             | 
| 7 | 
            +
                STRICT_BLOCK_LEVEL_HTML4 = Set.new([
         | 
| 8 | 
            +
                  "address",
         | 
| 9 | 
            +
                  "blockquote",
         | 
| 10 | 
            +
                  "center",
         | 
| 11 | 
            +
                  "dir",
         | 
| 12 | 
            +
                  "div",
         | 
| 13 | 
            +
                  "dl",
         | 
| 14 | 
            +
                  "fieldset",
         | 
| 15 | 
            +
                  "form",
         | 
| 16 | 
            +
                  "h1",
         | 
| 17 | 
            +
                  "h2",
         | 
| 18 | 
            +
                  "h3",
         | 
| 19 | 
            +
                  "h4",
         | 
| 20 | 
            +
                  "h5",
         | 
| 21 | 
            +
                  "h6",
         | 
| 22 | 
            +
                  "hr",
         | 
| 23 | 
            +
                  "isindex",
         | 
| 24 | 
            +
                  "menu",
         | 
| 25 | 
            +
                  "noframes",
         | 
| 26 | 
            +
                  "noscript",
         | 
| 27 | 
            +
                  "ol",
         | 
| 28 | 
            +
                  "p",
         | 
| 29 | 
            +
                  "pre",
         | 
| 30 | 
            +
                  "table",
         | 
| 31 | 
            +
                  "ul",
         | 
| 32 | 
            +
                ])
         | 
| 32 33 |  | 
| 33 34 | 
             
                # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
         | 
| 34 | 
            -
                STRICT_BLOCK_LEVEL_HTML5 = Set.new | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 35 | 
            +
                STRICT_BLOCK_LEVEL_HTML5 = Set.new([
         | 
| 36 | 
            +
                  "address",
         | 
| 37 | 
            +
                  "article",
         | 
| 38 | 
            +
                  "aside",
         | 
| 39 | 
            +
                  "blockquote",
         | 
| 40 | 
            +
                  "canvas",
         | 
| 41 | 
            +
                  "dd",
         | 
| 42 | 
            +
                  "div",
         | 
| 43 | 
            +
                  "dl",
         | 
| 44 | 
            +
                  "dt",
         | 
| 45 | 
            +
                  "fieldset",
         | 
| 46 | 
            +
                  "figcaption",
         | 
| 47 | 
            +
                  "figure",
         | 
| 48 | 
            +
                  "footer",
         | 
| 49 | 
            +
                  "form",
         | 
| 50 | 
            +
                  "h1",
         | 
| 51 | 
            +
                  "h2",
         | 
| 52 | 
            +
                  "h3",
         | 
| 53 | 
            +
                  "h4",
         | 
| 54 | 
            +
                  "h5",
         | 
| 55 | 
            +
                  "h6",
         | 
| 56 | 
            +
                  "header",
         | 
| 57 | 
            +
                  "hgroup",
         | 
| 58 | 
            +
                  "hr",
         | 
| 59 | 
            +
                  "li",
         | 
| 60 | 
            +
                  "main",
         | 
| 61 | 
            +
                  "nav",
         | 
| 62 | 
            +
                  "noscript",
         | 
| 63 | 
            +
                  "ol",
         | 
| 64 | 
            +
                  "output",
         | 
| 65 | 
            +
                  "p",
         | 
| 66 | 
            +
                  "pre",
         | 
| 67 | 
            +
                  "section",
         | 
| 68 | 
            +
                  "table",
         | 
| 69 | 
            +
                  "tfoot",
         | 
| 70 | 
            +
                  "ul",
         | 
| 71 | 
            +
                  "video",
         | 
| 72 | 
            +
                ])
         | 
| 72 73 |  | 
| 73 74 | 
             
                # The following elements may also be considered block-level
         | 
| 74 75 | 
             
                # elements since they may contain block-level elements
         | 
| 75 | 
            -
                LOOSE_BLOCK_LEVEL = Set.new | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 76 | 
            +
                LOOSE_BLOCK_LEVEL = Set.new([
         | 
| 77 | 
            +
                  "dd",
         | 
| 78 | 
            +
                  "dt",
         | 
| 79 | 
            +
                  "frameset",
         | 
| 80 | 
            +
                  "li",
         | 
| 81 | 
            +
                  "tbody",
         | 
| 82 | 
            +
                  "td",
         | 
| 83 | 
            +
                  "tfoot",
         | 
| 84 | 
            +
                  "th",
         | 
| 85 | 
            +
                  "thead",
         | 
| 86 | 
            +
                  "tr",
         | 
| 87 | 
            +
                ])
         | 
| 86 88 |  | 
| 87 89 | 
             
                # Elements that aren't block but should generate a newline in #to_text
         | 
| 88 90 | 
             
                INLINE_LINE_BREAK = Set.new(["br"])
         | 
| @@ -92,5 +94,5 @@ module Loofah | |
| 92 94 | 
             
                LINEBREAKERS = BLOCK_LEVEL + INLINE_LINE_BREAK
         | 
| 93 95 | 
             
              end
         | 
| 94 96 |  | 
| 95 | 
            -
              ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants | 
| 97 | 
            +
              ::Loofah::MetaHelpers.add_downcased_set_members_to_all_set_constants(::Loofah::Elements)
         | 
| 96 98 | 
             
            end
         | 
    
        data/lib/loofah/helpers.rb
    CHANGED
    
    | @@ -1,43 +1,47 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            module Loofah
         | 
| 3 4 | 
             
              module Helpers
         | 
| 4 5 | 
             
                class << self
         | 
| 5 6 | 
             
                  #
         | 
| 6 7 | 
             
                  #  A replacement for Rails's built-in +strip_tags+ helper.
         | 
| 7 8 | 
             
                  #
         | 
| 8 | 
            -
                  # | 
| 9 | 
            +
                  #    Loofah::Helpers.strip_tags("<div>Hello <b>there</b></div>") # => "Hello there"
         | 
| 9 10 | 
             
                  #
         | 
| 10 11 | 
             
                  def strip_tags(string_or_io)
         | 
| 11 | 
            -
                    Loofah. | 
| 12 | 
            +
                    Loofah.html4_fragment(string_or_io).text
         | 
| 12 13 | 
             
                  end
         | 
| 13 14 |  | 
| 14 15 | 
             
                  #
         | 
| 15 16 | 
             
                  #  A replacement for Rails's built-in +sanitize+ helper.
         | 
| 16 17 | 
             
                  #
         | 
| 17 | 
            -
                  # | 
| 18 | 
            +
                  #    Loofah::Helpers.sanitize("<script src=http://ha.ckers.org/xss.js></script>")
         | 
| 19 | 
            +
                  #    # => "<script src=\"http://ha.ckers.org/xss.js\"></script>"
         | 
| 18 20 | 
             
                  #
         | 
| 19 21 | 
             
                  def sanitize(string_or_io)
         | 
| 20 | 
            -
                    loofah_fragment = Loofah. | 
| 22 | 
            +
                    loofah_fragment = Loofah.html4_fragment(string_or_io)
         | 
| 21 23 | 
             
                    loofah_fragment.scrub!(:strip)
         | 
| 22 | 
            -
                    loofah_fragment.xpath("./form").each | 
| 24 | 
            +
                    loofah_fragment.xpath("./form").each(&:remove)
         | 
| 23 25 | 
             
                    loofah_fragment.to_s
         | 
| 24 26 | 
             
                  end
         | 
| 25 27 |  | 
| 26 28 | 
             
                  #
         | 
| 27 29 | 
             
                  #  A replacement for Rails's built-in +sanitize_css+ helper.
         | 
| 28 30 | 
             
                  #
         | 
| 29 | 
            -
                  #    Loofah::Helpers.sanitize_css("display:block;background-image:url(http:// | 
| 31 | 
            +
                  #    Loofah::Helpers.sanitize_css("display:block;background-image:url(http://example.com/foo.jpg)")
         | 
| 32 | 
            +
                  #    # => "display: block;"
         | 
| 30 33 | 
             
                  #
         | 
| 31 34 | 
             
                  def sanitize_css(style_string)
         | 
| 32 | 
            -
                    ::Loofah::HTML5::Scrub.scrub_css | 
| 35 | 
            +
                    ::Loofah::HTML5::Scrub.scrub_css(style_string)
         | 
| 33 36 | 
             
                  end
         | 
| 34 37 |  | 
| 35 38 | 
             
                  #
         | 
| 36 | 
            -
                  #  A helper to remove extraneous whitespace from text-ified HTML
         | 
| 39 | 
            +
                  #  A helper to remove extraneous whitespace from text-ified HTML.
         | 
| 40 | 
            +
                  #
         | 
| 37 41 | 
             
                  #  TODO: remove this in a future major-point-release.
         | 
| 38 42 | 
             
                  #
         | 
| 39 43 | 
             
                  def remove_extraneous_whitespace(string)
         | 
| 40 | 
            -
                    Loofah.remove_extraneous_whitespace | 
| 44 | 
            +
                    Loofah.remove_extraneous_whitespace(string)
         | 
| 41 45 | 
             
                  end
         | 
| 42 46 | 
             
                end
         | 
| 43 47 |  | 
| @@ -52,7 +56,7 @@ module Loofah | |
| 52 56 | 
             
                    end
         | 
| 53 57 |  | 
| 54 58 | 
             
                    def white_list_sanitizer
         | 
| 55 | 
            -
                      warn | 
| 59 | 
            +
                      warn("warning: white_list_sanitizer is deprecated, please use safe_list_sanitizer instead.")
         | 
| 56 60 | 
             
                      safe_list_sanitizer
         | 
| 57 61 | 
             
                    end
         | 
| 58 62 | 
             
                  end
         | 
| @@ -62,7 +66,8 @@ module Loofah | |
| 62 66 | 
             
                  #
         | 
| 63 67 | 
             
                  #  To use by default, call this in an application initializer:
         | 
| 64 68 | 
             
                  #
         | 
| 65 | 
            -
                  #    ActionView::Helpers::SanitizeHelper.full_sanitizer =  | 
| 69 | 
            +
                  #    ActionView::Helpers::SanitizeHelper.full_sanitizer = \
         | 
| 70 | 
            +
                  #      Loofah::Helpers::ActionView::FullSanitizer.new
         | 
| 66 71 | 
             
                  #
         | 
| 67 72 | 
             
                  #  Or, to generally opt-in to Loofah's view sanitizers:
         | 
| 68 73 | 
             
                  #
         | 
| @@ -70,7 +75,7 @@ module Loofah | |
| 70 75 | 
             
                  #
         | 
| 71 76 | 
             
                  class FullSanitizer
         | 
| 72 77 | 
             
                    def sanitize(html, *args)
         | 
| 73 | 
            -
                      Loofah::Helpers.strip_tags | 
| 78 | 
            +
                      Loofah::Helpers.strip_tags(html)
         | 
| 74 79 | 
             
                    end
         | 
| 75 80 | 
             
                  end
         | 
| 76 81 |  | 
| @@ -79,7 +84,8 @@ module Loofah | |
| 79 84 | 
             
                  #
         | 
| 80 85 | 
             
                  #  To use by default, call this in an application initializer:
         | 
| 81 86 | 
             
                  #
         | 
| 82 | 
            -
                  #    ActionView::Helpers::SanitizeHelper.safe_list_sanitizer =  | 
| 87 | 
            +
                  #    ActionView::Helpers::SanitizeHelper.safe_list_sanitizer = \
         | 
| 88 | 
            +
                  #      Loofah::Helpers::ActionView::SafeListSanitizer.new
         | 
| 83 89 | 
             
                  #
         | 
| 84 90 | 
             
                  #  Or, to generally opt-in to Loofah's view sanitizers:
         | 
| 85 91 | 
             
                  #
         | 
| @@ -87,11 +93,11 @@ module Loofah | |
| 87 93 | 
             
                  #
         | 
| 88 94 | 
             
                  class SafeListSanitizer
         | 
| 89 95 | 
             
                    def sanitize(html, *args)
         | 
| 90 | 
            -
                      Loofah::Helpers.sanitize | 
| 96 | 
            +
                      Loofah::Helpers.sanitize(html)
         | 
| 91 97 | 
             
                    end
         | 
| 92 98 |  | 
| 93 99 | 
             
                    def sanitize_css(style_string, *args)
         | 
| 94 | 
            -
                      Loofah::Helpers.sanitize_css | 
| 100 | 
            +
                      Loofah::Helpers.sanitize_css(style_string)
         | 
| 95 101 | 
             
                    end
         | 
| 96 102 | 
             
                  end
         | 
| 97 103 |  | 
| @@ -1,19 +1,17 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 2 3 | 
             
            module Loofah
         | 
| 3 | 
            -
              module  | 
| 4 | 
            +
              module HTML4 # :nodoc:
         | 
| 4 5 | 
             
                #
         | 
| 5 | 
            -
                #  Subclass of Nokogiri:: | 
| 6 | 
            +
                #  Subclass of Nokogiri::HTML4::Document.
         | 
| 6 7 | 
             
                #
         | 
| 7 8 | 
             
                #  See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
         | 
| 8 9 | 
             
                #
         | 
| 9 | 
            -
                class Document < Nokogiri:: | 
| 10 | 
            +
                class Document < Nokogiri::HTML4::Document
         | 
| 10 11 | 
             
                  include Loofah::ScrubBehavior::Node
         | 
| 11 12 | 
             
                  include Loofah::DocumentDecorator
         | 
| 12 13 | 
             
                  include Loofah::TextBehavior
         | 
| 13 | 
            -
             | 
| 14 | 
            -
                  def serialize_root
         | 
| 15 | 
            -
                    at_xpath("/html/body")
         | 
| 16 | 
            -
                  end
         | 
| 14 | 
            +
                  include Loofah::HtmlDocumentBehavior
         | 
| 17 15 | 
             
                end
         | 
| 18 16 | 
             
              end
         | 
| 19 17 | 
             
            end
         | 
| @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Loofah
         | 
| 4 | 
            +
              module HTML4 # :nodoc:
         | 
| 5 | 
            +
                #
         | 
| 6 | 
            +
                #  Subclass of Nokogiri::HTML4::DocumentFragment.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                #  See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
         | 
| 9 | 
            +
                #
         | 
| 10 | 
            +
                class DocumentFragment < Nokogiri::HTML4::DocumentFragment
         | 
| 11 | 
            +
                  include Loofah::TextBehavior
         | 
| 12 | 
            +
                  include Loofah::HtmlFragmentBehavior
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            end
         | 
| @@ -0,0 +1,17 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Loofah
         | 
| 4 | 
            +
              module HTML5 # :nodoc:
         | 
| 5 | 
            +
                #
         | 
| 6 | 
            +
                #  Subclass of Nokogiri::HTML5::Document.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                #  See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
         | 
| 9 | 
            +
                #
         | 
| 10 | 
            +
                class Document < Nokogiri::HTML5::Document
         | 
| 11 | 
            +
                  include Loofah::ScrubBehavior::Node
         | 
| 12 | 
            +
                  include Loofah::DocumentDecorator
         | 
| 13 | 
            +
                  include Loofah::TextBehavior
         | 
| 14 | 
            +
                  include Loofah::HtmlDocumentBehavior
         | 
| 15 | 
            +
                end
         | 
| 16 | 
            +
              end
         | 
| 17 | 
            +
            end
         | 
| @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Loofah
         | 
| 4 | 
            +
              module HTML5 # :nodoc:
         | 
| 5 | 
            +
                #
         | 
| 6 | 
            +
                #  Subclass of Nokogiri::HTML5::DocumentFragment.
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                #  See Loofah::ScrubBehavior and Loofah::TextBehavior for additional methods.
         | 
| 9 | 
            +
                #
         | 
| 10 | 
            +
                class DocumentFragment < Nokogiri::HTML5::DocumentFragment
         | 
| 11 | 
            +
                  include Loofah::TextBehavior
         | 
| 12 | 
            +
                  include Loofah::HtmlFragmentBehavior
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
            end
         | 
| @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            # coding: utf-8
         | 
| 2 2 | 
             
            # frozen_string_literal: true
         | 
| 3 | 
            +
             | 
| 3 4 | 
             
            require "set"
         | 
| 4 5 |  | 
| 5 6 | 
             
            module Loofah
         | 
| @@ -16,12 +17,12 @@ module Loofah | |
| 16 17 | 
             
                #
         | 
| 17 18 | 
             
                #  see comments about CVE-2018-8048 within the tests for more information
         | 
| 18 19 | 
             
                #
         | 
| 19 | 
            -
                BROKEN_ESCAPING_ATTRIBUTES = Set.new | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 20 | 
            +
                BROKEN_ESCAPING_ATTRIBUTES = Set.new([
         | 
| 21 | 
            +
                  "href",
         | 
| 22 | 
            +
                  "action",
         | 
| 23 | 
            +
                  "src",
         | 
| 24 | 
            +
                  "name",
         | 
| 25 | 
            +
                ])
         | 
| 25 26 | 
             
                BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
         | 
| 26 27 | 
             
              end
         | 
| 27 28 | 
             
            end
         |