loofah 2.3.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of loofah might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/CHANGELOG.md +95 -40
- data/Gemfile +9 -7
- data/Manifest.txt +0 -16
- data/README.md +3 -9
- data/Rakefile +20 -4
- data/benchmark/benchmark.rb +6 -1
- data/benchmark/helper.rb +15 -15
- data/lib/loofah.rb +21 -2
- data/lib/loofah/elements.rb +74 -73
- data/lib/loofah/helpers.rb +5 -4
- data/lib/loofah/html/document.rb +1 -0
- data/lib/loofah/html/document_fragment.rb +4 -2
- data/lib/loofah/html5/libxml2_workarounds.rb +8 -7
- data/lib/loofah/html5/safelist.rb +12 -5
- data/lib/loofah/html5/scrub.rb +21 -21
- data/lib/loofah/instance_methods.rb +5 -3
- data/lib/loofah/metahelpers.rb +2 -1
- data/lib/loofah/scrubber.rb +8 -7
- data/lib/loofah/scrubbers.rb +11 -10
- data/lib/loofah/xml/document.rb +1 -0
- data/lib/loofah/xml/document_fragment.rb +2 -1
- metadata +45 -34
- data/.gemtest +0 -0
- data/test/assets/msword.html +0 -63
- data/test/assets/testdata_sanitizer_tests1.dat +0 -502
- data/test/helper.rb +0 -18
- data/test/html5/test_sanitizer.rb +0 -401
- data/test/html5/test_scrub.rb +0 -10
- data/test/integration/test_ad_hoc.rb +0 -204
- data/test/integration/test_helpers.rb +0 -43
- data/test/integration/test_html.rb +0 -72
- data/test/integration/test_scrubbers.rb +0 -400
- data/test/integration/test_xml.rb +0 -55
- data/test/unit/test_api.rb +0 -142
- data/test/unit/test_encoding.rb +0 -20
- data/test/unit/test_helpers.rb +0 -62
- data/test/unit/test_scrubber.rb +0 -229
- data/test/unit/test_scrubbers.rb +0 -14
    
        data/benchmark/helper.rb
    CHANGED
    
    | @@ -1,13 +1,13 @@ | |
| 1 | 
            -
            require  | 
| 2 | 
            -
            require  | 
| 3 | 
            -
            require  | 
| 1 | 
            +
            require "rubygems"
         | 
| 2 | 
            +
            require "open-uri"
         | 
| 3 | 
            +
            require "hpricot"
         | 
| 4 4 | 
             
            require File.expand_path(File.dirname(__FILE__) + "/../lib/loofah")
         | 
| 5 | 
            -
            require  | 
| 5 | 
            +
            require "benchmark"
         | 
| 6 6 | 
             
            require "action_view"
         | 
| 7 7 | 
             
            require "action_controller/vendor/html-scanner"
         | 
| 8 8 | 
             
            require "sanitize"
         | 
| 9 | 
            -
            require  | 
| 10 | 
            -
            require  | 
| 9 | 
            +
            require "hitimes"
         | 
| 10 | 
            +
            require "htmlfilter"
         | 
| 11 11 |  | 
| 12 12 | 
             
            unless defined?(HTMLFilter)
         | 
| 13 13 | 
             
              HTMLFilter = HtmlFilter
         | 
| @@ -19,20 +19,20 @@ class RailsSanitize | |
| 19 19 | 
             
            end
         | 
| 20 20 |  | 
| 21 21 | 
             
            class HTML5libSanitize
         | 
| 22 | 
            -
              require  | 
| 23 | 
            -
              require  | 
| 24 | 
            -
              require  | 
| 25 | 
            -
              require  | 
| 26 | 
            -
              require  | 
| 27 | 
            -
              require  | 
| 22 | 
            +
              require "html5/html5parser"
         | 
| 23 | 
            +
              require "html5/liberalxmlparser"
         | 
| 24 | 
            +
              require "html5/treewalkers"
         | 
| 25 | 
            +
              require "html5/treebuilders"
         | 
| 26 | 
            +
              require "html5/serializer"
         | 
| 27 | 
            +
              require "html5/sanitizer"
         | 
| 28 28 |  | 
| 29 29 | 
             
              include HTML5
         | 
| 30 30 |  | 
| 31 31 | 
             
              def sanitize(html)
         | 
| 32 32 | 
             
                HTMLParser.parse_fragment(html, {
         | 
| 33 | 
            -
                  :tokenizer | 
| 34 | 
            -
                  :encoding | 
| 35 | 
            -
                  :tree | 
| 33 | 
            +
                  :tokenizer => HTMLSanitizer,
         | 
| 34 | 
            +
                  :encoding => "utf-8",
         | 
| 35 | 
            +
                  :tree => TreeBuilders::REXML::TreeBuilder,
         | 
| 36 36 | 
             
                }).to_s
         | 
| 37 37 | 
             
              end
         | 
| 38 38 | 
             
            end
         | 
    
        data/lib/loofah.rb
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 1 2 | 
             
            $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
         | 
| 2 3 |  | 
| 3 4 | 
             
            require "nokogiri"
         | 
| @@ -28,13 +29,13 @@ require "loofah/html/document_fragment" | |
| 28 29 | 
             
            #
         | 
| 29 30 | 
             
            module Loofah
         | 
| 30 31 | 
             
              # The version of Loofah you are using
         | 
| 31 | 
            -
              VERSION = "2. | 
| 32 | 
            +
              VERSION = "2.7.0"
         | 
| 32 33 |  | 
| 33 34 | 
             
              class << self
         | 
| 34 35 | 
             
                # Shortcut for Loofah::HTML::Document.parse
         | 
| 35 36 | 
             
                # This method accepts the same parameters as Nokogiri::HTML::Document.parse
         | 
| 36 37 | 
             
                def document(*args, &block)
         | 
| 37 | 
            -
                  Loofah::HTML::Document.parse(*args, &block)
         | 
| 38 | 
            +
                  remove_comments_before_html_element Loofah::HTML::Document.parse(*args, &block)
         | 
| 38 39 | 
             
                end
         | 
| 39 40 |  | 
| 40 41 | 
             
                # Shortcut for Loofah::HTML::DocumentFragment.parse
         | 
| @@ -79,5 +80,23 @@ module Loofah | |
| 79 80 | 
             
                def remove_extraneous_whitespace(string)
         | 
| 80 81 | 
             
                  string.gsub(/\n\s*\n\s*\n/, "\n\n")
         | 
| 81 82 | 
             
                end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                private
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                # remove comments that exist outside of the HTML element.
         | 
| 87 | 
            +
                #
         | 
| 88 | 
            +
                # these comments are allowed by the HTML spec:
         | 
| 89 | 
            +
                #
         | 
| 90 | 
            +
                #    https://www.w3.org/TR/html401/struct/global.html#h-7.1
         | 
| 91 | 
            +
                #
         | 
| 92 | 
            +
                # but are not scrubbed by Loofah because these nodes don't meet
         | 
| 93 | 
            +
                # the contract that scrubbers expect of a node (e.g., it can be
         | 
| 94 | 
            +
                # replaced, sibling and children nodes can be created).
         | 
| 95 | 
            +
                def remove_comments_before_html_element(doc)
         | 
| 96 | 
            +
                  doc.children.each do |child|
         | 
| 97 | 
            +
                    child.unlink if child.comment?
         | 
| 98 | 
            +
                  end
         | 
| 99 | 
            +
                  doc
         | 
| 100 | 
            +
                end
         | 
| 82 101 | 
             
              end
         | 
| 83 102 | 
             
            end
         | 
    
        data/lib/loofah/elements.rb
    CHANGED
    
    | @@ -1,89 +1,90 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
            require "set"
         | 
| 2 3 |  | 
| 3 4 | 
             
            module Loofah
         | 
| 4 5 | 
             
              module Elements
         | 
| 5 6 | 
             
                STRICT_BLOCK_LEVEL_HTML4 = Set.new %w[
         | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
             | 
| 15 | 
            -
             | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 29 | 
            -
             | 
| 30 | 
            -
             | 
| 7 | 
            +
                                                     address
         | 
| 8 | 
            +
                                                     blockquote
         | 
| 9 | 
            +
                                                     center
         | 
| 10 | 
            +
                                                     dir
         | 
| 11 | 
            +
                                                     div
         | 
| 12 | 
            +
                                                     dl
         | 
| 13 | 
            +
                                                     fieldset
         | 
| 14 | 
            +
                                                     form
         | 
| 15 | 
            +
                                                     h1
         | 
| 16 | 
            +
                                                     h2
         | 
| 17 | 
            +
                                                     h3
         | 
| 18 | 
            +
                                                     h4
         | 
| 19 | 
            +
                                                     h5
         | 
| 20 | 
            +
                                                     h6
         | 
| 21 | 
            +
                                                     hr
         | 
| 22 | 
            +
                                                     isindex
         | 
| 23 | 
            +
                                                     menu
         | 
| 24 | 
            +
                                                     noframes
         | 
| 25 | 
            +
                                                     noscript
         | 
| 26 | 
            +
                                                     ol
         | 
| 27 | 
            +
                                                     p
         | 
| 28 | 
            +
                                                     pre
         | 
| 29 | 
            +
                                                     table
         | 
| 30 | 
            +
                                                     ul
         | 
| 31 | 
            +
                                                   ]
         | 
| 31 32 |  | 
| 32 33 | 
             
                # https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements
         | 
| 33 34 | 
             
                STRICT_BLOCK_LEVEL_HTML5 = Set.new %w[
         | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 35 | 
            +
                                                     address
         | 
| 36 | 
            +
                                                     article
         | 
| 37 | 
            +
                                                     aside
         | 
| 38 | 
            +
                                                     blockquote
         | 
| 39 | 
            +
                                                     canvas
         | 
| 40 | 
            +
                                                     dd
         | 
| 41 | 
            +
                                                     div
         | 
| 42 | 
            +
                                                     dl
         | 
| 43 | 
            +
                                                     dt
         | 
| 44 | 
            +
                                                     fieldset
         | 
| 45 | 
            +
                                                     figcaption
         | 
| 46 | 
            +
                                                     figure
         | 
| 47 | 
            +
                                                     footer
         | 
| 48 | 
            +
                                                     form
         | 
| 49 | 
            +
                                                     h1
         | 
| 50 | 
            +
                                                     h2
         | 
| 51 | 
            +
                                                     h3
         | 
| 52 | 
            +
                                                     h4
         | 
| 53 | 
            +
                                                     h5
         | 
| 54 | 
            +
                                                     h6
         | 
| 55 | 
            +
                                                     header
         | 
| 56 | 
            +
                                                     hgroup
         | 
| 57 | 
            +
                                                     hr
         | 
| 58 | 
            +
                                                     li
         | 
| 59 | 
            +
                                                     main
         | 
| 60 | 
            +
                                                     nav
         | 
| 61 | 
            +
                                                     noscript
         | 
| 62 | 
            +
                                                     ol
         | 
| 63 | 
            +
                                                     output
         | 
| 64 | 
            +
                                                     p
         | 
| 65 | 
            +
                                                     pre
         | 
| 66 | 
            +
                                                     section
         | 
| 67 | 
            +
                                                     table
         | 
| 68 | 
            +
                                                     tfoot
         | 
| 69 | 
            +
                                                     ul
         | 
| 70 | 
            +
                                                     video
         | 
| 71 | 
            +
                                                   ]
         | 
| 71 72 |  | 
| 72 73 | 
             
                STRICT_BLOCK_LEVEL = STRICT_BLOCK_LEVEL_HTML4 + STRICT_BLOCK_LEVEL_HTML5
         | 
| 73 74 |  | 
| 74 75 | 
             
                # The following elements may also be considered block-level
         | 
| 75 76 | 
             
                # elements since they may contain block-level elements
         | 
| 76 77 | 
             
                LOOSE_BLOCK_LEVEL = Set.new %w[dd
         | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 78 | 
            +
                                               dt
         | 
| 79 | 
            +
                                               frameset
         | 
| 80 | 
            +
                                               li
         | 
| 81 | 
            +
                                               tbody
         | 
| 82 | 
            +
                                               td
         | 
| 83 | 
            +
                                               tfoot
         | 
| 84 | 
            +
                                               th
         | 
| 85 | 
            +
                                               thead
         | 
| 86 | 
            +
                                               tr
         | 
| 87 | 
            +
                                            ]
         | 
| 87 88 |  | 
| 88 89 | 
             
                BLOCK_LEVEL = STRICT_BLOCK_LEVEL + LOOSE_BLOCK_LEVEL
         | 
| 89 90 | 
             
              end
         | 
    
        data/lib/loofah/helpers.rb
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 1 2 | 
             
            module Loofah
         | 
| 2 3 | 
             
              module Helpers
         | 
| 3 4 | 
             
                class << self
         | 
| @@ -27,7 +28,7 @@ module Loofah | |
| 27 28 | 
             
                  #
         | 
| 28 29 | 
             
                  #    Loofah::Helpers.sanitize_css("display:block;background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg)") # => "display: block;"
         | 
| 29 30 | 
             
                  #
         | 
| 30 | 
            -
                  def sanitize_css | 
| 31 | 
            +
                  def sanitize_css(style_string)
         | 
| 31 32 | 
             
                    ::Loofah::HTML5::Scrub.scrub_css style_string
         | 
| 32 33 | 
             
                  end
         | 
| 33 34 |  | 
| @@ -68,7 +69,7 @@ module Loofah | |
| 68 69 | 
             
                  #    Loofah::Helpers::ActionView.set_as_default_sanitizer
         | 
| 69 70 | 
             
                  #
         | 
| 70 71 | 
             
                  class FullSanitizer
         | 
| 71 | 
            -
                    def sanitize | 
| 72 | 
            +
                    def sanitize(html, *args)
         | 
| 72 73 | 
             
                      Loofah::Helpers.strip_tags html
         | 
| 73 74 | 
             
                    end
         | 
| 74 75 | 
             
                  end
         | 
| @@ -85,11 +86,11 @@ module Loofah | |
| 85 86 | 
             
                  #    Loofah::Helpers::ActionView.set_as_default_sanitizer
         | 
| 86 87 | 
             
                  #
         | 
| 87 88 | 
             
                  class SafeListSanitizer
         | 
| 88 | 
            -
                    def sanitize | 
| 89 | 
            +
                    def sanitize(html, *args)
         | 
| 89 90 | 
             
                      Loofah::Helpers.sanitize html
         | 
| 90 91 | 
             
                    end
         | 
| 91 92 |  | 
| 92 | 
            -
                    def sanitize_css | 
| 93 | 
            +
                    def sanitize_css(style_string, *args)
         | 
| 93 94 | 
             
                      Loofah::Helpers.sanitize_css style_string
         | 
| 94 95 | 
             
                    end
         | 
| 95 96 | 
             
                  end
         | 
    
        data/lib/loofah/html/document.rb
    CHANGED
    
    
| @@ -1,3 +1,4 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 1 2 | 
             
            module Loofah
         | 
| 2 3 | 
             
              module HTML # :nodoc:
         | 
| 3 4 | 
             
                #
         | 
| @@ -14,10 +15,10 @@ module Loofah | |
| 14 15 | 
             
                    #  constructor. Applications should use Loofah.fragment to
         | 
| 15 16 | 
             
                    #  parse a fragment.
         | 
| 16 17 | 
             
                    #
         | 
| 17 | 
            -
                    def parse | 
| 18 | 
            +
                    def parse(tags, encoding = nil)
         | 
| 18 19 | 
             
                      doc = Loofah::HTML::Document.new
         | 
| 19 20 |  | 
| 20 | 
            -
                      encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name :  | 
| 21 | 
            +
                      encoding ||= tags.respond_to?(:encoding) ? tags.encoding.name : "UTF-8"
         | 
| 21 22 | 
             
                      doc.encoding = encoding
         | 
| 22 23 |  | 
| 23 24 | 
             
                      new(doc, tags)
         | 
| @@ -30,6 +31,7 @@ module Loofah | |
| 30 31 | 
             
                  def to_s
         | 
| 31 32 | 
             
                    serialize_root.children.to_s
         | 
| 32 33 | 
             
                  end
         | 
| 34 | 
            +
             | 
| 33 35 | 
             
                  alias :serialize :to_s
         | 
| 34 36 |  | 
| 35 37 | 
             
                  def serialize_root
         | 
| @@ -1,5 +1,6 @@ | |
| 1 1 | 
             
            # coding: utf-8
         | 
| 2 | 
            -
             | 
| 2 | 
            +
            # frozen_string_literal: true
         | 
| 3 | 
            +
            require "set"
         | 
| 3 4 |  | 
| 4 5 | 
             
            module Loofah
         | 
| 5 6 | 
             
              #
         | 
| @@ -16,11 +17,11 @@ module Loofah | |
| 16 17 | 
             
                #  see comments about CVE-2018-8048 within the tests for more information
         | 
| 17 18 | 
             
                #
         | 
| 18 19 | 
             
                BROKEN_ESCAPING_ATTRIBUTES = Set.new %w[
         | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
                BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = {"name" => "a"}
         | 
| 20 | 
            +
                                                       href
         | 
| 21 | 
            +
                                                       action
         | 
| 22 | 
            +
                                                       src
         | 
| 23 | 
            +
                                                       name
         | 
| 24 | 
            +
                                                     ]
         | 
| 25 | 
            +
                BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG = { "name" => "a" }
         | 
| 25 26 | 
             
              end
         | 
| 26 27 | 
             
            end
         | 
| @@ -1,4 +1,5 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
            require "set"
         | 
| 2 3 |  | 
| 3 4 | 
             
            module Loofah
         | 
| 4 5 | 
             
              module HTML5 # :nodoc:
         | 
| @@ -45,7 +46,6 @@ module Loofah | |
| 45 46 | 
             
                #
         | 
| 46 47 | 
             
                # </html5_license>
         | 
| 47 48 | 
             
                module SafeList
         | 
| 48 | 
            -
             | 
| 49 49 | 
             
                  ACCEPTABLE_ELEMENTS = Set.new([
         | 
| 50 50 | 
             
                                                  "a",
         | 
| 51 51 | 
             
                                                  "abbr",
         | 
| @@ -361,7 +361,6 @@ module Loofah | |
| 361 361 | 
             
                                             "baseProfile",
         | 
| 362 362 | 
             
                                             "bbox",
         | 
| 363 363 | 
             
                                             "begin",
         | 
| 364 | 
            -
                                             "by",
         | 
| 365 364 | 
             
                                             "calcMode",
         | 
| 366 365 | 
             
                                             "cap-height",
         | 
| 367 366 | 
             
                                             "class",
         | 
| @@ -468,7 +467,6 @@ module Loofah | |
| 468 467 | 
             
                                             "systemLanguage",
         | 
| 469 468 | 
             
                                             "target",
         | 
| 470 469 | 
             
                                             "text-anchor",
         | 
| 471 | 
            -
                                             "to",
         | 
| 472 470 | 
             
                                             "transform",
         | 
| 473 471 | 
             
                                             "type",
         | 
| 474 472 | 
             
                                             "u1",
         | 
| @@ -478,7 +476,6 @@ module Loofah | |
| 478 476 | 
             
                                             "unicode",
         | 
| 479 477 | 
             
                                             "unicode-range",
         | 
| 480 478 | 
             
                                             "units-per-em",
         | 
| 481 | 
            -
                                             "values",
         | 
| 482 479 | 
             
                                             "version",
         | 
| 483 480 | 
             
                                             "viewBox",
         | 
| 484 481 | 
             
                                             "visibility",
         | 
| @@ -577,7 +574,11 @@ module Loofah | |
| 577 574 | 
             
                                                        "line-height",
         | 
| 578 575 | 
             
                                                        "list-style",
         | 
| 579 576 | 
             
                                                        "list-style-type",
         | 
| 577 | 
            +
                                                        "max-width",
         | 
| 580 578 | 
             
                                                        "overflow",
         | 
| 579 | 
            +
                                                        "page-break-after",
         | 
| 580 | 
            +
                                                        "page-break-before",
         | 
| 581 | 
            +
                                                        "page-break-inside",
         | 
| 581 582 | 
             
                                                        "pause",
         | 
| 582 583 | 
             
                                                        "pause-after",
         | 
| 583 584 | 
             
                                                        "pause-before",
         | 
| @@ -616,9 +617,13 @@ module Loofah | |
| 616 617 | 
             
                                                      "collapse",
         | 
| 617 618 | 
             
                                                      "dashed",
         | 
| 618 619 | 
             
                                                      "dotted",
         | 
| 620 | 
            +
                                                      "double",
         | 
| 619 621 | 
             
                                                      "fuchsia",
         | 
| 620 622 | 
             
                                                      "gray",
         | 
| 621 623 | 
             
                                                      "green",
         | 
| 624 | 
            +
                                                      "groove",
         | 
| 625 | 
            +
                                                      "hidden",
         | 
| 626 | 
            +
                                                      "inset",
         | 
| 622 627 | 
             
                                                      "italic",
         | 
| 623 628 | 
             
                                                      "left",
         | 
| 624 629 | 
             
                                                      "lime",
         | 
| @@ -629,9 +634,11 @@ module Loofah | |
| 629 634 | 
             
                                                      "normal",
         | 
| 630 635 | 
             
                                                      "nowrap",
         | 
| 631 636 | 
             
                                                      "olive",
         | 
| 637 | 
            +
                                                      "outset",
         | 
| 632 638 | 
             
                                                      "pointer",
         | 
| 633 639 | 
             
                                                      "purple",
         | 
| 634 640 | 
             
                                                      "red",
         | 
| 641 | 
            +
                                                      "ridge",
         | 
| 635 642 | 
             
                                                      "right",
         | 
| 636 643 | 
             
                                                      "silver",
         | 
| 637 644 | 
             
                                                      "solid",
         | 
    
        data/lib/loofah/html5/scrub.rb
    CHANGED
    
    | @@ -1,22 +1,22 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
            require  | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
            require "cgi"
         | 
| 3 | 
            +
            require "crass"
         | 
| 3 4 |  | 
| 4 5 | 
             
            module Loofah
         | 
| 5 6 | 
             
              module HTML5 # :nodoc:
         | 
| 6 7 | 
             
                module Scrub
         | 
| 7 | 
            -
             | 
| 8 8 | 
             
                  CONTROL_CHARACTERS = /[`\u0000-\u0020\u007f\u0080-\u0101]/
         | 
| 9 | 
            -
                  CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
         | 
| 10 | 
            -
                  CRASS_SEMICOLON = {:node => :semicolon, :raw => ";"}
         | 
| 9 | 
            +
                  CSS_KEYWORDISH = /\A(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,3}\.?\d{0,10}(ch|cm|r?em|ex|in|lh|mm|pc|pt|px|Q|vmax|vmin|vw|vh|%|,|\))?)\z/
         | 
| 10 | 
            +
                  CRASS_SEMICOLON = { :node => :semicolon, :raw => ";" }
         | 
| 11 | 
            +
                  CSS_IMPORTANT = '!important'
         | 
| 11 12 |  | 
| 12 13 | 
             
                  class << self
         | 
| 13 | 
            -
             | 
| 14 | 
            -
                    def allowed_element? element_name
         | 
| 14 | 
            +
                    def allowed_element?(element_name)
         | 
| 15 15 | 
             
                      ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include? element_name
         | 
| 16 16 | 
             
                    end
         | 
| 17 17 |  | 
| 18 18 | 
             
                    #  alternative implementation of the html5lib attribute scrubbing algorithm
         | 
| 19 | 
            -
                    def scrub_attributes | 
| 19 | 
            +
                    def scrub_attributes(node)
         | 
| 20 20 | 
             
                      node.attribute_nodes.each do |attr_node|
         | 
| 21 21 | 
             
                        attr_name = if attr_node.namespace
         | 
| 22 22 | 
             
                                      "#{attr_node.namespace.prefix}:#{attr_node.node_name}"
         | 
| @@ -35,14 +35,14 @@ module Loofah | |
| 35 35 |  | 
| 36 36 | 
             
                        if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
         | 
| 37 37 | 
             
                          # this block lifted nearly verbatim from HTML5 sanitization
         | 
| 38 | 
            -
                          val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, | 
| 39 | 
            -
                          if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! | 
| 38 | 
            +
                          val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
         | 
| 39 | 
            +
                          if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
         | 
| 40 40 | 
             
                            attr_node.remove
         | 
| 41 41 | 
             
                            next
         | 
| 42 | 
            -
                          elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] ==  | 
| 42 | 
            +
                          elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
         | 
| 43 43 | 
             
                            # permit only allowed data mediatypes
         | 
| 44 44 | 
             
                            mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
         | 
| 45 | 
            -
                            mediatype, _ = mediatype.split( | 
| 45 | 
            +
                            mediatype, _ = mediatype.split(";")[0..1] if mediatype
         | 
| 46 46 | 
             
                            if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
         | 
| 47 47 | 
             
                              attr_node.remove
         | 
| 48 48 | 
             
                              next
         | 
| @@ -50,9 +50,9 @@ module Loofah | |
| 50 50 | 
             
                          end
         | 
| 51 51 | 
             
                        end
         | 
| 52 52 | 
             
                        if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
         | 
| 53 | 
            -
                          attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m,  | 
| 53 | 
            +
                          attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
         | 
| 54 54 | 
             
                        end
         | 
| 55 | 
            -
                        if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name ==  | 
| 55 | 
            +
                        if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
         | 
| 56 56 | 
             
                          attr_node.remove
         | 
| 57 57 | 
             
                          next
         | 
| 58 58 | 
             
                        end
         | 
| @@ -67,12 +67,12 @@ module Loofah | |
| 67 67 | 
             
                      force_correct_attribute_escaping! node
         | 
| 68 68 | 
             
                    end
         | 
| 69 69 |  | 
| 70 | 
            -
                    def scrub_css_attribute | 
| 71 | 
            -
                      style = node.attributes[ | 
| 70 | 
            +
                    def scrub_css_attribute(node)
         | 
| 71 | 
            +
                      style = node.attributes["style"]
         | 
| 72 72 | 
             
                      style.value = scrub_css(style.value) if style
         | 
| 73 73 | 
             
                    end
         | 
| 74 74 |  | 
| 75 | 
            -
                    def scrub_css | 
| 75 | 
            +
                    def scrub_css(style)
         | 
| 76 76 | 
             
                      style_tree = Crass.parse_properties style
         | 
| 77 77 | 
             
                      sanitized_tree = []
         | 
| 78 78 |  | 
| @@ -84,13 +84,14 @@ module Loofah | |
| 84 84 | 
             
                        name = node[:name].downcase
         | 
| 85 85 | 
             
                        if SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name)
         | 
| 86 86 | 
             
                          sanitized_tree << node << CRASS_SEMICOLON
         | 
| 87 | 
            -
                        elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split( | 
| 87 | 
            +
                        elsif SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first)
         | 
| 88 88 | 
             
                          value = node[:value].split.map do |keyword|
         | 
| 89 89 | 
             
                            if SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || keyword =~ CSS_KEYWORDISH
         | 
| 90 90 | 
             
                              keyword
         | 
| 91 91 | 
             
                            end
         | 
| 92 92 | 
             
                          end.compact
         | 
| 93 93 | 
             
                          unless value.empty?
         | 
| 94 | 
            +
                            value << CSS_IMPORTANT if node[:important]
         | 
| 94 95 | 
             
                            propstring = sprintf "%s:%s", name, value.join(" ")
         | 
| 95 96 | 
             
                            sanitized_node = Crass.parse_properties(propstring).first
         | 
| 96 97 | 
             
                            sanitized_tree << sanitized_node << CRASS_SEMICOLON
         | 
| @@ -106,7 +107,7 @@ module Loofah | |
| 106 107 | 
             
                    #
         | 
| 107 108 | 
             
                    #  see comments about CVE-2018-8048 within the tests for more information
         | 
| 108 109 | 
             
                    #
         | 
| 109 | 
            -
                    def force_correct_attribute_escaping! | 
| 110 | 
            +
                    def force_correct_attribute_escaping!(node)
         | 
| 110 111 | 
             
                      return unless Nokogiri::VersionInfo.instance.libxml2?
         | 
| 111 112 |  | 
| 112 113 | 
             
                      node.attribute_nodes.each do |attr_node|
         | 
| @@ -122,11 +123,10 @@ module Loofah | |
| 122 123 | 
             
                        #
         | 
| 123 124 | 
             
                        encoding = attr_node.value.encoding
         | 
| 124 125 | 
             
                        attr_node.value = attr_node.value.gsub(/[ "]/) do |m|
         | 
| 125 | 
            -
                           | 
| 126 | 
            +
                          "%" + m.unpack("H2" * m.bytesize).join("%").upcase
         | 
| 126 127 | 
             
                        end.force_encoding(encoding)
         | 
| 127 128 | 
             
                      end
         | 
| 128 129 | 
             
                    end
         | 
| 129 | 
            -
             | 
| 130 130 | 
             
                  end
         | 
| 131 131 | 
             
                end
         | 
| 132 132 | 
             
              end
         |