sanitize 4.6.5 → 6.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +235 -16
- data/LICENSE +1 -1
- data/README.md +89 -76
- data/lib/sanitize/config/default.rb +15 -4
- data/lib/sanitize/config/relaxed.rb +1 -1
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +105 -22
- data/lib/sanitize/version.rb +1 -1
- data/lib/sanitize.rb +53 -68
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +16 -20
- data/test/test_clean_css.rb +6 -6
- data/test/test_clean_doctype.rb +22 -22
- data/test/test_clean_element.rb +200 -82
- data/test/test_config.rb +9 -9
- data/test/test_malicious_css.rb +7 -7
- data/test/test_malicious_html.rb +179 -32
- data/test/test_parser.rb +9 -38
- data/test/test_sanitize.rb +114 -29
- data/test/test_sanitize_css.rb +88 -61
- data/test/test_transformers.rb +52 -46
- metadata +17 -33
- data/test/test_unicode.rb +0 -95
    
        data/lib/sanitize.rb
    CHANGED
    
    | @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            # encoding: utf-8
         | 
| 2 2 |  | 
| 3 | 
            -
            require ' | 
| 3 | 
            +
            require 'nokogiri'
         | 
| 4 4 | 
             
            require 'set'
         | 
| 5 5 |  | 
| 6 6 | 
             
            require_relative 'sanitize/version'
         | 
| @@ -19,6 +19,20 @@ require_relative 'sanitize/transformers/clean_element' | |
| 19 19 | 
             
            class Sanitize
         | 
| 20 20 | 
             
              attr_reader :config
         | 
| 21 21 |  | 
| 22 | 
            +
              # Matches one or more control characters that should be removed from HTML
         | 
| 23 | 
            +
              # before parsing, as defined by the HTML living standard.
         | 
| 24 | 
            +
              #
         | 
| 25 | 
            +
              # -   https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
         | 
| 26 | 
            +
              # -   https://infra.spec.whatwg.org/#control
         | 
| 27 | 
            +
              REGEX_HTML_CONTROL_CHARACTERS = /[\u0001-\u0008\u000b\u000e-\u001f\u007f-\u009f]+/u
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              # Matches one or more non-characters that should be removed from HTML before
         | 
| 30 | 
            +
              # parsing, as defined by the HTML living standard.
         | 
| 31 | 
            +
              #
         | 
| 32 | 
            +
              # -   https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
         | 
| 33 | 
            +
              # -   https://infra.spec.whatwg.org/#noncharacter
         | 
| 34 | 
            +
              REGEX_HTML_NON_CHARACTERS = /[\ufdd0-\ufdef\ufffe\uffff\u{1fffe}\u{1ffff}\u{2fffe}\u{2ffff}\u{3fffe}\u{3ffff}\u{4fffe}\u{4ffff}\u{5fffe}\u{5ffff}\u{6fffe}\u{6ffff}\u{7fffe}\u{7ffff}\u{8fffe}\u{8ffff}\u{9fffe}\u{9ffff}\u{afffe}\u{affff}\u{bfffe}\u{bffff}\u{cfffe}\u{cffff}\u{dfffe}\u{dffff}\u{efffe}\u{effff}\u{ffffe}\u{fffff}\u{10fffe}\u{10ffff}]+/u
         | 
| 35 | 
            +
             | 
| 22 36 | 
             
              # Matches an attribute value that could be treated by a browser as a URL
         | 
| 23 37 | 
             
              # with a protocol prefix, such as "http:" or "javascript:". Any string of zero
         | 
| 24 38 | 
             
              # or more characters followed by a colon is considered a match, even if the
         | 
| @@ -26,11 +40,12 @@ class Sanitize | |
| 26 40 | 
             
              # IE6 and Opera will still parse).
         | 
| 27 41 | 
             
              REGEX_PROTOCOL = /\A\s*([^\/#]*?)(?:\:|�*58|�*3a)/i
         | 
| 28 42 |  | 
| 29 | 
            -
              # Matches  | 
| 30 | 
            -
              #  | 
| 43 | 
            +
              # Matches one or more characters that should be stripped from HTML before
         | 
| 44 | 
            +
              # parsing. This is a combination of `REGEX_HTML_CONTROL_CHARACTERS` and
         | 
| 45 | 
            +
              # `REGEX_HTML_NON_CHARACTERS`.
         | 
| 31 46 | 
             
              #
         | 
| 32 | 
            -
              #  | 
| 33 | 
            -
              REGEX_UNSUITABLE_CHARS = / | 
| 47 | 
            +
              # https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
         | 
| 48 | 
            +
              REGEX_UNSUITABLE_CHARS = /(?:#{REGEX_HTML_CONTROL_CHARACTERS}|#{REGEX_HTML_NON_CHARACTERS})/u
         | 
| 34 49 |  | 
| 35 50 | 
             
              #--
         | 
| 36 51 | 
             
              # Class Methods
         | 
| @@ -39,7 +54,7 @@ class Sanitize | |
| 39 54 | 
             
              # Returns a sanitized copy of the given full _html_ document, using the
         | 
| 40 55 | 
             
              # settings in _config_ if specified.
         | 
| 41 56 | 
             
              #
         | 
| 42 | 
            -
              # When sanitizing a document, the `<html>` element must be  | 
| 57 | 
            +
              # When sanitizing a document, the `<html>` element must be allowlisted or an
         | 
| 43 58 | 
             
              # error will be raised. If this is undesirable, you should probably use
         | 
| 44 59 | 
             
              # {#fragment} instead.
         | 
| 45 60 | 
             
              def self.document(html, config = {})
         | 
| @@ -96,17 +111,19 @@ class Sanitize | |
| 96 111 |  | 
| 97 112 | 
             
                @transformers << Transformers::CleanDoctype
         | 
| 98 113 | 
             
                @transformers << Transformers::CleanCDATA
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                @transformer_config = { config: @config }
         | 
| 99 116 | 
             
              end
         | 
| 100 117 |  | 
| 101 118 | 
             
              # Returns a sanitized copy of the given _html_ document.
         | 
| 102 119 | 
             
              #
         | 
| 103 | 
            -
              # When sanitizing a document, the `<html>` element must be  | 
| 120 | 
            +
              # When sanitizing a document, the `<html>` element must be allowlisted or an
         | 
| 104 121 | 
             
              # error will be raised. If this is undesirable, you should probably use
         | 
| 105 122 | 
             
              # {#fragment} instead.
         | 
| 106 123 | 
             
              def document(html)
         | 
| 107 124 | 
             
                return '' unless html
         | 
| 108 125 |  | 
| 109 | 
            -
                doc = Nokogiri::HTML5.parse(preprocess(html))
         | 
| 126 | 
            +
                doc = Nokogiri::HTML5.parse(preprocess(html), **@config[:parser_options])
         | 
| 110 127 | 
             
                node!(doc)
         | 
| 111 128 | 
             
                to_html(doc)
         | 
| 112 129 | 
             
              end
         | 
| @@ -118,20 +135,7 @@ class Sanitize | |
| 118 135 | 
             
              def fragment(html)
         | 
| 119 136 | 
             
                return '' unless html
         | 
| 120 137 |  | 
| 121 | 
            -
                 | 
| 122 | 
            -
                doc  = Nokogiri::HTML5.parse("<html><body>#{html}")
         | 
| 123 | 
            -
             | 
| 124 | 
            -
                # Hack to allow fragments containing <body>. Borrowed from
         | 
| 125 | 
            -
                # Nokogiri::HTML::DocumentFragment.
         | 
| 126 | 
            -
                if html =~ /\A<body(?:\s|>)/i
         | 
| 127 | 
            -
                  path = '/html/body'
         | 
| 128 | 
            -
                else
         | 
| 129 | 
            -
                  path = '/html/body/node()'
         | 
| 130 | 
            -
                end
         | 
| 131 | 
            -
             | 
| 132 | 
            -
                frag = doc.fragment
         | 
| 133 | 
            -
                frag << doc.xpath(path)
         | 
| 134 | 
            -
             | 
| 138 | 
            +
                frag = Nokogiri::HTML5.fragment(preprocess(html), **@config[:parser_options])
         | 
| 135 139 | 
             
                node!(frag)
         | 
| 136 140 | 
             
                to_html(frag)
         | 
| 137 141 | 
             
              end
         | 
| @@ -143,20 +147,20 @@ class Sanitize | |
| 143 147 | 
             
              # in place.
         | 
| 144 148 | 
             
              #
         | 
| 145 149 | 
             
              # If _node_ is a `Nokogiri::XML::Document`, the `<html>` element must be
         | 
| 146 | 
            -
              #  | 
| 150 | 
            +
              # allowlisted or an error will be raised.
         | 
| 147 151 | 
             
              def node!(node)
         | 
| 148 152 | 
             
                raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
         | 
| 149 153 |  | 
| 150 154 | 
             
                if node.is_a?(Nokogiri::XML::Document)
         | 
| 151 155 | 
             
                  unless @config[:elements].include?('html')
         | 
| 152 | 
            -
                    raise Error, 'When sanitizing a document, "<html>" must be  | 
| 156 | 
            +
                    raise Error, 'When sanitizing a document, "<html>" must be allowlisted.'
         | 
| 153 157 | 
             
                  end
         | 
| 154 158 | 
             
                end
         | 
| 155 159 |  | 
| 156 | 
            -
                 | 
| 160 | 
            +
                node_allowlist = Set.new
         | 
| 157 161 |  | 
| 158 162 | 
             
                traverse(node) do |n|
         | 
| 159 | 
            -
                  transform_node!(n,  | 
| 163 | 
            +
                  transform_node!(n, node_allowlist)
         | 
| 160 164 | 
             
                end
         | 
| 161 165 |  | 
| 162 166 | 
             
                node
         | 
| @@ -182,51 +186,32 @@ class Sanitize | |
| 182 186 | 
             
              end
         | 
| 183 187 |  | 
| 184 188 | 
             
              def to_html(node)
         | 
| 185 | 
            -
                 | 
| 186 | 
            -
             | 
| 187 | 
            -
                # Hacky workaround for a libxml2 bug that adds an undesired Content-Type
         | 
| 188 | 
            -
                # meta tag to all serialized HTML documents.
         | 
| 189 | 
            -
                #
         | 
| 190 | 
            -
                # https://github.com/sparklemotion/nokogiri/issues/1008
         | 
| 191 | 
            -
                if node.type == Nokogiri::XML::Node::DOCUMENT_NODE ||
         | 
| 192 | 
            -
                    node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE
         | 
| 193 | 
            -
             | 
| 194 | 
            -
                  regex_meta   = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i
         | 
| 195 | 
            -
             | 
| 196 | 
            -
                  # Only replace the content-type meta tag if <meta> isn't whitelisted or
         | 
| 197 | 
            -
                  # the original document didn't actually include a content-type meta tag.
         | 
| 198 | 
            -
                  replace_meta = !@config[:elements].include?('meta') ||
         | 
| 199 | 
            -
                    node.xpath('/html/head/meta[@http-equiv]').none? do |meta|
         | 
| 200 | 
            -
                      meta['http-equiv'].casecmp('content-type').zero?
         | 
| 201 | 
            -
                    end
         | 
| 202 | 
            -
                end
         | 
| 203 | 
            -
             | 
| 204 | 
            -
                so = Nokogiri::XML::Node::SaveOptions
         | 
| 205 | 
            -
             | 
| 206 | 
            -
                # Serialize to HTML without any formatting to prevent Nokogiri from adding
         | 
| 207 | 
            -
                # newlines after certain tags.
         | 
| 208 | 
            -
                html = node.to_html(
         | 
| 209 | 
            -
                  :encoding  => 'utf-8',
         | 
| 210 | 
            -
                  :indent    => 0,
         | 
| 211 | 
            -
                  :save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML
         | 
| 212 | 
            -
                )
         | 
| 213 | 
            -
             | 
| 214 | 
            -
                html.gsub!(regex_meta, '\1') if replace_meta
         | 
| 215 | 
            -
                html
         | 
| 189 | 
            +
                node.to_html(preserve_newline: true)
         | 
| 216 190 | 
             
              end
         | 
| 217 191 |  | 
| 218 | 
            -
              def transform_node!(node,  | 
| 192 | 
            +
              def transform_node!(node, node_allowlist)
         | 
| 219 193 | 
             
                @transformers.each do |transformer|
         | 
| 220 | 
            -
                   | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 223 | 
            -
             | 
| 224 | 
            -
             | 
| 225 | 
            -
             | 
| 226 | 
            -
                   | 
| 227 | 
            -
             | 
| 228 | 
            -
                   | 
| 229 | 
            -
             | 
| 194 | 
            +
                  # Since transform_node! may be called in a tight loop to process thousands
         | 
| 195 | 
            +
                  # of items, we can optimize both memory and CPU performance by:
         | 
| 196 | 
            +
                  #
         | 
| 197 | 
            +
                  # 1. Reusing the same config hash for each transformer
         | 
| 198 | 
            +
                  # 2. Directly assigning values to hash instead of using merge!. Not only
         | 
| 199 | 
            +
                  # does merge! create a new hash, it is also 2.6x slower:
         | 
| 200 | 
            +
                  # https://github.com/JuanitoFatas/fast-ruby#hashmerge-vs-hashmerge-code
         | 
| 201 | 
            +
                  config = @transformer_config
         | 
| 202 | 
            +
                  config[:is_allowlisted] = config[:is_whitelisted] = node_allowlist.include?(node)
         | 
| 203 | 
            +
                  config[:node] = node
         | 
| 204 | 
            +
                  config[:node_name] = node.name.downcase
         | 
| 205 | 
            +
                  config[:node_allowlist] = config[:node_whitelist] = node_allowlist
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                  result = transformer.call(**config)
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                  if result.is_a?(Hash)
         | 
| 210 | 
            +
                    result_allowlist = result[:node_allowlist] || result[:node_whitelist]
         | 
| 211 | 
            +
             | 
| 212 | 
            +
                    if result_allowlist.respond_to?(:each)
         | 
| 213 | 
            +
                      node_allowlist.merge(result_allowlist)
         | 
| 214 | 
            +
                    end
         | 
| 230 215 | 
             
                  end
         | 
| 231 216 | 
             
                end
         | 
| 232 217 |  | 
    
        data/test/common.rb
    CHANGED
    
    | @@ -1,34 +1,3 @@ | |
| 1 1 | 
             
            # encoding: utf-8
         | 
| 2 | 
            -
            gem 'minitest'
         | 
| 3 2 | 
             
            require 'minitest/autorun'
         | 
| 4 | 
            -
             | 
| 5 3 | 
             
            require_relative '../lib/sanitize'
         | 
| 6 | 
            -
             | 
| 7 | 
            -
            # Helper to stub an instance method. Shamelessly stolen from
         | 
| 8 | 
            -
            # https://github.com/codeodor/minitest-stub_any_instance/
         | 
| 9 | 
            -
            class Object
         | 
| 10 | 
            -
              def self.stub_instance(name, value, &block)
         | 
| 11 | 
            -
                old_method = "__stubbed_method_#{name}__"
         | 
| 12 | 
            -
             | 
| 13 | 
            -
                class_eval do
         | 
| 14 | 
            -
                  alias_method old_method, name
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                  define_method(name) do |*args|
         | 
| 17 | 
            -
                    if value.respond_to?(:call) then
         | 
| 18 | 
            -
                      value.call(*args)
         | 
| 19 | 
            -
                    else
         | 
| 20 | 
            -
                      value
         | 
| 21 | 
            -
                    end
         | 
| 22 | 
            -
                  end
         | 
| 23 | 
            -
                end
         | 
| 24 | 
            -
             | 
| 25 | 
            -
                yield
         | 
| 26 | 
            -
             | 
| 27 | 
            -
              ensure
         | 
| 28 | 
            -
                class_eval do
         | 
| 29 | 
            -
                  undef_method name
         | 
| 30 | 
            -
                  alias_method name, old_method
         | 
| 31 | 
            -
                  undef_method old_method
         | 
| 32 | 
            -
                end
         | 
| 33 | 
            -
              end
         | 
| 34 | 
            -
            end
         | 
    
        data/test/test_clean_comment.rb
    CHANGED
    
    | @@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanComment' do | |
| 11 11 | 
             
                end
         | 
| 12 12 |  | 
| 13 13 | 
             
                it 'should remove comments' do
         | 
| 14 | 
            -
                  @s.fragment('foo <!-- comment --> bar').must_equal 'foo  bar'
         | 
| 15 | 
            -
                  @s.fragment('foo <!-- ').must_equal 'foo '
         | 
| 16 | 
            -
                  @s.fragment('foo <!-- - -> bar').must_equal 'foo '
         | 
| 17 | 
            -
                  @s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
         | 
| 18 | 
            -
                  @s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo  --> -->bar'
         | 
| 19 | 
            -
                  @s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
         | 
| 14 | 
            +
                  _(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo  bar'
         | 
| 15 | 
            +
                  _(@s.fragment('foo <!-- ')).must_equal 'foo '
         | 
| 16 | 
            +
                  _(@s.fragment('foo <!-- - -> bar')).must_equal 'foo '
         | 
| 17 | 
            +
                  _(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal 'foo bar'
         | 
| 18 | 
            +
                  _(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo  --> -->bar'
         | 
| 19 | 
            +
                  _(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
         | 
| 20 20 |  | 
| 21 21 | 
             
                  # Special case: the comment markup is inside a <script>, which makes it
         | 
| 22 22 | 
             
                  # text content and not an actual HTML comment.
         | 
| 23 | 
            -
                  @s.fragment("<script><!-- comment --></script>").must_equal ' | 
| 23 | 
            +
                  _(@s.fragment("<script><!-- comment --></script>")).must_equal ''
         | 
| 24 24 |  | 
| 25 | 
            -
                  Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
         | 
| 25 | 
            +
                  _(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script']))
         | 
| 26 26 | 
             
                    .must_equal '<script><!-- comment --></script>'
         | 
| 27 27 | 
             
                end
         | 
| 28 28 | 
             
              end
         | 
| @@ -33,18 +33,14 @@ describe 'Sanitize::Transformers::CleanComment' do | |
| 33 33 | 
             
                end
         | 
| 34 34 |  | 
| 35 35 | 
             
                it 'should allow comments' do
         | 
| 36 | 
            -
                  @s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
         | 
| 37 | 
            -
                  @s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
         | 
| 38 | 
            -
                  @s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
         | 
| 39 | 
            -
                  @s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
         | 
| 40 | 
            -
                  @s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
         | 
| 41 | 
            -
                  @s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>>bar</div>'
         | 
| 42 | 
            -
             | 
| 43 | 
            -
                   | 
| 44 | 
            -
                  # text content and not an actual HTML comment.
         | 
| 45 | 
            -
                  @s.fragment("<script><!-- comment --></script>").must_equal '<!-- comment -->'
         | 
| 46 | 
            -
             | 
| 47 | 
            -
                  Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
         | 
| 36 | 
            +
                  _(@s.fragment('foo <!-- comment --> bar')).must_equal 'foo <!-- comment --> bar'
         | 
| 37 | 
            +
                  _(@s.fragment('foo <!-- ')).must_equal 'foo <!-- -->'
         | 
| 38 | 
            +
                  _(@s.fragment('foo <!-- - -> bar')).must_equal 'foo <!-- - -> bar-->'
         | 
| 39 | 
            +
                  _(@s.fragment("foo <!--\n\n\n\n-->bar")).must_equal "foo <!--\n\n\n\n-->bar"
         | 
| 40 | 
            +
                  _(@s.fragment("foo <!-- <!-- <!-- --> --> -->bar")).must_equal 'foo <!-- <!-- <!-- --> --> -->bar'
         | 
| 41 | 
            +
                  _(@s.fragment("foo <div <!-- comment -->>bar</div>")).must_equal 'foo <div>>bar</div>'
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                  _(Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script']))
         | 
| 48 44 | 
             
                    .must_equal '<script><!-- comment --></script>'
         | 
| 49 45 | 
             
                end
         | 
| 50 46 | 
             
              end
         | 
    
        data/test/test_clean_css.rb
    CHANGED
    
    | @@ -10,15 +10,15 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do | |
| 10 10 | 
             
              end
         | 
| 11 11 |  | 
| 12 12 | 
             
              it 'should sanitize CSS properties in style attributes' do
         | 
| 13 | 
            -
                @s.fragment(%[
         | 
| 13 | 
            +
                _(@s.fragment(%[
         | 
| 14 14 | 
             
                  <div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
         | 
| 15 | 
            -
                ].strip).must_equal %[
         | 
| 16 | 
            -
                  <div style="color: #fff;  /*  | 
| 15 | 
            +
                ].strip)).must_equal %[
         | 
| 16 | 
            +
                  <div style="color: #fff;  /* <-- evil! */"></div>
         | 
| 17 17 | 
             
                ].strip
         | 
| 18 18 | 
             
              end
         | 
| 19 19 |  | 
| 20 20 | 
             
              it 'should remove the style attribute if the sanitized CSS is empty' do
         | 
| 21 | 
            -
                @s.fragment('<div style="width: expression(alert(1))"></div>').
         | 
| 21 | 
            +
                _(@s.fragment('<div style="width: expression(alert(1))"></div>')).
         | 
| 22 22 | 
             
                  must_equal '<div></div>'
         | 
| 23 23 | 
             
              end
         | 
| 24 24 | 
             
            end
         | 
| @@ -46,7 +46,7 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do | |
| 46 46 | 
             
                  </style>
         | 
| 47 47 | 
             
                ].strip
         | 
| 48 48 |  | 
| 49 | 
            -
                @s.fragment(html).must_equal %[
         | 
| 49 | 
            +
                _(@s.fragment(html)).must_equal %[
         | 
| 50 50 | 
             
                  <style>
         | 
| 51 51 | 
             
                  /* Yay CSS! */
         | 
| 52 52 | 
             
                  .foo { color: #fff; }
         | 
| @@ -62,6 +62,6 @@ describe 'Sanitize::Transformers::CSS::CleanElement' do | |
| 62 62 | 
             
              end
         | 
| 63 63 |  | 
| 64 64 | 
             
              it 'should remove the <style> element if the sanitized CSS is empty' do
         | 
| 65 | 
            -
                @s.fragment('<style></style>').must_equal ''
         | 
| 65 | 
            +
                _(@s.fragment('<style></style>')).must_equal ''
         | 
| 66 66 | 
             
              end
         | 
| 67 67 | 
             
            end
         | 
    
        data/test/test_clean_doctype.rb
    CHANGED
    
    | @@ -11,18 +11,18 @@ describe 'Sanitize::Transformers::CleanDoctype' do | |
| 11 11 | 
             
                end
         | 
| 12 12 |  | 
| 13 13 | 
             
                it 'should remove doctype declarations' do
         | 
| 14 | 
            -
                  @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html | 
| 15 | 
            -
                  @s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
         | 
| 14 | 
            +
                  _(@s.document('<!DOCTYPE html><html>foo</html>')).must_equal "<html>foo</html>"
         | 
| 15 | 
            +
                  _(@s.fragment('<!DOCTYPE html>foo')).must_equal 'foo'
         | 
| 16 16 | 
             
                end
         | 
| 17 17 |  | 
| 18 18 | 
             
                it 'should not allow doctype definitions in fragments' do
         | 
| 19 | 
            -
                  @s.fragment('<!DOCTYPE html><html>foo</html>')
         | 
| 19 | 
            +
                  _(@s.fragment('<!DOCTYPE html><html>foo</html>'))
         | 
| 20 20 | 
             
                    .must_equal "foo"
         | 
| 21 21 |  | 
| 22 | 
            -
                  @s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 22 | 
            +
                  _(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
         | 
| 23 23 | 
             
                    .must_equal "foo"
         | 
| 24 24 |  | 
| 25 | 
            -
                  @s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
         | 
| 25 | 
            +
                  _(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
         | 
| 26 26 | 
             
                    .must_equal "foo"
         | 
| 27 27 | 
             
                end
         | 
| 28 28 | 
             
              end
         | 
| @@ -33,38 +33,38 @@ describe 'Sanitize::Transformers::CleanDoctype' do | |
| 33 33 | 
             
                end
         | 
| 34 34 |  | 
| 35 35 | 
             
                it 'should allow doctype declarations in documents' do
         | 
| 36 | 
            -
                  @s.document('<!DOCTYPE html><html>foo</html>')
         | 
| 37 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 36 | 
            +
                  _(@s.document('<!DOCTYPE html><html>foo</html>'))
         | 
| 37 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 38 38 |  | 
| 39 | 
            -
                  @s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 40 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 39 | 
            +
                  _(@s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
         | 
| 40 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 41 41 |  | 
| 42 | 
            -
                  @s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
         | 
| 43 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 42 | 
            +
                  _(@s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
         | 
| 43 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 44 44 | 
             
                end
         | 
| 45 45 |  | 
| 46 46 | 
             
                it 'should not allow obviously invalid doctype declarations in documents' do
         | 
| 47 | 
            -
                  @s.document('<!DOCTYPE blah blah blah><html>foo</html>')
         | 
| 48 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 47 | 
            +
                  _(@s.document('<!DOCTYPE blah blah blah><html>foo</html>'))
         | 
| 48 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 49 49 |  | 
| 50 | 
            -
                  @s.document('<!DOCTYPE blah><html>foo</html>')
         | 
| 51 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 50 | 
            +
                  _(@s.document('<!DOCTYPE blah><html>foo</html>'))
         | 
| 51 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 52 52 |  | 
| 53 | 
            -
                  @s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 54 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 53 | 
            +
                  _(@s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
         | 
| 54 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 55 55 |  | 
| 56 | 
            -
                  @s.document('<!whatever><html>foo</html>')
         | 
| 57 | 
            -
                    .must_equal "<html>foo</html | 
| 56 | 
            +
                  _(@s.document('<!whatever><html>foo</html>'))
         | 
| 57 | 
            +
                    .must_equal "<html>foo</html>"
         | 
| 58 58 | 
             
                end
         | 
| 59 59 |  | 
| 60 60 | 
             
                it 'should not allow doctype definitions in fragments' do
         | 
| 61 | 
            -
                  @s.fragment('<!DOCTYPE html><html>foo</html>')
         | 
| 61 | 
            +
                  _(@s.fragment('<!DOCTYPE html><html>foo</html>'))
         | 
| 62 62 | 
             
                    .must_equal "foo"
         | 
| 63 63 |  | 
| 64 | 
            -
                  @s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 64 | 
            +
                  _(@s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>'))
         | 
| 65 65 | 
             
                    .must_equal "foo"
         | 
| 66 66 |  | 
| 67 | 
            -
                  @s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
         | 
| 67 | 
            +
                  _(@s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>"))
         | 
| 68 68 | 
             
                    .must_equal "foo"
         | 
| 69 69 | 
             
                end
         | 
| 70 70 | 
             
              end
         |