sanitize 6.1.3 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +32 -14
- data/LICENSE +3 -1
- data/README.md +120 -238
- data/lib/sanitize/config/basic.rb +15 -15
- data/lib/sanitize/config/default.rb +45 -45
- data/lib/sanitize/config/relaxed.rb +136 -32
- data/lib/sanitize/config/restricted.rb +2 -2
- data/lib/sanitize/config.rb +12 -14
- data/lib/sanitize/css.rb +308 -308
- data/lib/sanitize/transformers/clean_cdata.rb +9 -9
- data/lib/sanitize/transformers/clean_comment.rb +9 -9
- data/lib/sanitize/transformers/clean_css.rb +59 -55
- data/lib/sanitize/transformers/clean_doctype.rb +15 -15
- data/lib/sanitize/transformers/clean_element.rb +220 -237
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +38 -38
- data/test/common.rb +4 -3
- data/test/test_clean_comment.rb +26 -25
- data/test/test_clean_css.rb +14 -13
- data/test/test_clean_doctype.rb +21 -20
- data/test/test_clean_element.rb +258 -273
- data/test/test_config.rb +22 -21
- data/test/test_malicious_css.rb +20 -19
- data/test/test_malicious_html.rb +100 -99
- data/test/test_parser.rb +26 -25
- data/test/test_sanitize.rb +70 -69
- data/test/test_sanitize_css.rb +149 -114
- data/test/test_transformers.rb +81 -83
- metadata +14 -43
    
        data/test/test_config.rb
    CHANGED
    
    | @@ -1,7 +1,8 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
            require_relative 'common'
         | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 3 2 |  | 
| 4 | 
            -
             | 
| 3 | 
            +
            require_relative "common"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe "Config" do
         | 
| 5 6 | 
             
              make_my_diffs_pretty!
         | 
| 6 7 | 
             
              parallelize_me!
         | 
| 7 8 |  | 
| @@ -9,22 +10,22 @@ describe 'Config' do | |
| 9 10 | 
             
                _(config).must_be :frozen?
         | 
| 10 11 |  | 
| 11 12 | 
             
                if Hash === config
         | 
| 12 | 
            -
                  config.each_value {|v| verify_deeply_frozen(v) }
         | 
| 13 | 
            +
                  config.each_value { |v| verify_deeply_frozen(v) }
         | 
| 13 14 | 
             
                elsif Set === config || Array === config
         | 
| 14 | 
            -
                  config.each {|v| verify_deeply_frozen(v) }
         | 
| 15 | 
            +
                  config.each { |v| verify_deeply_frozen(v) }
         | 
| 15 16 | 
             
                end
         | 
| 16 17 | 
             
              end
         | 
| 17 18 |  | 
| 18 | 
            -
              it  | 
| 19 | 
            +
              it "built-in configs should be deeply frozen" do
         | 
| 19 20 | 
             
                verify_deeply_frozen Sanitize::Config::DEFAULT
         | 
| 20 21 | 
             
                verify_deeply_frozen Sanitize::Config::BASIC
         | 
| 21 22 | 
             
                verify_deeply_frozen Sanitize::Config::RELAXED
         | 
| 22 23 | 
             
                verify_deeply_frozen Sanitize::Config::RESTRICTED
         | 
| 23 24 | 
             
              end
         | 
| 24 25 |  | 
| 25 | 
            -
              describe  | 
| 26 | 
            -
                it  | 
| 27 | 
            -
                  a = {: | 
| 26 | 
            +
              describe ".freeze_config" do
         | 
| 27 | 
            +
                it "should deeply freeze and return a configuration Hash" do
         | 
| 28 | 
            +
                  a = {one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}}
         | 
| 28 29 | 
             
                  b = Sanitize::Config.freeze_config(a)
         | 
| 29 30 |  | 
| 30 31 | 
             
                  _(b).must_be_same_as a
         | 
| @@ -32,11 +33,11 @@ describe 'Config' do | |
| 32 33 | 
             
                end
         | 
| 33 34 | 
             
              end
         | 
| 34 35 |  | 
| 35 | 
            -
              describe  | 
| 36 | 
            -
                it  | 
| 36 | 
            +
              describe ".merge" do
         | 
| 37 | 
            +
                it "should deeply merge a configuration Hash" do
         | 
| 37 38 | 
             
                  # Freeze to ensure that we get an error if either Hash is modified.
         | 
| 38 | 
            -
                  a = Sanitize::Config.freeze_config({: | 
| 39 | 
            -
                  b = Sanitize::Config.freeze_config({: | 
| 39 | 
            +
                  a = Sanitize::Config.freeze_config({one: {one_one: [0, "1", :a], one_two: false, one_three: Set.new([:a, :b, :c])}})
         | 
| 40 | 
            +
                  b = Sanitize::Config.freeze_config({one: {one_two: true, one_three: 3}, two: 2})
         | 
| 40 41 |  | 
| 41 42 | 
             
                  c = Sanitize::Config.merge(a, b)
         | 
| 42 43 |  | 
| @@ -44,22 +45,22 @@ describe 'Config' do | |
| 44 45 | 
             
                  _(c).wont_be_same_as b
         | 
| 45 46 |  | 
| 46 47 | 
             
                  _(c).must_equal(
         | 
| 47 | 
            -
                    : | 
| 48 | 
            -
                      : | 
| 49 | 
            -
                      : | 
| 50 | 
            -
                      : | 
| 48 | 
            +
                    one: {
         | 
| 49 | 
            +
                      one_one: [0, "1", :a],
         | 
| 50 | 
            +
                      one_two: true,
         | 
| 51 | 
            +
                      one_three: 3
         | 
| 51 52 | 
             
                    },
         | 
| 52 53 |  | 
| 53 | 
            -
                    : | 
| 54 | 
            +
                    two: 2
         | 
| 54 55 | 
             
                  )
         | 
| 55 56 |  | 
| 56 57 | 
             
                  _(c[:one]).wont_be_same_as a[:one]
         | 
| 57 58 | 
             
                  _(c[:one][:one_one]).wont_be_same_as a[:one][:one_one]
         | 
| 58 59 | 
             
                end
         | 
| 59 60 |  | 
| 60 | 
            -
                it  | 
| 61 | 
            -
                  _(proc { Sanitize::Config.merge( | 
| 62 | 
            -
                  _(proc { Sanitize::Config.merge({},  | 
| 61 | 
            +
                it "should raise an ArgumentError if either argument is not a Hash" do
         | 
| 62 | 
            +
                  _(proc { Sanitize::Config.merge("foo", {}) }).must_raise ArgumentError
         | 
| 63 | 
            +
                  _(proc { Sanitize::Config.merge({}, "foo") }).must_raise ArgumentError
         | 
| 63 64 | 
             
                end
         | 
| 64 65 | 
             
              end
         | 
| 65 66 | 
             
            end
         | 
    
        data/test/test_malicious_css.rb
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative "common"
         | 
| 3 4 |  | 
| 4 5 | 
             
            # Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
         | 
| 5 6 | 
             
            # these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
         | 
| @@ -7,7 +8,7 @@ require_relative 'common' | |
| 7 8 | 
             
            #
         | 
| 8 9 | 
             
            # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
         | 
| 9 10 |  | 
| 10 | 
            -
            describe  | 
| 11 | 
            +
            describe "Malicious CSS" do
         | 
| 11 12 | 
             
              make_my_diffs_pretty!
         | 
| 12 13 | 
             
              parallelize_me!
         | 
| 13 14 |  | 
| @@ -15,37 +16,37 @@ describe 'Malicious CSS' do | |
| 15 16 | 
             
                @s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
         | 
| 16 17 | 
             
              end
         | 
| 17 18 |  | 
| 18 | 
            -
              it  | 
| 19 | 
            -
                _(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))])) | 
| 20 | 
            -
                  must_equal  | 
| 19 | 
            +
              it "should not be possible to inject an expression by munging it with a comment" do
         | 
| 20 | 
            +
                _(@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]))
         | 
| 21 | 
            +
                  .must_equal ""
         | 
| 21 22 |  | 
| 22 | 
            -
                _(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))])) | 
| 23 | 
            -
                  must_equal  | 
| 23 | 
            +
                _(@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]))
         | 
| 24 | 
            +
                  .must_equal ""
         | 
| 24 25 | 
             
              end
         | 
| 25 26 |  | 
| 26 | 
            -
              it  | 
| 27 | 
            -
                _(@s.properties(%[width:\nexpression(alert('XSS'));])) | 
| 28 | 
            -
                  must_equal  | 
| 27 | 
            +
              it "should not be possible to inject an expression by munging it with a newline" do
         | 
| 28 | 
            +
                _(@s.properties(%[width:\nexpression(alert('XSS'));]))
         | 
| 29 | 
            +
                  .must_equal ""
         | 
| 29 30 | 
             
              end
         | 
| 30 31 |  | 
| 31 | 
            -
              it  | 
| 32 | 
            -
                _(@s.properties(%[background-image:url("javascript:alert('XSS')");])) | 
| 33 | 
            -
                  must_equal  | 
| 32 | 
            +
              it "should not allow the javascript protocol" do
         | 
| 33 | 
            +
                _(@s.properties(%[background-image:url("javascript:alert('XSS')");]))
         | 
| 34 | 
            +
                  .must_equal ""
         | 
| 34 35 |  | 
| 35 36 | 
             
                _(Sanitize.fragment(%[<div style="background-image: url(javascript:alert('XSS'))">],
         | 
| 36 | 
            -
                  Sanitize::Config::RELAXED)).must_equal  | 
| 37 | 
            +
                  Sanitize::Config::RELAXED)).must_equal "<div></div>"
         | 
| 37 38 | 
             
              end
         | 
| 38 39 |  | 
| 39 | 
            -
              it  | 
| 40 | 
            -
                _(@s.properties(%[behavior: url(xss.htc);])).must_equal  | 
| 40 | 
            +
              it "should not allow behaviors" do
         | 
| 41 | 
            +
                _(@s.properties(%[behavior: url(xss.htc);])).must_equal ""
         | 
| 41 42 | 
             
              end
         | 
| 42 43 |  | 
| 43 | 
            -
              describe  | 
| 44 | 
            +
              describe "sanitization bypass via CSS at-rule in HTML <style> element" do
         | 
| 44 45 | 
             
                before do
         | 
| 45 46 | 
             
                  @s = Sanitize.new(Sanitize::Config::RELAXED)
         | 
| 46 47 | 
             
                end
         | 
| 47 48 |  | 
| 48 | 
            -
                it  | 
| 49 | 
            +
                it "is not possible to prematurely end a <style> element" do
         | 
| 49 50 | 
             
                  assert_equal(
         | 
| 50 51 | 
             
                    %[<style>@media<\\/style><iframe srcdoc='<script>alert(document.domain)<\\/script>'>{}</style>],
         | 
| 51 52 | 
             
                    @s.fragment(%[<style>@media</sty/**/le><iframe srcdoc='<script>alert(document.domain)</script>'></style>])
         | 
    
        data/test/test_malicious_html.rb
    CHANGED
    
    | @@ -1,5 +1,6 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
             | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require_relative "common"
         | 
| 3 4 |  | 
| 4 5 | 
             
            # Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
         | 
| 5 6 | 
             
            # of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
         | 
| @@ -7,7 +8,7 @@ require_relative 'common' | |
| 7 8 | 
             
            #
         | 
| 8 9 | 
             
            # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
         | 
| 9 10 |  | 
| 10 | 
            -
            describe  | 
| 11 | 
            +
            describe "Malicious HTML" do
         | 
| 11 12 | 
             
              make_my_diffs_pretty!
         | 
| 12 13 | 
             
              parallelize_me!
         | 
| 13 14 |  | 
| @@ -15,114 +16,114 @@ describe 'Malicious HTML' do | |
| 15 16 | 
             
                @s = Sanitize.new(Sanitize::Config::RELAXED)
         | 
| 16 17 | 
             
              end
         | 
| 17 18 |  | 
| 18 | 
            -
              describe  | 
| 19 | 
            -
                it  | 
| 20 | 
            -
                  _(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->])) | 
| 21 | 
            -
                    must_equal  | 
| 19 | 
            +
              describe "comments" do
         | 
| 20 | 
            +
                it "should not allow script injection via conditional comments" do
         | 
| 21 | 
            +
                  _(@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]))
         | 
| 22 | 
            +
                    .must_equal ""
         | 
| 22 23 | 
             
                end
         | 
| 23 24 | 
             
              end
         | 
| 24 25 |  | 
| 25 | 
            -
              describe  | 
| 26 | 
            -
                it  | 
| 27 | 
            -
                  _(@s.fragment( | 
| 28 | 
            -
                    must_equal  | 
| 26 | 
            +
              describe "interpolation (ERB, PHP, etc.)" do
         | 
| 27 | 
            +
                it "should escape ERB-style tags" do
         | 
| 28 | 
            +
                  _(@s.fragment("<% naughty_ruby_code %>"))
         | 
| 29 | 
            +
                    .must_equal "<% naughty_ruby_code %>"
         | 
| 29 30 |  | 
| 30 | 
            -
                  _(@s.fragment( | 
| 31 | 
            -
                    must_equal  | 
| 31 | 
            +
                  _(@s.fragment("<%= naughty_ruby_code %>"))
         | 
| 32 | 
            +
                    .must_equal "<%= naughty_ruby_code %>"
         | 
| 32 33 | 
             
                end
         | 
| 33 34 |  | 
| 34 | 
            -
                it  | 
| 35 | 
            -
                  _(@s.fragment( | 
| 36 | 
            -
                    must_equal  | 
| 35 | 
            +
                it "should remove PHP-style tags" do
         | 
| 36 | 
            +
                  _(@s.fragment("<? naughtyPHPCode(); ?>"))
         | 
| 37 | 
            +
                    .must_equal ""
         | 
| 37 38 |  | 
| 38 | 
            -
                  _(@s.fragment( | 
| 39 | 
            -
                    must_equal  | 
| 39 | 
            +
                  _(@s.fragment("<?= naughtyPHPCode(); ?>"))
         | 
| 40 | 
            +
                    .must_equal ""
         | 
| 40 41 | 
             
                end
         | 
| 41 42 | 
             
              end
         | 
| 42 43 |  | 
| 43 | 
            -
              describe  | 
| 44 | 
            -
                it  | 
| 45 | 
            -
                  _(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>')) | 
| 46 | 
            -
                    must_equal "<html><head></head><body></body></html>"
         | 
| 44 | 
            +
              describe "<body>" do
         | 
| 45 | 
            +
                it "should not be possible to inject JS via a malformed event attribute" do
         | 
| 46 | 
            +
                  _(@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>'))
         | 
| 47 | 
            +
                    .must_equal "<html><head></head><body></body></html>"
         | 
| 47 48 | 
             
                end
         | 
| 48 49 | 
             
              end
         | 
| 49 50 |  | 
| 50 | 
            -
              describe  | 
| 51 | 
            -
                it  | 
| 52 | 
            -
                  _(@s.fragment(% | 
| 53 | 
            -
                    must_equal  | 
| 51 | 
            +
              describe "<iframe>" do
         | 
| 52 | 
            +
                it "should not be possible to inject an iframe using an improperly closed tag" do
         | 
| 53 | 
            +
                  _(@s.fragment(%(<iframe src=http://ha.ckers.org/scriptlet.html <)))
         | 
| 54 | 
            +
                    .must_equal ""
         | 
| 54 55 | 
             
                end
         | 
| 55 56 | 
             
              end
         | 
| 56 57 |  | 
| 57 | 
            -
              describe  | 
| 58 | 
            -
                it  | 
| 59 | 
            -
                  _(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal  | 
| 58 | 
            +
              describe "<img>" do
         | 
| 59 | 
            +
                it "should not be possible to inject JS via an unquoted <img> src attribute" do
         | 
| 60 | 
            +
                  _(@s.fragment("<img src=javascript:alert('XSS')>")).must_equal "<img>"
         | 
| 60 61 | 
             
                end
         | 
| 61 62 |  | 
| 62 | 
            -
                it  | 
| 63 | 
            -
                  _(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal  | 
| 63 | 
            +
                it "should not be possible to inject JS using grave accents as <img> src delimiters" do
         | 
| 64 | 
            +
                  _(@s.fragment("<img src=`javascript:alert('XSS')`>")).must_equal "<img>"
         | 
| 64 65 | 
             
                end
         | 
| 65 66 |  | 
| 66 | 
            -
                it  | 
| 67 | 
            -
                  _(@s.fragment('<img """><script>alert("XSS")</script>">')) | 
| 68 | 
            -
                    must_equal '<img>">'
         | 
| 67 | 
            +
                it "should not be possible to inject <script> via a malformed <img> tag" do
         | 
| 68 | 
            +
                  _(@s.fragment('<img """><script>alert("XSS")</script>">'))
         | 
| 69 | 
            +
                    .must_equal '<img>">'
         | 
| 69 70 | 
             
                end
         | 
| 70 71 |  | 
| 71 | 
            -
                it  | 
| 72 | 
            -
                  _(@s.fragment( | 
| 73 | 
            -
                    must_equal  | 
| 72 | 
            +
                it "should not be possible to inject protocol-based JS" do
         | 
| 73 | 
            +
                  _(@s.fragment("<img src=javascript:alert('XSS')>"))
         | 
| 74 | 
            +
                    .must_equal "<img>"
         | 
| 74 75 |  | 
| 75 | 
            -
                  _(@s.fragment( | 
| 76 | 
            -
                    must_equal  | 
| 76 | 
            +
                  _(@s.fragment("<img src=javascript:alert('XSS')>"))
         | 
| 77 | 
            +
                    .must_equal "<img>"
         | 
| 77 78 |  | 
| 78 | 
            -
                  _(@s.fragment( | 
| 79 | 
            -
                    must_equal  | 
| 79 | 
            +
                  _(@s.fragment("<img src=javascript:alert('XSS')>"))
         | 
| 80 | 
            +
                    .must_equal "<img>"
         | 
| 80 81 |  | 
| 81 82 | 
             
                  # Encoded tab character.
         | 
| 82 | 
            -
                  _(@s.fragment(%[<img src="jav	ascript:alert('XSS');">])) | 
| 83 | 
            -
                    must_equal  | 
| 83 | 
            +
                  _(@s.fragment(%[<img src="jav	ascript:alert('XSS');">]))
         | 
| 84 | 
            +
                    .must_equal "<img>"
         | 
| 84 85 |  | 
| 85 86 | 
             
                  # Encoded newline.
         | 
| 86 | 
            -
                  _(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])) | 
| 87 | 
            -
                    must_equal  | 
| 87 | 
            +
                  _(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
         | 
| 88 | 
            +
                    .must_equal "<img>"
         | 
| 88 89 |  | 
| 89 90 | 
             
                  # Encoded carriage return.
         | 
| 90 | 
            -
                  _(@s.fragment(%[<img src="jav
ascript:alert('XSS');">])) | 
| 91 | 
            -
                    must_equal  | 
| 91 | 
            +
                  _(@s.fragment(%[<img src="jav
ascript:alert('XSS');">]))
         | 
| 92 | 
            +
                    .must_equal "<img>"
         | 
| 92 93 |  | 
| 93 94 | 
             
                  # Null byte.
         | 
| 94 | 
            -
                  _(@s.fragment(%[<img src=java\0script:alert("XSS")>])) | 
| 95 | 
            -
                    must_equal  | 
| 95 | 
            +
                  _(@s.fragment(%[<img src=java\0script:alert("XSS")>]))
         | 
| 96 | 
            +
                    .must_equal "<img>"
         | 
| 96 97 |  | 
| 97 98 | 
             
                  # Spaces plus meta char.
         | 
| 98 | 
            -
                  _(@s.fragment(%[<img src="   javascript:alert('XSS');">])) | 
| 99 | 
            -
                    must_equal  | 
| 99 | 
            +
                  _(@s.fragment(%[<img src="   javascript:alert('XSS');">]))
         | 
| 100 | 
            +
                    .must_equal "<img>"
         | 
| 100 101 |  | 
| 101 102 | 
             
                  # Mixed spaces and tabs.
         | 
| 102 | 
            -
                  _(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">])) | 
| 103 | 
            -
                    must_equal  | 
| 103 | 
            +
                  _(@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]))
         | 
| 104 | 
            +
                    .must_equal "<img>"
         | 
| 104 105 | 
             
                end
         | 
| 105 106 |  | 
| 106 | 
            -
                it  | 
| 107 | 
            -
                  _(@s.fragment(%[<img src="jav\tascript:alert('XSS');">])) | 
| 108 | 
            -
                    must_equal  | 
| 107 | 
            +
                it "should not be possible to inject protocol-based JS via whitespace" do
         | 
| 108 | 
            +
                  _(@s.fragment(%[<img src="jav\tascript:alert('XSS');">]))
         | 
| 109 | 
            +
                    .must_equal "<img>"
         | 
| 109 110 | 
             
                end
         | 
| 110 111 |  | 
| 111 | 
            -
                it  | 
| 112 | 
            -
                  _(@s.fragment(%[<img src="javascript:alert('XSS')"])) | 
| 113 | 
            -
                    must_equal  | 
| 112 | 
            +
                it "should not be possible to inject JS using a half-open <img> tag" do
         | 
| 113 | 
            +
                  _(@s.fragment(%[<img src="javascript:alert('XSS')"]))
         | 
| 114 | 
            +
                    .must_equal ""
         | 
| 114 115 | 
             
                end
         | 
| 115 116 | 
             
              end
         | 
| 116 117 |  | 
| 117 | 
            -
              describe  | 
| 118 | 
            -
                it  | 
| 119 | 
            -
                  _(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>])) | 
| 120 | 
            -
                    must_equal  | 
| 118 | 
            +
              describe "<script>" do
         | 
| 119 | 
            +
                it "should not be possible to inject <script> using a malformed non-alphanumeric tag name" do
         | 
| 120 | 
            +
                  _(@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]))
         | 
| 121 | 
            +
                    .must_equal ""
         | 
| 121 122 | 
             
                end
         | 
| 122 123 |  | 
| 123 | 
            -
                it  | 
| 124 | 
            -
                  _(@s.fragment(%[<<script>alert("XSS");//<</script>])) | 
| 125 | 
            -
                    must_equal  | 
| 124 | 
            +
                it "should not be possible to inject <script> via extraneous open brackets" do
         | 
| 125 | 
            +
                  _(@s.fragment(%[<<script>alert("XSS");//<</script>]))
         | 
| 126 | 
            +
                    .must_equal "<"
         | 
| 126 127 | 
             
                end
         | 
| 127 128 | 
             
              end
         | 
| 128 129 |  | 
| @@ -134,29 +135,29 @@ describe 'Malicious HTML' do | |
| 134 135 | 
             
              #
         | 
| 135 136 | 
             
              # The relevant libxml2 code is here:
         | 
| 136 137 | 
             
              # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
         | 
| 137 | 
            -
              describe  | 
| 138 | 
            +
              describe "unsafe libxml2 server-side includes in attributes" do
         | 
| 138 139 | 
             
                using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
         | 
| 139 140 |  | 
| 140 141 | 
             
                tag_configs = [
         | 
| 141 142 | 
             
                  {
         | 
| 142 | 
            -
                    tag_name:  | 
| 143 | 
            -
                    escaped_attrs: %w[ | 
| 143 | 
            +
                    tag_name: "a",
         | 
| 144 | 
            +
                    escaped_attrs: %w[action href src name],
         | 
| 144 145 | 
             
                    unescaped_attrs: []
         | 
| 145 146 | 
             
                  },
         | 
| 146 147 |  | 
| 147 148 | 
             
                  {
         | 
| 148 | 
            -
                    tag_name:  | 
| 149 | 
            -
                    escaped_attrs: %w[ | 
| 150 | 
            -
                    unescaped_attrs: %w[ | 
| 149 | 
            +
                    tag_name: "div",
         | 
| 150 | 
            +
                    escaped_attrs: %w[action href src],
         | 
| 151 | 
            +
                    unescaped_attrs: %w[name]
         | 
| 151 152 | 
             
                  }
         | 
| 152 153 | 
             
                ]
         | 
| 153 154 |  | 
| 154 155 | 
             
                before do
         | 
| 155 156 | 
             
                  @s = Sanitize.new({
         | 
| 156 | 
            -
                    elements: %w[ | 
| 157 | 
            +
                    elements: %w[a div],
         | 
| 157 158 |  | 
| 158 159 | 
             
                    attributes: {
         | 
| 159 | 
            -
                      all: %w[ | 
| 160 | 
            +
                      all: %w[action href src name]
         | 
| 160 161 | 
             
                    }
         | 
| 161 162 | 
             
                  })
         | 
| 162 163 | 
             
                end
         | 
| @@ -167,13 +168,13 @@ describe 'Malicious HTML' do | |
| 167 168 | 
             
                  tag_config[:escaped_attrs].each do |attr_name|
         | 
| 168 169 | 
             
                    input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         | 
| 169 170 |  | 
| 170 | 
            -
                    it  | 
| 171 | 
            +
                    it "should escape unsafe characters in attributes" do
         | 
| 171 172 | 
             
                      skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
         | 
| 172 173 |  | 
| 173 174 | 
             
                      # This uses Nokogumbo's HTML-compliant serializer rather than
         | 
| 174 175 | 
             
                      # libxml2's.
         | 
| 175 | 
            -
                      _(@s.fragment(input)) | 
| 176 | 
            -
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 176 | 
            +
                      _(@s.fragment(input))
         | 
| 177 | 
            +
                        .must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 177 178 |  | 
| 178 179 | 
             
                      # This uses the not-quite-standards-compliant libxml2 serializer via
         | 
| 179 180 | 
             
                      # Nokogiri, so the output may be a little different as of Nokogiri
         | 
| @@ -181,11 +182,11 @@ describe 'Malicious HTML' do | |
| 181 182 | 
             
                      # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
         | 
| 182 183 | 
             
                      fragment = Nokogiri::HTML.fragment(input)
         | 
| 183 184 | 
             
                      @s.node!(fragment)
         | 
| 184 | 
            -
                      _(fragment.to_html) | 
| 185 | 
            -
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 185 | 
            +
                      _(fragment.to_html)
         | 
| 186 | 
            +
                        .must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 186 187 | 
             
                    end
         | 
| 187 188 |  | 
| 188 | 
            -
                    it  | 
| 189 | 
            +
                    it "should round-trip to the same output" do
         | 
| 189 190 | 
             
                      output = @s.fragment(input)
         | 
| 190 191 | 
             
                      _(@s.fragment(output)).must_equal(output)
         | 
| 191 192 | 
             
                    end
         | 
| @@ -194,13 +195,13 @@ describe 'Malicious HTML' do | |
| 194 195 | 
             
                  tag_config[:unescaped_attrs].each do |attr_name|
         | 
| 195 196 | 
             
                    input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         | 
| 196 197 |  | 
| 197 | 
            -
                    it  | 
| 198 | 
            +
                    it "should not escape characters unnecessarily" do
         | 
| 198 199 | 
             
                      skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
         | 
| 199 200 |  | 
| 200 201 | 
             
                      # This uses Nokogumbo's HTML-compliant serializer rather than
         | 
| 201 202 | 
             
                      # libxml2's.
         | 
| 202 | 
            -
                      _(@s.fragment(input)) | 
| 203 | 
            -
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 203 | 
            +
                      _(@s.fragment(input))
         | 
| 204 | 
            +
                        .must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 204 205 |  | 
| 205 206 | 
             
                      # This uses the not-quite-standards-compliant libxml2 serializer via
         | 
| 206 207 | 
             
                      # Nokogiri, so the output may be a little different as of Nokogiri
         | 
| @@ -208,11 +209,11 @@ describe 'Malicious HTML' do | |
| 208 209 | 
             
                      # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
         | 
| 209 210 | 
             
                      fragment = Nokogiri::HTML.fragment(input)
         | 
| 210 211 | 
             
                      @s.node!(fragment)
         | 
| 211 | 
            -
                      _(fragment.to_html) | 
| 212 | 
            -
                        must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
         | 
| 212 | 
            +
                      _(fragment.to_html)
         | 
| 213 | 
            +
                        .must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
         | 
| 213 214 | 
             
                    end
         | 
| 214 215 |  | 
| 215 | 
            -
                    it  | 
| 216 | 
            +
                    it "should round-trip to the same output" do
         | 
| 216 217 | 
             
                      output = @s.fragment(input)
         | 
| 217 218 | 
             
                      _(@s.fragment(output)).must_equal(output)
         | 
| 218 219 | 
             
                    end
         | 
| @@ -221,14 +222,14 @@ describe 'Malicious HTML' do | |
| 221 222 | 
             
              end
         | 
| 222 223 |  | 
| 223 224 | 
             
              # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
         | 
| 224 | 
            -
              describe  | 
| 225 | 
            -
                it  | 
| 225 | 
            +
              describe "foreign content bypass in relaxed config" do
         | 
| 226 | 
            +
                it "prevents a sanitization bypass via carefully crafted foreign content" do
         | 
| 226 227 | 
             
                  %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
         | 
| 227 | 
            -
                    _(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])) | 
| 228 | 
            -
                      must_equal  | 
| 228 | 
            +
                    _(@s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
         | 
| 229 | 
            +
                      .must_equal ""
         | 
| 229 230 |  | 
| 230 | 
            -
                    _(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/])) | 
| 231 | 
            -
                      must_equal  | 
| 231 | 
            +
                    _(@s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]))
         | 
| 232 | 
            +
                      .must_equal ""
         | 
| 232 233 | 
             
                  end
         | 
| 233 234 | 
             
                end
         | 
| 234 235 | 
             
              end
         | 
| @@ -240,7 +241,7 @@ describe 'Malicious HTML' do | |
| 240 241 | 
             
              #
         | 
| 241 242 | 
             
              # Do not use the custom configs you see in these tests! If you do, you may be
         | 
| 242 243 | 
             
              # creating XSS vulnerabilities in your application.
         | 
| 243 | 
            -
              describe  | 
| 244 | 
            +
              describe "foreign content bypass in unsafe custom config that allows MathML or SVG" do
         | 
| 244 245 | 
             
                unescaped_content_elements = %w[
         | 
| 245 246 | 
             
                  noembed
         | 
| 246 247 | 
             
                  noframes
         | 
| @@ -305,33 +306,33 @@ describe 'Malicious HTML' do | |
| 305 306 | 
             
                removed_elements.each do |name|
         | 
| 306 307 | 
             
                  it "removes `<#{name}>` elements in a MathML namespace" do
         | 
| 307 308 | 
             
                    assert_equal(
         | 
| 308 | 
            -
                       | 
| 309 | 
            +
                      "<math></math>",
         | 
| 309 310 | 
             
                      @s.fragment("<math><#{name}><img src=x onerror=alert(1)></#{name}>")
         | 
| 310 311 | 
             
                    )
         | 
| 311 312 | 
             
                  end
         | 
| 312 313 |  | 
| 313 314 | 
             
                  it "removes `<#{name}>` elements in an SVG namespace" do
         | 
| 314 315 | 
             
                    assert_equal(
         | 
| 315 | 
            -
                       | 
| 316 | 
            +
                      "<svg></svg>",
         | 
| 316 317 | 
             
                      @s.fragment("<svg><#{name}><img src=x onerror=alert(1)></#{name}>")
         | 
| 317 318 | 
             
                    )
         | 
| 318 319 | 
             
                  end
         | 
| 319 320 | 
             
                end
         | 
| 320 321 | 
             
              end
         | 
| 321 322 |  | 
| 322 | 
            -
              describe  | 
| 323 | 
            +
              describe "sanitization bypass by exploiting scripting-disabled <noscript> behavior" do
         | 
| 323 324 | 
             
                before do
         | 
| 324 325 | 
             
                  @s = Sanitize.new(
         | 
| 325 326 | 
             
                    Sanitize::Config.merge(
         | 
| 326 327 | 
             
                      Sanitize::Config::RELAXED,
         | 
| 327 | 
            -
                      elements: Sanitize::Config::RELAXED[:elements] + [ | 
| 328 | 
            +
                      elements: Sanitize::Config::RELAXED[:elements] + ["noscript"]
         | 
| 328 329 | 
             
                    )
         | 
| 329 330 | 
             
                  )
         | 
| 330 331 | 
             
                end
         | 
| 331 332 |  | 
| 332 | 
            -
                it  | 
| 333 | 
            +
                it "is prevented by removing `<noscript>` elements regardless of the allowlist" do
         | 
| 333 334 | 
             
                  assert_equal(
         | 
| 334 | 
            -
                     | 
| 335 | 
            +
                    "",
         | 
| 335 336 | 
             
                    @s.fragment(%[<noscript><div id='</noscript><img src=x onerror=alert(1)> '>])
         | 
| 336 337 | 
             
                  )
         | 
| 337 338 | 
             
                end
         | 
    
        data/test/test_parser.rb
    CHANGED
    
    | @@ -1,36 +1,37 @@ | |
| 1 | 
            -
            #  | 
| 2 | 
            -
            require_relative 'common'
         | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 3 2 |  | 
| 4 | 
            -
             | 
| 3 | 
            +
            require_relative "common"
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            describe "Parser" do
         | 
| 5 6 | 
             
              make_my_diffs_pretty!
         | 
| 6 7 | 
             
              parallelize_me!
         | 
| 7 8 |  | 
| 8 | 
            -
              it  | 
| 9 | 
            +
              it "should translate valid entities into characters" do
         | 
| 9 10 | 
             
                _(Sanitize.fragment("'é&")).must_equal("'é&")
         | 
| 10 11 | 
             
              end
         | 
| 11 12 |  | 
| 12 | 
            -
              it  | 
| 13 | 
            -
                _(Sanitize.fragment( | 
| 13 | 
            +
              it "should translate orphaned ampersands into entities" do
         | 
| 14 | 
            +
                _(Sanitize.fragment("at&t")).must_equal("at&t")
         | 
| 14 15 | 
             
              end
         | 
| 15 16 |  | 
| 16 | 
            -
              it  | 
| 17 | 
            -
                _(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", : | 
| 17 | 
            +
              it "should not add newlines after tags when serializing a fragment" do
         | 
| 18 | 
            +
                _(Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", elements: ["div", "p"]))
         | 
| 18 19 | 
             
                  .must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
         | 
| 19 20 | 
             
              end
         | 
| 20 21 |  | 
| 21 | 
            -
              it  | 
| 22 | 
            -
                _(Sanitize.fragment( | 
| 23 | 
            -
                _(Sanitize.fragment( | 
| 22 | 
            +
              it "should not have the Nokogiri 1.4.2+ unterminated script/style element bug" do
         | 
| 23 | 
            +
                _(Sanitize.fragment("foo <script>bar")).must_equal "foo "
         | 
| 24 | 
            +
                _(Sanitize.fragment("foo <style>bar")).must_equal "foo "
         | 
| 24 25 | 
             
              end
         | 
| 25 26 |  | 
| 26 27 | 
             
              it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
         | 
| 27 | 
            -
                _(Sanitize.fragment( | 
| 28 | 
            -
                _(Sanitize.fragment( | 
| 28 | 
            +
                _(Sanitize.fragment("1 > 2 and 2 < 1")).must_equal "1 > 2 and 2 < 1"
         | 
| 29 | 
            +
                _(Sanitize.fragment("OMG HAPPY BIRTHDAY! *<:-D")).must_equal "OMG HAPPY BIRTHDAY! *<:-D"
         | 
| 29 30 | 
             
              end
         | 
| 30 31 |  | 
| 31 | 
            -
              describe  | 
| 32 | 
            -
                it  | 
| 33 | 
            -
                  html = % | 
| 32 | 
            +
              describe "when siblings are added after a node during traversal" do
         | 
| 33 | 
            +
                it "the added siblings should be traversed" do
         | 
| 34 | 
            +
                  html = %(
         | 
| 34 35 | 
             
                    <div id="one">
         | 
| 35 36 | 
             
                        <div id="one_one">
         | 
| 36 37 | 
             
                            <div id="one_one_one"></div>
         | 
| @@ -42,20 +43,20 @@ describe 'Parser' do | |
| 42 43 | 
             
                        <div id="two_two"></div>
         | 
| 43 44 | 
             
                    </div>
         | 
| 44 45 | 
             
                    <div id="three"></div>
         | 
| 45 | 
            -
                   | 
| 46 | 
            +
                  )
         | 
| 46 47 |  | 
| 47 48 | 
             
                  siblings = []
         | 
| 48 49 |  | 
| 49 | 
            -
                  Sanitize.fragment(html, : | 
| 50 | 
            -
             | 
| 50 | 
            +
                  Sanitize.fragment(html, transformers: ->(env) {
         | 
| 51 | 
            +
                    name = env[:node].name
         | 
| 51 52 |  | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 53 | 
            +
                    if name == "div"
         | 
| 54 | 
            +
                      env[:node].add_next_sibling('<b id="added_' + env[:node]["id"] + '">')
         | 
| 55 | 
            +
                    elsif name == "b"
         | 
| 56 | 
            +
                      siblings << env[:node][:id]
         | 
| 57 | 
            +
                    end
         | 
| 57 58 |  | 
| 58 | 
            -
             | 
| 59 | 
            +
                    {node_allowlist: [env[:node]]}
         | 
| 59 60 | 
             
                  })
         | 
| 60 61 |  | 
| 61 62 | 
             
                  # All siblings should be traversed, and in the order added.
         |