sanitize 4.6.6 → 5.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +147 -16
- data/README.md +61 -41
- data/lib/sanitize.rb +37 -61
- data/lib/sanitize/config/default.rb +10 -4
- data/lib/sanitize/css.rb +2 -2
- data/lib/sanitize/transformers/clean_comment.rb +1 -1
- data/lib/sanitize/transformers/clean_css.rb +3 -3
- data/lib/sanitize/transformers/clean_doctype.rb +1 -1
- data/lib/sanitize/transformers/clean_element.rb +54 -13
- data/lib/sanitize/version.rb +1 -1
- data/test/common.rb +0 -31
- data/test/test_clean_comment.rb +1 -5
- data/test/test_clean_css.rb +1 -1
- data/test/test_clean_doctype.rb +8 -8
- data/test/test_clean_element.rb +121 -26
- data/test/test_malicious_html.rb +50 -7
- data/test/test_parser.rb +3 -32
- data/test/test_sanitize.rb +103 -18
- data/test/test_sanitize_css.rb +43 -16
- data/test/test_transformers.rb +29 -23
- metadata +16 -18
- data/test/test_unicode.rb +0 -95
    
        data/test/test_clean_css.rb
    CHANGED
    
    | @@ -13,7 +13,7 @@ describe 'Sanitize::Transformers::CSS::CleanAttribute' do | |
| 13 13 | 
             
                @s.fragment(%[
         | 
| 14 14 | 
             
                  <div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
         | 
| 15 15 | 
             
                ].strip).must_equal %[
         | 
| 16 | 
            -
                  <div style="color: #fff;  /*  | 
| 16 | 
            +
                  <div style="color: #fff;  /* <-- evil! */"></div>
         | 
| 17 17 | 
             
                ].strip
         | 
| 18 18 | 
             
              end
         | 
| 19 19 |  | 
    
        data/test/test_clean_doctype.rb
    CHANGED
    
    | @@ -11,7 +11,7 @@ describe 'Sanitize::Transformers::CleanDoctype' do | |
| 11 11 | 
             
                end
         | 
| 12 12 |  | 
| 13 13 | 
             
                it 'should remove doctype declarations' do
         | 
| 14 | 
            -
                  @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html | 
| 14 | 
            +
                  @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>"
         | 
| 15 15 | 
             
                  @s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
         | 
| 16 16 | 
             
                end
         | 
| 17 17 |  | 
| @@ -34,27 +34,27 @@ describe 'Sanitize::Transformers::CleanDoctype' do | |
| 34 34 |  | 
| 35 35 | 
             
                it 'should allow doctype declarations in documents' do
         | 
| 36 36 | 
             
                  @s.document('<!DOCTYPE html><html>foo</html>')
         | 
| 37 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 37 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 38 38 |  | 
| 39 39 | 
             
                  @s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 40 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 40 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 41 41 |  | 
| 42 42 | 
             
                  @s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n    \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
         | 
| 43 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 43 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 44 44 | 
             
                end
         | 
| 45 45 |  | 
| 46 46 | 
             
                it 'should not allow obviously invalid doctype declarations in documents' do
         | 
| 47 47 | 
             
                  @s.document('<!DOCTYPE blah blah blah><html>foo</html>')
         | 
| 48 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 48 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 49 49 |  | 
| 50 50 | 
             
                  @s.document('<!DOCTYPE blah><html>foo</html>')
         | 
| 51 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 51 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 52 52 |  | 
| 53 53 | 
             
                  @s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
         | 
| 54 | 
            -
                    .must_equal "<!DOCTYPE html | 
| 54 | 
            +
                    .must_equal "<!DOCTYPE html><html>foo</html>"
         | 
| 55 55 |  | 
| 56 56 | 
             
                  @s.document('<!whatever><html>foo</html>')
         | 
| 57 | 
            -
                    .must_equal "<html>foo</html | 
| 57 | 
            +
                    .must_equal "<html>foo</html>"
         | 
| 58 58 | 
             
                end
         | 
| 59 59 |  | 
| 60 60 | 
             
                it 'should not allow doctype definitions in fragments' do
         | 
    
        data/test/test_clean_element.rb
    CHANGED
    
    | @@ -8,25 +8,22 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 8 8 | 
             
              strings = {
         | 
| 9 9 | 
             
                :basic => {
         | 
| 10 10 | 
             
                  :html       => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
         | 
| 11 | 
            -
             | 
| 12 | 
            -
                  : | 
| 13 | 
            -
                  : | 
| 14 | 
            -
                  : | 
| 15 | 
            -
                  :relaxed    => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
         | 
| 11 | 
            +
                  :default    => 'Lorem ipsum dolor sit amet  ',
         | 
| 12 | 
            +
                  :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet  ',
         | 
| 13 | 
            +
                  :basic      => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet  ',
         | 
| 14 | 
            +
                  :relaxed    => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
         | 
| 16 15 | 
             
                },
         | 
| 17 16 |  | 
| 18 17 | 
             
                :malformed => {
         | 
| 19 18 | 
             
                  :html       => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
         | 
| 20 | 
            -
             | 
| 21 | 
            -
                  : | 
| 22 | 
            -
                  : | 
| 23 | 
            -
                  : | 
| 24 | 
            -
                  :relaxed    => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
         | 
| 19 | 
            +
                  :default    => 'Lorem dolor sit amet ',
         | 
| 20 | 
            +
                  :restricted => 'Lorem <strong>dolor</strong> sit amet ',
         | 
| 21 | 
            +
                  :basic      => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
         | 
| 22 | 
            +
                  :relaxed    => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
         | 
| 25 23 | 
             
                },
         | 
| 26 24 |  | 
| 27 25 | 
             
                :unclosed => {
         | 
| 28 26 | 
             
                  :html       => '<p>a</p><blockquote>b',
         | 
| 29 | 
            -
             | 
| 30 27 | 
             
                  :default    => ' a  b ',
         | 
| 31 28 | 
             
                  :restricted => ' a  b ',
         | 
| 32 29 | 
             
                  :basic      => '<p>a</p><blockquote>b</blockquote>',
         | 
| @@ -35,7 +32,6 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 35 32 |  | 
| 36 33 | 
             
                :malicious => {
         | 
| 37 34 | 
             
                  :html       => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
         | 
| 38 | 
            -
             | 
| 39 35 | 
             
                  :default    => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
         | 
| 40 36 | 
             
                  :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
         | 
| 41 37 | 
             
                  :basic      => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
         | 
| @@ -166,15 +162,15 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 166 162 | 
             
              }
         | 
| 167 163 |  | 
| 168 164 | 
             
              describe 'Default config' do
         | 
| 169 | 
            -
                it 'should remove non- | 
| 165 | 
            +
                it 'should remove non-allowlisted elements, leaving safe contents behind' do
         | 
| 170 166 | 
             
                  Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
         | 
| 171 167 | 
             
                    .must_equal 'foo bar baz quux'
         | 
| 172 168 |  | 
| 173 169 | 
             
                  Sanitize.fragment('<script>alert("<xss>");</script>')
         | 
| 174 | 
            -
                    .must_equal ' | 
| 170 | 
            +
                    .must_equal ''
         | 
| 175 171 |  | 
| 176 172 | 
             
                  Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
         | 
| 177 | 
            -
                    .must_equal '< | 
| 173 | 
            +
                    .must_equal '<'
         | 
| 178 174 |  | 
| 179 175 | 
             
                  Sanitize.fragment('< script <>> alert("<xss>");</script>')
         | 
| 180 176 | 
             
                    .must_equal '< script <>> alert("");'
         | 
| @@ -196,6 +192,56 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 196 192 | 
             
                    .must_equal ''
         | 
| 197 193 | 
             
                end
         | 
| 198 194 |  | 
| 195 | 
            +
                it 'should not preserve the content of removed `iframe` elements' do
         | 
| 196 | 
            +
                  Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
         | 
| 197 | 
            +
                    .must_equal ''
         | 
| 198 | 
            +
                end
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                it 'should not preserve the content of removed `math` elements' do
         | 
| 201 | 
            +
                  Sanitize.fragment('<math>hello! <script>alert(0)</script></math>')
         | 
| 202 | 
            +
                    .must_equal ''
         | 
| 203 | 
            +
                end
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                it 'should not preserve the content of removed `noembed` elements' do
         | 
| 206 | 
            +
                  Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
         | 
| 207 | 
            +
                    .must_equal ''
         | 
| 208 | 
            +
                end
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                it 'should not preserve the content of removed `noframes` elements' do
         | 
| 211 | 
            +
                  Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
         | 
| 212 | 
            +
                    .must_equal ''
         | 
| 213 | 
            +
                end
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                it 'should not preserve the content of removed `noscript` elements' do
         | 
| 216 | 
            +
                  Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
         | 
| 217 | 
            +
                    .must_equal ''
         | 
| 218 | 
            +
                end
         | 
| 219 | 
            +
             | 
| 220 | 
            +
                it 'should not preserve the content of removed `plaintext` elements' do
         | 
| 221 | 
            +
                  Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
         | 
| 222 | 
            +
                    .must_equal ''
         | 
| 223 | 
            +
                end
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                it 'should not preserve the content of removed `script` elements' do
         | 
| 226 | 
            +
                  Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
         | 
| 227 | 
            +
                    .must_equal ''
         | 
| 228 | 
            +
                end
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                it 'should not preserve the content of removed `style` elements' do
         | 
| 231 | 
            +
                  Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
         | 
| 232 | 
            +
                    .must_equal ''
         | 
| 233 | 
            +
                end
         | 
| 234 | 
            +
             | 
| 235 | 
            +
                it 'should not preserve the content of removed `svg` elements' do
         | 
| 236 | 
            +
                  Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')
         | 
| 237 | 
            +
                    .must_equal ''
         | 
| 238 | 
            +
                end
         | 
| 239 | 
            +
             | 
| 240 | 
            +
                it 'should not preserve the content of removed `xmp` elements' do
         | 
| 241 | 
            +
                  Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
         | 
| 242 | 
            +
                    .must_equal ''
         | 
| 243 | 
            +
                end
         | 
| 244 | 
            +
             | 
| 199 245 | 
             
                strings.each do |name, data|
         | 
| 200 246 | 
             
                  it "should clean #{name} HTML" do
         | 
| 201 247 | 
             
                    Sanitize.fragment(data[:html]).must_equal(data[:default])
         | 
| @@ -234,7 +280,7 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 234 280 |  | 
| 235 281 | 
             
                it 'should not choke on valueless attributes' do
         | 
| 236 282 | 
             
                  @s.fragment('foo <a href>foo</a> bar')
         | 
| 237 | 
            -
                    .must_equal 'foo <a href rel="nofollow">foo</a> bar'
         | 
| 283 | 
            +
                    .must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
         | 
| 238 284 | 
             
                end
         | 
| 239 285 |  | 
| 240 286 | 
             
                it 'should downcase attribute names' do
         | 
| @@ -262,7 +308,7 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 262 308 |  | 
| 263 309 | 
             
                it 'should encode special chars in attribute values' do
         | 
| 264 310 | 
             
                  @s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>')
         | 
| 265 | 
            -
                    .must_equal '<a href="http://example.com" title=" | 
| 311 | 
            +
                    .must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
         | 
| 266 312 | 
             
                end
         | 
| 267 313 |  | 
| 268 314 | 
             
                strings.each do |name, data|
         | 
| @@ -279,7 +325,7 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 279 325 | 
             
              end
         | 
| 280 326 |  | 
| 281 327 | 
             
              describe 'Custom configs' do
         | 
| 282 | 
            -
                it 'should allow attributes on all elements if  | 
| 328 | 
            +
                it 'should allow attributes on all elements if allowlisted under :all' do
         | 
| 283 329 | 
             
                  input = '<p class="foo">bar</p>'
         | 
| 284 330 |  | 
| 285 331 | 
             
                  Sanitize.fragment(input).must_equal ' bar '
         | 
| @@ -300,7 +346,7 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 300 346 | 
             
                  }).must_equal input
         | 
| 301 347 | 
             
                end
         | 
| 302 348 |  | 
| 303 | 
            -
                it "should not allow relative URLs when relative URLs aren't  | 
| 349 | 
            +
                it "should not allow relative URLs when relative URLs aren't allowlisted" do
         | 
| 304 350 | 
             
                  input = '<a href="/foo/bar">Link</a>'
         | 
| 305 351 |  | 
| 306 352 | 
             
                  Sanitize.fragment(input,
         | 
| @@ -344,16 +390,30 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 344 390 | 
             
                  ).must_equal 'foo bar   '
         | 
| 345 391 | 
             
                end
         | 
| 346 392 |  | 
| 347 | 
            -
                it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
         | 
| 348 | 
            -
                  Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
         | 
| 393 | 
            +
                it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do
         | 
| 394 | 
            +
                  Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
         | 
| 349 395 | 
             
                    :remove_contents => ['script', 'span']
         | 
| 350 | 
            -
                  ).must_equal 'foo bar  baz '
         | 
| 396 | 
            +
                  ).must_equal 'foo bar  baz hi '
         | 
| 397 | 
            +
             | 
| 398 | 
            +
                  Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
         | 
| 399 | 
            +
                    :remove_contents => Set.new(['script', 'span'])
         | 
| 400 | 
            +
                  ).must_equal 'foo bar  baz hi '
         | 
| 351 401 | 
             
                end
         | 
| 352 402 |  | 
| 353 | 
            -
                it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
         | 
| 354 | 
            -
                  Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
         | 
| 403 | 
            +
                it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do
         | 
| 404 | 
            +
                  Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
         | 
| 355 405 | 
             
                    :remove_contents => [:script, :span]
         | 
| 356 | 
            -
                  ).must_equal 'foo bar  baz '
         | 
| 406 | 
            +
                  ).must_equal 'foo bar  baz hi '
         | 
| 407 | 
            +
             | 
| 408 | 
            +
                  Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
         | 
| 409 | 
            +
                    :remove_contents => Set.new([:script, :span])
         | 
| 410 | 
            +
                  ).must_equal 'foo bar  baz hi '
         | 
| 411 | 
            +
                end
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                it 'should remove the contents of allowlisted iframes' do
         | 
| 414 | 
            +
                  Sanitize.fragment('<iframe>hi <script>hello</script></iframe>',
         | 
| 415 | 
            +
                    :elements => ['iframe']
         | 
| 416 | 
            +
                  ).must_equal '<iframe></iframe>'
         | 
| 357 417 | 
             
                end
         | 
| 358 418 |  | 
| 359 419 | 
             
                it 'should not allow arbitrary HTML5 data attributes by default' do
         | 
| @@ -413,7 +473,7 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 413 473 | 
             
                  s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
         | 
| 414 474 | 
             
                end
         | 
| 415 475 |  | 
| 416 | 
            -
                it ' | 
| 476 | 
            +
                it 'should handle protocols correctly regardless of case' do
         | 
| 417 477 | 
             
                  input = '<a href="hTTpS://foo.com/">Text</a>'
         | 
| 418 478 |  | 
| 419 479 | 
             
                  Sanitize.fragment(input, {
         | 
| @@ -430,5 +490,40 @@ describe 'Sanitize::Transformers::CleanElement' do | |
| 430 490 | 
             
                    :protocols  => {'a' => {'href' => ['https']}}
         | 
| 431 491 | 
             
                  }).must_equal "<a>Text</a>"
         | 
| 432 492 | 
             
                end
         | 
| 493 | 
            +
             | 
| 494 | 
            +
                it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do
         | 
| 495 | 
            +
                  Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
         | 
| 496 | 
            +
                    :elements   => %w[html head meta body],
         | 
| 497 | 
            +
                    :attributes => {'meta' => ['charset']}
         | 
| 498 | 
            +
                  ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                  Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
         | 
| 501 | 
            +
                    :elements   => %w[html meta],
         | 
| 502 | 
            +
                    :attributes => {'meta' => ['charset']}
         | 
| 503 | 
            +
                  ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
         | 
| 504 | 
            +
             | 
| 505 | 
            +
                  Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
         | 
| 506 | 
            +
                    :elements   => %w[html meta],
         | 
| 507 | 
            +
                    :attributes => {'meta' => ['charset']}
         | 
| 508 | 
            +
                  ).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
         | 
| 509 | 
            +
             | 
| 510 | 
            +
                  Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
         | 
| 511 | 
            +
                    :elements   => %w[html meta],
         | 
| 512 | 
            +
                    :attributes => {'meta' => %w[content http-equiv]}
         | 
| 513 | 
            +
                  ).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
         | 
| 514 | 
            +
             | 
| 515 | 
            +
                  Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
         | 
| 516 | 
            +
                    :elements   => %w[html meta],
         | 
| 517 | 
            +
                    :attributes => {'meta' => %w[content http-equiv]}
         | 
| 518 | 
            +
                  ).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
         | 
| 519 | 
            +
                end
         | 
| 520 | 
            +
             | 
| 521 | 
            +
                it 'should not modify `<meta>` tags that already set a UTF-8 charset' do
         | 
| 522 | 
            +
                  Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
         | 
| 523 | 
            +
                    :elements   => %w[html head meta body],
         | 
| 524 | 
            +
                    :attributes => {'meta' => %w[content http-equiv]}
         | 
| 525 | 
            +
                  ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
         | 
| 526 | 
            +
                end
         | 
| 527 | 
            +
             | 
| 433 528 | 
             
              end
         | 
| 434 529 | 
             
            end
         | 
    
        data/test/test_malicious_html.rb
    CHANGED
    
    | @@ -43,7 +43,7 @@ describe 'Malicious HTML' do | |
| 43 43 | 
             
              describe '<body>' do
         | 
| 44 44 | 
             
                it 'should not be possible to inject JS via a malformed event attribute' do
         | 
| 45 45 | 
             
                  @s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
         | 
| 46 | 
            -
                    must_equal "<html><head></head><body></body></html | 
| 46 | 
            +
                    must_equal "<html><head></head><body></body></html>"
         | 
| 47 47 | 
             
                end
         | 
| 48 48 | 
             
              end
         | 
| 49 49 |  | 
| @@ -65,7 +65,7 @@ describe 'Malicious HTML' do | |
| 65 65 |  | 
| 66 66 | 
             
                it 'should not be possible to inject <script> via a malformed <img> tag' do
         | 
| 67 67 | 
             
                  @s.fragment('<img """><script>alert("XSS")</script>">').
         | 
| 68 | 
            -
                    must_equal '<img> | 
| 68 | 
            +
                    must_equal '<img>">'
         | 
| 69 69 | 
             
                end
         | 
| 70 70 |  | 
| 71 71 | 
             
                it 'should not be possible to inject protocol-based JS' do
         | 
| @@ -117,24 +117,26 @@ describe 'Malicious HTML' do | |
| 117 117 | 
             
              describe '<script>' do
         | 
| 118 118 | 
             
                it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
         | 
| 119 119 | 
             
                  @s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
         | 
| 120 | 
            -
                    must_equal ' | 
| 120 | 
            +
                    must_equal ''
         | 
| 121 121 | 
             
                end
         | 
| 122 122 |  | 
| 123 123 | 
             
                it 'should not be possible to inject <script> via extraneous open brackets' do
         | 
| 124 124 | 
             
                  @s.fragment(%[<<script>alert("XSS");//<</script>]).
         | 
| 125 | 
            -
                    must_equal '< | 
| 125 | 
            +
                    must_equal '<'
         | 
| 126 126 | 
             
                end
         | 
| 127 127 | 
             
              end
         | 
| 128 128 |  | 
| 129 129 | 
             
              # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
         | 
| 130 130 | 
             
              # attempt to preserve server-side includes. This can result in XSS since an
         | 
| 131 | 
            -
              # unescaped double quote can allow an attacker to inject a non- | 
| 131 | 
            +
              # unescaped double quote can allow an attacker to inject a non-allowlisted
         | 
| 132 132 | 
             
              # attribute. Sanitize works around this by implementing its own escaping for
         | 
| 133 133 | 
             
              # affected attributes.
         | 
| 134 134 | 
             
              #
         | 
| 135 135 | 
             
              # The relevant libxml2 code is here:
         | 
| 136 136 | 
             
              # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
         | 
| 137 137 | 
             
              describe 'unsafe libxml2 server-side includes in attributes' do
         | 
| 138 | 
            +
                using_unpatched_libxml2 = Nokogiri::VersionInfo.instance.libxml2_using_system?
         | 
| 139 | 
            +
             | 
| 138 140 | 
             
                tag_configs = [
         | 
| 139 141 | 
             
                  {
         | 
| 140 142 | 
             
                    tag_name: 'a',
         | 
| @@ -166,7 +168,21 @@ describe 'Malicious HTML' do | |
| 166 168 | 
             
                    input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         | 
| 167 169 |  | 
| 168 170 | 
             
                    it 'should escape unsafe characters in attributes' do
         | 
| 169 | 
            -
                       | 
| 171 | 
            +
                      skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
         | 
| 172 | 
            +
             | 
| 173 | 
            +
                      # This uses Nokogumbo's HTML-compliant serializer rather than
         | 
| 174 | 
            +
                      # libxml2's.
         | 
| 175 | 
            +
                      @s.fragment(input).
         | 
| 176 | 
            +
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 177 | 
            +
             | 
| 178 | 
            +
                      # This uses the not-quite-standards-compliant libxml2 serializer via
         | 
| 179 | 
            +
                      # Nokogiri, so the output may be a little different as of Nokogiri
         | 
| 180 | 
            +
                      # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
         | 
| 181 | 
            +
                      # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
         | 
| 182 | 
            +
                      fragment = Nokogiri::HTML.fragment(input)
         | 
| 183 | 
            +
                      @s.node!(fragment)
         | 
| 184 | 
            +
                      fragment.to_html.
         | 
| 185 | 
            +
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 170 186 | 
             
                    end
         | 
| 171 187 |  | 
| 172 188 | 
             
                    it 'should round-trip to the same output' do
         | 
| @@ -179,7 +195,21 @@ describe 'Malicious HTML' do | |
| 179 195 | 
             
                    input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
         | 
| 180 196 |  | 
| 181 197 | 
             
                    it 'should not escape characters unnecessarily' do
         | 
| 182 | 
            -
                       | 
| 198 | 
            +
                      skip "behavior should only exist in nokogiri's patched libxml" if using_unpatched_libxml2
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                      # This uses Nokogumbo's HTML-compliant serializer rather than
         | 
| 201 | 
            +
                      # libxml2's.
         | 
| 202 | 
            +
                      @s.fragment(input).
         | 
| 203 | 
            +
                        must_equal(%[<#{tag_name} #{attr_name}="examp<!--" onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
         | 
| 204 | 
            +
             | 
| 205 | 
            +
                      # This uses the not-quite-standards-compliant libxml2 serializer via
         | 
| 206 | 
            +
                      # Nokogiri, so the output may be a little different as of Nokogiri
         | 
| 207 | 
            +
                      # 1.10.2 when using Nokogiri's vendored libxml2 due to this patch:
         | 
| 208 | 
            +
                      # https://github.com/sparklemotion/nokogiri/commit/4852e43cb6039e26d8c51af78621e539cbf46c5d
         | 
| 209 | 
            +
                      fragment = Nokogiri::HTML.fragment(input)
         | 
| 210 | 
            +
                      @s.node!(fragment)
         | 
| 211 | 
            +
                      fragment.to_html.
         | 
| 212 | 
            +
                        must_equal(%[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>])
         | 
| 183 213 | 
             
                    end
         | 
| 184 214 |  | 
| 185 215 | 
             
                    it 'should round-trip to the same output' do
         | 
| @@ -189,4 +219,17 @@ describe 'Malicious HTML' do | |
| 189 219 | 
             
                  end
         | 
| 190 220 | 
             
                end
         | 
| 191 221 | 
             
              end
         | 
| 222 | 
            +
             | 
| 223 | 
            +
              # https://github.com/rgrove/sanitize/security/advisories/GHSA-p4x4-rw2p-8j8m
         | 
| 224 | 
            +
              describe 'foreign content bypass in relaxed config' do
         | 
| 225 | 
            +
                it 'prevents a sanitization bypass via carefully crafted foreign content' do
         | 
| 226 | 
            +
                  %w[iframe noembed noframes noscript plaintext script style xmp].each do |tag_name|
         | 
| 227 | 
            +
                    @s.fragment(%[<math><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
         | 
| 228 | 
            +
                      must_equal ''
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    @s.fragment(%[<svg><#{tag_name}>/*</#{tag_name}><img src onerror=alert(1)>*/]).
         | 
| 231 | 
            +
                      must_equal ''
         | 
| 232 | 
            +
                  end
         | 
| 233 | 
            +
                end
         | 
| 234 | 
            +
              end
         | 
| 192 235 | 
             
            end
         | 
    
        data/test/test_parser.rb
    CHANGED
    
    | @@ -19,8 +19,8 @@ describe 'Parser' do | |
| 19 19 | 
             
              end
         | 
| 20 20 |  | 
| 21 21 | 
             
              it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
         | 
| 22 | 
            -
                Sanitize.fragment('foo <script>bar').must_equal 'foo  | 
| 23 | 
            -
                Sanitize.fragment('foo <style>bar').must_equal 'foo  | 
| 22 | 
            +
                Sanitize.fragment('foo <script>bar').must_equal 'foo '
         | 
| 23 | 
            +
                Sanitize.fragment('foo <style>bar').must_equal 'foo '
         | 
| 24 24 | 
             
              end
         | 
| 25 25 |  | 
| 26 26 | 
             
              it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
         | 
| @@ -28,35 +28,6 @@ describe 'Parser' do | |
| 28 28 | 
             
                Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
         | 
| 29 29 | 
             
              end
         | 
| 30 30 |  | 
| 31 | 
            -
              # https://github.com/sparklemotion/nokogiri/issues/1008
         | 
| 32 | 
            -
              it 'should work around the libxml2 content-type meta tag bug' do
         | 
| 33 | 
            -
                Sanitize.document('<html><head></head><body>Howdy!</body></html>',
         | 
| 34 | 
            -
                  :elements => %w[html head body]
         | 
| 35 | 
            -
                ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
         | 
| 36 | 
            -
             | 
| 37 | 
            -
                Sanitize.document('<html><head></head><body>Howdy!</body></html>',
         | 
| 38 | 
            -
                  :elements => %w[html head meta body]
         | 
| 39 | 
            -
                ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
         | 
| 40 | 
            -
             | 
| 41 | 
            -
                Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
         | 
| 42 | 
            -
                  :elements   => %w[html head meta body],
         | 
| 43 | 
            -
                  :attributes => {'meta' => ['charset']}
         | 
| 44 | 
            -
                ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
         | 
| 45 | 
            -
             | 
| 46 | 
            -
                Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
         | 
| 47 | 
            -
                  :elements   => %w[html head meta body],
         | 
| 48 | 
            -
                  :attributes => {'meta' => %w[charset content http-equiv]}
         | 
| 49 | 
            -
                ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
         | 
| 50 | 
            -
             | 
| 51 | 
            -
                # Edge case: an existing content-type meta tag with a non-UTF-8 content type
         | 
| 52 | 
            -
                # will be converted to UTF-8, since that's the only output encoding we
         | 
| 53 | 
            -
                # support.
         | 
| 54 | 
            -
                Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
         | 
| 55 | 
            -
                  :elements   => %w[html head meta body],
         | 
| 56 | 
            -
                  :attributes => {'meta' => %w[charset content http-equiv]}
         | 
| 57 | 
            -
                ).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
         | 
| 58 | 
            -
              end
         | 
| 59 | 
            -
             | 
| 60 31 | 
             
              describe 'when siblings are added after a node during traversal' do
         | 
| 61 32 | 
             
                it 'the added siblings should be traversed' do
         | 
| 62 33 | 
             
                  html = %[
         | 
| @@ -84,7 +55,7 @@ describe 'Parser' do | |
| 84 55 | 
             
                        siblings << env[:node][:id]
         | 
| 85 56 | 
             
                      end
         | 
| 86 57 |  | 
| 87 | 
            -
                      return {: | 
| 58 | 
            +
                      return {:node_allowlist => [env[:node]]}
         | 
| 88 59 | 
             
                  })
         | 
| 89 60 |  | 
| 90 61 | 
             
                  # All siblings should be traversed, and in the order added.
         |