sanitize 2.1.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/HISTORY.md +93 -14
- data/README.md +346 -134
- data/lib/sanitize.rb +177 -132
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +103 -0
- data/lib/sanitize/config/relaxed.rb +517 -52
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/css.rb +218 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +13 -0
- data/lib/sanitize/transformers/clean_element.rb +99 -129
- data/lib/sanitize/version.rb +3 -1
- data/test/common.rb +34 -0
- data/test/test_clean_comment.rb +51 -0
- data/test/test_clean_css.rb +66 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +399 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +128 -0
- data/test/test_parser.rb +104 -0
- data/test/test_sanitize.rb +65 -693
- data/test/test_sanitize_css.rb +222 -0
- data/test/test_transformers.rb +144 -0
- data/test/test_unicode.rb +84 -0
- metadata +56 -8
data/test/test_config.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Config' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
def verify_deeply_frozen(config)
|
9
|
+
config.must_be :frozen?
|
10
|
+
|
11
|
+
if Hash === config
|
12
|
+
config.each_value {|v| verify_deeply_frozen(v) }
|
13
|
+
elsif Set === config || Array === config
|
14
|
+
config.each {|v| verify_deeply_frozen(v) }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'built-in configs should be deeply frozen' do
|
19
|
+
verify_deeply_frozen Sanitize::Config::DEFAULT
|
20
|
+
verify_deeply_frozen Sanitize::Config::BASIC
|
21
|
+
verify_deeply_frozen Sanitize::Config::RELAXED
|
22
|
+
verify_deeply_frozen Sanitize::Config::RESTRICTED
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '.freeze_config' do
|
26
|
+
it 'should deeply freeze and return a configuration Hash' do
|
27
|
+
a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}
|
28
|
+
b = Sanitize::Config.freeze_config(a)
|
29
|
+
|
30
|
+
b.must_be_same_as a
|
31
|
+
verify_deeply_frozen a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '.merge' do
|
36
|
+
it 'should deeply merge a configuration Hash' do
|
37
|
+
# Freeze to ensure that we get an error if either Hash is modified.
|
38
|
+
a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}})
|
39
|
+
b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2})
|
40
|
+
|
41
|
+
c = Sanitize::Config.merge(a, b)
|
42
|
+
|
43
|
+
c.wont_be_same_as a
|
44
|
+
c.wont_be_same_as b
|
45
|
+
|
46
|
+
c.must_equal(
|
47
|
+
:one => {
|
48
|
+
:one_one => [0, '1', :a],
|
49
|
+
:one_two => true,
|
50
|
+
:one_three => 3
|
51
|
+
},
|
52
|
+
|
53
|
+
:two => 2
|
54
|
+
)
|
55
|
+
|
56
|
+
c[:one].wont_be_same_as a[:one]
|
57
|
+
c[:one][:one_one].wont_be_same_as a[:one][:one_one]
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should raise an ArgumentError if either argument is not a Hash' do
|
61
|
+
proc { Sanitize::Config.merge('foo', {}) }.must_raise ArgumentError
|
62
|
+
proc { Sanitize::Config.merge({}, 'foo') }.must_raise ArgumentError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
# Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
|
5
|
+
# these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
6
|
+
# Sheet.
|
7
|
+
#
|
8
|
+
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
|
+
|
10
|
+
describe 'Malicious CSS' do
|
11
|
+
make_my_diffs_pretty!
|
12
|
+
parallelize_me!
|
13
|
+
|
14
|
+
before do
|
15
|
+
@s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should not be possible to inject an expression by munging it with a comment' do
|
19
|
+
@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]).
|
20
|
+
must_equal ''
|
21
|
+
|
22
|
+
@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]).
|
23
|
+
must_equal ''
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should not be possible to inject an expression by munging it with a newline' do
|
27
|
+
@s.properties(%[width:\nexpression(alert('XSS'));]).
|
28
|
+
must_equal ''
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should not allow the javascript protocol' do
|
32
|
+
@s.properties(%[background-image:url("javascript:alert('XSS')");]).
|
33
|
+
must_equal ''
|
34
|
+
|
35
|
+
Sanitize.fragment(%[<div style="background-image: url(javascript:alert('XSS'))">],
|
36
|
+
Sanitize::Config::RELAXED).must_equal '<div></div>'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should not allow behaviors' do
|
40
|
+
@s.properties(%[behavior: url(xss.htc);]).must_equal ''
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
# Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
|
5
|
+
# of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
6
|
+
# Sheet.
|
7
|
+
#
|
8
|
+
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
|
+
|
10
|
+
describe 'Malicious HTML' do
|
11
|
+
make_my_diffs_pretty!
|
12
|
+
parallelize_me!
|
13
|
+
|
14
|
+
before do
|
15
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
16
|
+
end
|
17
|
+
|
18
|
+
describe 'comments' do
|
19
|
+
it 'should not allow script injection via conditional comments' do
|
20
|
+
@s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
|
21
|
+
must_equal ''
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe 'interpolation (ERB, PHP, etc.)' do
|
26
|
+
it 'should escape ERB-style tags' do
|
27
|
+
@s.fragment('<% naughty_ruby_code %>').
|
28
|
+
must_equal '<% naughty_ruby_code %>'
|
29
|
+
|
30
|
+
@s.fragment('<%= naughty_ruby_code %>').
|
31
|
+
must_equal '<%= naughty_ruby_code %>'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'should remove PHP-style tags' do
|
35
|
+
@s.fragment('<? naughtyPHPCode(); ?>').
|
36
|
+
must_equal ''
|
37
|
+
|
38
|
+
@s.fragment('<?= naughtyPHPCode(); ?>').
|
39
|
+
must_equal ''
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
describe '<body>' do
|
44
|
+
it 'should not be possible to inject JS via a malformed event attribute' do
|
45
|
+
@s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
|
46
|
+
must_equal "<html><head></head><body></body></html>\n"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe '<iframe>' do
|
51
|
+
it 'should not be possible to inject an iframe using an improperly closed tag' do
|
52
|
+
@s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
|
53
|
+
must_equal ''
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
describe '<img>' do
|
58
|
+
it 'should not be possible to inject JS via an unquoted <img> src attribute' do
|
59
|
+
@s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
|
60
|
+
end
|
61
|
+
|
62
|
+
it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
|
63
|
+
@s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
|
64
|
+
end
|
65
|
+
|
66
|
+
it 'should not be possible to inject <script> via a malformed <img> tag' do
|
67
|
+
@s.fragment('<img """><script>alert("XSS")</script>">').
|
68
|
+
must_equal '<img>alert("XSS")">'
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'should not be possible to inject protocol-based JS' do
|
72
|
+
@s.fragment('<img src=javascript:alert('XSS')>').
|
73
|
+
must_equal '<img>'
|
74
|
+
|
75
|
+
@s.fragment('<img src=javascript:alert('XSS')>').
|
76
|
+
must_equal '<img>'
|
77
|
+
|
78
|
+
@s.fragment('<img src=javascript:alert('XSS')>').
|
79
|
+
must_equal '<img>'
|
80
|
+
|
81
|
+
# Encoded tab character.
|
82
|
+
@s.fragment(%[<img src="jav	ascript:alert('XSS');">]).
|
83
|
+
must_equal '<img>'
|
84
|
+
|
85
|
+
# Encoded newline.
|
86
|
+
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
87
|
+
must_equal '<img>'
|
88
|
+
|
89
|
+
# Encoded carriage return.
|
90
|
+
@s.fragment(%[<img src="jav
ascript:alert('XSS');">]).
|
91
|
+
must_equal '<img>'
|
92
|
+
|
93
|
+
# Null byte.
|
94
|
+
@s.fragment(%[<img src=java\0script:alert("XSS")>]).
|
95
|
+
must_equal '<img>'
|
96
|
+
|
97
|
+
# Spaces plus meta char.
|
98
|
+
@s.fragment(%[<img src="  javascript:alert('XSS');">]).
|
99
|
+
must_equal '<img>'
|
100
|
+
|
101
|
+
# Mixed spaces and tabs.
|
102
|
+
@s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
|
103
|
+
must_equal '<img>'
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'should not be possible to inject protocol-based JS via whitespace' do
|
107
|
+
@s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
|
108
|
+
must_equal '<img>'
|
109
|
+
end
|
110
|
+
|
111
|
+
it 'should not be possible to inject JS using a half-open <img> tag' do
|
112
|
+
@s.fragment(%[<img src="javascript:alert('XSS')"]).
|
113
|
+
must_equal ''
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
describe '<script>' do
|
118
|
+
it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
|
119
|
+
@s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
|
120
|
+
must_equal 'alert(1)'
|
121
|
+
end
|
122
|
+
|
123
|
+
it 'should not be possible to inject <script> via extraneous open brackets' do
|
124
|
+
@s.fragment(%[<<script>alert("XSS");//<</script>]).
|
125
|
+
must_equal '<alert("XSS");//<'
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
data/test/test_parser.rb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Parser' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
it 'should translate valid entities into characters' do
|
9
|
+
Sanitize.fragment("'é&").must_equal("'é&")
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'should translate orphaned ampersands into entities' do
|
13
|
+
Sanitize.fragment('at&t').must_equal('at&t')
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should not add newlines after tags when serializing a fragment' do
|
17
|
+
Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
|
18
|
+
.must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
|
22
|
+
Sanitize.fragment('foo <script>bar').must_equal 'foo bar'
|
23
|
+
Sanitize.fragment('foo <style>bar').must_equal 'foo bar'
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
|
27
|
+
Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 > 2 and 2 < 1'
|
28
|
+
Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *<:-D'
|
29
|
+
end
|
30
|
+
|
31
|
+
# https://github.com/sparklemotion/nokogiri/issues/1008
|
32
|
+
it 'should work around the libxml2 content-type meta tag bug' do
|
33
|
+
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
34
|
+
:elements => %w[html head body]
|
35
|
+
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
36
|
+
|
37
|
+
Sanitize.document('<html><head></head><body>Howdy!</body></html>',
|
38
|
+
:elements => %w[html head meta body]
|
39
|
+
).must_equal "<html><head></head><body>Howdy!</body></html>\n"
|
40
|
+
|
41
|
+
Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
42
|
+
:elements => %w[html head meta body],
|
43
|
+
:attributes => {'meta' => ['charset']}
|
44
|
+
).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
|
45
|
+
|
46
|
+
Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
47
|
+
:elements => %w[html head meta body],
|
48
|
+
:attributes => {'meta' => %w[charset content http-equiv]}
|
49
|
+
).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
50
|
+
|
51
|
+
# Edge case: an existing content-type meta tag with a non-UTF-8 content type
|
52
|
+
# will be converted to UTF-8, since that's the only output encoding we
|
53
|
+
# support.
|
54
|
+
Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
|
55
|
+
:elements => %w[html head meta body],
|
56
|
+
:attributes => {'meta' => %w[charset content http-equiv]}
|
57
|
+
).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
describe 'when siblings are added after a node during traversal' do
|
61
|
+
it 'the added siblings should be traversed' do
|
62
|
+
html = %[
|
63
|
+
<div id="one">
|
64
|
+
<div id="one_one">
|
65
|
+
<div id="one_one_one"></div>
|
66
|
+
</div>
|
67
|
+
<div id="one_two"></div>
|
68
|
+
</div>
|
69
|
+
<div id="two">
|
70
|
+
<div id="two_one"><div id="two_one_one"></div></div>
|
71
|
+
<div id="two_two"></div>
|
72
|
+
</div>
|
73
|
+
<div id="three"></div>
|
74
|
+
]
|
75
|
+
|
76
|
+
siblings = []
|
77
|
+
|
78
|
+
Sanitize.fragment(html, :transformers => ->(env) {
|
79
|
+
name = env[:node].name
|
80
|
+
|
81
|
+
if name == 'div'
|
82
|
+
env[:node].add_next_sibling('<b id="added_' + env[:node]['id'] + '">')
|
83
|
+
elsif name == 'b'
|
84
|
+
siblings << env[:node][:id]
|
85
|
+
end
|
86
|
+
|
87
|
+
return {:node_whitelist => [env[:node]]}
|
88
|
+
})
|
89
|
+
|
90
|
+
# All siblings should be traversed, and in the order added.
|
91
|
+
siblings.must_equal [
|
92
|
+
"added_one_one_one",
|
93
|
+
"added_one_one",
|
94
|
+
"added_one_two",
|
95
|
+
"added_one",
|
96
|
+
"added_two_one_one",
|
97
|
+
"added_two_one",
|
98
|
+
"added_two_two",
|
99
|
+
"added_two",
|
100
|
+
"added_three"
|
101
|
+
]
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
data/test/test_sanitize.rb
CHANGED
@@ -1,721 +1,93 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
|
3
|
-
# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
|
4
|
-
#
|
5
|
-
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
# of this software and associated documentation files (the 'Software'), to deal
|
7
|
-
# in the Software without restriction, including without limitation the rights
|
8
|
-
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
# copies of the Software, and to permit persons to whom the Software is
|
10
|
-
# furnished to do so, subject to the following conditions:
|
11
|
-
#
|
12
|
-
# The above copyright notice and this permission notice shall be included in all
|
13
|
-
# copies or substantial portions of the Software.
|
14
|
-
#
|
15
|
-
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
-
# SOFTWARE.
|
22
|
-
#++
|
2
|
+
require_relative 'common'
|
23
3
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
require 'sanitize'
|
29
|
-
|
30
|
-
strings = {
|
31
|
-
:basic => {
|
32
|
-
:html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>',
|
33
|
-
:default => 'Lorem ipsum dolor sit amet alert("hello world");',
|
34
|
-
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert("hello world");',
|
35
|
-
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
36
|
-
:relaxed => '<b>Lorem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet alert("hello world");'
|
37
|
-
},
|
38
|
-
|
39
|
-
:malformed => {
|
40
|
-
:html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
41
|
-
:default => 'Lorem dolor sit amet alert("hello world");',
|
42
|
-
:restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
|
43
|
-
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
44
|
-
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
|
45
|
-
:document => ' Lorem dolor sit amet alert("hello world"); '
|
46
|
-
},
|
47
|
-
|
48
|
-
:unclosed => {
|
49
|
-
:html => '<p>a</p><blockquote>b',
|
50
|
-
:default => ' a b ',
|
51
|
-
:restricted => ' a b ',
|
52
|
-
:basic => '<p>a</p><blockquote>b</blockquote>',
|
53
|
-
:relaxed => '<p>a</p><blockquote>b</blockquote>'
|
54
|
-
},
|
55
|
-
|
56
|
-
:malicious => {
|
57
|
-
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
58
|
-
:default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
59
|
-
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
60
|
-
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
61
|
-
:relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
62
|
-
},
|
63
|
-
|
64
|
-
:raw_comment => {
|
65
|
-
:html => '<!-- comment -->Hello',
|
66
|
-
:default => 'Hello',
|
67
|
-
:restricted => 'Hello',
|
68
|
-
:basic => 'Hello',
|
69
|
-
:relaxed => 'Hello',
|
70
|
-
:document => ' Hello ',
|
71
|
-
}
|
72
|
-
}
|
73
|
-
|
74
|
-
tricky = {
|
75
|
-
'protocol-based JS injection: simple, no spaces' => {
|
76
|
-
:html => '<a href="javascript:alert(\'XSS\');">foo</a>',
|
77
|
-
:default => 'foo',
|
78
|
-
:restricted => 'foo',
|
79
|
-
:basic => '<a rel="nofollow">foo</a>',
|
80
|
-
:relaxed => '<a>foo</a>'
|
81
|
-
},
|
82
|
-
|
83
|
-
'protocol-based JS injection: simple, spaces before' => {
|
84
|
-
:html => '<a href="javascript :alert(\'XSS\');">foo</a>',
|
85
|
-
:default => 'foo',
|
86
|
-
:restricted => 'foo',
|
87
|
-
:basic => '<a rel="nofollow">foo</a>',
|
88
|
-
:relaxed => '<a>foo</a>'
|
89
|
-
},
|
90
|
-
|
91
|
-
'protocol-based JS injection: simple, spaces after' => {
|
92
|
-
:html => '<a href="javascript: alert(\'XSS\');">foo</a>',
|
93
|
-
:default => 'foo',
|
94
|
-
:restricted => 'foo',
|
95
|
-
:basic => '<a rel="nofollow">foo</a>',
|
96
|
-
:relaxed => '<a>foo</a>'
|
97
|
-
},
|
98
|
-
|
99
|
-
'protocol-based JS injection: simple, spaces before and after' => {
|
100
|
-
:html => '<a href="javascript : alert(\'XSS\');">foo</a>',
|
101
|
-
:default => 'foo',
|
102
|
-
:restricted => 'foo',
|
103
|
-
:basic => '<a rel="nofollow">foo</a>',
|
104
|
-
:relaxed => '<a>foo</a>'
|
105
|
-
},
|
106
|
-
|
107
|
-
'protocol-based JS injection: preceding colon' => {
|
108
|
-
:html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
|
109
|
-
:default => 'foo',
|
110
|
-
:restricted => 'foo',
|
111
|
-
:basic => '<a rel="nofollow">foo</a>',
|
112
|
-
:relaxed => '<a>foo</a>'
|
113
|
-
},
|
114
|
-
|
115
|
-
'protocol-based JS injection: UTF-8 encoding' => {
|
116
|
-
:html => '<a href="javascript:">foo</a>',
|
117
|
-
:default => 'foo',
|
118
|
-
:restricted => 'foo',
|
119
|
-
:basic => '<a rel="nofollow">foo</a>',
|
120
|
-
:relaxed => '<a>foo</a>'
|
121
|
-
},
|
122
|
-
|
123
|
-
'protocol-based JS injection: long UTF-8 encoding' => {
|
124
|
-
:html => '<a href="javascript:">foo</a>',
|
125
|
-
:default => 'foo',
|
126
|
-
:restricted => 'foo',
|
127
|
-
:basic => '<a rel="nofollow">foo</a>',
|
128
|
-
:relaxed => '<a>foo</a>'
|
129
|
-
},
|
130
|
-
|
131
|
-
'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
|
132
|
-
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
133
|
-
:default => 'foo',
|
134
|
-
:restricted => 'foo',
|
135
|
-
:basic => '<a rel="nofollow">foo</a>',
|
136
|
-
:relaxed => '<a>foo</a>'
|
137
|
-
},
|
138
|
-
|
139
|
-
'protocol-based JS injection: hex encoding' => {
|
140
|
-
:html => '<a href="javascript:">foo</a>',
|
141
|
-
:default => 'foo',
|
142
|
-
:restricted => 'foo',
|
143
|
-
:basic => '<a rel="nofollow">foo</a>',
|
144
|
-
:relaxed => '<a>foo</a>'
|
145
|
-
},
|
146
|
-
|
147
|
-
'protocol-based JS injection: long hex encoding' => {
|
148
|
-
:html => '<a href="javascript:">foo</a>',
|
149
|
-
:default => 'foo',
|
150
|
-
:restricted => 'foo',
|
151
|
-
:basic => '<a rel="nofollow">foo</a>',
|
152
|
-
:relaxed => '<a>foo</a>'
|
153
|
-
},
|
154
|
-
|
155
|
-
'protocol-based JS injection: hex encoding without semicolons' => {
|
156
|
-
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
157
|
-
:default => 'foo',
|
158
|
-
:restricted => 'foo',
|
159
|
-
:basic => '<a rel="nofollow">foo</a>',
|
160
|
-
:relaxed => '<a>foo</a>'
|
161
|
-
},
|
162
|
-
|
163
|
-
'protocol-based JS injection: null char' => {
|
164
|
-
:html => "<img src=java\0script:alert(\"XSS\")>",
|
165
|
-
:default => '',
|
166
|
-
:restricted => '',
|
167
|
-
:basic => '',
|
168
|
-
:relaxed => '<img src="java">' # everything following the null char gets stripped, and URL is considered relative
|
169
|
-
},
|
170
|
-
|
171
|
-
'protocol-based JS injection: invalid URL char' => {
|
172
|
-
:html => '<img src=java\script:alert("XSS")>',
|
173
|
-
:default => '',
|
174
|
-
:restricted => '',
|
175
|
-
:basic => '',
|
176
|
-
:relaxed => '<img>'
|
177
|
-
},
|
178
|
-
|
179
|
-
'protocol-based JS injection: spaces and entities' => {
|
180
|
-
:html => '<img src="  javascript:alert(\'XSS\');">',
|
181
|
-
:default => '',
|
182
|
-
:restricted => '',
|
183
|
-
:basic => '',
|
184
|
-
:relaxed => '<img src>'
|
185
|
-
}
|
186
|
-
}
|
187
|
-
|
188
|
-
describe 'Config::DEFAULT' do
|
189
|
-
it 'should translate valid HTML entities' do
|
190
|
-
Sanitize.clean("Don't tasé me & bro!").must_equal("Don't tasé me & bro!")
|
191
|
-
end
|
192
|
-
|
193
|
-
it 'should translate valid HTML entities while encoding unencoded ampersands' do
|
194
|
-
Sanitize.clean("cookies² & ¼ créme").must_equal("cookies² & ¼ créme")
|
195
|
-
end
|
196
|
-
|
197
|
-
it 'should never output '' do
|
198
|
-
Sanitize.clean("<a href=''' class=\"' '\">IE6 isn't a real browser</a>").wont_match(/'/)
|
199
|
-
end
|
200
|
-
|
201
|
-
it 'should not choke on several instances of the same element in a row' do
|
202
|
-
Sanitize.clean('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">').must_equal('')
|
203
|
-
end
|
204
|
-
|
205
|
-
it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
|
206
|
-
Sanitize.clean('foo<div>bar</div>baz').must_equal('foo bar baz')
|
207
|
-
Sanitize.clean('foo<br>bar<br>baz').must_equal('foo bar baz')
|
208
|
-
Sanitize.clean('foo<hr>bar<hr>baz').must_equal('foo bar baz')
|
209
|
-
end
|
210
|
-
|
211
|
-
strings.each do |name, data|
|
212
|
-
it "should clean #{name} HTML" do
|
213
|
-
Sanitize.clean(data[:html]).must_equal(data[:default])
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
tricky.each do |name, data|
|
218
|
-
it "should not allow #{name}" do
|
219
|
-
Sanitize.clean(data[:html]).must_equal(data[:default])
|
220
|
-
end
|
221
|
-
end
|
222
|
-
end
|
223
|
-
|
224
|
-
describe 'Config::RESTRICTED' do
|
225
|
-
before { @s = Sanitize.new(Sanitize::Config::RESTRICTED) }
|
226
|
-
|
227
|
-
strings.each do |name, data|
|
228
|
-
it "should clean #{name} HTML" do
|
229
|
-
@s.clean(data[:html]).must_equal(data[:restricted])
|
230
|
-
end
|
231
|
-
end
|
232
|
-
|
233
|
-
tricky.each do |name, data|
|
234
|
-
it "should not allow #{name}" do
|
235
|
-
@s.clean(data[:html]).must_equal(data[:restricted])
|
4
|
+
describe 'Sanitize' do
|
5
|
+
describe 'instance methods' do
|
6
|
+
before do
|
7
|
+
@s = Sanitize.new
|
236
8
|
end
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
describe 'Config::BASIC' do
|
241
|
-
before { @s = Sanitize.new(Sanitize::Config::BASIC) }
|
242
9
|
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
it 'should downcase attribute names' do
|
248
|
-
@s.clean('<a HREF="javascript:alert(\'foo\')">bar</a>').must_equal('<a rel="nofollow">bar</a>')
|
249
|
-
end
|
10
|
+
describe '#document' do
|
11
|
+
before do
|
12
|
+
@s = Sanitize.new(:elements => ['html'])
|
13
|
+
end
|
250
14
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
end
|
15
|
+
it 'should sanitize an HTML document' do
|
16
|
+
@s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
|
17
|
+
.must_equal "<html>Lorem ipsum dolor sit amet alert(\"hello world\");</html>\n"
|
18
|
+
end
|
256
19
|
|
257
|
-
|
258
|
-
|
259
|
-
|
20
|
+
it 'should not modify the input string' do
|
21
|
+
input = '<!DOCTYPE html><b>foo</b>'
|
22
|
+
@s.document(input)
|
23
|
+
input.must_equal('<!DOCTYPE html><b>foo</b>')
|
24
|
+
end
|
260
25
|
end
|
261
|
-
end
|
262
|
-
end
|
263
|
-
|
264
|
-
describe 'Config::RELAXED' do
|
265
|
-
before { @s = Sanitize.new(Sanitize::Config::RELAXED) }
|
266
26
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
27
|
+
describe '#fragment' do
|
28
|
+
it 'should sanitize an HTML fragment' do
|
29
|
+
@s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
30
|
+
.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
|
31
|
+
end
|
272
32
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
33
|
+
it 'should not modify the input string' do
|
34
|
+
input = '<b>foo</b>'
|
35
|
+
@s.fragment(input)
|
36
|
+
input.must_equal '<b>foo</b>'
|
37
|
+
end
|
278
38
|
|
279
|
-
|
280
|
-
|
281
|
-
|
39
|
+
it 'should not choke on fragments containing <html> or <body>' do
|
40
|
+
@s.fragment('<html><b>foo</b></html>').must_equal 'foo'
|
41
|
+
@s.fragment('<body><b>foo</b></body>').must_equal 'foo'
|
42
|
+
@s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
|
43
|
+
@s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
|
44
|
+
end
|
282
45
|
end
|
283
|
-
end
|
284
|
-
end
|
285
|
-
|
286
|
-
describe 'Full Document parser (using clean_document)' do
|
287
|
-
before {
|
288
|
-
@s = Sanitize.new({:elements => %w[!DOCTYPE html]})
|
289
|
-
@default_doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">"
|
290
|
-
}
|
291
46
|
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
end
|
297
|
-
|
298
|
-
it 'should NOT require HTML element to be whitelisted if remove_contents is true' do
|
299
|
-
output = '<!DOCTYPE html><html>foo</html>'
|
300
|
-
Sanitize.clean_document!(output, {:remove_contents => true}).must_equal "<!DOCTYPE html>\n\n"
|
301
|
-
end
|
47
|
+
describe '#node!' do
|
48
|
+
it 'should sanitize a Nokogiri::XML::Node' do
|
49
|
+
doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
|
50
|
+
frag = doc.fragment
|
302
51
|
|
303
|
-
|
304
|
-
@s.clean_document('').must_equal("#{@default_doctype}\n\n")
|
305
|
-
end
|
52
|
+
doc.xpath('/html/body/node()').each {|node| frag << node }
|
306
53
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
end
|
311
|
-
|
312
|
-
strings.each do |name, data|
|
313
|
-
it "should wrap #{name} with DOCTYPE and HTML tag" do
|
314
|
-
output = data[:document] || data[:default]
|
315
|
-
@s.clean_document(data[:html]).must_equal("#{@default_doctype}\n<html>#{output}</html>\n")
|
316
|
-
end
|
317
|
-
end
|
54
|
+
@s.node!(frag)
|
55
|
+
frag.to_html.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
|
56
|
+
end
|
318
57
|
|
319
|
-
|
320
|
-
|
321
|
-
|
58
|
+
describe "when the given node is a document and <html> isn't whitelisted" do
|
59
|
+
it 'should raise a Sanitize::Error' do
|
60
|
+
doc = Nokogiri::HTML5.parse('foo')
|
61
|
+
proc { @s.node!(doc) }.must_raise Sanitize::Error
|
62
|
+
end
|
63
|
+
end
|
322
64
|
end
|
323
65
|
end
|
324
|
-
end
|
325
|
-
|
326
|
-
describe 'Custom configs' do
|
327
|
-
it 'should allow attributes on all elements if whitelisted under :all' do
|
328
|
-
input = '<p class="foo">bar</p>'
|
329
|
-
|
330
|
-
Sanitize.clean(input).must_equal(' bar ')
|
331
|
-
Sanitize.clean(input, {:elements => ['p'], :attributes => {:all => ['class']}}).must_equal(input)
|
332
|
-
Sanitize.clean(input, {:elements => ['p'], :attributes => {'div' => ['class']}}).must_equal('<p>bar</p>')
|
333
|
-
Sanitize.clean(input, {:elements => ['p'], :attributes => {'p' => ['title'], :all => ['class']}}).must_equal(input)
|
334
|
-
end
|
335
|
-
|
336
|
-
it 'should allow comments when :allow_comments == true' do
|
337
|
-
input = 'foo <!-- bar --> baz'
|
338
|
-
Sanitize.clean(input).must_equal('foo baz')
|
339
|
-
Sanitize.clean(input, :allow_comments => true).must_equal(input)
|
340
|
-
end
|
341
|
-
|
342
|
-
it 'should allow relative URLs containing colons where the colon is not in the first path segment' do
|
343
|
-
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
344
|
-
Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
|
345
|
-
end
|
346
|
-
|
347
|
-
it 'should allow relative URLs containing colons where the colon is part of an anchor' do
|
348
|
-
input = '<a href="#fn:1">Footnote 1</a>'
|
349
|
-
Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
|
350
|
-
end
|
351
|
-
|
352
|
-
it 'should allow relative URLs containing colons where the colon is part of an anchor' do
|
353
|
-
input = '<a href="somepage#fn:1">Footnote 1</a>'
|
354
|
-
Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
|
355
|
-
end
|
356
|
-
|
357
|
-
it 'should output HTML when :output == :html' do
|
358
|
-
input = 'foo<br/>bar<br>baz'
|
359
|
-
Sanitize.clean(input, :elements => ['br'], :output => :html).must_equal('foo<br>bar<br>baz')
|
360
|
-
end
|
361
|
-
|
362
|
-
it 'should remove the contents of filtered nodes when :remove_contents == true' do
|
363
|
-
Sanitize.clean('foo bar <div>baz<span>quux</span></div>', :remove_contents => true).must_equal('foo bar ')
|
364
|
-
end
|
365
|
-
|
366
|
-
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
|
367
|
-
Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => ['script', 'span']).must_equal('foo bar baz ')
|
368
|
-
end
|
369
|
-
|
370
|
-
it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
|
371
|
-
Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => [:script, :span]).must_equal('foo bar baz ')
|
372
|
-
end
|
373
|
-
|
374
|
-
it 'should support encodings other than utf-8' do
|
375
|
-
html = 'foo bar'
|
376
|
-
Sanitize.clean(html).must_equal("foo\302\240bar")
|
377
|
-
Sanitize.clean(html, :output_encoding => 'ASCII').must_equal("foo bar")
|
378
|
-
end
|
379
|
-
|
380
|
-
it 'should not allow arbitrary HTML5 data attributes by default' do
|
381
|
-
config = {
|
382
|
-
:elements => ['b']
|
383
|
-
}
|
384
|
-
|
385
|
-
Sanitize.clean('<b data-foo="bar"></b>', config)
|
386
|
-
.must_equal('<b></b>')
|
387
|
-
|
388
|
-
config[:attributes] = {'b' => ['class']}
|
389
|
-
|
390
|
-
Sanitize.clean('<b class="foo" data-foo="bar"></b>', config)
|
391
|
-
.must_equal('<b class="foo"></b>')
|
392
|
-
end
|
393
|
-
|
394
|
-
it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
|
395
|
-
config = {
|
396
|
-
:attributes => {'b' => [:data]},
|
397
|
-
:elements => ['b']
|
398
|
-
}
|
399
|
-
|
400
|
-
Sanitize.clean('<b data-foo="valid" data-bar="valid"></b>', config)
|
401
|
-
.must_equal('<b data-foo="valid" data-bar="valid"></b>')
|
402
|
-
|
403
|
-
Sanitize.clean('<b data-="invalid"></b>', config)
|
404
|
-
.must_equal('<b></b>')
|
405
|
-
|
406
|
-
Sanitize.clean('<b data-="invalid"></b>', config)
|
407
|
-
.must_equal('<b></b>')
|
408
|
-
|
409
|
-
Sanitize.clean('<b data-xml="invalid"></b>', config)
|
410
|
-
.must_equal('<b></b>')
|
411
|
-
|
412
|
-
Sanitize.clean('<b data-xmlfoo="invalid"></b>', config)
|
413
|
-
.must_equal('<b></b>')
|
414
|
-
|
415
|
-
Sanitize.clean('<b data-f:oo="valid"></b>', config)
|
416
|
-
.must_equal('<b></b>')
|
417
|
-
|
418
|
-
Sanitize.clean('<b data-f/oo="partial"></b>', config)
|
419
|
-
.must_equal('<b data-f></b>') # Nokogiri quirk; not ideal, but harmless
|
420
|
-
|
421
|
-
Sanitize.clean('<b data-éfoo="valid"></b>', config)
|
422
|
-
.must_equal('<b></b>') # Another annoying Nokogiri quirk.
|
423
|
-
end
|
424
|
-
end
|
425
|
-
|
426
|
-
describe 'Sanitize.clean' do
|
427
|
-
it 'should not modify the input string' do
|
428
|
-
input = '<b>foo</b>'
|
429
|
-
Sanitize.clean(input)
|
430
|
-
input.must_equal('<b>foo</b>')
|
431
|
-
end
|
432
|
-
|
433
|
-
it 'should return a new string' do
|
434
|
-
input = '<b>foo</b>'
|
435
|
-
Sanitize.clean(input).must_equal('foo')
|
436
|
-
end
|
437
|
-
end
|
438
|
-
|
439
|
-
describe 'Sanitize.clean!' do
|
440
|
-
it 'should modify the input string' do
|
441
|
-
input = '<b>foo</b>'
|
442
|
-
Sanitize.clean!(input)
|
443
|
-
input.must_equal('foo')
|
444
|
-
end
|
445
|
-
|
446
|
-
it 'should return the string if it was modified' do
|
447
|
-
input = '<b>foo</b>'
|
448
|
-
Sanitize.clean!(input).must_equal('foo')
|
449
|
-
end
|
450
|
-
|
451
|
-
it 'should return nil if the string was not modified' do
|
452
|
-
input = 'foo'
|
453
|
-
Sanitize.clean!(input).must_equal(nil)
|
454
|
-
end
|
455
|
-
end
|
456
|
-
|
457
|
-
describe 'Sanitize.clean_document' do
|
458
|
-
before { @config = { :elements => ['html', 'p'] } }
|
459
|
-
|
460
|
-
it 'should be idempotent' do
|
461
|
-
input = '<!DOCTYPE html><html><p>foo</p></html>'
|
462
|
-
first = Sanitize.clean_document(input, @config)
|
463
|
-
second = Sanitize.clean_document(first, @config)
|
464
|
-
second.must_equal first
|
465
|
-
second.wont_be_nil
|
466
|
-
end
|
467
|
-
|
468
|
-
it 'should handle nil without raising' do
|
469
|
-
Sanitize.clean_document(nil).must_equal nil
|
470
|
-
end
|
471
|
-
|
472
|
-
it 'should not modify the input string' do
|
473
|
-
input = '<!DOCTYPE html><b>foo</b>'
|
474
|
-
Sanitize.clean_document(input, @config)
|
475
|
-
input.must_equal('<!DOCTYPE html><b>foo</b>')
|
476
|
-
end
|
477
|
-
|
478
|
-
it 'should return a new string' do
|
479
|
-
input = '<!DOCTYPE html><b>foo</b>'
|
480
|
-
Sanitize.clean_document(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
|
481
|
-
end
|
482
|
-
end
|
483
|
-
|
484
|
-
describe 'Sanitize.clean_document!' do
|
485
|
-
before { @config = { :elements => ['html'] } }
|
486
|
-
|
487
|
-
it 'should modify the input string' do
|
488
|
-
input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
|
489
|
-
Sanitize.clean_document!(input, @config)
|
490
|
-
input.must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
|
491
|
-
end
|
492
|
-
|
493
|
-
it 'should return the string if it was modified' do
|
494
|
-
input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
|
495
|
-
Sanitize.clean_document!(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
|
496
|
-
end
|
497
|
-
|
498
|
-
it 'should return nil if the string was not modified' do
|
499
|
-
input = "<!DOCTYPE html>\n<html></html>\n"
|
500
|
-
Sanitize.clean_document!(input, @config).must_equal(nil)
|
501
|
-
end
|
502
|
-
end
|
503
|
-
|
504
|
-
describe 'transformers' do
|
505
|
-
# YouTube embed transformer.
|
506
|
-
youtube = lambda do |env|
|
507
|
-
node = env[:node]
|
508
|
-
node_name = env[:node_name]
|
509
|
-
|
510
|
-
# Don't continue if this node is already whitelisted or is not an element.
|
511
|
-
return if env[:is_whitelisted] || !node.element?
|
512
|
-
|
513
|
-
# Don't continue unless the node is an iframe.
|
514
|
-
return unless node_name == 'iframe'
|
515
|
-
|
516
|
-
# Verify that the video URL is actually a valid YouTube video URL.
|
517
|
-
return unless node['src'] =~ /\Ahttps?:\/\/(?:www\.)?youtube(?:-nocookie)?\.com\//
|
518
|
-
|
519
|
-
# We're now certain that this is a YouTube embed, but we still need to run
|
520
|
-
# it through a special Sanitize step to ensure that no unwanted elements or
|
521
|
-
# attributes that don't belong in a YouTube embed can sneak in.
|
522
|
-
Sanitize.clean_node!(node, {
|
523
|
-
:elements => %w[iframe],
|
524
|
-
|
525
|
-
:attributes => {
|
526
|
-
'iframe' => %w[allowfullscreen frameborder height src width]
|
527
|
-
}
|
528
|
-
})
|
529
|
-
|
530
|
-
# Now that we're sure that this is a valid YouTube embed and that there are
|
531
|
-
# no unwanted elements or attributes hidden inside it, we can tell Sanitize
|
532
|
-
# to whitelist the current node.
|
533
|
-
{:node_whitelist => [node]}
|
534
|
-
end
|
535
|
-
|
536
|
-
it 'should receive a complete env Hash as input' do
|
537
|
-
Sanitize.clean!('<SPAN>foo</SPAN>', :foo => :bar, :transformers => lambda {|env|
|
538
|
-
return unless env[:node].element?
|
539
|
-
|
540
|
-
env[:config][:foo].must_equal(:bar)
|
541
|
-
env[:is_whitelisted].must_equal(false)
|
542
|
-
env[:node].must_be_kind_of(Nokogiri::XML::Node)
|
543
|
-
env[:node_name].must_equal('span')
|
544
|
-
env[:node_whitelist].must_be_kind_of(Set)
|
545
|
-
env[:node_whitelist].must_be_empty
|
546
|
-
})
|
547
|
-
end
|
548
|
-
|
549
|
-
it 'should traverse all node types, including the fragment itself' do
|
550
|
-
nodes = []
|
551
|
-
|
552
|
-
Sanitize.clean!('<div>foo</div><!--bar--><script>cdata!</script>', :transformers => proc {|env|
|
553
|
-
nodes << env[:node_name]
|
554
|
-
})
|
555
66
|
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
nodes = []
|
563
|
-
|
564
|
-
Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers => proc {|env|
|
565
|
-
env[:traversal_mode].must_equal(:depth)
|
566
|
-
nodes << env[:node_name] if env[:node].element?
|
567
|
-
})
|
568
|
-
|
569
|
-
nodes.must_equal(['span', 'div', 'p'])
|
570
|
-
end
|
571
|
-
|
572
|
-
it 'should traverse in breadth-first mode when using :transformers_breadth' do
|
573
|
-
nodes = []
|
574
|
-
|
575
|
-
Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers_breadth => proc {|env|
|
576
|
-
env[:traversal_mode].must_equal(:breadth)
|
577
|
-
nodes << env[:node_name] if env[:node].element?
|
578
|
-
})
|
579
|
-
|
580
|
-
nodes.must_equal(['div', 'span', 'p'])
|
581
|
-
end
|
582
|
-
|
583
|
-
it 'should whitelist nodes in the node whitelist' do
|
584
|
-
Sanitize.clean!('<div class="foo">foo</div><span>bar</span>', :transformers => [
|
585
|
-
proc {|env|
|
586
|
-
{:node_whitelist => [env[:node]]} if env[:node_name] == 'div'
|
587
|
-
},
|
588
|
-
|
589
|
-
proc {|env|
|
590
|
-
env[:is_whitelisted].must_equal(false) unless env[:node_name] == 'div'
|
591
|
-
env[:is_whitelisted].must_equal(true) if env[:node_name] == 'div'
|
592
|
-
env[:node_whitelist].must_include(env[:node]) if env[:node_name] == 'div'
|
593
|
-
}
|
594
|
-
]).must_equal('<div class="foo">foo</div>bar')
|
595
|
-
end
|
596
|
-
|
597
|
-
it 'should clear the node whitelist after each fragment' do
|
598
|
-
called = false
|
599
|
-
|
600
|
-
Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
|
601
|
-
{:node_whitelist => [env[:node]]}
|
602
|
-
})
|
603
|
-
|
604
|
-
Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
|
605
|
-
called = true
|
606
|
-
env[:is_whitelisted].must_equal(false)
|
607
|
-
env[:node_whitelist].must_be_empty
|
608
|
-
})
|
609
|
-
|
610
|
-
called.must_equal(true)
|
611
|
-
end
|
612
|
-
|
613
|
-
it 'should allow youtube video embeds via the youtube transformer' do
|
614
|
-
input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
615
|
-
output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
|
616
|
-
|
617
|
-
Sanitize.clean!(input, :transformers => youtube).must_equal(output)
|
618
|
-
end
|
619
|
-
|
620
|
-
it 'should allow https youtube video embeds via the youtube transformer' do
|
621
|
-
input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
622
|
-
output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
|
623
|
-
|
624
|
-
Sanitize.clean!(input, :transformers => youtube).must_equal(output)
|
625
|
-
end
|
626
|
-
|
627
|
-
it 'should allow privacy-enhanced youtube video embeds via the youtube transformer' do
|
628
|
-
input = '<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
|
629
|
-
output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
|
630
|
-
|
631
|
-
Sanitize.clean!(input, :transformers => youtube).must_equal(output)
|
632
|
-
end
|
633
|
-
|
634
|
-
it 'should not allow non-youtube video embeds via the youtube transformer' do
|
635
|
-
input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
|
636
|
-
output = ''
|
637
|
-
|
638
|
-
Sanitize.clean!(input, :transformers => youtube).must_equal(output)
|
639
|
-
end
|
640
|
-
end
|
641
|
-
|
642
|
-
describe 'bugs' do
|
643
|
-
it 'should not have Nokogiri 1.4.2+ unterminated script/style element bug' do
|
644
|
-
Sanitize.clean!('foo <script>bar').must_equal('foo bar')
|
645
|
-
Sanitize.clean!('foo <style>bar').must_equal('foo bar')
|
646
|
-
end
|
647
|
-
end
|
648
|
-
|
649
|
-
describe 'Malicious HTML' do
|
650
|
-
make_my_diffs_pretty!
|
651
|
-
parallelize_me!
|
652
|
-
|
653
|
-
before do
|
654
|
-
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
655
|
-
end
|
656
|
-
|
657
|
-
# libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
|
658
|
-
# attempt to preserve server-side includes. This can result in XSS since an
|
659
|
-
# unescaped double quote can allow an attacker to inject a non-whitelisted
|
660
|
-
# attribute. Sanitize works around this by implementing its own escaping for
|
661
|
-
# affected attributes.
|
662
|
-
#
|
663
|
-
# The relevant libxml2 code is here:
|
664
|
-
# <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
|
665
|
-
describe 'unsafe libxml2 server-side includes in attributes' do
|
666
|
-
tag_configs = [
|
667
|
-
{
|
668
|
-
tag_name: 'a',
|
669
|
-
escaped_attrs: %w[ action href src name ],
|
670
|
-
unescaped_attrs: []
|
671
|
-
},
|
672
|
-
|
673
|
-
{
|
674
|
-
tag_name: 'div',
|
675
|
-
escaped_attrs: %w[ action href src ],
|
676
|
-
unescaped_attrs: %w[ name ]
|
677
|
-
}
|
678
|
-
]
|
679
|
-
|
680
|
-
before do
|
681
|
-
@s = Sanitize.new({
|
682
|
-
elements: %w[ a div ],
|
683
|
-
|
684
|
-
attributes: {
|
685
|
-
all: %w[ action href src name ]
|
686
|
-
}
|
687
|
-
})
|
688
|
-
end
|
689
|
-
|
690
|
-
tag_configs.each do |tag_config|
|
691
|
-
tag_name = tag_config[:tag_name]
|
692
|
-
|
693
|
-
tag_config[:escaped_attrs].each do |attr_name|
|
694
|
-
input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
|
695
|
-
|
696
|
-
it 'should escape unsafe characters in attributes' do
|
697
|
-
@s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
|
698
|
-
end
|
699
|
-
|
700
|
-
it 'should round-trip to the same output' do
|
701
|
-
output = @s.clean(input)
|
702
|
-
@s.clean(output).must_equal(output)
|
67
|
+
describe 'class methods' do
|
68
|
+
describe '.document' do
|
69
|
+
it 'should call #document' do
|
70
|
+
Sanitize.stub_instance(:document, proc {|html| html + ' called' }) do
|
71
|
+
Sanitize.document('<html>foo</html>')
|
72
|
+
.must_equal '<html>foo</html> called'
|
703
73
|
end
|
704
74
|
end
|
75
|
+
end
|
705
76
|
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
@s.clean(input).must_equal(input)
|
77
|
+
describe '.fragment' do
|
78
|
+
it 'should call #fragment' do
|
79
|
+
Sanitize.stub_instance(:fragment, proc {|html| html + ' called' }) do
|
80
|
+
Sanitize.fragment('<b>foo</b>').must_equal '<b>foo</b> called'
|
711
81
|
end
|
82
|
+
end
|
83
|
+
end
|
712
84
|
|
713
|
-
|
714
|
-
|
715
|
-
|
85
|
+
describe '.node!' do
|
86
|
+
it 'should call #node!' do
|
87
|
+
Sanitize.stub_instance(:node!, proc {|input| input + ' called' }) do
|
88
|
+
Sanitize.node!('not really a node').must_equal 'not really a node called'
|
716
89
|
end
|
717
90
|
end
|
718
91
|
end
|
719
92
|
end
|
720
93
|
end
|
721
|
-
|