sanitize 2.1.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Config' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ def verify_deeply_frozen(config)
9
+ config.must_be :frozen?
10
+
11
+ if Hash === config
12
+ config.each_value {|v| verify_deeply_frozen(v) }
13
+ elsif Set === config || Array === config
14
+ config.each {|v| verify_deeply_frozen(v) }
15
+ end
16
+ end
17
+
18
+ it 'built-in configs should be deeply frozen' do
19
+ verify_deeply_frozen Sanitize::Config::DEFAULT
20
+ verify_deeply_frozen Sanitize::Config::BASIC
21
+ verify_deeply_frozen Sanitize::Config::RELAXED
22
+ verify_deeply_frozen Sanitize::Config::RESTRICTED
23
+ end
24
+
25
+ describe '.freeze_config' do
26
+ it 'should deeply freeze and return a configuration Hash' do
27
+ a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}
28
+ b = Sanitize::Config.freeze_config(a)
29
+
30
+ b.must_be_same_as a
31
+ verify_deeply_frozen a
32
+ end
33
+ end
34
+
35
+ describe '.merge' do
36
+ it 'should deeply merge a configuration Hash' do
37
+ # Freeze to ensure that we get an error if either Hash is modified.
38
+ a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}})
39
+ b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2})
40
+
41
+ c = Sanitize::Config.merge(a, b)
42
+
43
+ c.wont_be_same_as a
44
+ c.wont_be_same_as b
45
+
46
+ c.must_equal(
47
+ :one => {
48
+ :one_one => [0, '1', :a],
49
+ :one_two => true,
50
+ :one_three => 3
51
+ },
52
+
53
+ :two => 2
54
+ )
55
+
56
+ c[:one].wont_be_same_as a[:one]
57
+ c[:one][:one_one].wont_be_same_as a[:one][:one_one]
58
+ end
59
+
60
+ it 'should raise an ArgumentError if either argument is not a Hash' do
61
+ proc { Sanitize::Config.merge('foo', {}) }.must_raise ArgumentError
62
+ proc { Sanitize::Config.merge({}, 'foo') }.must_raise ArgumentError
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ # Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
5
+ # these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
6
+ # Sheet.
7
+ #
8
+ # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
9
+
10
+ describe 'Malicious CSS' do
11
+ make_my_diffs_pretty!
12
+ parallelize_me!
13
+
14
+ before do
15
+ @s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
16
+ end
17
+
18
+ it 'should not be possible to inject an expression by munging it with a comment' do
19
+ @s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]).
20
+ must_equal ''
21
+
22
+ @s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]).
23
+ must_equal ''
24
+ end
25
+
26
+ it 'should not be possible to inject an expression by munging it with a newline' do
27
+ @s.properties(%[width:\nexpression(alert('XSS'));]).
28
+ must_equal ''
29
+ end
30
+
31
+ it 'should not allow the javascript protocol' do
32
+ @s.properties(%[background-image:url("javascript:alert('XSS')");]).
33
+ must_equal ''
34
+
35
+ Sanitize.fragment(%[<div style="background-image: url(&#1;javascript:alert('XSS'))">],
36
+ Sanitize::Config::RELAXED).must_equal '<div></div>'
37
+ end
38
+
39
+ it 'should not allow behaviors' do
40
+ @s.properties(%[behavior: url(xss.htc);]).must_equal ''
41
+ end
42
+ end
@@ -0,0 +1,128 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ # Miscellaneous attempts to sneak maliciously crafted HTML past Sanitize. Many
5
+ # of these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
6
+ # Sheet.
7
+ #
8
+ # https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
9
+
10
+ describe 'Malicious HTML' do
11
+ make_my_diffs_pretty!
12
+ parallelize_me!
13
+
14
+ before do
15
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
16
+ end
17
+
18
+ describe 'comments' do
19
+ it 'should not allow script injection via conditional comments' do
20
+ @s.fragment(%[<!--[if gte IE 4]>\n<script>alert('XSS');</script>\n<![endif]-->]).
21
+ must_equal ''
22
+ end
23
+ end
24
+
25
+ describe 'interpolation (ERB, PHP, etc.)' do
26
+ it 'should escape ERB-style tags' do
27
+ @s.fragment('<% naughty_ruby_code %>').
28
+ must_equal '&lt;% naughty_ruby_code %&gt;'
29
+
30
+ @s.fragment('<%= naughty_ruby_code %>').
31
+ must_equal '&lt;%= naughty_ruby_code %&gt;'
32
+ end
33
+
34
+ it 'should remove PHP-style tags' do
35
+ @s.fragment('<? naughtyPHPCode(); ?>').
36
+ must_equal ''
37
+
38
+ @s.fragment('<?= naughtyPHPCode(); ?>').
39
+ must_equal ''
40
+ end
41
+ end
42
+
43
+ describe '<body>' do
44
+ it 'should not be possible to inject JS via a malformed event attribute' do
45
+ @s.document('<html><head></head><body onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert("XSS")></body></html>').
46
+ must_equal "<html><head></head><body></body></html>\n"
47
+ end
48
+ end
49
+
50
+ describe '<iframe>' do
51
+ it 'should not be possible to inject an iframe using an improperly closed tag' do
52
+ @s.fragment(%[<iframe src=http://ha.ckers.org/scriptlet.html <]).
53
+ must_equal ''
54
+ end
55
+ end
56
+
57
+ describe '<img>' do
58
+ it 'should not be possible to inject JS via an unquoted <img> src attribute' do
59
+ @s.fragment("<img src=javascript:alert('XSS')>").must_equal '<img>'
60
+ end
61
+
62
+ it 'should not be possible to inject JS using grave accents as <img> src delimiters' do
63
+ @s.fragment("<img src=`javascript:alert('XSS')`>").must_equal '<img>'
64
+ end
65
+
66
+ it 'should not be possible to inject <script> via a malformed <img> tag' do
67
+ @s.fragment('<img """><script>alert("XSS")</script>">').
68
+ must_equal '<img>alert("XSS")"&gt;'
69
+ end
70
+
71
+ it 'should not be possible to inject protocol-based JS' do
72
+ @s.fragment('<img src=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>').
73
+ must_equal '<img>'
74
+
75
+ @s.fragment('<img src=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>').
76
+ must_equal '<img>'
77
+
78
+ @s.fragment('<img src=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>').
79
+ must_equal '<img>'
80
+
81
+ # Encoded tab character.
82
+ @s.fragment(%[<img src="jav&#x09;ascript:alert('XSS');">]).
83
+ must_equal '<img>'
84
+
85
+ # Encoded newline.
86
+ @s.fragment(%[<img src="jav&#x0A;ascript:alert('XSS');">]).
87
+ must_equal '<img>'
88
+
89
+ # Encoded carriage return.
90
+ @s.fragment(%[<img src="jav&#x0D;ascript:alert('XSS');">]).
91
+ must_equal '<img>'
92
+
93
+ # Null byte.
94
+ @s.fragment(%[<img src=java\0script:alert("XSS")>]).
95
+ must_equal '<img>'
96
+
97
+ # Spaces plus meta char.
98
+ @s.fragment(%[<img src=" &#14; javascript:alert('XSS');">]).
99
+ must_equal '<img>'
100
+
101
+ # Mixed spaces and tabs.
102
+ @s.fragment(%[<img src="j\na v\tascript://alert('XSS');">]).
103
+ must_equal '<img>'
104
+ end
105
+
106
+ it 'should not be possible to inject protocol-based JS via whitespace' do
107
+ @s.fragment(%[<img src="jav\tascript:alert('XSS');">]).
108
+ must_equal '<img>'
109
+ end
110
+
111
+ it 'should not be possible to inject JS using a half-open <img> tag' do
112
+ @s.fragment(%[<img src="javascript:alert('XSS')"]).
113
+ must_equal ''
114
+ end
115
+ end
116
+
117
+ describe '<script>' do
118
+ it 'should not be possible to inject <script> using a malformed non-alphanumeric tag name' do
119
+ @s.fragment(%[<script/xss src="http://ha.ckers.org/xss.js">alert(1)</script>]).
120
+ must_equal 'alert(1)'
121
+ end
122
+
123
+ it 'should not be possible to inject <script> via extraneous open brackets' do
124
+ @s.fragment(%[<<script>alert("XSS");//<</script>]).
125
+ must_equal '&lt;alert("XSS");//&lt;'
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,104 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Parser' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ it 'should translate valid entities into characters' do
9
+ Sanitize.fragment("&apos;&eacute;&amp;").must_equal("'é&amp;")
10
+ end
11
+
12
+ it 'should translate orphaned ampersands into entities' do
13
+ Sanitize.fragment('at&t').must_equal('at&amp;t')
14
+ end
15
+
16
+ it 'should not add newlines after tags when serializing a fragment' do
17
+ Sanitize.fragment("<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>", :elements => ['div', 'p'])
18
+ .must_equal "<div>foo\n\n<p>bar</p><div>\nbaz</div></div><div>quux</div>"
19
+ end
20
+
21
+ it 'should not have the Nokogiri 1.4.2+ unterminated script/style element bug' do
22
+ Sanitize.fragment('foo <script>bar').must_equal 'foo bar'
23
+ Sanitize.fragment('foo <style>bar').must_equal 'foo bar'
24
+ end
25
+
26
+ it 'ambiguous non-tag brackets like "1 > 2 and 2 < 1" should be parsed correctly' do
27
+ Sanitize.fragment('1 > 2 and 2 < 1').must_equal '1 &gt; 2 and 2 &lt; 1'
28
+ Sanitize.fragment('OMG HAPPY BIRTHDAY! *<:-D').must_equal 'OMG HAPPY BIRTHDAY! *&lt;:-D'
29
+ end
30
+
31
+ # https://github.com/sparklemotion/nokogiri/issues/1008
32
+ it 'should work around the libxml2 content-type meta tag bug' do
33
+ Sanitize.document('<html><head></head><body>Howdy!</body></html>',
34
+ :elements => %w[html head body]
35
+ ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
36
+
37
+ Sanitize.document('<html><head></head><body>Howdy!</body></html>',
38
+ :elements => %w[html head meta body]
39
+ ).must_equal "<html><head></head><body>Howdy!</body></html>\n"
40
+
41
+ Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
42
+ :elements => %w[html head meta body],
43
+ :attributes => {'meta' => ['charset']}
44
+ ).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>\n"
45
+
46
+ Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
47
+ :elements => %w[html head meta body],
48
+ :attributes => {'meta' => %w[charset content http-equiv]}
49
+ ).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>\n"
50
+
51
+ # Edge case: an existing content-type meta tag with a non-UTF-8 content type
52
+ # will be converted to UTF-8, since that's the only output encoding we
53
+ # support.
54
+ Sanitize.document('<html><head><meta http-equiv="content-type" content="text/html;charset=us-ascii"></head><body>Howdy!</body></html>',
55
+ :elements => %w[html head meta body],
56
+ :attributes => {'meta' => %w[charset content http-equiv]}
57
+ ).must_equal "<html><head><meta http-equiv=\"content-type\" content=\"text/html; charset=utf-8\"></head><body>Howdy!</body></html>\n"
58
+ end
59
+
60
+ describe 'when siblings are added after a node during traversal' do
61
+ it 'the added siblings should be traversed' do
62
+ html = %[
63
+ <div id="one">
64
+ <div id="one_one">
65
+ <div id="one_one_one"></div>
66
+ </div>
67
+ <div id="one_two"></div>
68
+ </div>
69
+ <div id="two">
70
+ <div id="two_one"><div id="two_one_one"></div></div>
71
+ <div id="two_two"></div>
72
+ </div>
73
+ <div id="three"></div>
74
+ ]
75
+
76
+ siblings = []
77
+
78
+ Sanitize.fragment(html, :transformers => ->(env) {
79
+ name = env[:node].name
80
+
81
+ if name == 'div'
82
+ env[:node].add_next_sibling('<b id="added_' + env[:node]['id'] + '">')
83
+ elsif name == 'b'
84
+ siblings << env[:node][:id]
85
+ end
86
+
87
+ return {:node_whitelist => [env[:node]]}
88
+ })
89
+
90
+ # All siblings should be traversed, and in the order added.
91
+ siblings.must_equal [
92
+ "added_one_one_one",
93
+ "added_one_one",
94
+ "added_one_two",
95
+ "added_one",
96
+ "added_two_one_one",
97
+ "added_two_one",
98
+ "added_two_two",
99
+ "added_two",
100
+ "added_three"
101
+ ]
102
+ end
103
+ end
104
+ end
@@ -1,721 +1,93 @@
1
1
  # encoding: utf-8
2
- #--
3
- # Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
4
- #
5
- # Permission is hereby granted, free of charge, to any person obtaining a copy
6
- # of this software and associated documentation files (the 'Software'), to deal
7
- # in the Software without restriction, including without limitation the rights
8
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- # copies of the Software, and to permit persons to whom the Software is
10
- # furnished to do so, subject to the following conditions:
11
- #
12
- # The above copyright notice and this permission notice shall be included in all
13
- # copies or substantial portions of the Software.
14
- #
15
- # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- # SOFTWARE.
22
- #++
2
+ require_relative 'common'
23
3
 
24
- require 'rubygems'
25
- gem 'minitest'
26
-
27
- require 'minitest/autorun'
28
- require 'sanitize'
29
-
30
- strings = {
31
- :basic => {
32
- :html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>',
33
- :default => 'Lorem ipsum dolor sit amet alert("hello world");',
34
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert("hello world");',
35
- :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
36
- :relaxed => '<b>Lorem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet alert("hello world");'
37
- },
38
-
39
- :malformed => {
40
- :html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
41
- :default => 'Lorem dolor sit amet alert("hello world");',
42
- :restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
43
- :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
44
- :relaxed => 'Lorem <a href="pants" title="foo&gt;ipsum &lt;a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
45
- :document => ' Lorem dolor sit amet alert("hello world"); '
46
- },
47
-
48
- :unclosed => {
49
- :html => '<p>a</p><blockquote>b',
50
- :default => ' a b ',
51
- :restricted => ' a b ',
52
- :basic => '<p>a</p><blockquote>b</blockquote>',
53
- :relaxed => '<p>a</p><blockquote>b</blockquote>'
54
- },
55
-
56
- :malicious => {
57
- :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
58
- :default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
59
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
60
- :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
61
- :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");'
62
- },
63
-
64
- :raw_comment => {
65
- :html => '<!-- comment -->Hello',
66
- :default => 'Hello',
67
- :restricted => 'Hello',
68
- :basic => 'Hello',
69
- :relaxed => 'Hello',
70
- :document => ' Hello ',
71
- }
72
- }
73
-
74
- tricky = {
75
- 'protocol-based JS injection: simple, no spaces' => {
76
- :html => '<a href="javascript:alert(\'XSS\');">foo</a>',
77
- :default => 'foo',
78
- :restricted => 'foo',
79
- :basic => '<a rel="nofollow">foo</a>',
80
- :relaxed => '<a>foo</a>'
81
- },
82
-
83
- 'protocol-based JS injection: simple, spaces before' => {
84
- :html => '<a href="javascript :alert(\'XSS\');">foo</a>',
85
- :default => 'foo',
86
- :restricted => 'foo',
87
- :basic => '<a rel="nofollow">foo</a>',
88
- :relaxed => '<a>foo</a>'
89
- },
90
-
91
- 'protocol-based JS injection: simple, spaces after' => {
92
- :html => '<a href="javascript: alert(\'XSS\');">foo</a>',
93
- :default => 'foo',
94
- :restricted => 'foo',
95
- :basic => '<a rel="nofollow">foo</a>',
96
- :relaxed => '<a>foo</a>'
97
- },
98
-
99
- 'protocol-based JS injection: simple, spaces before and after' => {
100
- :html => '<a href="javascript : alert(\'XSS\');">foo</a>',
101
- :default => 'foo',
102
- :restricted => 'foo',
103
- :basic => '<a rel="nofollow">foo</a>',
104
- :relaxed => '<a>foo</a>'
105
- },
106
-
107
- 'protocol-based JS injection: preceding colon' => {
108
- :html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
109
- :default => 'foo',
110
- :restricted => 'foo',
111
- :basic => '<a rel="nofollow">foo</a>',
112
- :relaxed => '<a>foo</a>'
113
- },
114
-
115
- 'protocol-based JS injection: UTF-8 encoding' => {
116
- :html => '<a href="javascript&#58;">foo</a>',
117
- :default => 'foo',
118
- :restricted => 'foo',
119
- :basic => '<a rel="nofollow">foo</a>',
120
- :relaxed => '<a>foo</a>'
121
- },
122
-
123
- 'protocol-based JS injection: long UTF-8 encoding' => {
124
- :html => '<a href="javascript&#0058;">foo</a>',
125
- :default => 'foo',
126
- :restricted => 'foo',
127
- :basic => '<a rel="nofollow">foo</a>',
128
- :relaxed => '<a>foo</a>'
129
- },
130
-
131
- 'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
132
- :html => '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
133
- :default => 'foo',
134
- :restricted => 'foo',
135
- :basic => '<a rel="nofollow">foo</a>',
136
- :relaxed => '<a>foo</a>'
137
- },
138
-
139
- 'protocol-based JS injection: hex encoding' => {
140
- :html => '<a href="javascript&#x3A;">foo</a>',
141
- :default => 'foo',
142
- :restricted => 'foo',
143
- :basic => '<a rel="nofollow">foo</a>',
144
- :relaxed => '<a>foo</a>'
145
- },
146
-
147
- 'protocol-based JS injection: long hex encoding' => {
148
- :html => '<a href="javascript&#x003A;">foo</a>',
149
- :default => 'foo',
150
- :restricted => 'foo',
151
- :basic => '<a rel="nofollow">foo</a>',
152
- :relaxed => '<a>foo</a>'
153
- },
154
-
155
- 'protocol-based JS injection: hex encoding without semicolons' => {
156
- :html => '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
157
- :default => 'foo',
158
- :restricted => 'foo',
159
- :basic => '<a rel="nofollow">foo</a>',
160
- :relaxed => '<a>foo</a>'
161
- },
162
-
163
- 'protocol-based JS injection: null char' => {
164
- :html => "<img src=java\0script:alert(\"XSS\")>",
165
- :default => '',
166
- :restricted => '',
167
- :basic => '',
168
- :relaxed => '<img src="java">' # everything following the null char gets stripped, and URL is considered relative
169
- },
170
-
171
- 'protocol-based JS injection: invalid URL char' => {
172
- :html => '<img src=java\script:alert("XSS")>',
173
- :default => '',
174
- :restricted => '',
175
- :basic => '',
176
- :relaxed => '<img>'
177
- },
178
-
179
- 'protocol-based JS injection: spaces and entities' => {
180
- :html => '<img src=" &#14; javascript:alert(\'XSS\');">',
181
- :default => '',
182
- :restricted => '',
183
- :basic => '',
184
- :relaxed => '<img src>'
185
- }
186
- }
187
-
188
- describe 'Config::DEFAULT' do
189
- it 'should translate valid HTML entities' do
190
- Sanitize.clean("Don&apos;t tas&eacute; me &amp; bro!").must_equal("Don't tasé me &amp; bro!")
191
- end
192
-
193
- it 'should translate valid HTML entities while encoding unencoded ampersands' do
194
- Sanitize.clean("cookies&sup2; & &frac14; cr&eacute;me").must_equal("cookies² &amp; ¼ créme")
195
- end
196
-
197
- it 'should never output &apos;' do
198
- Sanitize.clean("<a href='&apos;' class=\"' &#39;\">IE6 isn't a real browser</a>").wont_match(/&apos;/)
199
- end
200
-
201
- it 'should not choke on several instances of the same element in a row' do
202
- Sanitize.clean('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">').must_equal('')
203
- end
204
-
205
- it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
206
- Sanitize.clean('foo<div>bar</div>baz').must_equal('foo bar baz')
207
- Sanitize.clean('foo<br>bar<br>baz').must_equal('foo bar baz')
208
- Sanitize.clean('foo<hr>bar<hr>baz').must_equal('foo bar baz')
209
- end
210
-
211
- strings.each do |name, data|
212
- it "should clean #{name} HTML" do
213
- Sanitize.clean(data[:html]).must_equal(data[:default])
214
- end
215
- end
216
-
217
- tricky.each do |name, data|
218
- it "should not allow #{name}" do
219
- Sanitize.clean(data[:html]).must_equal(data[:default])
220
- end
221
- end
222
- end
223
-
224
- describe 'Config::RESTRICTED' do
225
- before { @s = Sanitize.new(Sanitize::Config::RESTRICTED) }
226
-
227
- strings.each do |name, data|
228
- it "should clean #{name} HTML" do
229
- @s.clean(data[:html]).must_equal(data[:restricted])
230
- end
231
- end
232
-
233
- tricky.each do |name, data|
234
- it "should not allow #{name}" do
235
- @s.clean(data[:html]).must_equal(data[:restricted])
4
+ describe 'Sanitize' do
5
+ describe 'instance methods' do
6
+ before do
7
+ @s = Sanitize.new
236
8
  end
237
- end
238
- end
239
-
240
- describe 'Config::BASIC' do
241
- before { @s = Sanitize.new(Sanitize::Config::BASIC) }
242
9
 
243
- it 'should not choke on valueless attributes' do
244
- @s.clean('foo <a href>foo</a> bar').must_equal('foo <a href rel="nofollow">foo</a> bar')
245
- end
246
-
247
- it 'should downcase attribute names' do
248
- @s.clean('<a HREF="javascript:alert(\'foo\')">bar</a>').must_equal('<a rel="nofollow">bar</a>')
249
- end
10
+ describe '#document' do
11
+ before do
12
+ @s = Sanitize.new(:elements => ['html'])
13
+ end
250
14
 
251
- strings.each do |name, data|
252
- it "should clean #{name} HTML" do
253
- @s.clean(data[:html]).must_equal(data[:basic])
254
- end
255
- end
15
+ it 'should sanitize an HTML document' do
16
+ @s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
17
+ .must_equal "<html>Lorem ipsum dolor sit amet alert(\"hello world\");</html>\n"
18
+ end
256
19
 
257
- tricky.each do |name, data|
258
- it "should not allow #{name}" do
259
- @s.clean(data[:html]).must_equal(data[:basic])
20
+ it 'should not modify the input string' do
21
+ input = '<!DOCTYPE html><b>foo</b>'
22
+ @s.document(input)
23
+ input.must_equal('<!DOCTYPE html><b>foo</b>')
24
+ end
260
25
  end
261
- end
262
- end
263
-
264
- describe 'Config::RELAXED' do
265
- before { @s = Sanitize.new(Sanitize::Config::RELAXED) }
266
26
 
267
- it 'should encode special chars in attribute values' do
268
- input = '<a href="http://example.com" title="<b>&eacute;xamples</b> & things">foo</a>'
269
- output = Nokogiri::HTML.fragment('<a href="http://example.com" title="&lt;b&gt;éxamples&lt;/b&gt; &amp; things">foo</a>').to_xhtml(:encoding => 'utf-8', :indent => 0, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XHTML)
270
- @s.clean(input).must_equal(output)
271
- end
27
+ describe '#fragment' do
28
+ it 'should sanitize an HTML fragment' do
29
+ @s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
30
+ .must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
31
+ end
272
32
 
273
- strings.each do |name, data|
274
- it "should clean #{name} HTML" do
275
- @s.clean(data[:html]).must_equal(data[:relaxed])
276
- end
277
- end
33
+ it 'should not modify the input string' do
34
+ input = '<b>foo</b>'
35
+ @s.fragment(input)
36
+ input.must_equal '<b>foo</b>'
37
+ end
278
38
 
279
- tricky.each do |name, data|
280
- it "should not allow #{name}" do
281
- @s.clean(data[:html]).must_equal(data[:relaxed])
39
+ it 'should not choke on fragments containing <html> or <body>' do
40
+ @s.fragment('<html><b>foo</b></html>').must_equal 'foo'
41
+ @s.fragment('<body><b>foo</b></body>').must_equal 'foo'
42
+ @s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
43
+ @s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
44
+ end
282
45
  end
283
- end
284
- end
285
-
286
- describe 'Full Document parser (using clean_document)' do
287
- before {
288
- @s = Sanitize.new({:elements => %w[!DOCTYPE html]})
289
- @default_doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">"
290
- }
291
46
 
292
- it 'should require HTML element is whitelisted to prevent parser errors' do
293
- assert_raises(RuntimeError, 'You must have the HTML element whitelisted') {
294
- Sanitize.clean_document!('', {:elements => [], :remove_contents => false})
295
- }
296
- end
297
-
298
- it 'should NOT require HTML element to be whitelisted if remove_contents is true' do
299
- output = '<!DOCTYPE html><html>foo</html>'
300
- Sanitize.clean_document!(output, {:remove_contents => true}).must_equal "<!DOCTYPE html>\n\n"
301
- end
47
+ describe '#node!' do
48
+ it 'should sanitize a Nokogiri::XML::Node' do
49
+ doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
50
+ frag = doc.fragment
302
51
 
303
- it 'adds a doctype tag if not included' do
304
- @s.clean_document('').must_equal("#{@default_doctype}\n\n")
305
- end
52
+ doc.xpath('/html/body/node()').each {|node| frag << node }
306
53
 
307
- it 'should apply whitelist filtering to HTML element' do
308
- output = "<!DOCTYPE html>\n<html anything='false'></html>\n\n"
309
- @s.clean_document(output).must_equal("<!DOCTYPE html>\n<html></html>\n")
310
- end
311
-
312
- strings.each do |name, data|
313
- it "should wrap #{name} with DOCTYPE and HTML tag" do
314
- output = data[:document] || data[:default]
315
- @s.clean_document(data[:html]).must_equal("#{@default_doctype}\n<html>#{output}</html>\n")
316
- end
317
- end
54
+ @s.node!(frag)
55
+ frag.to_html.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
56
+ end
318
57
 
319
- tricky.each do |name, data|
320
- it "should wrap #{name} with DOCTYPE and HTML tag" do
321
- @s.clean_document(data[:html]).must_equal("#{@default_doctype}\n<html>#{data[:default]}</html>\n")
58
+ describe "when the given node is a document and <html> isn't whitelisted" do
59
+ it 'should raise a Sanitize::Error' do
60
+ doc = Nokogiri::HTML5.parse('foo')
61
+ proc { @s.node!(doc) }.must_raise Sanitize::Error
62
+ end
63
+ end
322
64
  end
323
65
  end
324
- end
325
-
326
- describe 'Custom configs' do
327
- it 'should allow attributes on all elements if whitelisted under :all' do
328
- input = '<p class="foo">bar</p>'
329
-
330
- Sanitize.clean(input).must_equal(' bar ')
331
- Sanitize.clean(input, {:elements => ['p'], :attributes => {:all => ['class']}}).must_equal(input)
332
- Sanitize.clean(input, {:elements => ['p'], :attributes => {'div' => ['class']}}).must_equal('<p>bar</p>')
333
- Sanitize.clean(input, {:elements => ['p'], :attributes => {'p' => ['title'], :all => ['class']}}).must_equal(input)
334
- end
335
-
336
- it 'should allow comments when :allow_comments == true' do
337
- input = 'foo <!-- bar --> baz'
338
- Sanitize.clean(input).must_equal('foo baz')
339
- Sanitize.clean(input, :allow_comments => true).must_equal(input)
340
- end
341
-
342
- it 'should allow relative URLs containing colons where the colon is not in the first path segment' do
343
- input = '<a href="/wiki/Special:Random">Random Page</a>'
344
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
345
- end
346
-
347
- it 'should allow relative URLs containing colons where the colon is part of an anchor' do
348
- input = '<a href="#fn:1">Footnote 1</a>'
349
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
350
- end
351
-
352
- it 'should allow relative URLs containing colons where the colon is part of an anchor' do
353
- input = '<a href="somepage#fn:1">Footnote 1</a>'
354
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
355
- end
356
-
357
- it 'should output HTML when :output == :html' do
358
- input = 'foo<br/>bar<br>baz'
359
- Sanitize.clean(input, :elements => ['br'], :output => :html).must_equal('foo<br>bar<br>baz')
360
- end
361
-
362
- it 'should remove the contents of filtered nodes when :remove_contents == true' do
363
- Sanitize.clean('foo bar <div>baz<span>quux</span></div>', :remove_contents => true).must_equal('foo bar ')
364
- end
365
-
366
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
367
- Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => ['script', 'span']).must_equal('foo bar baz ')
368
- end
369
-
370
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
371
- Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => [:script, :span]).must_equal('foo bar baz ')
372
- end
373
-
374
- it 'should support encodings other than utf-8' do
375
- html = 'foo&nbsp;bar'
376
- Sanitize.clean(html).must_equal("foo\302\240bar")
377
- Sanitize.clean(html, :output_encoding => 'ASCII').must_equal("foo&#160;bar")
378
- end
379
-
380
- it 'should not allow arbitrary HTML5 data attributes by default' do
381
- config = {
382
- :elements => ['b']
383
- }
384
-
385
- Sanitize.clean('<b data-foo="bar"></b>', config)
386
- .must_equal('<b></b>')
387
-
388
- config[:attributes] = {'b' => ['class']}
389
-
390
- Sanitize.clean('<b class="foo" data-foo="bar"></b>', config)
391
- .must_equal('<b class="foo"></b>')
392
- end
393
-
394
- it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
395
- config = {
396
- :attributes => {'b' => [:data]},
397
- :elements => ['b']
398
- }
399
-
400
- Sanitize.clean('<b data-foo="valid" data-bar="valid"></b>', config)
401
- .must_equal('<b data-foo="valid" data-bar="valid"></b>')
402
-
403
- Sanitize.clean('<b data-="invalid"></b>', config)
404
- .must_equal('<b></b>')
405
-
406
- Sanitize.clean('<b data-="invalid"></b>', config)
407
- .must_equal('<b></b>')
408
-
409
- Sanitize.clean('<b data-xml="invalid"></b>', config)
410
- .must_equal('<b></b>')
411
-
412
- Sanitize.clean('<b data-xmlfoo="invalid"></b>', config)
413
- .must_equal('<b></b>')
414
-
415
- Sanitize.clean('<b data-f:oo="valid"></b>', config)
416
- .must_equal('<b></b>')
417
-
418
- Sanitize.clean('<b data-f/oo="partial"></b>', config)
419
- .must_equal('<b data-f></b>') # Nokogiri quirk; not ideal, but harmless
420
-
421
- Sanitize.clean('<b data-éfoo="valid"></b>', config)
422
- .must_equal('<b></b>') # Another annoying Nokogiri quirk.
423
- end
424
- end
425
-
426
- describe 'Sanitize.clean' do
427
- it 'should not modify the input string' do
428
- input = '<b>foo</b>'
429
- Sanitize.clean(input)
430
- input.must_equal('<b>foo</b>')
431
- end
432
-
433
- it 'should return a new string' do
434
- input = '<b>foo</b>'
435
- Sanitize.clean(input).must_equal('foo')
436
- end
437
- end
438
-
439
- describe 'Sanitize.clean!' do
440
- it 'should modify the input string' do
441
- input = '<b>foo</b>'
442
- Sanitize.clean!(input)
443
- input.must_equal('foo')
444
- end
445
-
446
- it 'should return the string if it was modified' do
447
- input = '<b>foo</b>'
448
- Sanitize.clean!(input).must_equal('foo')
449
- end
450
-
451
- it 'should return nil if the string was not modified' do
452
- input = 'foo'
453
- Sanitize.clean!(input).must_equal(nil)
454
- end
455
- end
456
-
457
- describe 'Sanitize.clean_document' do
458
- before { @config = { :elements => ['html', 'p'] } }
459
-
460
- it 'should be idempotent' do
461
- input = '<!DOCTYPE html><html><p>foo</p></html>'
462
- first = Sanitize.clean_document(input, @config)
463
- second = Sanitize.clean_document(first, @config)
464
- second.must_equal first
465
- second.wont_be_nil
466
- end
467
-
468
- it 'should handle nil without raising' do
469
- Sanitize.clean_document(nil).must_equal nil
470
- end
471
-
472
- it 'should not modify the input string' do
473
- input = '<!DOCTYPE html><b>foo</b>'
474
- Sanitize.clean_document(input, @config)
475
- input.must_equal('<!DOCTYPE html><b>foo</b>')
476
- end
477
-
478
- it 'should return a new string' do
479
- input = '<!DOCTYPE html><b>foo</b>'
480
- Sanitize.clean_document(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
481
- end
482
- end
483
-
484
- describe 'Sanitize.clean_document!' do
485
- before { @config = { :elements => ['html'] } }
486
-
487
- it 'should modify the input string' do
488
- input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
489
- Sanitize.clean_document!(input, @config)
490
- input.must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
491
- end
492
-
493
- it 'should return the string if it was modified' do
494
- input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
495
- Sanitize.clean_document!(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
496
- end
497
-
498
- it 'should return nil if the string was not modified' do
499
- input = "<!DOCTYPE html>\n<html></html>\n"
500
- Sanitize.clean_document!(input, @config).must_equal(nil)
501
- end
502
- end
503
-
504
- describe 'transformers' do
505
- # YouTube embed transformer.
506
- youtube = lambda do |env|
507
- node = env[:node]
508
- node_name = env[:node_name]
509
-
510
- # Don't continue if this node is already whitelisted or is not an element.
511
- return if env[:is_whitelisted] || !node.element?
512
-
513
- # Don't continue unless the node is an iframe.
514
- return unless node_name == 'iframe'
515
-
516
- # Verify that the video URL is actually a valid YouTube video URL.
517
- return unless node['src'] =~ /\Ahttps?:\/\/(?:www\.)?youtube(?:-nocookie)?\.com\//
518
-
519
- # We're now certain that this is a YouTube embed, but we still need to run
520
- # it through a special Sanitize step to ensure that no unwanted elements or
521
- # attributes that don't belong in a YouTube embed can sneak in.
522
- Sanitize.clean_node!(node, {
523
- :elements => %w[iframe],
524
-
525
- :attributes => {
526
- 'iframe' => %w[allowfullscreen frameborder height src width]
527
- }
528
- })
529
-
530
- # Now that we're sure that this is a valid YouTube embed and that there are
531
- # no unwanted elements or attributes hidden inside it, we can tell Sanitize
532
- # to whitelist the current node.
533
- {:node_whitelist => [node]}
534
- end
535
-
536
- it 'should receive a complete env Hash as input' do
537
- Sanitize.clean!('<SPAN>foo</SPAN>', :foo => :bar, :transformers => lambda {|env|
538
- return unless env[:node].element?
539
-
540
- env[:config][:foo].must_equal(:bar)
541
- env[:is_whitelisted].must_equal(false)
542
- env[:node].must_be_kind_of(Nokogiri::XML::Node)
543
- env[:node_name].must_equal('span')
544
- env[:node_whitelist].must_be_kind_of(Set)
545
- env[:node_whitelist].must_be_empty
546
- })
547
- end
548
-
549
- it 'should traverse all node types, including the fragment itself' do
550
- nodes = []
551
-
552
- Sanitize.clean!('<div>foo</div><!--bar--><script>cdata!</script>', :transformers => proc {|env|
553
- nodes << env[:node_name]
554
- })
555
66
 
556
- nodes.must_equal(%w[
557
- text div comment #cdata-section script #document-fragment
558
- ])
559
- end
560
-
561
- it 'should traverse in depth-first mode by default' do
562
- nodes = []
563
-
564
- Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers => proc {|env|
565
- env[:traversal_mode].must_equal(:depth)
566
- nodes << env[:node_name] if env[:node].element?
567
- })
568
-
569
- nodes.must_equal(['span', 'div', 'p'])
570
- end
571
-
572
- it 'should traverse in breadth-first mode when using :transformers_breadth' do
573
- nodes = []
574
-
575
- Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers_breadth => proc {|env|
576
- env[:traversal_mode].must_equal(:breadth)
577
- nodes << env[:node_name] if env[:node].element?
578
- })
579
-
580
- nodes.must_equal(['div', 'span', 'p'])
581
- end
582
-
583
- it 'should whitelist nodes in the node whitelist' do
584
- Sanitize.clean!('<div class="foo">foo</div><span>bar</span>', :transformers => [
585
- proc {|env|
586
- {:node_whitelist => [env[:node]]} if env[:node_name] == 'div'
587
- },
588
-
589
- proc {|env|
590
- env[:is_whitelisted].must_equal(false) unless env[:node_name] == 'div'
591
- env[:is_whitelisted].must_equal(true) if env[:node_name] == 'div'
592
- env[:node_whitelist].must_include(env[:node]) if env[:node_name] == 'div'
593
- }
594
- ]).must_equal('<div class="foo">foo</div>bar')
595
- end
596
-
597
- it 'should clear the node whitelist after each fragment' do
598
- called = false
599
-
600
- Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
601
- {:node_whitelist => [env[:node]]}
602
- })
603
-
604
- Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
605
- called = true
606
- env[:is_whitelisted].must_equal(false)
607
- env[:node_whitelist].must_be_empty
608
- })
609
-
610
- called.must_equal(true)
611
- end
612
-
613
- it 'should allow youtube video embeds via the youtube transformer' do
614
- input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
615
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
616
-
617
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
618
- end
619
-
620
- it 'should allow https youtube video embeds via the youtube transformer' do
621
- input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
622
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
623
-
624
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
625
- end
626
-
627
- it 'should allow privacy-enhanced youtube video embeds via the youtube transformer' do
628
- input = '<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
629
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
630
-
631
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
632
- end
633
-
634
- it 'should not allow non-youtube video embeds via the youtube transformer' do
635
- input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
636
- output = ''
637
-
638
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
639
- end
640
- end
641
-
642
- describe 'bugs' do
643
- it 'should not have Nokogiri 1.4.2+ unterminated script/style element bug' do
644
- Sanitize.clean!('foo <script>bar').must_equal('foo bar')
645
- Sanitize.clean!('foo <style>bar').must_equal('foo bar')
646
- end
647
- end
648
-
649
- describe 'Malicious HTML' do
650
- make_my_diffs_pretty!
651
- parallelize_me!
652
-
653
- before do
654
- @s = Sanitize.new(Sanitize::Config::RELAXED)
655
- end
656
-
657
- # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
658
- # attempt to preserve server-side includes. This can result in XSS since an
659
- # unescaped double quote can allow an attacker to inject a non-whitelisted
660
- # attribute. Sanitize works around this by implementing its own escaping for
661
- # affected attributes.
662
- #
663
- # The relevant libxml2 code is here:
664
- # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
665
- describe 'unsafe libxml2 server-side includes in attributes' do
666
- tag_configs = [
667
- {
668
- tag_name: 'a',
669
- escaped_attrs: %w[ action href src name ],
670
- unescaped_attrs: []
671
- },
672
-
673
- {
674
- tag_name: 'div',
675
- escaped_attrs: %w[ action href src ],
676
- unescaped_attrs: %w[ name ]
677
- }
678
- ]
679
-
680
- before do
681
- @s = Sanitize.new({
682
- elements: %w[ a div ],
683
-
684
- attributes: {
685
- all: %w[ action href src name ]
686
- }
687
- })
688
- end
689
-
690
- tag_configs.each do |tag_config|
691
- tag_name = tag_config[:tag_name]
692
-
693
- tag_config[:escaped_attrs].each do |attr_name|
694
- input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
695
-
696
- it 'should escape unsafe characters in attributes' do
697
- @s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
698
- end
699
-
700
- it 'should round-trip to the same output' do
701
- output = @s.clean(input)
702
- @s.clean(output).must_equal(output)
67
+ describe 'class methods' do
68
+ describe '.document' do
69
+ it 'should call #document' do
70
+ Sanitize.stub_instance(:document, proc {|html| html + ' called' }) do
71
+ Sanitize.document('<html>foo</html>')
72
+ .must_equal '<html>foo</html> called'
703
73
  end
704
74
  end
75
+ end
705
76
 
706
- tag_config[:unescaped_attrs].each do |attr_name|
707
- input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
708
-
709
- it 'should not escape characters unnecessarily' do
710
- @s.clean(input).must_equal(input)
77
+ describe '.fragment' do
78
+ it 'should call #fragment' do
79
+ Sanitize.stub_instance(:fragment, proc {|html| html + ' called' }) do
80
+ Sanitize.fragment('<b>foo</b>').must_equal '<b>foo</b> called'
711
81
  end
82
+ end
83
+ end
712
84
 
713
- it 'should round-trip to the same output' do
714
- output = @s.clean(input)
715
- @s.clean(output).must_equal(output)
85
+ describe '.node!' do
86
+ it 'should call #node!' do
87
+ Sanitize.stub_instance(:node!, proc {|input| input + ' called' }) do
88
+ Sanitize.node!('not really a node').must_equal 'not really a node called'
716
89
  end
717
90
  end
718
91
  end
719
92
  end
720
93
  end
721
-