sanitize 2.1.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of sanitize might be problematic. Click here for more details.

@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  class Sanitize
2
- VERSION = '2.1.1'
4
+ VERSION = '3.0.0'
3
5
  end
data/test/common.rb ADDED
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+ gem 'minitest'
3
+ require 'minitest/autorun'
4
+
5
+ require_relative '../lib/sanitize'
6
+
7
+ # Helper to stub an instance method. Shamelessly stolen from
8
+ # https://github.com/codeodor/minitest-stub_any_instance/
9
+ class Object
10
+ def self.stub_instance(name, value, &block)
11
+ old_method = "__stubbed_method_#{name}__"
12
+
13
+ class_eval do
14
+ alias_method old_method, name
15
+
16
+ define_method(name) do |*args|
17
+ if value.respond_to?(:call) then
18
+ value.call(*args)
19
+ else
20
+ value
21
+ end
22
+ end
23
+ end
24
+
25
+ yield
26
+
27
+ ensure
28
+ class_eval do
29
+ undef_method name
30
+ alias_method name, old_method
31
+ undef_method old_method
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,51 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::Transformers::CleanComment' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ describe 'when :allow_comments is false' do
9
+ before do
10
+ @s = Sanitize.new(:allow_comments => false, :elements => ['div'])
11
+ end
12
+
13
+ it 'should remove comments' do
14
+ @s.fragment('foo <!-- comment --> bar').must_equal 'foo bar'
15
+ @s.fragment('foo <!-- ').must_equal 'foo '
16
+ @s.fragment('foo <!-- - -> bar').must_equal 'foo '
17
+ @s.fragment("foo <!--\n\n\n\n-->bar").must_equal 'foo bar'
18
+ @s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo --&gt; --&gt;bar'
19
+ @s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>&gt;bar</div>'
20
+
21
+ # Special case: the comment markup is inside a <script>, which makes it
22
+ # text content and not an actual HTML comment.
23
+ @s.fragment("<script><!-- comment --></script>").must_equal '&lt;!-- comment --&gt;'
24
+
25
+ Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => false, :elements => ['script'])
26
+ .must_equal '<script><!-- comment --></script>'
27
+ end
28
+ end
29
+
30
+ describe 'when :allow_comments is true' do
31
+ before do
32
+ @s = Sanitize.new(:allow_comments => true, :elements => ['div'])
33
+ end
34
+
35
+ it 'should allow comments' do
36
+ @s.fragment('foo <!-- comment --> bar').must_equal 'foo <!-- comment --> bar'
37
+ @s.fragment('foo <!-- ').must_equal 'foo <!-- -->'
38
+ @s.fragment('foo <!-- - -> bar').must_equal 'foo <!-- - -> bar-->'
39
+ @s.fragment("foo <!--\n\n\n\n-->bar").must_equal "foo <!--\n\n\n\n-->bar"
40
+ @s.fragment("foo <!-- <!-- <!-- --> --> -->bar").must_equal 'foo <!-- <!-- <!-- --> --&gt; --&gt;bar'
41
+ @s.fragment("foo <div <!-- comment -->>bar</div>").must_equal 'foo <div>&gt;bar</div>'
42
+
43
+ # Special case: the comment markup is inside a <script>, which makes it
44
+ # text content and not an actual HTML comment.
45
+ @s.fragment("<script><!-- comment --></script>").must_equal '&lt;!-- comment --&gt;'
46
+
47
+ Sanitize.fragment("<script><!-- comment --></script>", :allow_comments => true, :elements => ['script'])
48
+ .must_equal '<script><!-- comment --></script>'
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,66 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::Transformers::CSS::CleanAttribute' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ before do
9
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
10
+ end
11
+
12
+ it 'should sanitize CSS properties in style attributes' do
13
+ @s.fragment(%[
14
+ <div style="color: #fff; width: expression(alert(1)); /* <-- evil! */"></div>
15
+ ].strip).must_equal %[
16
+ <div style="color: #fff; /* &lt;-- evil! */"></div>
17
+ ].strip
18
+ end
19
+
20
+ it 'should remove the style attribute if the sanitized CSS is empty' do
21
+ @s.fragment('<div style="width: expression(alert(1))"></div>').
22
+ must_equal '<div></div>'
23
+ end
24
+ end
25
+
26
+ describe 'Sanitize::Transformers::CSS::CleanElement' do
27
+ make_my_diffs_pretty!
28
+ parallelize_me!
29
+
30
+ before do
31
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
32
+ end
33
+
34
+ it 'should sanitize CSS stylesheets in <style> elements' do
35
+ html = %[
36
+ <style>@import url(evil.css);
37
+ /* Yay CSS! */
38
+ .foo { color: #fff; }
39
+ #bar { background: url(yay.jpg); bogus: wtf; }
40
+ .evil { width: expression(xss()); }
41
+
42
+ @media screen (max-width:480px) {
43
+ .foo { width: 400px; }
44
+ #bar:not(.baz) { height: 100px; }
45
+ }
46
+ </style>
47
+ ].strip
48
+
49
+ @s.fragment(html).must_equal %[
50
+ <style>
51
+ /* Yay CSS! */
52
+ .foo { color: #fff; }
53
+ #bar { background: url(yay.jpg); }
54
+ .evil { }
55
+
56
+ @media screen (max-width:480px) {
57
+ .foo { width: 400px; }
58
+ #bar:not(.baz) { height: 100px; }
59
+ }
60
+ </style>
61
+ ].strip
62
+ end
63
+
64
+ it 'should remove the <style> element if the sanitized CSS is empty' do
65
+ end
66
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::Transformers::CleanDoctype' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ describe 'when :allow_doctype is false' do
9
+ before do
10
+ @s = Sanitize.new(:allow_doctype => false, :elements => ['html'])
11
+ end
12
+
13
+ it 'should remove doctype declarations' do
14
+ @s.document('<!DOCTYPE html><html>foo</html>').must_equal "<html>foo</html>\n"
15
+ @s.fragment('<!DOCTYPE html>foo').must_equal 'foo'
16
+ end
17
+
18
+ it 'should not allow doctype definitions in fragments' do
19
+ @s.fragment('<!DOCTYPE html><html>foo</html>')
20
+ .must_equal "foo"
21
+
22
+ @s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
23
+ .must_equal "foo"
24
+
25
+ @s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
26
+ .must_equal "foo"
27
+ end
28
+ end
29
+
30
+ describe 'when :allow_doctype is true' do
31
+ before do
32
+ @s = Sanitize.new(:allow_doctype => true, :elements => ['html'])
33
+ end
34
+
35
+ it 'should allow doctype declarations in documents' do
36
+ @s.document('<!DOCTYPE html><html>foo</html>')
37
+ .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
38
+
39
+ @s.document('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
40
+ .must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n<html>foo</html>\n"
41
+
42
+ @s.document("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
43
+ .must_equal "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html>foo</html>\n"
44
+ end
45
+
46
+ it 'should not allow obviously invalid doctype declarations in documents' do
47
+ @s.document('<!DOCTYPE blah blah blah><html>foo</html>')
48
+ .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
49
+
50
+ @s.document('<!DOCTYPE blah><html>foo</html>')
51
+ .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
52
+
53
+ @s.document('<!DOCTYPE html BLAH "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
54
+ .must_equal "<!DOCTYPE html>\n<html>foo</html>\n"
55
+
56
+ @s.document('<!whatever><html>foo</html>')
57
+ .must_equal "<html>foo</html>\n"
58
+ end
59
+
60
+ it 'should not allow doctype definitions in fragments' do
61
+ @s.fragment('<!DOCTYPE html><html>foo</html>')
62
+ .must_equal "foo"
63
+
64
+ @s.fragment('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"><html>foo</html>')
65
+ .must_equal "foo"
66
+
67
+ @s.fragment("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\"\n \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\"><html>foo</html>")
68
+ .must_equal "foo"
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,399 @@
1
+ # encoding: utf-8
2
+ require_relative 'common'
3
+
4
+ describe 'Sanitize::Transformers::CleanElement' do
5
+ make_my_diffs_pretty!
6
+ parallelize_me!
7
+
8
+ strings = {
9
+ :basic => {
10
+ :html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
11
+
12
+ :default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");',
13
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet .foo { color: #fff; } alert("hello world");',
14
+ :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet .foo { color: #fff; } alert("hello world");',
15
+ :relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> alert("hello world");'
16
+ },
17
+
18
+ :malformed => {
19
+ :html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
20
+
21
+ :default => 'Lorem dolor sit amet alert("hello world");',
22
+ :restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
23
+ :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
24
+ :relaxed => 'Lorem <a href="pants" title="foo&gt;ipsum &lt;a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
25
+ },
26
+
27
+ :unclosed => {
28
+ :html => '<p>a</p><blockquote>b',
29
+
30
+ :default => ' a b ',
31
+ :restricted => ' a b ',
32
+ :basic => '<p>a</p><blockquote>b</blockquote>',
33
+ :relaxed => '<p>a</p><blockquote>b</blockquote>'
34
+ },
35
+
36
+ :malicious => {
37
+ :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
38
+
39
+ :default => 'Lorem ipsum dolor sit amet &lt;script&gt;alert("hello world");',
40
+ :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet &lt;script&gt;alert("hello world");',
41
+ :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");',
42
+ :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet &lt;script&gt;alert("hello world");'
43
+ }
44
+ }
45
+
46
+ protocols = {
47
+ 'protocol-based JS injection: simple, no spaces' => {
48
+ :html => '<a href="javascript:alert(\'XSS\');">foo</a>',
49
+ :default => 'foo',
50
+ :restricted => 'foo',
51
+ :basic => '<a rel="nofollow">foo</a>',
52
+ :relaxed => '<a>foo</a>'
53
+ },
54
+
55
+ 'protocol-based JS injection: simple, spaces before' => {
56
+ :html => '<a href="javascript :alert(\'XSS\');">foo</a>',
57
+ :default => 'foo',
58
+ :restricted => 'foo',
59
+ :basic => '<a rel="nofollow">foo</a>',
60
+ :relaxed => '<a>foo</a>'
61
+ },
62
+
63
+ 'protocol-based JS injection: simple, spaces after' => {
64
+ :html => '<a href="javascript: alert(\'XSS\');">foo</a>',
65
+ :default => 'foo',
66
+ :restricted => 'foo',
67
+ :basic => '<a rel="nofollow">foo</a>',
68
+ :relaxed => '<a>foo</a>'
69
+ },
70
+
71
+ 'protocol-based JS injection: simple, spaces before and after' => {
72
+ :html => '<a href="javascript : alert(\'XSS\');">foo</a>',
73
+ :default => 'foo',
74
+ :restricted => 'foo',
75
+ :basic => '<a rel="nofollow">foo</a>',
76
+ :relaxed => '<a>foo</a>'
77
+ },
78
+
79
+ 'protocol-based JS injection: preceding colon' => {
80
+ :html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
81
+ :default => 'foo',
82
+ :restricted => 'foo',
83
+ :basic => '<a rel="nofollow">foo</a>',
84
+ :relaxed => '<a>foo</a>'
85
+ },
86
+
87
+ 'protocol-based JS injection: UTF-8 encoding' => {
88
+ :html => '<a href="javascript&#58;">foo</a>',
89
+ :default => 'foo',
90
+ :restricted => 'foo',
91
+ :basic => '<a rel="nofollow">foo</a>',
92
+ :relaxed => '<a>foo</a>'
93
+ },
94
+
95
+ 'protocol-based JS injection: long UTF-8 encoding' => {
96
+ :html => '<a href="javascript&#0058;">foo</a>',
97
+ :default => 'foo',
98
+ :restricted => 'foo',
99
+ :basic => '<a rel="nofollow">foo</a>',
100
+ :relaxed => '<a>foo</a>'
101
+ },
102
+
103
+ 'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
104
+ :html => '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
105
+ :default => 'foo',
106
+ :restricted => 'foo',
107
+ :basic => '<a rel="nofollow">foo</a>',
108
+ :relaxed => '<a>foo</a>'
109
+ },
110
+
111
+ 'protocol-based JS injection: hex encoding' => {
112
+ :html => '<a href="javascript&#x3A;">foo</a>',
113
+ :default => 'foo',
114
+ :restricted => 'foo',
115
+ :basic => '<a rel="nofollow">foo</a>',
116
+ :relaxed => '<a>foo</a>'
117
+ },
118
+
119
+ 'protocol-based JS injection: long hex encoding' => {
120
+ :html => '<a href="javascript&#x003A;">foo</a>',
121
+ :default => 'foo',
122
+ :restricted => 'foo',
123
+ :basic => '<a rel="nofollow">foo</a>',
124
+ :relaxed => '<a>foo</a>'
125
+ },
126
+
127
+ 'protocol-based JS injection: hex encoding without semicolons' => {
128
+ :html => '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
129
+ :default => 'foo',
130
+ :restricted => 'foo',
131
+ :basic => '<a rel="nofollow">foo</a>',
132
+ :relaxed => '<a>foo</a>'
133
+ },
134
+
135
+ 'protocol-based JS injection: null char' => {
136
+ :html => "<img src=java\0script:alert(\"XSS\")>",
137
+ :default => '',
138
+ :restricted => '',
139
+ :basic => '',
140
+ :relaxed => '<img>'
141
+ },
142
+
143
+ 'protocol-based JS injection: invalid URL char' => {
144
+ :html => '<img src=java\script:alert("XSS")>',
145
+ :default => '',
146
+ :restricted => '',
147
+ :basic => '',
148
+ :relaxed => '<img>'
149
+ },
150
+
151
+ 'protocol-based JS injection: spaces and entities' => {
152
+ :html => '<img src=" &#14; javascript:alert(\'XSS\');">',
153
+ :default => '',
154
+ :restricted => '',
155
+ :basic => '',
156
+ :relaxed => '<img>'
157
+ }
158
+ }
159
+
160
+ describe 'Default config' do
161
+ it 'should remove non-whitelisted elements, leaving safe contents behind' do
162
+ Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
163
+ .must_equal 'foo bar baz quux'
164
+
165
+ Sanitize.fragment('<script>alert("<xss>");</script>')
166
+ .must_equal 'alert("&lt;xss&gt;");'
167
+
168
+ Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
169
+ .must_equal '&lt;script&gt;alert("&lt;xss&gt;");&lt;/&lt;script&gt;&gt;'
170
+
171
+ Sanitize.fragment('< script <>> alert("<xss>");</script>')
172
+ .must_equal '&lt; script &lt;&gt;&gt; alert("");'
173
+ end
174
+
175
+ it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
176
+ Sanitize.fragment('foo<div>bar</div>baz')
177
+ .must_equal 'foo bar baz'
178
+
179
+ Sanitize.fragment('foo<br>bar<br>baz')
180
+ .must_equal 'foo bar baz'
181
+
182
+ Sanitize.fragment('foo<hr>bar<hr>baz')
183
+ .must_equal 'foo bar baz'
184
+ end
185
+
186
+ it 'should not choke on several instances of the same element in a row' do
187
+ Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">')
188
+ .must_equal ''
189
+ end
190
+
191
+ strings.each do |name, data|
192
+ it "should clean #{name} HTML" do
193
+ Sanitize.fragment(data[:html]).must_equal(data[:default])
194
+ end
195
+ end
196
+
197
+ protocols.each do |name, data|
198
+ it "should not allow #{name}" do
199
+ Sanitize.fragment(data[:html]).must_equal(data[:default])
200
+ end
201
+ end
202
+ end
203
+
204
+ describe 'Restricted config' do
205
+ before do
206
+ @s = Sanitize.new(Sanitize::Config::RESTRICTED)
207
+ end
208
+
209
+ strings.each do |name, data|
210
+ it "should clean #{name} HTML" do
211
+ @s.fragment(data[:html]).must_equal(data[:restricted])
212
+ end
213
+ end
214
+
215
+ protocols.each do |name, data|
216
+ it "should not allow #{name}" do
217
+ @s.fragment(data[:html]).must_equal(data[:restricted])
218
+ end
219
+ end
220
+ end
221
+
222
+ describe 'Basic config' do
223
+ before do
224
+ @s = Sanitize.new(Sanitize::Config::BASIC)
225
+ end
226
+
227
+ it 'should not choke on valueless attributes' do
228
+ @s.fragment('foo <a href>foo</a> bar')
229
+ .must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
230
+ end
231
+
232
+ it 'should downcase attribute names' do
233
+ @s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>')
234
+ .must_equal '<a rel="nofollow">bar</a>'
235
+ end
236
+
237
+ strings.each do |name, data|
238
+ it "should clean #{name} HTML" do
239
+ @s.fragment(data[:html]).must_equal(data[:basic])
240
+ end
241
+ end
242
+
243
+ protocols.each do |name, data|
244
+ it "should not allow #{name}" do
245
+ @s.fragment(data[:html]).must_equal(data[:basic])
246
+ end
247
+ end
248
+ end
249
+
250
+ describe 'Relaxed config' do
251
+ before do
252
+ @s = Sanitize.new(Sanitize::Config::RELAXED)
253
+ end
254
+
255
+ it 'should encode special chars in attribute values' do
256
+ @s.fragment('<a href="http://example.com" title="<b>&eacute;xamples</b> & things">foo</a>')
257
+ .must_equal '<a href="http://example.com" title="&lt;b&gt;éxamples&lt;/b&gt; &amp; things">foo</a>'
258
+ end
259
+
260
+ strings.each do |name, data|
261
+ it "should clean #{name} HTML" do
262
+ @s.fragment(data[:html]).must_equal(data[:relaxed])
263
+ end
264
+ end
265
+
266
+ protocols.each do |name, data|
267
+ it "should not allow #{name}" do
268
+ @s.fragment(data[:html]).must_equal(data[:relaxed])
269
+ end
270
+ end
271
+ end
272
+
273
+ describe 'Custom configs' do
274
+ it 'should allow attributes on all elements if whitelisted under :all' do
275
+ input = '<p class="foo">bar</p>'
276
+
277
+ Sanitize.fragment(input).must_equal ' bar '
278
+
279
+ Sanitize.fragment(input, {
280
+ :elements => ['p'],
281
+ :attributes => {:all => ['class']}
282
+ }).must_equal input
283
+
284
+ Sanitize.fragment(input, {
285
+ :elements => ['p'],
286
+ :attributes => {'div' => ['class']}
287
+ }).must_equal '<p>bar</p>'
288
+
289
+ Sanitize.fragment(input, {
290
+ :elements => ['p'],
291
+ :attributes => {'p' => ['title'], :all => ['class']}
292
+ }).must_equal input
293
+ end
294
+
295
+ it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
296
+ input = '<a href="/wiki/Special:Random">Random Page</a>'
297
+
298
+ Sanitize.fragment(input, {
299
+ :elements => ['a'],
300
+ :attributes => {'a' => ['href']},
301
+ :protocols => {'a' => {'href' => [:relative]}}
302
+ }).must_equal input
303
+ end
304
+
305
+ it 'should allow relative URLs containing colons when the colon is part of an anchor' do
306
+ input = '<a href="#fn:1">Footnote 1</a>'
307
+
308
+ Sanitize.fragment(input, {
309
+ :elements => ['a'],
310
+ :attributes => {'a' => ['href']},
311
+ :protocols => {'a' => {'href' => [:relative]}}
312
+ }).must_equal input
313
+
314
+ input = '<a href="somepage#fn:1">Footnote 1</a>'
315
+
316
+ Sanitize.fragment(input, {
317
+ :elements => ['a'],
318
+ :attributes => {'a' => ['href']},
319
+ :protocols => {'a' => {'href' => [:relative]}}
320
+ }).must_equal input
321
+ end
322
+
323
+ it 'should remove the contents of filtered nodes when :remove_contents is true' do
324
+ Sanitize.fragment('foo bar <div>baz<span>quux</span></div>',
325
+ :remove_contents => true
326
+ ).must_equal 'foo bar '
327
+ end
328
+
329
+ it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
330
+ Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
331
+ :remove_contents => ['script', 'span']
332
+ ).must_equal 'foo bar baz '
333
+ end
334
+
335
+ it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
336
+ Sanitize.fragment('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>',
337
+ :remove_contents => [:script, :span]
338
+ ).must_equal 'foo bar baz '
339
+ end
340
+
341
+ it 'should not allow arbitrary HTML5 data attributes by default' do
342
+ Sanitize.fragment('<b data-foo="bar"></b>',
343
+ :elements => ['b']
344
+ ).must_equal '<b></b>'
345
+
346
+ Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
347
+ :attributes => {'b' => ['class']},
348
+ :elements => ['b']
349
+ ).must_equal '<b class="foo"></b>'
350
+ end
351
+
352
+ it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
353
+ s = Sanitize.new(
354
+ :attributes => {'b' => [:data]},
355
+ :elements => ['b']
356
+ )
357
+
358
+ s.fragment('<b data-foo="valid" data-bar="valid"></b>')
359
+ .must_equal '<b data-foo="valid" data-bar="valid"></b>'
360
+
361
+ s.fragment('<b data-="invalid"></b>')
362
+ .must_equal '<b></b>'
363
+
364
+ s.fragment('<b data-="invalid"></b>')
365
+ .must_equal '<b></b>'
366
+
367
+ s.fragment('<b data-xml="invalid"></b>')
368
+ .must_equal '<b></b>'
369
+
370
+ s.fragment('<b data-xmlfoo="invalid"></b>')
371
+ .must_equal '<b></b>'
372
+
373
+ s.fragment('<b data-f:oo="valid"></b>')
374
+ .must_equal '<b></b>'
375
+
376
+ s.fragment('<b data-f/oo="partial"></b>')
377
+ .must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
378
+
379
+ s.fragment('<b data-éfoo="valid"></b>')
380
+ .must_equal '<b></b>' # Another annoying Nokogiri quirk.
381
+ end
382
+
383
+ it 'should replace whitespace_elements with configured :before and :after values' do
384
+ s = Sanitize.new(
385
+ :whitespace_elements => {
386
+ 'p' => { :before => "\n", :after => "\n" },
387
+ 'div' => { :before => "\n", :after => "\n" },
388
+ 'br' => { :before => "\n", :after => "\n" },
389
+ }
390
+ )
391
+
392
+ s.fragment('<p>foo</p>').must_equal "\nfoo\n"
393
+ s.fragment('<p>foo</p><p>bar</p>').must_equal "\nfoo\n\nbar\n"
394
+ s.fragment('foo<div>bar</div>baz').must_equal "foo\nbar\nbaz"
395
+ s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
396
+ end
397
+ end
398
+
399
+ end