sanitize 2.1.1 → 6.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of sanitize might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/HISTORY.md +520 -55
- data/LICENSE +1 -1
- data/README.md +438 -168
- data/lib/sanitize/config/basic.rb +12 -32
- data/lib/sanitize/config/default.rb +118 -0
- data/lib/sanitize/config/relaxed.rb +716 -53
- data/lib/sanitize/config/restricted.rb +3 -23
- data/lib/sanitize/config.rb +53 -79
- data/lib/sanitize/css.rb +348 -0
- data/lib/sanitize/transformers/clean_cdata.rb +3 -3
- data/lib/sanitize/transformers/clean_comment.rb +6 -3
- data/lib/sanitize/transformers/clean_css.rb +57 -0
- data/lib/sanitize/transformers/clean_doctype.rb +19 -0
- data/lib/sanitize/transformers/clean_element.rb +192 -124
- data/lib/sanitize/version.rb +3 -1
- data/lib/sanitize.rb +172 -143
- data/test/common.rb +3 -0
- data/test/test_clean_comment.rb +47 -0
- data/test/test_clean_css.rb +67 -0
- data/test/test_clean_doctype.rb +71 -0
- data/test/test_clean_element.rb +545 -0
- data/test/test_config.rb +65 -0
- data/test/test_malicious_css.rb +42 -0
- data/test/test_malicious_html.rb +235 -0
- data/test/test_parser.rb +75 -0
- data/test/test_sanitize.rb +151 -675
- data/test/test_sanitize_css.rb +424 -0
- data/test/test_transformers.rb +230 -0
- metadata +44 -41
@@ -0,0 +1,545 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Sanitize::Transformers::CleanElement' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
strings = {
|
9
|
+
:basic => {
|
10
|
+
:html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <style>.foo { color: #fff; }</style> <script>alert("hello world");</script>',
|
11
|
+
:default => 'Lorem ipsum dolor sit amet ',
|
12
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet ',
|
13
|
+
:basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
14
|
+
:relaxed => '<b>Lorem</b> <a href="pants" title="foo" style="text-decoration: underline;">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <style>.foo { color: #fff; }</style> '
|
15
|
+
},
|
16
|
+
|
17
|
+
:malformed => {
|
18
|
+
:html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
|
19
|
+
:default => 'Lorem dolor sit amet ',
|
20
|
+
:restricted => 'Lorem <strong>dolor</strong> sit amet ',
|
21
|
+
:basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet ',
|
22
|
+
:relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet ',
|
23
|
+
},
|
24
|
+
|
25
|
+
:unclosed => {
|
26
|
+
:html => '<p>a</p><blockquote>b',
|
27
|
+
:default => ' a b ',
|
28
|
+
:restricted => ' a b ',
|
29
|
+
:basic => '<p>a</p><blockquote>b</blockquote>',
|
30
|
+
:relaxed => '<p>a</p><blockquote>b</blockquote>'
|
31
|
+
},
|
32
|
+
|
33
|
+
:malicious => {
|
34
|
+
:html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
|
35
|
+
:default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
|
36
|
+
:restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
|
37
|
+
:basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
|
38
|
+
:relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
protocols = {
|
43
|
+
'protocol-based JS injection: simple, no spaces' => {
|
44
|
+
:html => '<a href="javascript:alert(\'XSS\');">foo</a>',
|
45
|
+
:default => 'foo',
|
46
|
+
:restricted => 'foo',
|
47
|
+
:basic => '<a rel="nofollow">foo</a>',
|
48
|
+
:relaxed => '<a>foo</a>'
|
49
|
+
},
|
50
|
+
|
51
|
+
'protocol-based JS injection: simple, spaces before' => {
|
52
|
+
:html => '<a href="javascript :alert(\'XSS\');">foo</a>',
|
53
|
+
:default => 'foo',
|
54
|
+
:restricted => 'foo',
|
55
|
+
:basic => '<a rel="nofollow">foo</a>',
|
56
|
+
:relaxed => '<a>foo</a>'
|
57
|
+
},
|
58
|
+
|
59
|
+
'protocol-based JS injection: simple, spaces after' => {
|
60
|
+
:html => '<a href="javascript: alert(\'XSS\');">foo</a>',
|
61
|
+
:default => 'foo',
|
62
|
+
:restricted => 'foo',
|
63
|
+
:basic => '<a rel="nofollow">foo</a>',
|
64
|
+
:relaxed => '<a>foo</a>'
|
65
|
+
},
|
66
|
+
|
67
|
+
'protocol-based JS injection: simple, spaces before and after' => {
|
68
|
+
:html => '<a href="javascript : alert(\'XSS\');">foo</a>',
|
69
|
+
:default => 'foo',
|
70
|
+
:restricted => 'foo',
|
71
|
+
:basic => '<a rel="nofollow">foo</a>',
|
72
|
+
:relaxed => '<a>foo</a>'
|
73
|
+
},
|
74
|
+
|
75
|
+
'protocol-based JS injection: preceding colon' => {
|
76
|
+
:html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
|
77
|
+
:default => 'foo',
|
78
|
+
:restricted => 'foo',
|
79
|
+
:basic => '<a rel="nofollow">foo</a>',
|
80
|
+
:relaxed => '<a>foo</a>'
|
81
|
+
},
|
82
|
+
|
83
|
+
'protocol-based JS injection: UTF-8 encoding' => {
|
84
|
+
:html => '<a href="javascript:">foo</a>',
|
85
|
+
:default => 'foo',
|
86
|
+
:restricted => 'foo',
|
87
|
+
:basic => '<a rel="nofollow">foo</a>',
|
88
|
+
:relaxed => '<a>foo</a>'
|
89
|
+
},
|
90
|
+
|
91
|
+
'protocol-based JS injection: long UTF-8 encoding' => {
|
92
|
+
:html => '<a href="javascript:">foo</a>',
|
93
|
+
:default => 'foo',
|
94
|
+
:restricted => 'foo',
|
95
|
+
:basic => '<a rel="nofollow">foo</a>',
|
96
|
+
:relaxed => '<a>foo</a>'
|
97
|
+
},
|
98
|
+
|
99
|
+
'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
|
100
|
+
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
101
|
+
:default => 'foo',
|
102
|
+
:restricted => 'foo',
|
103
|
+
:basic => '<a rel="nofollow">foo</a>',
|
104
|
+
:relaxed => '<a>foo</a>'
|
105
|
+
},
|
106
|
+
|
107
|
+
'protocol-based JS injection: hex encoding' => {
|
108
|
+
:html => '<a href="javascript:">foo</a>',
|
109
|
+
:default => 'foo',
|
110
|
+
:restricted => 'foo',
|
111
|
+
:basic => '<a rel="nofollow">foo</a>',
|
112
|
+
:relaxed => '<a>foo</a>'
|
113
|
+
},
|
114
|
+
|
115
|
+
'protocol-based JS injection: long hex encoding' => {
|
116
|
+
:html => '<a href="javascript:">foo</a>',
|
117
|
+
:default => 'foo',
|
118
|
+
:restricted => 'foo',
|
119
|
+
:basic => '<a rel="nofollow">foo</a>',
|
120
|
+
:relaxed => '<a>foo</a>'
|
121
|
+
},
|
122
|
+
|
123
|
+
'protocol-based JS injection: hex encoding without semicolons' => {
|
124
|
+
:html => '<a href=javascript:alert('XSS')>foo</a>',
|
125
|
+
:default => 'foo',
|
126
|
+
:restricted => 'foo',
|
127
|
+
:basic => '<a rel="nofollow">foo</a>',
|
128
|
+
:relaxed => '<a>foo</a>'
|
129
|
+
},
|
130
|
+
|
131
|
+
'protocol-based JS injection: null char' => {
|
132
|
+
:html => "<img src=java\0script:alert(\"XSS\")>",
|
133
|
+
:default => '',
|
134
|
+
:restricted => '',
|
135
|
+
:basic => '',
|
136
|
+
:relaxed => '<img>'
|
137
|
+
},
|
138
|
+
|
139
|
+
'protocol-based JS injection: invalid URL char' => {
|
140
|
+
:html => '<img src=java\script:alert("XSS")>',
|
141
|
+
:default => '',
|
142
|
+
:restricted => '',
|
143
|
+
:basic => '',
|
144
|
+
:relaxed => '<img>'
|
145
|
+
},
|
146
|
+
|
147
|
+
'protocol-based JS injection: spaces and entities' => {
|
148
|
+
:html => '<img src="  javascript:alert(\'XSS\');">',
|
149
|
+
:default => '',
|
150
|
+
:restricted => '',
|
151
|
+
:basic => '',
|
152
|
+
:relaxed => '<img>'
|
153
|
+
},
|
154
|
+
|
155
|
+
'protocol whitespace' => {
|
156
|
+
:html => '<a href=" http://example.com/"></a>',
|
157
|
+
:default => '',
|
158
|
+
:restricted => '',
|
159
|
+
:basic => '<a href="http://example.com/" rel="nofollow"></a>',
|
160
|
+
:relaxed => '<a href="http://example.com/"></a>'
|
161
|
+
}
|
162
|
+
}
|
163
|
+
|
164
|
+
describe 'Default config' do
|
165
|
+
it 'should remove non-allowlisted elements, leaving safe contents behind' do
|
166
|
+
Sanitize.fragment('foo <b>bar</b> <strong><a href="#a">baz</a></strong> quux')
|
167
|
+
.must_equal 'foo bar baz quux'
|
168
|
+
|
169
|
+
Sanitize.fragment('<script>alert("<xss>");</script>')
|
170
|
+
.must_equal ''
|
171
|
+
|
172
|
+
Sanitize.fragment('<<script>script>alert("<xss>");</<script>>')
|
173
|
+
.must_equal '<'
|
174
|
+
|
175
|
+
Sanitize.fragment('< script <>> alert("<xss>");</script>')
|
176
|
+
.must_equal '< script <>> alert("");'
|
177
|
+
end
|
178
|
+
|
179
|
+
it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
|
180
|
+
Sanitize.fragment('foo<div>bar</div>baz')
|
181
|
+
.must_equal 'foo bar baz'
|
182
|
+
|
183
|
+
Sanitize.fragment('foo<br>bar<br>baz')
|
184
|
+
.must_equal 'foo bar baz'
|
185
|
+
|
186
|
+
Sanitize.fragment('foo<hr>bar<hr>baz')
|
187
|
+
.must_equal 'foo bar baz'
|
188
|
+
end
|
189
|
+
|
190
|
+
it 'should not choke on several instances of the same element in a row' do
|
191
|
+
Sanitize.fragment('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">')
|
192
|
+
.must_equal ''
|
193
|
+
end
|
194
|
+
|
195
|
+
it 'should not preserve the content of removed `iframe` elements' do
|
196
|
+
Sanitize.fragment('<iframe>hello! <script>alert(0)</script></iframe>')
|
197
|
+
.must_equal ''
|
198
|
+
end
|
199
|
+
|
200
|
+
it 'should not preserve the content of removed `math` elements' do
|
201
|
+
Sanitize.fragment('<math>hello! <script>alert(0)</script></math>')
|
202
|
+
.must_equal ''
|
203
|
+
end
|
204
|
+
|
205
|
+
it 'should not preserve the content of removed `noembed` elements' do
|
206
|
+
Sanitize.fragment('<noembed>hello! <script>alert(0)</script></noembed>')
|
207
|
+
.must_equal ''
|
208
|
+
end
|
209
|
+
|
210
|
+
it 'should not preserve the content of removed `noframes` elements' do
|
211
|
+
Sanitize.fragment('<noframes>hello! <script>alert(0)</script></noframes>')
|
212
|
+
.must_equal ''
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'should not preserve the content of removed `noscript` elements' do
|
216
|
+
Sanitize.fragment('<noscript>hello! <script>alert(0)</script></noscript>')
|
217
|
+
.must_equal ''
|
218
|
+
end
|
219
|
+
|
220
|
+
it 'should not preserve the content of removed `plaintext` elements' do
|
221
|
+
Sanitize.fragment('<plaintext>hello! <script>alert(0)</script>')
|
222
|
+
.must_equal ''
|
223
|
+
end
|
224
|
+
|
225
|
+
it 'should not preserve the content of removed `script` elements' do
|
226
|
+
Sanitize.fragment('<script>hello! <script>alert(0)</script></script>')
|
227
|
+
.must_equal ''
|
228
|
+
end
|
229
|
+
|
230
|
+
it 'should not preserve the content of removed `style` elements' do
|
231
|
+
Sanitize.fragment('<style>hello! <script>alert(0)</script></style>')
|
232
|
+
.must_equal ''
|
233
|
+
end
|
234
|
+
|
235
|
+
it 'should not preserve the content of removed `svg` elements' do
|
236
|
+
Sanitize.fragment('<svg>hello! <script>alert(0)</script></svg>')
|
237
|
+
.must_equal ''
|
238
|
+
end
|
239
|
+
|
240
|
+
it 'should not preserve the content of removed `xmp` elements' do
|
241
|
+
Sanitize.fragment('<xmp>hello! <script>alert(0)</script></xmp>')
|
242
|
+
.must_equal ''
|
243
|
+
end
|
244
|
+
|
245
|
+
strings.each do |name, data|
|
246
|
+
it "should clean #{name} HTML" do
|
247
|
+
Sanitize.fragment(data[:html]).must_equal(data[:default])
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
protocols.each do |name, data|
|
252
|
+
it "should not allow #{name}" do
|
253
|
+
Sanitize.fragment(data[:html]).must_equal(data[:default])
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
describe 'Restricted config' do
|
259
|
+
before do
|
260
|
+
@s = Sanitize.new(Sanitize::Config::RESTRICTED)
|
261
|
+
end
|
262
|
+
|
263
|
+
strings.each do |name, data|
|
264
|
+
it "should clean #{name} HTML" do
|
265
|
+
@s.fragment(data[:html]).must_equal(data[:restricted])
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
protocols.each do |name, data|
|
270
|
+
it "should not allow #{name}" do
|
271
|
+
@s.fragment(data[:html]).must_equal(data[:restricted])
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
describe 'Basic config' do
|
277
|
+
before do
|
278
|
+
@s = Sanitize.new(Sanitize::Config::BASIC)
|
279
|
+
end
|
280
|
+
|
281
|
+
it 'should not choke on valueless attributes' do
|
282
|
+
@s.fragment('foo <a href>foo</a> bar')
|
283
|
+
.must_equal 'foo <a href="" rel="nofollow">foo</a> bar'
|
284
|
+
end
|
285
|
+
|
286
|
+
it 'should downcase attribute names' do
|
287
|
+
@s.fragment('<a HREF="javascript:alert(\'foo\')">bar</a>')
|
288
|
+
.must_equal '<a rel="nofollow">bar</a>'
|
289
|
+
end
|
290
|
+
|
291
|
+
strings.each do |name, data|
|
292
|
+
it "should clean #{name} HTML" do
|
293
|
+
@s.fragment(data[:html]).must_equal(data[:basic])
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
protocols.each do |name, data|
|
298
|
+
it "should not allow #{name}" do
|
299
|
+
@s.fragment(data[:html]).must_equal(data[:basic])
|
300
|
+
end
|
301
|
+
end
|
302
|
+
end
|
303
|
+
|
304
|
+
describe 'Relaxed config' do
|
305
|
+
before do
|
306
|
+
@s = Sanitize.new(Sanitize::Config::RELAXED)
|
307
|
+
end
|
308
|
+
|
309
|
+
it 'should encode special chars in attribute values' do
|
310
|
+
@s.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>')
|
311
|
+
.must_equal '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
|
312
|
+
end
|
313
|
+
|
314
|
+
strings.each do |name, data|
|
315
|
+
it "should clean #{name} HTML" do
|
316
|
+
@s.fragment(data[:html]).must_equal(data[:relaxed])
|
317
|
+
end
|
318
|
+
end
|
319
|
+
|
320
|
+
protocols.each do |name, data|
|
321
|
+
it "should not allow #{name}" do
|
322
|
+
@s.fragment(data[:html]).must_equal(data[:relaxed])
|
323
|
+
end
|
324
|
+
end
|
325
|
+
end
|
326
|
+
|
327
|
+
describe 'Custom configs' do
|
328
|
+
it 'should allow attributes on all elements if allowlisted under :all' do
|
329
|
+
input = '<p class="foo">bar</p>'
|
330
|
+
|
331
|
+
Sanitize.fragment(input).must_equal ' bar '
|
332
|
+
|
333
|
+
Sanitize.fragment(input, {
|
334
|
+
:elements => ['p'],
|
335
|
+
:attributes => {:all => ['class']}
|
336
|
+
}).must_equal input
|
337
|
+
|
338
|
+
Sanitize.fragment(input, {
|
339
|
+
:elements => ['p'],
|
340
|
+
:attributes => {'div' => ['class']}
|
341
|
+
}).must_equal '<p>bar</p>'
|
342
|
+
|
343
|
+
Sanitize.fragment(input, {
|
344
|
+
:elements => ['p'],
|
345
|
+
:attributes => {'p' => ['title'], :all => ['class']}
|
346
|
+
}).must_equal input
|
347
|
+
end
|
348
|
+
|
349
|
+
it "should not allow relative URLs when relative URLs aren't allowlisted" do
|
350
|
+
input = '<a href="/foo/bar">Link</a>'
|
351
|
+
|
352
|
+
Sanitize.fragment(input,
|
353
|
+
:elements => ['a'],
|
354
|
+
:attributes => {'a' => ['href']},
|
355
|
+
:protocols => {'a' => {'href' => ['http']}}
|
356
|
+
).must_equal '<a>Link</a>'
|
357
|
+
end
|
358
|
+
|
359
|
+
it 'should allow relative URLs containing colons when the colon is not in the first path segment' do
|
360
|
+
input = '<a href="/wiki/Special:Random">Random Page</a>'
|
361
|
+
|
362
|
+
Sanitize.fragment(input, {
|
363
|
+
:elements => ['a'],
|
364
|
+
:attributes => {'a' => ['href']},
|
365
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
366
|
+
}).must_equal input
|
367
|
+
end
|
368
|
+
|
369
|
+
it 'should allow relative URLs containing colons when the colon is part of an anchor' do
|
370
|
+
input = '<a href="#fn:1">Footnote 1</a>'
|
371
|
+
|
372
|
+
Sanitize.fragment(input, {
|
373
|
+
:elements => ['a'],
|
374
|
+
:attributes => {'a' => ['href']},
|
375
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
376
|
+
}).must_equal input
|
377
|
+
|
378
|
+
input = '<a href="somepage#fn:1">Footnote 1</a>'
|
379
|
+
|
380
|
+
Sanitize.fragment(input, {
|
381
|
+
:elements => ['a'],
|
382
|
+
:attributes => {'a' => ['href']},
|
383
|
+
:protocols => {'a' => {'href' => [:relative]}}
|
384
|
+
}).must_equal input
|
385
|
+
end
|
386
|
+
|
387
|
+
it 'should remove the contents of filtered nodes when :remove_contents is true' do
|
388
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span></div>',
|
389
|
+
:remove_contents => true
|
390
|
+
).must_equal 'foo bar '
|
391
|
+
end
|
392
|
+
|
393
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as strings' do
|
394
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
395
|
+
:remove_contents => ['script', 'span']
|
396
|
+
).must_equal 'foo bar baz hi '
|
397
|
+
|
398
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
399
|
+
:remove_contents => Set.new(['script', 'span'])
|
400
|
+
).must_equal 'foo bar baz hi '
|
401
|
+
end
|
402
|
+
|
403
|
+
it 'should remove the contents of specified nodes when :remove_contents is an Array or Set of element names as symbols' do
|
404
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
405
|
+
:remove_contents => [:script, :span]
|
406
|
+
).must_equal 'foo bar baz hi '
|
407
|
+
|
408
|
+
Sanitize.fragment('foo bar <div>baz<span>quux</span> <b>hi</b><script>alert("hello!");</script></div>',
|
409
|
+
:remove_contents => Set.new([:script, :span])
|
410
|
+
).must_equal 'foo bar baz hi '
|
411
|
+
end
|
412
|
+
|
413
|
+
it 'should remove the contents of allowlisted iframes' do
|
414
|
+
Sanitize.fragment('<iframe>hi <script>hello</script></iframe>',
|
415
|
+
:elements => ['iframe']
|
416
|
+
).must_equal '<iframe></iframe>'
|
417
|
+
end
|
418
|
+
|
419
|
+
it 'should not allow arbitrary HTML5 data attributes by default' do
|
420
|
+
Sanitize.fragment('<b data-foo="bar"></b>',
|
421
|
+
:elements => ['b']
|
422
|
+
).must_equal '<b></b>'
|
423
|
+
|
424
|
+
Sanitize.fragment('<b class="foo" data-foo="bar"></b>',
|
425
|
+
:attributes => {'b' => ['class']},
|
426
|
+
:elements => ['b']
|
427
|
+
).must_equal '<b class="foo"></b>'
|
428
|
+
end
|
429
|
+
|
430
|
+
it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
|
431
|
+
s = Sanitize.new(
|
432
|
+
:attributes => {'b' => [:data]},
|
433
|
+
:elements => ['b']
|
434
|
+
)
|
435
|
+
|
436
|
+
s.fragment('<b data-foo="valid" data-bar="valid"></b>')
|
437
|
+
.must_equal '<b data-foo="valid" data-bar="valid"></b>'
|
438
|
+
|
439
|
+
s.fragment('<b data-="invalid"></b>')
|
440
|
+
.must_equal '<b></b>'
|
441
|
+
|
442
|
+
s.fragment('<b data-="invalid"></b>')
|
443
|
+
.must_equal '<b></b>'
|
444
|
+
|
445
|
+
s.fragment('<b data-xml="invalid"></b>')
|
446
|
+
.must_equal '<b></b>'
|
447
|
+
|
448
|
+
s.fragment('<b data-xmlfoo="invalid"></b>')
|
449
|
+
.must_equal '<b></b>'
|
450
|
+
|
451
|
+
s.fragment('<b data-f:oo="valid"></b>')
|
452
|
+
.must_equal '<b></b>'
|
453
|
+
|
454
|
+
s.fragment('<b data-f/oo="partial"></b>')
|
455
|
+
.must_equal '<b data-f=""></b>' # Nokogiri quirk; not ideal, but harmless
|
456
|
+
|
457
|
+
s.fragment('<b data-éfoo="valid"></b>')
|
458
|
+
.must_equal '<b></b>' # Another annoying Nokogiri quirk.
|
459
|
+
end
|
460
|
+
|
461
|
+
it 'should replace whitespace_elements with configured :before and :after values' do
|
462
|
+
s = Sanitize.new(
|
463
|
+
:whitespace_elements => {
|
464
|
+
'p' => { :before => "\n", :after => "\n" },
|
465
|
+
'div' => { :before => "\n", :after => "\n" },
|
466
|
+
'br' => { :before => "\n", :after => "\n" },
|
467
|
+
}
|
468
|
+
)
|
469
|
+
|
470
|
+
s.fragment('<p>foo</p>').must_equal "\nfoo\n"
|
471
|
+
s.fragment('<p>foo</p><p>bar</p>').must_equal "\nfoo\n\nbar\n"
|
472
|
+
s.fragment('foo<div>bar</div>baz').must_equal "foo\nbar\nbaz"
|
473
|
+
s.fragment('foo<br>bar<br>baz').must_equal "foo\nbar\nbaz"
|
474
|
+
end
|
475
|
+
|
476
|
+
it 'should handle protocols correctly regardless of case' do
|
477
|
+
input = '<a href="hTTpS://foo.com/">Text</a>'
|
478
|
+
|
479
|
+
Sanitize.fragment(input, {
|
480
|
+
:elements => ['a'],
|
481
|
+
:attributes => {'a' => ['href']},
|
482
|
+
:protocols => {'a' => {'href' => ['https']}}
|
483
|
+
}).must_equal input
|
484
|
+
|
485
|
+
input = '<a href="mailto:someone@example.com?Subject=Hello">Text</a>'
|
486
|
+
|
487
|
+
Sanitize.fragment(input, {
|
488
|
+
:elements => ['a'],
|
489
|
+
:attributes => {'a' => ['href']},
|
490
|
+
:protocols => {'a' => {'href' => ['https']}}
|
491
|
+
}).must_equal "<a>Text</a>"
|
492
|
+
end
|
493
|
+
|
494
|
+
it 'should sanitize protocols in data attributes even if data attributes are generically allowed' do
|
495
|
+
input = '<a data-url="mailto:someone@example.com">Text</a>'
|
496
|
+
|
497
|
+
Sanitize.fragment(input, {
|
498
|
+
:elements => ['a'],
|
499
|
+
:attributes => {'a' => [:data]},
|
500
|
+
:protocols => {'a' => {'data-url' => ['https']}}
|
501
|
+
}).must_equal "<a>Text</a>"
|
502
|
+
|
503
|
+
Sanitize.fragment(input, {
|
504
|
+
:elements => ['a'],
|
505
|
+
:attributes => {'a' => [:data]},
|
506
|
+
:protocols => {'a' => {'data-url' => ['mailto']}}
|
507
|
+
}).must_equal input
|
508
|
+
end
|
509
|
+
|
510
|
+
it 'should prevent `<meta>` tags from being used to set a non-UTF-8 charset' do
|
511
|
+
Sanitize.document('<html><head><meta charset="utf-8"></head><body>Howdy!</body></html>',
|
512
|
+
:elements => %w[html head meta body],
|
513
|
+
:attributes => {'meta' => ['charset']}
|
514
|
+
).must_equal "<html><head><meta charset=\"utf-8\"></head><body>Howdy!</body></html>"
|
515
|
+
|
516
|
+
Sanitize.document('<html><meta charset="utf-8">Howdy!</html>',
|
517
|
+
:elements => %w[html meta],
|
518
|
+
:attributes => {'meta' => ['charset']}
|
519
|
+
).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
520
|
+
|
521
|
+
Sanitize.document('<html><meta charset="us-ascii">Howdy!</html>',
|
522
|
+
:elements => %w[html meta],
|
523
|
+
:attributes => {'meta' => ['charset']}
|
524
|
+
).must_equal "<html><meta charset=\"utf-8\">Howdy!</html>"
|
525
|
+
|
526
|
+
Sanitize.document('<html><meta http-equiv="content-type" content=" text/html; charset=us-ascii">Howdy!</html>',
|
527
|
+
:elements => %w[html meta],
|
528
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
529
|
+
).must_equal "<html><meta http-equiv=\"content-type\" content=\" text/html;charset=utf-8\">Howdy!</html>"
|
530
|
+
|
531
|
+
Sanitize.document('<html><meta http-equiv="Content-Type" content="text/plain;charset = us-ascii">Howdy!</html>',
|
532
|
+
:elements => %w[html meta],
|
533
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
534
|
+
).must_equal "<html><meta http-equiv=\"Content-Type\" content=\"text/plain;charset=utf-8\">Howdy!</html>"
|
535
|
+
end
|
536
|
+
|
537
|
+
it 'should not modify `<meta>` tags that already set a UTF-8 charset' do
|
538
|
+
Sanitize.document('<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8"></head><body>Howdy!</body></html>',
|
539
|
+
:elements => %w[html head meta body],
|
540
|
+
:attributes => {'meta' => %w[content http-equiv]}
|
541
|
+
).must_equal "<html><head><meta http-equiv=\"Content-Type\" content=\"text/html;charset=utf-8\"></head><body>Howdy!</body></html>"
|
542
|
+
end
|
543
|
+
|
544
|
+
end
|
545
|
+
end
|
data/test/test_config.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
describe 'Config' do
|
5
|
+
make_my_diffs_pretty!
|
6
|
+
parallelize_me!
|
7
|
+
|
8
|
+
def verify_deeply_frozen(config)
|
9
|
+
config.must_be :frozen?
|
10
|
+
|
11
|
+
if Hash === config
|
12
|
+
config.each_value {|v| verify_deeply_frozen(v) }
|
13
|
+
elsif Set === config || Array === config
|
14
|
+
config.each {|v| verify_deeply_frozen(v) }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'built-in configs should be deeply frozen' do
|
19
|
+
verify_deeply_frozen Sanitize::Config::DEFAULT
|
20
|
+
verify_deeply_frozen Sanitize::Config::BASIC
|
21
|
+
verify_deeply_frozen Sanitize::Config::RELAXED
|
22
|
+
verify_deeply_frozen Sanitize::Config::RESTRICTED
|
23
|
+
end
|
24
|
+
|
25
|
+
describe '.freeze_config' do
|
26
|
+
it 'should deeply freeze and return a configuration Hash' do
|
27
|
+
a = {:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}}
|
28
|
+
b = Sanitize::Config.freeze_config(a)
|
29
|
+
|
30
|
+
b.must_be_same_as a
|
31
|
+
verify_deeply_frozen a
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe '.merge' do
|
36
|
+
it 'should deeply merge a configuration Hash' do
|
37
|
+
# Freeze to ensure that we get an error if either Hash is modified.
|
38
|
+
a = Sanitize::Config.freeze_config({:one => {:one_one => [0, '1', :a], :one_two => false, :one_three => Set.new([:a, :b, :c])}})
|
39
|
+
b = Sanitize::Config.freeze_config({:one => {:one_two => true, :one_three => 3}, :two => 2})
|
40
|
+
|
41
|
+
c = Sanitize::Config.merge(a, b)
|
42
|
+
|
43
|
+
c.wont_be_same_as a
|
44
|
+
c.wont_be_same_as b
|
45
|
+
|
46
|
+
c.must_equal(
|
47
|
+
:one => {
|
48
|
+
:one_one => [0, '1', :a],
|
49
|
+
:one_two => true,
|
50
|
+
:one_three => 3
|
51
|
+
},
|
52
|
+
|
53
|
+
:two => 2
|
54
|
+
)
|
55
|
+
|
56
|
+
c[:one].wont_be_same_as a[:one]
|
57
|
+
c[:one][:one_one].wont_be_same_as a[:one][:one_one]
|
58
|
+
end
|
59
|
+
|
60
|
+
it 'should raise an ArgumentError if either argument is not a Hash' do
|
61
|
+
proc { Sanitize::Config.merge('foo', {}) }.must_raise ArgumentError
|
62
|
+
proc { Sanitize::Config.merge({}, 'foo') }.must_raise ArgumentError
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require_relative 'common'
|
3
|
+
|
4
|
+
# Miscellaneous attempts to sneak maliciously crafted CSS past Sanitize. Some of
|
5
|
+
# these are courtesy of (or inspired by) the OWASP XSS Filter Evasion Cheat
|
6
|
+
# Sheet.
|
7
|
+
#
|
8
|
+
# https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet
|
9
|
+
|
10
|
+
describe 'Malicious CSS' do
|
11
|
+
make_my_diffs_pretty!
|
12
|
+
parallelize_me!
|
13
|
+
|
14
|
+
before do
|
15
|
+
@s = Sanitize::CSS.new(Sanitize::Config::RELAXED)
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'should not be possible to inject an expression by munging it with a comment' do
|
19
|
+
@s.properties(%[width:expr/*XSS*/ession(alert('XSS'))]).
|
20
|
+
must_equal ''
|
21
|
+
|
22
|
+
@s.properties(%[width:ex/*XSS*//*/*/pression(alert("XSS"))]).
|
23
|
+
must_equal ''
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'should not be possible to inject an expression by munging it with a newline' do
|
27
|
+
@s.properties(%[width:\nexpression(alert('XSS'));]).
|
28
|
+
must_equal ''
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should not allow the javascript protocol' do
|
32
|
+
@s.properties(%[background-image:url("javascript:alert('XSS')");]).
|
33
|
+
must_equal ''
|
34
|
+
|
35
|
+
Sanitize.fragment(%[<div style="background-image: url(javascript:alert('XSS'))">],
|
36
|
+
Sanitize::Config::RELAXED).must_equal '<div></div>'
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should not allow behaviors' do
|
40
|
+
@s.properties(%[behavior: url(xss.htc);]).must_equal ''
|
41
|
+
end
|
42
|
+
end
|